add more tokens and fix identifier regex

This commit is contained in:
vel 2024-10-23 02:26:47 -07:00
parent 4aabd97544
commit 3866859c3f
Signed by: velvox
GPG Key ID: 59D9762F674151DF
4 changed files with 41 additions and 7 deletions

View File

@ -8,7 +8,7 @@ extern char *yytext;
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
FILE *file; FILE *file;
const char *filename = "samples/sorting-snippet.cbl"; // Default filename const char *filename = "samples/quadratic-snippet.cbl"; // Default filename
// Check if a filename is provided as a command-line argument // Check if a filename is provided as a command-line argument
if (argc > 1) { if (argc > 1) {

View File

@ -48,6 +48,16 @@ UTEST(scanner, assignment) {
yy_delete_buffer(buffer); yy_delete_buffer(buffer);
} }
UTEST(scanner, hello) {
struct token_st tokens[] = {
{TOKEN_IDENTIFICATION, "IDENTIFICATION"},
{TOKEN_PROGRAM_ID, "PROGRAM-ID. HELLO-WORLD."}
{TOKEN_PROCEDURE_DIVISION, "PROCEDURE DIVISION."},
{TOKEN_STRING, "Hello World!"},
{TOKEN_KEYWORD_PRINT, "DISPLAY"},
{TOKEN_EOF, "STOP RUN."},
};
UTEST(scanner, sample) { UTEST(scanner, sample) {
struct token_st tokens[] = { struct token_st tokens[] = {
{TOKEN_IDENT, "answer"}, {TOKEN_IDENT, "answer"},

View File

@ -1,11 +1,11 @@
%{ %{
#include "token.h" #include "token.h"
%} %}
DIGIT [0-9] NAME [a-zA-Z]([a-zA-Z0-9_-]*[a-zA-Z0-9])?
LETTER [a-zA-Z] DIGIT [0-9]+
NAME [a-zA-Z0-9][a-zA-Z0-9_-]*[a-zA-Z0-9]
%% %%
(" "|\t|\n) /* skip whitespace */ (" "|\t|\n) /* skip whitespace */
\*>\ ?.* { return TOKEN_COMMENT; } \*>\ ?.* { return TOKEN_COMMENT; }
IDENTIFICATION { return TOKEN_IDENTIFICATION; } IDENTIFICATION { return TOKEN_IDENTIFICATION; }
@ -32,23 +32,34 @@ SPACE { return TOKEN_SPACE; }
PIC { return TOKEN_PICTURE; } PIC { return TOKEN_PICTURE; }
OCCURS { return TOKEN_KEYWORD_OCCURS; } OCCURS { return TOKEN_KEYWORD_OCCURS; }
VALUE { return TOKEN_KEYWORD_VALUE; } VALUE { return TOKEN_KEYWORD_VALUE; }
COMPUTE { return TOKEN_KEYWORD_COMPUTE; }
FUNCTION { return TOKEN_KEYWORD_FUNCTION; }
X { return TOKEN_ALPHANUMERIC; } X { return TOKEN_ALPHANUMERIC; }
S9 { return TOKEN_SIGNED_NUMERIC; } S9 { return TOKEN_SIGNED_NUMERIC; }
9 { return TOKEN_NUMERIC; } 9 { return TOKEN_NUMERIC; }
V9 { return TOKEN_IMPLIED_DECIMAL; }
COMP { return TOKEN_COMPUTATION_LEVEL_0; }
COMP-1 { return TOKEN_COMPUTATION_LEVEL_1; }
COMP-2 { return TOKEN_COMPUTATION_LEVEL_2; }
COMP-3 { return TOKEN_COMPUTATION_LEVEL_3; }
{DIGIT} { return TOKEN_INTEGER; }
{NAME} { return TOKEN_IDENT; }
\+ { return TOKEN_ADD; } \+ { return TOKEN_ADD; }
\- { return TOKEN_SUB; } \- { return TOKEN_SUB; }
\*\* { return TOKEN_EXPONENTIAL; }
\* { return TOKEN_MULTIPLY; }
\/ { return TOKEN_DIVIDE; }
\> { return TOKEN_GREATER_THAN; } \> { return TOKEN_GREATER_THAN; }
\< { return TOKEN_LESS_THAN; } \< { return TOKEN_LESS_THAN; }
\= { return TOKEN_EQUAL;}
"\""[^"]*"\"" { return TOKEN_STRING; } "\""[^"]*"\"" { return TOKEN_STRING; }
"\'"[^']*"\'" { return TOKEN_STRING; } "\'"[^']*"\'" { return TOKEN_STRING; }
"(" { return TOKEN_LEFT_PARENTHESIS; } "(" { return TOKEN_LEFT_PARENTHESIS; }
")" { return TOKEN_RIGHT_PARENTHESIS; } ")" { return TOKEN_RIGHT_PARENTHESIS; }
\. { return TOKEN_DOT; } \. { return TOKEN_DOT; }
{NAME} { return TOKEN_IDENT; }
{DIGIT} { return TOKEN_INTEGER; }
%% %%
int yywrap() { return 1; } int yywrap() { return 1; }

View File

@ -26,6 +26,8 @@ typedef enum {
TOKEN_SPACE, TOKEN_SPACE,
TOKEN_KEYWORD_OCCURS, TOKEN_KEYWORD_OCCURS,
TOKEN_KEYWORD_VALUE, TOKEN_KEYWORD_VALUE,
TOKEN_KEYWORD_COMPUTE,
TOKEN_KEYWORD_FUNCTION,
// Identifiers // Identifiers
TOKEN_IDENT, TOKEN_IDENT,
@ -36,6 +38,13 @@ typedef enum {
TOKEN_ALPHANUMERIC, TOKEN_ALPHANUMERIC,
TOKEN_NUMERIC, TOKEN_NUMERIC,
TOKEN_SIGNED_NUMERIC, TOKEN_SIGNED_NUMERIC,
TOKEN_IMPLIED_DECIMAL,
// https://ibmmainframes.com/about393.html
TOKEN_COMPUTATION_LEVEL_0,
TOKEN_COMPUTATION_LEVEL_1,
TOKEN_COMPUTATION_LEVEL_2,
TOKEN_COMPUTATION_LEVEL_3,
// Grammar // Grammar
TOKEN_LEFT_PARENTHESIS, TOKEN_LEFT_PARENTHESIS,
TOKEN_RIGHT_PARENTHESIS, TOKEN_RIGHT_PARENTHESIS,
@ -45,6 +54,10 @@ typedef enum {
// Operators // Operators
TOKEN_ADD, TOKEN_ADD,
TOKEN_SUB, TOKEN_SUB,
TOKEN_MULTIPLY,
TOKEN_DIVIDE,
TOKEN_EQUAL,
TOKEN_GREATER_THAN, TOKEN_GREATER_THAN,
TOKEN_LESS_THAN, TOKEN_LESS_THAN,
TOKEN_EXPONENTIAL,
} token_t; } token_t;