From 3866859c3f11797ff8ab0e4ea0354656ad425b7c Mon Sep 17 00:00:00 2001 From: Riley Smith Date: Wed, 23 Oct 2024 02:26:47 -0700 Subject: [PATCH] add more tokens and fix identifier regex --- lab-3/main.c | 2 +- lab-3/main_test.c | 10 ++++++++++ lab-3/scanner.flex | 23 +++++++++++++++++------ lab-3/token.h | 13 +++++++++++++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/lab-3/main.c b/lab-3/main.c index 66a6605..0970031 100644 --- a/lab-3/main.c +++ b/lab-3/main.c @@ -8,7 +8,7 @@ extern char *yytext; int main(int argc, char *argv[]) { FILE *file; - const char *filename = "samples/sorting-snippet.cbl"; // Default filename + const char *filename = "samples/quadratic-snippet.cbl"; // Default filename // Check if a filename is provided as a command-line argument if (argc > 1) { diff --git a/lab-3/main_test.c b/lab-3/main_test.c index beb70df..32a4bbf 100644 --- a/lab-3/main_test.c +++ b/lab-3/main_test.c @@ -48,6 +48,16 @@ UTEST(scanner, assignment) { yy_delete_buffer(buffer); } +UTEST(scanner, hello) { + struct token_st tokens[] = { + {TOKEN_IDENTIFICATION, "IDENTIFICATION"}, + {TOKEN_PROGRAM_ID, "PROGRAM-ID. HELLO-WORLD."} + {TOKEN_PROCEDURE_DIVISION, "PROCEDURE DIVISION."}, + {TOKEN_STRING, "Hello World!"}, + {TOKEN_KEYWORD_PRINT, "DISPLAY"}, + {TOKEN_EOF, "STOP RUN."}, + }; + UTEST(scanner, sample) { struct token_st tokens[] = { {TOKEN_IDENT, "answer"}, diff --git a/lab-3/scanner.flex b/lab-3/scanner.flex index b6824a4..e290bfc 100644 --- a/lab-3/scanner.flex +++ b/lab-3/scanner.flex @@ -1,11 +1,11 @@ %{ #include "token.h" %} -DIGIT [0-9] -LETTER [a-zA-Z] -NAME [a-zA-Z0-9][a-zA-Z0-9_-]*[a-zA-Z0-9] +NAME [a-zA-Z]([a-zA-Z0-9_-]*[a-zA-Z0-9])? +DIGIT [0-9]+ %% + (" "|\t|\n) /* skip whitespace */ \*>\ ?.* { return TOKEN_COMMENT; } IDENTIFICATION { return TOKEN_IDENTIFICATION; } @@ -32,23 +32,34 @@ SPACE { return TOKEN_SPACE; } PIC { return TOKEN_PICTURE; } OCCURS { return TOKEN_KEYWORD_OCCURS; } VALUE { return TOKEN_KEYWORD_VALUE; } +COMPUTE { return TOKEN_KEYWORD_COMPUTE; } +FUNCTION { return TOKEN_KEYWORD_FUNCTION; } X { return TOKEN_ALPHANUMERIC; } S9 { return TOKEN_SIGNED_NUMERIC; } 9 { return TOKEN_NUMERIC; } +V9 { return TOKEN_IMPLIED_DECIMAL; } +COMP { return TOKEN_COMPUTATION_LEVEL_0; } +COMP-1 { return TOKEN_COMPUTATION_LEVEL_1; } +COMP-2 { return TOKEN_COMPUTATION_LEVEL_2; } +COMP-3 { return TOKEN_COMPUTATION_LEVEL_3; } +{DIGIT} { return TOKEN_INTEGER; } +{NAME} { return TOKEN_IDENT; } \+ { return TOKEN_ADD; } \- { return TOKEN_SUB; } +\*\* { return TOKEN_EXPONENTIAL; } +\* { return TOKEN_MULTIPLY; } +\/ { return TOKEN_DIVIDE; } \> { return TOKEN_GREATER_THAN; } \< { return TOKEN_LESS_THAN; } +\= { return TOKEN_EQUAL;} "\""[^"]*"\"" { return TOKEN_STRING; } "\'"[^']*"\'" { return TOKEN_STRING; } "(" { return TOKEN_LEFT_PARENTHESIS; } ")" { return TOKEN_RIGHT_PARENTHESIS; } - \. { return TOKEN_DOT; } -{NAME} { return TOKEN_IDENT; } -{DIGIT} { return TOKEN_INTEGER; } + %% int yywrap() { return 1; } diff --git a/lab-3/token.h b/lab-3/token.h index 155ca08..85bf604 100644 --- a/lab-3/token.h +++ b/lab-3/token.h @@ -26,6 +26,8 @@ typedef enum { TOKEN_SPACE, TOKEN_KEYWORD_OCCURS, TOKEN_KEYWORD_VALUE, + TOKEN_KEYWORD_COMPUTE, + TOKEN_KEYWORD_FUNCTION, // Identifiers TOKEN_IDENT, @@ -36,6 +38,13 @@ typedef enum { TOKEN_ALPHANUMERIC, TOKEN_NUMERIC, TOKEN_SIGNED_NUMERIC, + TOKEN_IMPLIED_DECIMAL, + // https://ibmmainframes.com/about393.html + TOKEN_COMPUTATION_LEVEL_0, + TOKEN_COMPUTATION_LEVEL_1, + TOKEN_COMPUTATION_LEVEL_2, + TOKEN_COMPUTATION_LEVEL_3, + // Grammar TOKEN_LEFT_PARENTHESIS, TOKEN_RIGHT_PARENTHESIS, @@ -45,6 +54,10 @@ typedef enum { // Operators TOKEN_ADD, TOKEN_SUB, + TOKEN_MULTIPLY, + TOKEN_DIVIDE, + TOKEN_EQUAL, TOKEN_GREATER_THAN, TOKEN_LESS_THAN, + TOKEN_EXPONENTIAL, } token_t; \ No newline at end of file