From de6ef0ac023f4dcdbc4cc0d17f1fa3b9c755653d Mon Sep 17 00:00:00 2001 From: Riley Smith Date: Tue, 22 Oct 2024 16:22:26 -0700 Subject: [PATCH 1/5] first pass at flex lexer Co-authored-by: Jenessyl --- lab-3/main.c | 2 +- lab-3/samples/hello-world.cbl | 3 +-- lab-3/samples/sorting-snippet.cbl | 2 +- lab-3/scanner.flex | 44 ++++++++++++++++++++++++++++--- lab-3/token.h | 32 +++++++++++++++++++++- 5 files changed, 74 insertions(+), 9 deletions(-) diff --git a/lab-3/main.c b/lab-3/main.c index 4a96a9e..66a6605 100644 --- a/lab-3/main.c +++ b/lab-3/main.c @@ -8,7 +8,7 @@ extern char *yytext; int main(int argc, char *argv[]) { FILE *file; - const char *filename = "samples/hello.py"; // Default filename + const char *filename = "samples/sorting-snippet.cbl"; // Default filename // Check if a filename is provided as a command-line argument if (argc > 1) { diff --git a/lab-3/samples/hello-world.cbl b/lab-3/samples/hello-world.cbl index 3c78e9b..737f19e 100644 --- a/lab-3/samples/hello-world.cbl +++ b/lab-3/samples/hello-world.cbl @@ -1,6 +1,5 @@ IDENTIFICATION DIVISION. PROGRAM-ID. HELLO-WORLD. PROCEDURE DIVISION. - DISPLAY "Hello World!" + DISPLAY 'Hello World!' STOP RUN. -if \ No newline at end of file diff --git a/lab-3/samples/sorting-snippet.cbl b/lab-3/samples/sorting-snippet.cbl index a0eae92..8324e47 100644 --- a/lab-3/samples/sorting-snippet.cbl +++ b/lab-3/samples/sorting-snippet.cbl @@ -13,7 +13,7 @@ WORKING-STORAGE SECTION. 05 WS-INDEX PIC S9(4) COMP. PROCEDURE DIVISION. -*> * Initialize test data +*> Initialize test data MOVE "30" TO WS-SORT-ROW(1) MOVE "10" TO WS-SORT-ROW(2) MOVE "50" TO WS-SORT-ROW(3) diff --git a/lab-3/scanner.flex b/lab-3/scanner.flex index 021db23..0838033 100644 --- a/lab-3/scanner.flex +++ b/lab-3/scanner.flex @@ -3,10 +3,46 @@ %} DIGIT [0-9] LETTER [a-zA-Z] -%% -(" "|\t|\n) /* skip whitespace */ -IDENTIFICATION { return TOKEN_IDENTIFICATION; } -DIVISION { return TOKEN_KEYWORD_DIVISION; } +NAME [a-zA-Z0-9][a-zA-Z0-9_-]*[a-zA-Z0-9] +%% +(" "|\t|\n) /* skip whitespace */ +\*>\ ?.* { return TOKEN_COMMENT; } +IDENTIFICATION { return TOKEN_IDENTIFICATION; } +DIVISION { return TOKEN_KEYWORD_DIVISION; } +PROGRAM-ID { return TOKEN_PROGRAM_ID; } +PROCEDURE { return TOKEN_PROCEDURE; } +DATA { return TOKEN_KEYWORD_DATA; } +SECTION { return TOKEN_KEYWORD_SECTION; } +WORKING-STORAGE { return TOKEN_WORKING_STORAGE; } +DISPLAY { return TOKEN_DISPLAY; } +STOP { return TOKEN_STOP; } +RUN { return TOKEN_RUN; } +MOVE { return TOKEN_MOVE; } +TO { return TOKEN_KEYWORD_TO; } +VARYING { return TOKEN_VARYING; } +FROM { return TOKEN_KEYWORD_FROM; } +BY { return TOKEN_KEYWORD_BY; } +UNTIL { return TOKEN_UNTIL; } +PERFORM { return TOKEN_PERFORM; } +END-PERFORM { return TOKEN_END_PERFORM; } +IF { return TOKEN_IF; } +END-IF { return TOKEN_END_IF; } +SPACE { return TOKEN_SPACE; } + +\+ { return TOKEN_ADD; } +\- { return TOKEN_SUB; } +\> { return TOKEN_GREATER_THAN; } +\< { return TOKEN_LESS_THAN; } + +"\""[^"]*"\"" { return TOKEN_STRING; } +"\'"[^']*"\'" { return TOKEN_STRING; } +"(" { return TOKEN_LEFT_PARENTHESIS; } +")" { return TOKEN_RIGHT_PARENTHESIS; } + + +\. { return TOKEN_DOT; } +{NAME} { return TOKEN_IDENT; } +{DIGIT} { return TOKEN_INTEGER; } %% int yywrap() { return 1; } diff --git a/lab-3/token.h b/lab-3/token.h index 993c7a1..7ffcc96 100644 --- a/lab-3/token.h +++ b/lab-3/token.h @@ -1,12 +1,42 @@ typedef enum { TOKEN_EOF = 0, + // Identification Keywords TOKEN_IDENTIFICATION, TOKEN_KEYWORD_DIVISION, + TOKEN_KEYWORD_DATA, + TOKEN_KEYWORD_SECTION, TOKEN_PROGRAM_ID, + TOKEN_WORKING_STORAGE, TOKEN_PROCEDURE, + + // Program Keywords TOKEN_DISPLAY, TOKEN_STOP, TOKEN_RUN, + TOKEN_MOVE, + TOKEN_KEYWORD_TO, + TOKEN_PERFORM, + TOKEN_VARYING, + TOKEN_KEYWORD_FROM, + TOKEN_KEYWORD_BY, + TOKEN_UNTIL, + TOKEN_END_PERFORM, + TOKEN_IF, + TOKEN_END_IF, + TOKEN_SPACE, + + // Identifiers + TOKEN_IDENT, TOKEN_STRING, - TOKEN_DOT + TOKEN_INTEGER, + TOKEN_LEFT_PARENTHESIS, + TOKEN_RIGHT_PARENTHESIS, + TOKEN_DOT, + TOKEN_COMMENT, + + // Operators + TOKEN_ADD, + TOKEN_SUB, + TOKEN_GREATER_THAN, + TOKEN_LESS_THAN, } token_t; \ No newline at end of file From 4aabd97544aa9002f39654bfb753b9e2607dae53 Mon Sep 17 00:00:00 2001 From: Riley Smith Date: Tue, 22 Oct 2024 16:31:46 -0700 Subject: [PATCH 2/5] scanner and tokens Co-authored-by: Jenessyl Co-authored-by: Joshua Garbi --- lab-3/scanner.flex | 6 ++++++ lab-3/token.h | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/lab-3/scanner.flex b/lab-3/scanner.flex index 0838033..b6824a4 100644 --- a/lab-3/scanner.flex +++ b/lab-3/scanner.flex @@ -29,6 +29,12 @@ END-PERFORM { return TOKEN_END_PERFORM; } IF { return TOKEN_IF; } END-IF { return TOKEN_END_IF; } SPACE { return TOKEN_SPACE; } +PIC { return TOKEN_PICTURE; } +OCCURS { return TOKEN_KEYWORD_OCCURS; } +VALUE { return TOKEN_KEYWORD_VALUE; } +X { return TOKEN_ALPHANUMERIC; } +S9 { return TOKEN_SIGNED_NUMERIC; } +9 { return TOKEN_NUMERIC; } \+ { return TOKEN_ADD; } \- { return TOKEN_SUB; } diff --git a/lab-3/token.h b/lab-3/token.h index 7ffcc96..155ca08 100644 --- a/lab-3/token.h +++ b/lab-3/token.h @@ -24,11 +24,19 @@ typedef enum { TOKEN_IF, TOKEN_END_IF, TOKEN_SPACE, + TOKEN_KEYWORD_OCCURS, + TOKEN_KEYWORD_VALUE, // Identifiers TOKEN_IDENT, + // Data types TOKEN_STRING, TOKEN_INTEGER, + TOKEN_PICTURE, + TOKEN_ALPHANUMERIC, + TOKEN_NUMERIC, + TOKEN_SIGNED_NUMERIC, + // Grammar TOKEN_LEFT_PARENTHESIS, TOKEN_RIGHT_PARENTHESIS, TOKEN_DOT, From 3866859c3f11797ff8ab0e4ea0354656ad425b7c Mon Sep 17 00:00:00 2001 From: Riley Smith Date: Wed, 23 Oct 2024 02:26:47 -0700 Subject: [PATCH 3/5] add more tokens and fix identifier regex --- lab-3/main.c | 2 +- lab-3/main_test.c | 10 ++++++++++ lab-3/scanner.flex | 23 +++++++++++++++++------ lab-3/token.h | 13 +++++++++++++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/lab-3/main.c b/lab-3/main.c index 66a6605..0970031 100644 --- a/lab-3/main.c +++ b/lab-3/main.c @@ -8,7 +8,7 @@ extern char *yytext; int main(int argc, char *argv[]) { FILE *file; - const char *filename = "samples/sorting-snippet.cbl"; // Default filename + const char *filename = "samples/quadratic-snippet.cbl"; // Default filename // Check if a filename is provided as a command-line argument if (argc > 1) { diff --git a/lab-3/main_test.c b/lab-3/main_test.c index beb70df..32a4bbf 100644 --- a/lab-3/main_test.c +++ b/lab-3/main_test.c @@ -48,6 +48,16 @@ UTEST(scanner, assignment) { yy_delete_buffer(buffer); } +UTEST(scanner, hello) { + struct token_st tokens[] = { + {TOKEN_IDENTIFICATION, "IDENTIFICATION"}, + {TOKEN_PROGRAM_ID, "PROGRAM-ID. HELLO-WORLD."} + {TOKEN_PROCEDURE_DIVISION, "PROCEDURE DIVISION."}, + {TOKEN_STRING, "Hello World!"}, + {TOKEN_KEYWORD_PRINT, "DISPLAY"}, + {TOKEN_EOF, "STOP RUN."}, + }; + UTEST(scanner, sample) { struct token_st tokens[] = { {TOKEN_IDENT, "answer"}, diff --git a/lab-3/scanner.flex b/lab-3/scanner.flex index b6824a4..e290bfc 100644 --- a/lab-3/scanner.flex +++ b/lab-3/scanner.flex @@ -1,11 +1,11 @@ %{ #include "token.h" %} -DIGIT [0-9] -LETTER [a-zA-Z] -NAME [a-zA-Z0-9][a-zA-Z0-9_-]*[a-zA-Z0-9] +NAME [a-zA-Z]([a-zA-Z0-9_-]*[a-zA-Z0-9])? +DIGIT [0-9]+ %% + (" "|\t|\n) /* skip whitespace */ \*>\ ?.* { return TOKEN_COMMENT; } IDENTIFICATION { return TOKEN_IDENTIFICATION; } @@ -32,23 +32,34 @@ SPACE { return TOKEN_SPACE; } PIC { return TOKEN_PICTURE; } OCCURS { return TOKEN_KEYWORD_OCCURS; } VALUE { return TOKEN_KEYWORD_VALUE; } +COMPUTE { return TOKEN_KEYWORD_COMPUTE; } +FUNCTION { return TOKEN_KEYWORD_FUNCTION; } X { return TOKEN_ALPHANUMERIC; } S9 { return TOKEN_SIGNED_NUMERIC; } 9 { return TOKEN_NUMERIC; } +V9 { return TOKEN_IMPLIED_DECIMAL; } +COMP { return TOKEN_COMPUTATION_LEVEL_0; } +COMP-1 { return TOKEN_COMPUTATION_LEVEL_1; } +COMP-2 { return TOKEN_COMPUTATION_LEVEL_2; } +COMP-3 { return TOKEN_COMPUTATION_LEVEL_3; } +{DIGIT} { return TOKEN_INTEGER; } +{NAME} { return TOKEN_IDENT; } \+ { return TOKEN_ADD; } \- { return TOKEN_SUB; } +\*\* { return TOKEN_EXPONENTIAL; } +\* { return TOKEN_MULTIPLY; } +\/ { return TOKEN_DIVIDE; } \> { return TOKEN_GREATER_THAN; } \< { return TOKEN_LESS_THAN; } +\= { return TOKEN_EQUAL;} "\""[^"]*"\"" { return TOKEN_STRING; } "\'"[^']*"\'" { return TOKEN_STRING; } "(" { return TOKEN_LEFT_PARENTHESIS; } ")" { return TOKEN_RIGHT_PARENTHESIS; } - \. { return TOKEN_DOT; } -{NAME} { return TOKEN_IDENT; } -{DIGIT} { return TOKEN_INTEGER; } + %% int yywrap() { return 1; } diff --git a/lab-3/token.h b/lab-3/token.h index 155ca08..85bf604 100644 --- a/lab-3/token.h +++ b/lab-3/token.h @@ -26,6 +26,8 @@ typedef enum { TOKEN_SPACE, TOKEN_KEYWORD_OCCURS, TOKEN_KEYWORD_VALUE, + TOKEN_KEYWORD_COMPUTE, + TOKEN_KEYWORD_FUNCTION, // Identifiers TOKEN_IDENT, @@ -36,6 +38,13 @@ typedef enum { TOKEN_ALPHANUMERIC, TOKEN_NUMERIC, TOKEN_SIGNED_NUMERIC, + TOKEN_IMPLIED_DECIMAL, + // https://ibmmainframes.com/about393.html + TOKEN_COMPUTATION_LEVEL_0, + TOKEN_COMPUTATION_LEVEL_1, + TOKEN_COMPUTATION_LEVEL_2, + TOKEN_COMPUTATION_LEVEL_3, + // Grammar TOKEN_LEFT_PARENTHESIS, TOKEN_RIGHT_PARENTHESIS, @@ -45,6 +54,10 @@ typedef enum { // Operators TOKEN_ADD, TOKEN_SUB, + TOKEN_MULTIPLY, + TOKEN_DIVIDE, + TOKEN_EQUAL, TOKEN_GREATER_THAN, TOKEN_LESS_THAN, + TOKEN_EXPONENTIAL, } token_t; \ No newline at end of file From abdcad6b3a77416ec0de149816544960d11e13df Mon Sep 17 00:00:00 2001 From: Joshua Garbi Date: Thu, 24 Oct 2024 10:53:28 -0700 Subject: [PATCH 4/5] Update file main_test.c --- lab-3/main_test.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/lab-3/main_test.c b/lab-3/main_test.c index 32a4bbf..5407c9f 100644 --- a/lab-3/main_test.c +++ b/lab-3/main_test.c @@ -51,11 +51,21 @@ UTEST(scanner, assignment) { UTEST(scanner, hello) { struct token_st tokens[] = { {TOKEN_IDENTIFICATION, "IDENTIFICATION"}, - {TOKEN_PROGRAM_ID, "PROGRAM-ID. HELLO-WORLD."} - {TOKEN_PROCEDURE_DIVISION, "PROCEDURE DIVISION."}, - {TOKEN_STRING, "Hello World!"}, - {TOKEN_KEYWORD_PRINT, "DISPLAY"}, - {TOKEN_EOF, "STOP RUN."}, + {TOKEN_KEYWORD_DIVISION, "DIVISION"}, + {TOKEN_DOT, "."}, + {TOKEN_PROGRAM_ID, "PROGRAM-ID"}, + {TOKEN_DOT, "."}, + {TOKEN_IDENT, "HELLO-WORLD"}, + {TOKEN_DOT, "."}, + {TOKEN_PROCEDURE, "PROCEDURE"}, + {TOKEN_KEYWORD_DIVISION, "DIVISION"}, + {TOKEN_DOT, "."}, + {TOKEN_DISPLAY, "DISPLAY"}, + {TOKEN_STRING, "'Hello World!'"}, + {TOKEN_STOP, "STOP"}, + {TOKEN_RUN, "RUN"}, + {TOKEN_DOT, "."}, + {TOKEN_EOF, ""}, }; UTEST(scanner, sample) { From d04c690652ef79910c7afcc12c5391e63c55d4a1 Mon Sep 17 00:00:00 2001 From: Riley Smith Date: Thu, 24 Oct 2024 11:20:41 -0700 Subject: [PATCH 5/5] fix tests --- lab-3/main.c | 2 +- lab-3/main_test.c | 45 ++------------------------------------------- lab-3/scanner.flex | 1 - lab-3/token.h | 1 + 4 files changed, 4 insertions(+), 45 deletions(-) diff --git a/lab-3/main.c b/lab-3/main.c index 0970031..59ddc86 100644 --- a/lab-3/main.c +++ b/lab-3/main.c @@ -8,7 +8,7 @@ extern char *yytext; int main(int argc, char *argv[]) { FILE *file; - const char *filename = "samples/quadratic-snippet.cbl"; // Default filename + const char *filename = "samples/hello-world.cbl"; // Default filename // Check if a filename is provided as a command-line argument if (argc > 1) { diff --git a/lab-3/main_test.c b/lab-3/main_test.c index 5407c9f..ac25b7e 100644 --- a/lab-3/main_test.c +++ b/lab-3/main_test.c @@ -18,36 +18,6 @@ struct token_st { char *p; }; -UTEST(scanner, identifier) { - token_t t; - // Must include the null character to terminate input - char string[] = "test\0"; - YY_BUFFER_STATE buffer = yy_scan_buffer(string, sizeof(string)); - - ASSERT_EQ(TOKEN_IDENT, (t = yylex())); - ASSERT_STREQ("test", yytext); - - ASSERT_EQ(TOKEN_EOF, (t = yylex())); - ASSERT_STREQ("", yytext); - - yy_delete_buffer(buffer); -} - -UTEST(scanner, assignment) { - token_t t; - // Must include the null character to terminate input - char string[] = "=\0"; - YY_BUFFER_STATE buffer = yy_scan_buffer(string, sizeof(string)); - - ASSERT_EQ(TOKEN_ASSIGNMENT, (t = yylex())); - ASSERT_STREQ("=", yytext); - - ASSERT_EQ(TOKEN_EOF, (t = yylex())); - ASSERT_STREQ("", yytext); - - yy_delete_buffer(buffer); -} - UTEST(scanner, hello) { struct token_st tokens[] = { {TOKEN_IDENTIFICATION, "IDENTIFICATION"}, @@ -68,23 +38,12 @@ UTEST(scanner, hello) { {TOKEN_EOF, ""}, }; -UTEST(scanner, sample) { - struct token_st tokens[] = { - {TOKEN_IDENT, "answer"}, - {TOKEN_ASSIGNMENT, "="}, - {TOKEN_NUMBER, "2020"}, - {TOKEN_ADD, "+"}, - {TOKEN_NUMBER, "4"}, - {TOKEN_EOF, ""} - }; - - yyin = fopen("samples/program.c", "r"); - yyrestart(yyin); + yyin = fopen("samples/hello-world.cbl", "r"); ASSERT_TRUE(yyin); - int index = 0; token_t t; do { + printf("index: %d token: %d text: %s\n", index, t, yytext); ASSERT_EQ(tokens[index].t, (t = yylex())); ASSERT_STREQ(tokens[index].p, yytext); ++index; diff --git a/lab-3/scanner.flex b/lab-3/scanner.flex index e290bfc..4a3e7e4 100644 --- a/lab-3/scanner.flex +++ b/lab-3/scanner.flex @@ -3,7 +3,6 @@ %} NAME [a-zA-Z]([a-zA-Z0-9_-]*[a-zA-Z0-9])? DIGIT [0-9]+ - %% (" "|\t|\n) /* skip whitespace */ diff --git a/lab-3/token.h b/lab-3/token.h index 85bf604..8b40b70 100644 --- a/lab-3/token.h +++ b/lab-3/token.h @@ -31,6 +31,7 @@ typedef enum { // Identifiers TOKEN_IDENT, + // Data types TOKEN_STRING, TOKEN_INTEGER,