diff --git a/lab-3/main.c b/lab-3/main.c index 4a96a9e..59ddc86 100644 --- a/lab-3/main.c +++ b/lab-3/main.c @@ -8,7 +8,7 @@ extern char *yytext; int main(int argc, char *argv[]) { FILE *file; - const char *filename = "samples/hello.py"; // Default filename + const char *filename = "samples/hello-world.cbl"; // Default filename // Check if a filename is provided as a command-line argument if (argc > 1) { diff --git a/lab-3/main_test.c b/lab-3/main_test.c index beb70df..ac25b7e 100644 --- a/lab-3/main_test.c +++ b/lab-3/main_test.c @@ -18,53 +18,32 @@ struct token_st { char *p; }; -UTEST(scanner, identifier) { - token_t t; - // Must include the null character to terminate input - char string[] = "test\0"; - YY_BUFFER_STATE buffer = yy_scan_buffer(string, sizeof(string)); - - ASSERT_EQ(TOKEN_IDENT, (t = yylex())); - ASSERT_STREQ("test", yytext); - - ASSERT_EQ(TOKEN_EOF, (t = yylex())); - ASSERT_STREQ("", yytext); - - yy_delete_buffer(buffer); -} - -UTEST(scanner, assignment) { - token_t t; - // Must include the null character to terminate input - char string[] = "=\0"; - YY_BUFFER_STATE buffer = yy_scan_buffer(string, sizeof(string)); - - ASSERT_EQ(TOKEN_ASSIGNMENT, (t = yylex())); - ASSERT_STREQ("=", yytext); - - ASSERT_EQ(TOKEN_EOF, (t = yylex())); - ASSERT_STREQ("", yytext); - - yy_delete_buffer(buffer); -} - -UTEST(scanner, sample) { +UTEST(scanner, hello) { struct token_st tokens[] = { - {TOKEN_IDENT, "answer"}, - {TOKEN_ASSIGNMENT, "="}, - {TOKEN_NUMBER, "2020"}, - {TOKEN_ADD, "+"}, - {TOKEN_NUMBER, "4"}, - {TOKEN_EOF, ""} + {TOKEN_IDENTIFICATION, "IDENTIFICATION"}, + {TOKEN_KEYWORD_DIVISION, "DIVISION"}, + {TOKEN_DOT, "."}, + {TOKEN_PROGRAM_ID, "PROGRAM-ID"}, + {TOKEN_DOT, "."}, + {TOKEN_IDENT, "HELLO-WORLD"}, + {TOKEN_DOT, "."}, + {TOKEN_PROCEDURE, "PROCEDURE"}, + {TOKEN_KEYWORD_DIVISION, "DIVISION"}, + {TOKEN_DOT, "."}, + {TOKEN_DISPLAY, "DISPLAY"}, + {TOKEN_STRING, "'Hello World!'"}, + {TOKEN_STOP, "STOP"}, + {TOKEN_RUN, "RUN"}, + {TOKEN_DOT, "."}, + {TOKEN_EOF, ""}, }; - yyin = fopen("samples/program.c", "r"); - yyrestart(yyin); + yyin = fopen("samples/hello-world.cbl", "r"); ASSERT_TRUE(yyin); - int index = 0; token_t t; do { + printf("index: %d token: %d text: %s\n", index, t, yytext); ASSERT_EQ(tokens[index].t, (t = yylex())); ASSERT_STREQ(tokens[index].p, yytext); ++index; diff --git a/lab-3/samples/hello-world.cbl b/lab-3/samples/hello-world.cbl index 3c78e9b..737f19e 100644 --- a/lab-3/samples/hello-world.cbl +++ b/lab-3/samples/hello-world.cbl @@ -1,6 +1,5 @@ IDENTIFICATION DIVISION. PROGRAM-ID. HELLO-WORLD. PROCEDURE DIVISION. - DISPLAY "Hello World!" + DISPLAY 'Hello World!' STOP RUN. -if \ No newline at end of file diff --git a/lab-3/samples/sorting-snippet.cbl b/lab-3/samples/sorting-snippet.cbl index a0eae92..8324e47 100644 --- a/lab-3/samples/sorting-snippet.cbl +++ b/lab-3/samples/sorting-snippet.cbl @@ -13,7 +13,7 @@ WORKING-STORAGE SECTION. 05 WS-INDEX PIC S9(4) COMP. PROCEDURE DIVISION. -*> * Initialize test data +*> Initialize test data MOVE "30" TO WS-SORT-ROW(1) MOVE "10" TO WS-SORT-ROW(2) MOVE "50" TO WS-SORT-ROW(3) diff --git a/lab-3/scanner.flex b/lab-3/scanner.flex index 021db23..4a3e7e4 100644 --- a/lab-3/scanner.flex +++ b/lab-3/scanner.flex @@ -1,12 +1,64 @@ %{ #include "token.h" %} -DIGIT [0-9] -LETTER [a-zA-Z] +NAME [a-zA-Z]([a-zA-Z0-9_-]*[a-zA-Z0-9])? +DIGIT [0-9]+ %% + (" "|\t|\n) /* skip whitespace */ +\*>\ ?.* { return TOKEN_COMMENT; } IDENTIFICATION { return TOKEN_IDENTIFICATION; } DIVISION { return TOKEN_KEYWORD_DIVISION; } +PROGRAM-ID { return TOKEN_PROGRAM_ID; } +PROCEDURE { return TOKEN_PROCEDURE; } +DATA { return TOKEN_KEYWORD_DATA; } +SECTION { return TOKEN_KEYWORD_SECTION; } +WORKING-STORAGE { return TOKEN_WORKING_STORAGE; } +DISPLAY { return TOKEN_DISPLAY; } +STOP { return TOKEN_STOP; } +RUN { return TOKEN_RUN; } +MOVE { return TOKEN_MOVE; } +TO { return TOKEN_KEYWORD_TO; } +VARYING { return TOKEN_VARYING; } +FROM { return TOKEN_KEYWORD_FROM; } +BY { return TOKEN_KEYWORD_BY; } +UNTIL { return TOKEN_UNTIL; } +PERFORM { return TOKEN_PERFORM; } +END-PERFORM { return TOKEN_END_PERFORM; } +IF { return TOKEN_IF; } +END-IF { return TOKEN_END_IF; } +SPACE { return TOKEN_SPACE; } +PIC { return TOKEN_PICTURE; } +OCCURS { return TOKEN_KEYWORD_OCCURS; } +VALUE { return TOKEN_KEYWORD_VALUE; } +COMPUTE { return TOKEN_KEYWORD_COMPUTE; } +FUNCTION { return TOKEN_KEYWORD_FUNCTION; } +X { return TOKEN_ALPHANUMERIC; } +S9 { return TOKEN_SIGNED_NUMERIC; } +9 { return TOKEN_NUMERIC; } +V9 { return TOKEN_IMPLIED_DECIMAL; } +COMP { return TOKEN_COMPUTATION_LEVEL_0; } +COMP-1 { return TOKEN_COMPUTATION_LEVEL_1; } +COMP-2 { return TOKEN_COMPUTATION_LEVEL_2; } +COMP-3 { return TOKEN_COMPUTATION_LEVEL_3; } + +{DIGIT} { return TOKEN_INTEGER; } +{NAME} { return TOKEN_IDENT; } +\+ { return TOKEN_ADD; } +\- { return TOKEN_SUB; } +\*\* { return TOKEN_EXPONENTIAL; } +\* { return TOKEN_MULTIPLY; } +\/ { return TOKEN_DIVIDE; } +\> { return TOKEN_GREATER_THAN; } +\< { return TOKEN_LESS_THAN; } +\= { return TOKEN_EQUAL;} + +"\""[^"]*"\"" { return TOKEN_STRING; } +"\'"[^']*"\'" { return TOKEN_STRING; } +"(" { return TOKEN_LEFT_PARENTHESIS; } +")" { return TOKEN_RIGHT_PARENTHESIS; } + +\. { return TOKEN_DOT; } %% int yywrap() { return 1; } diff --git a/lab-3/token.h b/lab-3/token.h index 993c7a1..8b40b70 100644 --- a/lab-3/token.h +++ b/lab-3/token.h @@ -1,12 +1,64 @@ typedef enum { TOKEN_EOF = 0, + // Identification Keywords TOKEN_IDENTIFICATION, TOKEN_KEYWORD_DIVISION, + TOKEN_KEYWORD_DATA, + TOKEN_KEYWORD_SECTION, TOKEN_PROGRAM_ID, + TOKEN_WORKING_STORAGE, TOKEN_PROCEDURE, + + // Program Keywords TOKEN_DISPLAY, TOKEN_STOP, TOKEN_RUN, + TOKEN_MOVE, + TOKEN_KEYWORD_TO, + TOKEN_PERFORM, + TOKEN_VARYING, + TOKEN_KEYWORD_FROM, + TOKEN_KEYWORD_BY, + TOKEN_UNTIL, + TOKEN_END_PERFORM, + TOKEN_IF, + TOKEN_END_IF, + TOKEN_SPACE, + TOKEN_KEYWORD_OCCURS, + TOKEN_KEYWORD_VALUE, + TOKEN_KEYWORD_COMPUTE, + TOKEN_KEYWORD_FUNCTION, + + // Identifiers + TOKEN_IDENT, + + // Data types TOKEN_STRING, - TOKEN_DOT + TOKEN_INTEGER, + TOKEN_PICTURE, + TOKEN_ALPHANUMERIC, + TOKEN_NUMERIC, + TOKEN_SIGNED_NUMERIC, + TOKEN_IMPLIED_DECIMAL, + // https://ibmmainframes.com/about393.html + TOKEN_COMPUTATION_LEVEL_0, + TOKEN_COMPUTATION_LEVEL_1, + TOKEN_COMPUTATION_LEVEL_2, + TOKEN_COMPUTATION_LEVEL_3, + + // Grammar + TOKEN_LEFT_PARENTHESIS, + TOKEN_RIGHT_PARENTHESIS, + TOKEN_DOT, + TOKEN_COMMENT, + + // Operators + TOKEN_ADD, + TOKEN_SUB, + TOKEN_MULTIPLY, + TOKEN_DIVIDE, + TOKEN_EQUAL, + TOKEN_GREATER_THAN, + TOKEN_LESS_THAN, + TOKEN_EXPONENTIAL, } token_t; \ No newline at end of file