Merge branch 'main' of gitlab.cs.wallawalla.edu:lustje/language-interpreter-lab

2024-10-24 11:23:04 -07:00 · 2024-10-24 11:23:04 -07:00 · 86a2cc9293
parent 31acd919e7 d04c690652
commit 86a2cc9293
6 changed files with 129 additions and 47 deletions
--- a/lab-3/main.c
+++ b/lab-3/main.c
@ -8,7 +8,7 @@ extern char *yytext;

 int main(int argc, char *argv[]) {
  FILE *file;
-  const char *filename = "samples/hello.py"; // Default filename
+  const char *filename = "samples/hello-world.cbl"; // Default filename

  // Check if a filename is provided as a command-line argument
  if (argc > 1) {
--- a/lab-3/main_test.c
+++ b/lab-3/main_test.c
@ -18,53 +18,32 @@ struct token_st {
  char *p;
 };

-UTEST(scanner, identifier) {
-  token_t t;
-  // Must include the null character to terminate input
-  char string[] = "test\0"; 
-  YY_BUFFER_STATE buffer = yy_scan_buffer(string, sizeof(string));
-
-  ASSERT_EQ(TOKEN_IDENT, (t = yylex()));
-  ASSERT_STREQ("test", yytext);
-
-  ASSERT_EQ(TOKEN_EOF, (t = yylex()));
-  ASSERT_STREQ("", yytext);
-
-  yy_delete_buffer(buffer);
-}
-
-UTEST(scanner, assignment) {
-  token_t t;
-  // Must include the null character to terminate input
-  char string[] = "=\0"; 
-  YY_BUFFER_STATE buffer = yy_scan_buffer(string, sizeof(string));
-
-  ASSERT_EQ(TOKEN_ASSIGNMENT, (t = yylex()));
-  ASSERT_STREQ("=", yytext);
-
-  ASSERT_EQ(TOKEN_EOF, (t = yylex()));
-  ASSERT_STREQ("", yytext);
-
-  yy_delete_buffer(buffer);
-}
-
-UTEST(scanner, sample) {
+UTEST(scanner, hello) {
  struct token_st tokens[] = {
-    {TOKEN_IDENT, "answer"},
-    {TOKEN_ASSIGNMENT, "="},
-    {TOKEN_NUMBER, "2020"},
-    {TOKEN_ADD, "+"},
-    {TOKEN_NUMBER, "4"},
-    {TOKEN_EOF, ""}
+    {TOKEN_IDENTIFICATION, "IDENTIFICATION"},
+    {TOKEN_KEYWORD_DIVISION, "DIVISION"},
+    {TOKEN_DOT, "."},
+    {TOKEN_PROGRAM_ID, "PROGRAM-ID"},
+    {TOKEN_DOT, "."}, 
+    {TOKEN_IDENT, "HELLO-WORLD"},
+    {TOKEN_DOT, "."},
+    {TOKEN_PROCEDURE, "PROCEDURE"},
+    {TOKEN_KEYWORD_DIVISION, "DIVISION"},
+    {TOKEN_DOT, "."},
+    {TOKEN_DISPLAY, "DISPLAY"},
+    {TOKEN_STRING, "'Hello World!'"},
+    {TOKEN_STOP, "STOP"},
+    {TOKEN_RUN, "RUN"},
+    {TOKEN_DOT, "."},
+    {TOKEN_EOF, ""},
  };

-  yyin = fopen("samples/program.c", "r");
-  yyrestart(yyin);
+  yyin = fopen("samples/hello-world.cbl", "r");
  ASSERT_TRUE(yyin);
-
  int index = 0;
  token_t t;
  do {
+    printf("index: %d token: %d text: %s\n", index, t, yytext);
    ASSERT_EQ(tokens[index].t, (t = yylex()));
    ASSERT_STREQ(tokens[index].p, yytext);
    ++index;
--- a/lab-3/samples/hello-world.cbl
+++ b/lab-3/samples/hello-world.cbl
@ -1,6 +1,5 @@
 IDENTIFICATION DIVISION.
 PROGRAM-ID. HELLO-WORLD.
 PROCEDURE DIVISION.
-    DISPLAY "Hello World!"
+    DISPLAY 'Hello World!'
 STOP RUN.
-if
--- a/lab-3/samples/sorting-snippet.cbl
+++ b/lab-3/samples/sorting-snippet.cbl
@ -13,7 +13,7 @@ WORKING-STORAGE SECTION.
    05  WS-INDEX        PIC S9(4) COMP.

 PROCEDURE DIVISION.
-*> * Initialize test data
+*> Initialize test data
    MOVE "30" TO WS-SORT-ROW(1)
    MOVE "10" TO WS-SORT-ROW(2)
    MOVE "50" TO WS-SORT-ROW(3)
--- a/lab-3/scanner.flex
+++ b/lab-3/scanner.flex
@ -1,12 +1,64 @@
 %{
 #include "token.h"
 %}
-DIGIT [0-9]
-LETTER [a-zA-Z]
+NAME [a-zA-Z]([a-zA-Z0-9_-]*[a-zA-Z0-9])?
+DIGIT [0-9]+
 %%
+
 (" "|\t|\n) /* skip whitespace */
+\*>\ ?.* { return TOKEN_COMMENT; }
 IDENTIFICATION { return TOKEN_IDENTIFICATION; }
 DIVISION { return TOKEN_KEYWORD_DIVISION; }
+PROGRAM-ID { return TOKEN_PROGRAM_ID; }
+PROCEDURE { return TOKEN_PROCEDURE; }
+DATA { return TOKEN_KEYWORD_DATA; }
+SECTION { return TOKEN_KEYWORD_SECTION; }
+WORKING-STORAGE { return TOKEN_WORKING_STORAGE; }
+DISPLAY { return TOKEN_DISPLAY; }
+STOP { return TOKEN_STOP; }
+RUN { return TOKEN_RUN; }
+MOVE { return TOKEN_MOVE; }
+TO { return TOKEN_KEYWORD_TO; }
+VARYING { return TOKEN_VARYING; }
+FROM { return TOKEN_KEYWORD_FROM; }
+BY { return TOKEN_KEYWORD_BY; }
+UNTIL { return TOKEN_UNTIL; }
+PERFORM { return TOKEN_PERFORM; }
+END-PERFORM { return TOKEN_END_PERFORM; }
+IF { return TOKEN_IF; }
+END-IF { return TOKEN_END_IF; }
+SPACE { return TOKEN_SPACE; }
+PIC { return TOKEN_PICTURE; }
+OCCURS { return TOKEN_KEYWORD_OCCURS; }
+VALUE { return TOKEN_KEYWORD_VALUE; }
+COMPUTE { return TOKEN_KEYWORD_COMPUTE; }
+FUNCTION { return TOKEN_KEYWORD_FUNCTION; }
+X { return TOKEN_ALPHANUMERIC; }
+S9 { return TOKEN_SIGNED_NUMERIC; }
+9 { return TOKEN_NUMERIC; }
+V9 { return TOKEN_IMPLIED_DECIMAL; }
+COMP { return TOKEN_COMPUTATION_LEVEL_0; }
+COMP-1 { return TOKEN_COMPUTATION_LEVEL_1; }
+COMP-2 { return TOKEN_COMPUTATION_LEVEL_2; }
+COMP-3 { return TOKEN_COMPUTATION_LEVEL_3; }
+
+{DIGIT} { return TOKEN_INTEGER; }
+{NAME} { return TOKEN_IDENT; }
+\+ { return TOKEN_ADD; }
+\- { return TOKEN_SUB; }
+\*\* { return TOKEN_EXPONENTIAL; }
+\* { return TOKEN_MULTIPLY; }
+\/ { return TOKEN_DIVIDE; }
+\> { return TOKEN_GREATER_THAN; }
+\< { return TOKEN_LESS_THAN; }
+\= { return TOKEN_EQUAL;}
+
+"\""[^"]*"\""   { return TOKEN_STRING; }
+"\'"[^']*"\'"   { return TOKEN_STRING; }
+"("             { return TOKEN_LEFT_PARENTHESIS; }
+")"             { return TOKEN_RIGHT_PARENTHESIS; }
+
+\. { return TOKEN_DOT; }

 %%
 int yywrap() { return 1; }
--- a/lab-3/token.h
+++ b/lab-3/token.h
@ -1,12 +1,64 @@
 typedef enum {
  TOKEN_EOF = 0,
+  // Identification Keywords
  TOKEN_IDENTIFICATION,
  TOKEN_KEYWORD_DIVISION,
+  TOKEN_KEYWORD_DATA,
+  TOKEN_KEYWORD_SECTION,
  TOKEN_PROGRAM_ID,
+  TOKEN_WORKING_STORAGE,
  TOKEN_PROCEDURE,
+
+  // Program Keywords
  TOKEN_DISPLAY,
  TOKEN_STOP,
  TOKEN_RUN,
+  TOKEN_MOVE,
+  TOKEN_KEYWORD_TO,
+  TOKEN_PERFORM,
+  TOKEN_VARYING,
+  TOKEN_KEYWORD_FROM,
+  TOKEN_KEYWORD_BY,
+  TOKEN_UNTIL,
+  TOKEN_END_PERFORM,
+  TOKEN_IF,
+  TOKEN_END_IF,
+  TOKEN_SPACE,
+  TOKEN_KEYWORD_OCCURS,
+  TOKEN_KEYWORD_VALUE,
+  TOKEN_KEYWORD_COMPUTE,
+  TOKEN_KEYWORD_FUNCTION,
+
+  // Identifiers
+  TOKEN_IDENT,
+
+  // Data types
  TOKEN_STRING,
-  TOKEN_DOT
+  TOKEN_INTEGER,
+  TOKEN_PICTURE,
+  TOKEN_ALPHANUMERIC,
+  TOKEN_NUMERIC,
+  TOKEN_SIGNED_NUMERIC,
+  TOKEN_IMPLIED_DECIMAL,
+  // https://ibmmainframes.com/about393.html
+  TOKEN_COMPUTATION_LEVEL_0,
+  TOKEN_COMPUTATION_LEVEL_1,
+  TOKEN_COMPUTATION_LEVEL_2,
+  TOKEN_COMPUTATION_LEVEL_3,
+
+  // Grammar
+  TOKEN_LEFT_PARENTHESIS,
+  TOKEN_RIGHT_PARENTHESIS,
+  TOKEN_DOT,
+  TOKEN_COMMENT,
+
+  // Operators
+  TOKEN_ADD,
+  TOKEN_SUB,
+  TOKEN_MULTIPLY,
+  TOKEN_DIVIDE,
+  TOKEN_EQUAL,
+  TOKEN_GREATER_THAN,
+  TOKEN_LESS_THAN,
+  TOKEN_EXPONENTIAL,
 } token_t;