/*----------------------------------------------------------------------------*/ /* Context free grammar parser */ /* LOG32LLC Assignment 7 */ /* Sten M. Andersen */ /* */ /*----------------------------------------------------------------------------*/ #include #include #include #include #define MAX_NONTERMINALS 50 #define MAX_CODE 1000 #define MAX_STRING 100 #define ALFA_LENGTH 25 /* 16 */ typedef enum { false, true } BOOL; typedef enum { badchar,query,nonterminal,quote,altern,equal, lparen,rparen,lbrack,rbrack,lbrace,rbrace,semcol,period } symbol; typedef enum { tsy,nsy,alt,cat,rep,opt } operator; typedef struct { operator op; int left; int right; } node; /*----------------------------------------------------------------------------*/ /* Globals */ /*----------------------------------------------------------------------------*/ const char* emptyalfa = " "; // spaces struct { char name[ALFA_LENGTH]; int adr; } table[ MAX_NONTERMINALS ]; symbol sym; int lasttable, location; char ch; char al[ ALFA_LENGTH ]; node code[ MAX_CODE ]; int lastcode; char instring[ MAX_STRING ]; int sp,sl; BOOL tracing; int i; int num_parses; BOOL cont; jmp_buf start; // On error: empty stack and go to start /*----------------------------------------------------------------------------*/ /* Function declarations */ /*----------------------------------------------------------------------------*/ void factor(); void term(); void expression(); void gen( operator o, int l, int r ); void getysm(); void error( char* msg ); /*----------------------------------------------------------------------------*/ /* Input */ /*----------------------------------------------------------------------------*/ void get_char() { ch = getchar(); if ( ch <= ' ' ) ch = ' '; } void getsym() { int k; do get_char(); while ( ch <= ' ' ); if ( isalpha( ch ) ) { sym = nonterminal; k = 0; strcpy(al, emptyalfa); // Read a symbol do { if ( k < ALFA_LENGTH ) { k++; al[ k ] = ch; } get_char(); } while ( isalnum( ch ) || ch == '_' ); ungetc( ch, stdin ); // push back char strcpy( table[ 0 ].name, al);// Prepare sentinel search location = lasttable; while ( strcmp(table[ location ].name, al) ) location--; if ( !location ) { // It wasn't in the table from before lasttable++; // So insert it now strcpy( table[ lasttable ].name, al); table[ lasttable ].adr = 0; location = lasttable; } } else { switch ( ch ) { case '.': sym = period; break; case ';': sym = semcol; break; case '?': sym = query; break; case '=': sym = equal; break; case '\'':sym = quote; break; case '|': sym = altern; break; case '(': sym = lparen; break; case ')': sym = rparen; break; case '[': sym = lbrack; break; case ']': sym = rbrack; break; case '{': sym = lbrace; break; case '}': sym = rbrace; break; default: sym = badchar; error("This char is illegal."); } } } /*----------------------------------------------------------------------------*/ /* Application logic */ /*----------------------------------------------------------------------------*/ void gen( operator o, int l, int r ) { lastcode++; code[ lastcode ].op = o; code[ lastcode ].left = l; code[ lastcode ].right = r; } void factor() {; int left; switch ( sym ) { case nonterminal: gen( nsy, location, 0 /* fixed in main */ ); getsym(); break; case quote: get_char(); if ( ch == ' ' ) error ( "no white space as terminal"); if ( ch == '.' ) error ( "no . as terminal"); gen( tsy, ch, 0 ); getsym(); break; case lparen: getsym(); expression(); if ( sym == rparen ) getsym(); else error(") expected"); break; case lbrack: getsym(); expression(); if ( sym == rbrack ) getsym(); else error("] expected"); gen( rep, 0, lastcode ); break; case lbrace: getsym(); expression(); if ( sym == rbrace ) getsym(); else error("} expected"); gen( opt, 0, lastcode ); break; default: error("beginning of factor expected"); } } void term() { int left; factor(); if ( sym == nonterminal || sym == quote || sym == lparen || sym == lbrack || sym == lbrace ) { left = lastcode; term(); gen( cat, left, lastcode); } } void expression() { int left; term(); if ( sym == altern ) { // alternation getsym(); left = lastcode; expression(); gen( alt, left, lastcode ); } } /*----------------------------------------------------------------------------*/ /* Parse & show */ /*----------------------------------------------------------------------------*/ void parse(int t, void (*cp) (void) ) { void alsoright() { parse( code[ t ].right, cp ); } // used for concat (cat) void sameagain() { parse( t, cp ); } // used for rep [] / * switch ( code[ t ].op ) { case tsy: // terminal symbol if ( instring[ sp ] == code[ t ].left ) { sp++; cp(); sp--; } break; case nsy: // non-terminal symbol parse( code[ t ].right, cp ); break; case cat: // concat parse( code[ t ].left, alsoright ); break; case alt: // alternation | parse( code[ t ].left, cp ); parse( code[ t ].right, cp ); break; case rep: // repetition cp(); parse( code[ t ].right, sameagain ); break; case opt: // optional cp(); parse( code[ t ].right, cp ); break; } } void show() { if ( num_parses == 0 ) printf(" ... well-formed -\n"); num_parses++; printf("%d: %c", num_parses, 9 /* tab*/ ); for ( i = 1; i < sp; i++) printf( "%c", instring[i] ); printf(" "); for ( i = sp; i < sl; i++) printf( "%c", instring[i] ); printf("\n"); } /*----------------------------------------------------------------------------*/ /* Print out error */ /*----------------------------------------------------------------------------*/ void error(char* msg) { printf( "Error: Seen " ); if ( sym == nonterminal ) printf( "%s", al ); else printf ("%c", ch ); printf( " when %s\n", msg ); longjmp( start, 0 ); } /*----------------------------------------------------------------------------*/ /* Main */ /*----------------------------------------------------------------------------*/ int main() { setjmp( start ); do { // read a grammar getsym(); if ( sym != nonterminal ) error("non-terminal expected."); i = location; // of last non-terminal getsym(); if ( sym != equal ) error("= (equal) expected."); getsym(); expression(); table[ i ].adr = lastcode; } while ( sym == semcol ); if ( sym == query) tracing = true; else if ( sym == period ) tracing = false; else error(". (period) expected at end of grammar."); for ( i = 1; i <= lasttable; i++ ) if ( table[ i ].adr == 0 ) { // this non-terminal is undefined printf("Undefined non-terminal %s. Panicking.", table[ i ].name ); exit( 1 ); } for ( i = 1; i <= lastcode; i++ ) if ( code[ i ].op == nsy ) code[ i ].right = table[ code[ i ].left ].adr;//fixup if ( tracing ) { // output trace printf("non-terminals:\n"); for ( i = 1; i <= lasttable; i++ ) printf("%s : % d\n", table[ i ].name, table[ i ].adr); printf("code\n"); for ( i = 1; i <= lastcode; i++ ) printf("%d, op: %d l: %d r: %d\n", i, code[i].op, code[i].left ,code[i].right); printf("\n"); } cont = true; // fix this to be more user friendly do { // parse sentences sl = 0; do { do get_char(); while ( ch == ' ' ); sl++; instring[ sl ] = ch; } while ( ch != '.' ); printf("\n"); if ( sl > 1 ) { sp = 1; num_parses = 0; parse( table[1].adr, show ); if ( num_parses == 0 ) printf(" ... ill-formed\n"); } } while ( cont ); return 0; }