44static char * current ;
55static int line = 1 ;
66static FILE * file ;
7+ static bool eof_reached = false;
78
89void lexer_init (FILE * f ) {
910 file = f ;
1011 current = NULL ;
1112}
1213
1314// Helper function to read next character
14- static int next_char () {
15+ static int next_char () {
1516 if (current && * current ) {
1617 char c = * current ++ ;
1718 if (c == '\n' ) line ++ ;
@@ -143,6 +144,18 @@ static Token* read_string() {
143144 return tok ;
144145}
145146
147+ static Token * make_token (TokenType type , char * value , int line_num ) {
148+ Token * tok = malloc (sizeof (Token ));
149+ if (!tok ) {
150+ perror ("Failed to allocate token" );
151+ exit (1 );
152+ }
153+ tok -> type = type ;
154+ tok -> value = value ;
155+ tok -> line = line_num ;
156+ return tok ;
157+ }
158+
146159// Main lexer function
147160Token * lexer_next () {
148161 if (!current ) {
@@ -160,66 +173,59 @@ Token* lexer_next() {
160173
161174 int c = peek_char ();
162175 if (c == EOF ) {
163- Token * tok = malloc (sizeof (Token ));
164- tok -> type = TOK_EOF ;
165- tok -> value = NULL ;
166- tok -> line = line ;
167- return tok ;
176+ eof_reached = true;
177+ return make_token (TOK_EOF , NULL , line );
168178 }
169179
170180 // Single character tokens
171- if (c == '+' ) { next_char (); return & ( Token ){ TOK_PLUS , "+" , line } ; }
172- if (c == '-' ) { next_char (); return & ( Token ){ TOK_MINUS , "-" , line } ; }
173- if (c == '*' ) { next_char (); return & ( Token ){ TOK_TIMES , "*" , line } ; }
174- if (c == '/' ) { next_char (); return & ( Token ){ TOK_DIVIDE , "/" , line } ; }
175- if (c == '(' ) { next_char (); return & ( Token ){ TOK_LPAREN , "(" , line } ; }
176- if (c == ')' ) { next_char (); return & ( Token ){ TOK_RPAREN , ")" , line } ; }
177- if (c == '[' ) { next_char (); return & ( Token ){ TOK_LBRACKET , "[" , line } ; }
178- if (c == ']' ) { next_char (); return & ( Token ){ TOK_RBRACKET , "]" , line } ; }
179- if (c == '{' ) { next_char (); return & ( Token ){ TOK_LBRACE , "{" , line } ; }
180- if (c == '}' ) { next_char (); return & ( Token ){ TOK_RBRACE , "}" , line } ; }
181- if (c == ',' ) { next_char (); return & ( Token ){ TOK_COMMA , "," , line } ; }
182- if (c == ':' ) { next_char (); return & ( Token ){ TOK_COLON , ":" , line } ; }
181+ if (c == '+' ) { next_char (); return make_token ( TOK_PLUS , NULL , line ) ; }
182+ if (c == '-' ) { next_char (); return make_token ( TOK_MINUS , NULL , line ) ; }
183+ if (c == '*' ) { next_char (); return make_token ( TOK_TIMES , NULL , line ) ; }
184+ if (c == '/' ) { next_char (); return make_token ( TOK_DIVIDE , NULL , line ) ; }
185+ if (c == '(' ) { next_char (); return make_token ( TOK_LPAREN , NULL , line ) ; }
186+ if (c == ')' ) { next_char (); return make_token ( TOK_RPAREN , NULL , line ) ; }
187+ if (c == '[' ) { next_char (); return make_token ( TOK_LBRACKET , NULL , line ) ; }
188+ if (c == ']' ) { next_char (); return make_token ( TOK_RBRACKET , NULL , line ) ; }
189+ if (c == '{' ) { next_char (); return make_token ( TOK_LBRACE , NULL , line ) ; }
190+ if (c == '}' ) { next_char (); return make_token ( TOK_RBRACE , NULL , line ) ; }
191+ if (c == ',' ) { next_char (); return make_token ( TOK_COMMA , NULL , line ) ; }
192+ if (c == ':' ) { next_char (); return make_token ( TOK_COLON , NULL , line ) ; }
183193 if (c == '=' ) {
184194 next_char ();
185195 if (peek_char () == '=' ) {
186196 next_char ();
187- return & ( Token ){ TOK_EQ , "==" , line } ;
197+ return make_token ( TOK_EQ , NULL , line ) ;
188198 }
189- return & ( Token ){ TOK_ASSIGN , "=" , line } ;
199+ return make_token ( TOK_ASSIGN , NULL , line ) ;
190200 }
191201 if (c == '!' ) {
192202 next_char ();
193203 if (peek_char () == '=' ) {
194204 next_char ();
195- return & ( Token ){ TOK_NEQ , "!=" , line } ;
205+ return make_token ( TOK_NEQ , NULL , line ) ;
196206 }
197207 // Error: unexpected '!'
198- Token * tok = malloc (sizeof (Token ));
199- tok -> type = TOK_ERROR ;
200- tok -> value = strdup ("Unexpected '!'" );
201- tok -> line = line ;
202- return tok ;
208+ return make_token (TOK_ERROR , strdup ("Unexpected '!'" ), line );
203209 }
204210 if (c == '<' ) {
205211 next_char ();
206212 if (peek_char () == '=' ) {
207213 next_char ();
208- return & ( Token ){ TOK_LE , "<=" , line } ;
214+ return make_token ( TOK_LE , NULL , line ) ;
209215 }
210- return & ( Token ){ TOK_LT , "<" , line } ;
216+ return make_token ( TOK_LT , NULL , line ) ;
211217 }
212218 if (c == '>' ) {
213219 next_char ();
214220 if (peek_char () == '=' ) {
215221 next_char ();
216- return & ( Token ){ TOK_GE , ">=" , line } ;
222+ return make_token ( TOK_GE , NULL , line ) ;
217223 }
218- return & ( Token ){ TOK_GT , ">" , line } ;
224+ return make_token ( TOK_GT , NULL , line ) ;
219225 }
220226 if (c == '\n' ) {
221227 next_char ();
222- return & ( Token ){ TOK_NEWLINE , "\n" , line } ;
228+ return make_token ( TOK_NEWLINE , NULL , line ) ;
223229 }
224230
225231 // Multi-character tokens
@@ -234,12 +240,82 @@ Token* lexer_next() {
234240 }
235241
236242 // Unknown character
237- Token * tok = malloc (sizeof (Token ));
238- tok -> type = TOK_ERROR ;
239243 char msg [100 ];
240244 sprintf (msg , "Unknown character: %c" , c );
241- tok -> value = strdup (msg );
242- tok -> line = line ;
243245 next_char (); // consume it
244- return tok ;
246+ return make_token (TOK_ERROR , strdup (msg ), line );
247+ }
248+
249+ // Add this at the bottom of lexer.c (or anywhere after lexer_next is defined)
250+ void print_tokens () {
251+ Token * tok ;
252+ do {
253+ tok = lexer_next ();
254+ switch (tok -> type ) {
255+ case TOK_EOF :
256+ printf ("EOF\n" );
257+ free (tok );
258+ break ;
259+ case TOK_ERROR :
260+ printf ("ERROR (line %d): %s\n" , tok -> line , tok -> value );
261+ break ;
262+ case TOK_NUMBER :
263+ printf ("NUMBER (line %d): %s\n" , tok -> line , tok -> value );
264+ break ;
265+ case TOK_STRING :
266+ printf ("STRING (line %d): \"%s\"\n" , tok -> line , tok -> value );
267+ break ;
268+ case TOK_ID :
269+ printf ("IDENTIFIER (line %d): %s\n" , tok -> line , tok -> value );
270+ break ;
271+ case TOK_NEWLINE :
272+ printf ("NEWLINE (line %d)\n" , tok -> line );
273+ break ;
274+ case TOK_PLUS : printf ("PLUS (line %d)\n" , tok -> line ); break ;
275+ case TOK_MINUS : printf ("MINUS (line %d)\n" , tok -> line ); break ;
276+ case TOK_TIMES : printf ("TIMES (line %d)\n" , tok -> line ); break ;
277+ case TOK_DIVIDE : printf ("DIVIDE (line %d)\n" , tok -> line ); break ;
278+ case TOK_ASSIGN : printf ("ASSIGN (line %d)\n" , tok -> line ); break ;
279+ case TOK_EQ : printf ("EQ (line %d)\n" , tok -> line ); break ;
280+ case TOK_NEQ : printf ("NEQ (line %d)\n" , tok -> line ); break ;
281+ case TOK_LT : printf ("LT (line %d)\n" , tok -> line ); break ;
282+ case TOK_LE : printf ("LE (line %d)\n" , tok -> line ); break ;
283+ case TOK_GT : printf ("GT (line %d)\n" , tok -> line ); break ;
284+ case TOK_GE : printf ("GE (line %d)\n" , tok -> line ); break ;
285+ case TOK_LPAREN : printf ("LPAREN (line %d)\n" , tok -> line ); break ;
286+ case TOK_RPAREN : printf ("RPAREN (line %d)\n" , tok -> line ); break ;
287+ case TOK_LBRACKET : printf ("LBRACKET (line %d)\n" , tok -> line ); break ;
288+ case TOK_RBRACKET : printf ("RBRACKET (line %d)\n" , tok -> line ); break ;
289+ case TOK_LBRACE : printf ("LBRACE (line %d)\n" , tok -> line ); break ;
290+ case TOK_RBRACE : printf ("RBRACE (line %d)\n" , tok -> line ); break ;
291+ case TOK_COMMA : printf ("COMMA (line %d)\n" , tok -> line ); break ;
292+ case TOK_COLON : printf ("COLON (line %d)\n" , tok -> line ); break ;
293+ case TOK_END : printf ("END (line %d)\n" , tok -> line ); break ;
294+
295+ // Keywords
296+ case KW_LOOP : printf ("KW_LOOP (line %d)\n" , tok -> line ); break ;
297+ case KW_EACH : printf ("KW_EACH (line %d)\n" , tok -> line ); break ;
298+ case KW_IN : printf ("KW_IN (line %d)\n" , tok -> line ); break ;
299+ case KW_STOP : printf ("KW_STOP (line %d)\n" , tok -> line ); break ;
300+ case KW_NEXT : printf ("KW_NEXT (line %d)\n" , tok -> line ); break ;
301+ case KW_GIVE : printf ("KW_GIVE (line %d)\n" , tok -> line ); break ;
302+ case KW_IF : printf ("KW_IF (line %d)\n" , tok -> line ); break ;
303+ case KW_ELIF : printf ("KW_ELIF (line %d)\n" , tok -> line ); break ;
304+ case KW_ELSE : printf ("KW_ELSE (line %d)\n" , tok -> line ); break ;
305+ case KW_DEF : printf ("KW_DEF (line %d)\n" , tok -> line ); break ;
306+ case KW_TRUE : printf ("KW_TRUE (line %d)\n" , tok -> line ); break ;
307+ case KW_FALSE : printf ("KW_FALSE (line %d)\n" , tok -> line ); break ;
308+ case KW_NULL : printf ("KW_NULL (line %d)\n" , tok -> line ); break ;
309+ case KW_PRINT : printf ("KW_PRINT (line %d)\n" , tok -> line ); break ;
310+
311+ default :
312+ printf ("UNKNOWN TOKEN (line %d): %s\n" , tok -> line , tok -> value ? tok -> value : "(null)" );
313+ break ;
314+ }
315+
316+ // Free token memory if you're using malloc in lexer_next
317+ if (tok -> value ) free (tok -> value );
318+ free (tok );
319+
320+ } while (tok -> type != TOK_EOF );
245321}
0 commit comments