scripts/genksyms/lex.l - maze/linux - Git at Google

 /* Lexical analysis for genksyms.
    Copyright 1996, 1997 Linux International.

    New implementation contributed by Richard Henderson <rth@tamu.edu>
    Based on original work by Bjorn Ekwall <bj0rn@blox.se>

    Taken from Linux modutils 2.4.22.

    This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the
    Free Software Foundation; either version 2 of the License, or (at your
    option) any later version.

    This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software Foundation,
    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */


 %{

 #include <limits.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>

 #include "genksyms.h"
 #include "parse.h"

 /* We've got a two-level lexer here.  We let flex do basic tokenization
    and then we categorize those basic tokens in the second stage.  */
 #define YY_DECL		static int yylex1(void)

 %}

 IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*

 O_INT			0[0-7]*
 D_INT			[1-9][0-9]*
 X_INT			0[Xx][0-9A-Fa-f]+
 I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
 INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?

 FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
 EXP			[Ee][+-]?[0-9]+
 F_SUF			[FfLl]
 REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)

 STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
 CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'

 MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)

 /* We don't do multiple input files.  */
 %option noyywrap

 %option noinput

 %%


  /* Keep track of our location in the original source files.  */
 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
 ^#.*\n					cur_line++;
 \n					cur_line++;

  /* Ignore all other whitespace.  */
 [ \t\f\v\r]+				;


 {STRING}				return STRING;
 {CHAR}					return CHAR;
 {IDENT}					return IDENT;

  /* The Pedant requires that the other C multi-character tokens be
     recognized as tokens.  We don't actually use them since we don't
     parse expressions, but we do want whitespace to be arranged
     around them properly.  */
 {MC_TOKEN}				return OTHER;
 {INT}					return INT;
 {REAL}					return REAL;

 "..."					return DOTS;

  /* All other tokens are single characters.  */
 .					return yytext[0];


 %%

 /* Bring in the keyword recognizer.  */

 #include "keywords.c"


 /* Macros to append to our phrase collection list.  */

 #define _APP(T,L)	do {						   \
 			  cur_node = next_node;				   \
 			  next_node = xmalloc(sizeof(*next_node));	   \
 			  next_node->next = cur_node;			   \
 			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
 			  cur_node->tag = SYM_NORMAL;			   \
 			} while (0)

 #define APP		_APP(yytext, yyleng)


 /* The second stage lexer.  Here we incorporate knowledge of the state
    of the parser to tailor the tokens that are returned.  */

 int
 yylex(void)
 {
   static enum {
     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
     ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
     ST_TABLE_5, ST_TABLE_6
   } lexstate = ST_NOTSTARTED;

   static int suppress_type_lookup, dont_want_brace_phrase;
   static struct string_list *next_node;

   int token, count = 0;
   struct string_list *cur_node;

   if (lexstate == ST_NOTSTARTED)
     {
       next_node = xmalloc(sizeof(*next_node));
       next_node->next = NULL;
       lexstate = ST_NORMAL;
     }

 repeat:
   token = yylex1();

   if (token == 0)
     return 0;
   else if (token == FILENAME)
     {
       char *file, *e;

       /* Save the filename and line number for later error messages.  */

       if (cur_filename)
 	free(cur_filename);

       file = strchr(yytext, '\"')+1;
       e = strchr(file, '\"');
       *e = '\0';
       cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
       cur_line = atoi(yytext+2);

       goto repeat;
     }

   switch (lexstate)
     {
     case ST_NORMAL:
       switch (token)
 	{
 	case IDENT:
 	  APP;
 	  {
 	    const struct resword *r = is_reserved_word(yytext, yyleng);
 	    if (r)
 	      {
 		switch (token = r->token)
 		  {
 		  case ATTRIBUTE_KEYW:
 		    lexstate = ST_ATTRIBUTE;
 		    count = 0;
 		    goto repeat;
 		  case ASM_KEYW:
 		    lexstate = ST_ASM;
 		    count = 0;
 		    goto repeat;

 		  case STRUCT_KEYW:
 		  case UNION_KEYW:
 		    dont_want_brace_phrase = 3;
 		  case ENUM_KEYW:
 		    suppress_type_lookup = 2;
 		    goto fini;

 		  case EXPORT_SYMBOL_KEYW:
 		      goto fini;
 		  }
 	      }
 	    if (!suppress_type_lookup)
 	      {
 		struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
 		if (sym && sym->type == SYM_TYPEDEF)
 		  token = TYPE;
 	      }
 	  }
 	  break;

 	case '[':
 	  APP;
 	  lexstate = ST_BRACKET;
 	  count = 1;
 	  goto repeat;

 	case '{':
 	  APP;
 	  if (dont_want_brace_phrase)
 	    break;
 	  lexstate = ST_BRACE;
 	  count = 1;
 	  goto repeat;

 	case '=': case ':':
 	  APP;
 	  lexstate = ST_EXPRESSION;
 	  break;

 	case DOTS:
 	default:
 	  APP;
 	  break;
 	}
       break;

     case ST_ATTRIBUTE:
       APP;
       switch (token)
 	{
 	case '(':
 	  ++count;
 	  goto repeat;
 	case ')':
 	  if (--count == 0)
 	    {
 	      lexstate = ST_NORMAL;
 	      token = ATTRIBUTE_PHRASE;
 	      break;
 	    }
 	  goto repeat;
 	default:
 	  goto repeat;
 	}
       break;

     case ST_ASM:
       APP;
       switch (token)
 	{
 	case '(':
 	  ++count;
 	  goto repeat;
 	case ')':
 	  if (--count == 0)
 	    {
 	      lexstate = ST_NORMAL;
 	      token = ASM_PHRASE;
 	      break;
 	    }
 	  goto repeat;
 	default:
 	  goto repeat;
 	}
       break;

     case ST_BRACKET:
       APP;
       switch (token)
 	{
 	case '[':
 	  ++count;
 	  goto repeat;
 	case ']':
 	  if (--count == 0)
 	    {
 	      lexstate = ST_NORMAL;
 	      token = BRACKET_PHRASE;
 	      break;
 	    }
 	  goto repeat;
 	default:
 	  goto repeat;
 	}
       break;

     case ST_BRACE:
       APP;
       switch (token)
 	{
 	case '{':
 	  ++count;
 	  goto repeat;
 	case '}':
 	  if (--count == 0)
 	    {
 	      lexstate = ST_NORMAL;
 	      token = BRACE_PHRASE;
 	      break;
 	    }
 	  goto repeat;
 	default:
 	  goto repeat;
 	}
       break;

     case ST_EXPRESSION:
       switch (token)
 	{
 	case '(': case '[': case '{':
 	  ++count;
 	  APP;
 	  goto repeat;
 	case ')': case ']': case '}':
 	  --count;
 	  APP;
 	  goto repeat;
 	case ',': case ';':
 	  if (count == 0)
 	    {
 	      /* Put back the token we just read so's we can find it again
 		 after registering the expression.  */
 	      unput(token);

 	      lexstate = ST_NORMAL;
 	      token = EXPRESSION_PHRASE;
 	      break;
 	    }
 	  APP;
 	  goto repeat;
 	default:
 	  APP;
 	  goto repeat;
 	}
       break;

     case ST_TABLE_1:
       goto repeat;

     case ST_TABLE_2:
       if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
 	{
 	  token = EXPORT_SYMBOL_KEYW;
 	  lexstate = ST_TABLE_5;
 	  APP;
 	  break;
 	}
       lexstate = ST_TABLE_6;
       /* FALLTHRU */

     case ST_TABLE_6:
       switch (token)
 	{
 	case '{': case '[': case '(':
 	  ++count;
 	  break;
 	case '}': case ']': case ')':
 	  --count;
 	  break;
 	case ',':
 	  if (count == 0)
 	    lexstate = ST_TABLE_2;
 	  break;
 	};
       goto repeat;

     case ST_TABLE_3:
       goto repeat;

     case ST_TABLE_4:
       if (token == ';')
 	lexstate = ST_NORMAL;
       goto repeat;

     case ST_TABLE_5:
       switch (token)
 	{
 	case ',':
 	  token = ';';
 	  lexstate = ST_TABLE_2;
 	  APP;
 	  break;
 	default:
 	  APP;
 	  break;
 	}
       break;

     default:
       exit(1);
     }
 fini:

   if (suppress_type_lookup > 0)
     --suppress_type_lookup;
   if (dont_want_brace_phrase > 0)
     --dont_want_brace_phrase;

   yylval = &next_node->next;

   return token;
 }
	/* Lexical analysis for genksyms.
	Copyright 1996, 1997 Linux International.

	New implementation contributed by Richard Henderson <rth@tamu.edu>
	Based on original work by Bjorn Ekwall <bj0rn@blox.se>

	Taken from Linux modutils 2.4.22.

	This program is free software; you can redistribute it and/or modify it
	under the terms of the GNU General Public License as published by the
	Free Software Foundation; either version 2 of the License, or (at your
	option) any later version.

	This program is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software Foundation,
	Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */


	%{

	#include <limits.h>
	#include <stdlib.h>
	#include <string.h>
	#include <ctype.h>

	#include "genksyms.h"
	#include "parse.h"

	/* We've got a two-level lexer here. We let flex do basic tokenization
	and then we categorize those basic tokens in the second stage. */
	#define YY_DECL static int yylex1(void)

	%}

	IDENT [A-Za-z_\$][A-Za-z0-9_\$]*

	O_INT 0[0-7]*
	D_INT [1-9][0-9]*
	X_INT 0[Xx][0-9A-Fa-f]+
	I_SUF [Uu]\|[Ll]\|[Uu][Ll]\|[Ll][Uu]
	INT ({O_INT}\|{D_INT}\|{X_INT}){I_SUF}?

	FRAC ([0-9]*\.[0-9]+)\|([0-9]+\.)
	EXP [Ee][+-]?[0-9]+
	F_SUF [FfLl]
	REAL ({FRAC}{EXP}?{F_SUF}?)\|([0-9]+{EXP}{F_SUF}?)

	STRING L?\"([^\\\"]\\.)[^\\\"]*\"
	CHAR L?\'([^\\\']\\.)[^\\\']*\'

	MC_TOKEN ([~%^&*+=\|<>/-]=)\|(&&)\|("\|\|")\|(->)\|(<<)\|(>>)

	/* We don't do multiple input files. */
	%option noyywrap

	%option noinput

	%%


	/* Keep track of our location in the original source files. */
	^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
	^#.*\n cur_line++;
	\n cur_line++;

	/* Ignore all other whitespace. */
	[ \t\f\v\r]+ ;


	{STRING} return STRING;
	{CHAR} return CHAR;
	{IDENT} return IDENT;

	/* The Pedant requires that the other C multi-character tokens be
	recognized as tokens. We don't actually use them since we don't
	parse expressions, but we do want whitespace to be arranged
	around them properly. */
	{MC_TOKEN} return OTHER;
	{INT} return INT;
	{REAL} return REAL;

	"..." return DOTS;

	/* All other tokens are single characters. */
	. return yytext[0];


	%%

	/* Bring in the keyword recognizer. */

	#include "keywords.c"


	/* Macros to append to our phrase collection list. */

	#define _APP(T,L) do { \
	cur_node = next_node; \
	next_node = xmalloc(sizeof(*next_node)); \
	next_node->next = cur_node; \
	cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
	cur_node->tag = SYM_NORMAL; \
	} while (0)

	#define APP _APP(yytext, yyleng)


	/* The second stage lexer. Here we incorporate knowledge of the state
	of the parser to tailor the tokens that are returned. */

	int
	yylex(void)
	{
	static enum {
	ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
	ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
	ST_TABLE_5, ST_TABLE_6
	} lexstate = ST_NOTSTARTED;

	static int suppress_type_lookup, dont_want_brace_phrase;
	static struct string_list *next_node;

	int token, count = 0;
	struct string_list *cur_node;

	if (lexstate == ST_NOTSTARTED)
	{
	next_node = xmalloc(sizeof(*next_node));
	next_node->next = NULL;
	lexstate = ST_NORMAL;
	}

	repeat:
	token = yylex1();

	if (token == 0)
	return 0;
	else if (token == FILENAME)
	{
	char file, e;

	/* Save the filename and line number for later error messages. */

	if (cur_filename)
	free(cur_filename);

	file = strchr(yytext, '\"')+1;
	e = strchr(file, '\"');
	*e = '\0';
	cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
	cur_line = atoi(yytext+2);

	goto repeat;
	}

	switch (lexstate)
	{
	case ST_NORMAL:
	switch (token)
	{
	case IDENT:
	APP;
	{
	const struct resword *r = is_reserved_word(yytext, yyleng);
	if (r)
	{
	switch (token = r->token)
	{
	case ATTRIBUTE_KEYW:
	lexstate = ST_ATTRIBUTE;
	count = 0;
	goto repeat;
	case ASM_KEYW:
	lexstate = ST_ASM;
	count = 0;
	goto repeat;

	case STRUCT_KEYW:
	case UNION_KEYW:
	dont_want_brace_phrase = 3;
	case ENUM_KEYW:
	suppress_type_lookup = 2;
	goto fini;

	case EXPORT_SYMBOL_KEYW:
	goto fini;
	}
	}
	if (!suppress_type_lookup)
	{
	struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
	if (sym && sym->type == SYM_TYPEDEF)
	token = TYPE;
	}
	}
	break;

	case '[':
	APP;
	lexstate = ST_BRACKET;
	count = 1;
	goto repeat;

	case '{':
	APP;
	if (dont_want_brace_phrase)
	break;
	lexstate = ST_BRACE;
	count = 1;
	goto repeat;

	case '=': case ':':
	APP;
	lexstate = ST_EXPRESSION;
	break;

	case DOTS:
	default:
	APP;
	break;
	}
	break;

	case ST_ATTRIBUTE:
	APP;
	switch (token)
	{
	case '(':
	++count;
	goto repeat;
	case ')':
	if (--count == 0)
	{
	lexstate = ST_NORMAL;
	token = ATTRIBUTE_PHRASE;
	break;
	}
	goto repeat;
	default:
	goto repeat;
	}
	break;

	case ST_ASM:
	APP;
	switch (token)
	{
	case '(':
	++count;
	goto repeat;
	case ')':
	if (--count == 0)
	{
	lexstate = ST_NORMAL;
	token = ASM_PHRASE;
	break;
	}
	goto repeat;
	default:
	goto repeat;
	}
	break;

	case ST_BRACKET:
	APP;
	switch (token)
	{
	case '[':
	++count;
	goto repeat;
	case ']':
	if (--count == 0)
	{
	lexstate = ST_NORMAL;
	token = BRACKET_PHRASE;
	break;
	}
	goto repeat;
	default:
	goto repeat;
	}
	break;

	case ST_BRACE:
	APP;
	switch (token)
	{
	case '{':
	++count;
	goto repeat;
	case '}':
	if (--count == 0)
	{
	lexstate = ST_NORMAL;
	token = BRACE_PHRASE;
	break;
	}
	goto repeat;
	default:
	goto repeat;
	}
	break;

	case ST_EXPRESSION:
	switch (token)
	{
	case '(': case '[': case '{':
	++count;
	APP;
	goto repeat;
	case ')': case ']': case '}':
	--count;
	APP;
	goto repeat;
	case ',': case ';':
	if (count == 0)
	{
	/* Put back the token we just read so's we can find it again
	after registering the expression. */
	unput(token);

	lexstate = ST_NORMAL;
	token = EXPRESSION_PHRASE;
	break;
	}
	APP;
	goto repeat;
	default:
	APP;
	goto repeat;
	}
	break;

	case ST_TABLE_1:
	goto repeat;

	case ST_TABLE_2:
	if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
	{
	token = EXPORT_SYMBOL_KEYW;
	lexstate = ST_TABLE_5;
	APP;
	break;
	}
	lexstate = ST_TABLE_6;
	/* FALLTHRU */

	case ST_TABLE_6:
	switch (token)
	{
	case '{': case '[': case '(':
	++count;
	break;
	case '}': case ']': case ')':
	--count;
	break;
	case ',':
	if (count == 0)
	lexstate = ST_TABLE_2;
	break;
	};
	goto repeat;

	case ST_TABLE_3:
	goto repeat;

	case ST_TABLE_4:
	if (token == ';')
	lexstate = ST_NORMAL;
	goto repeat;

	case ST_TABLE_5:
	switch (token)
	{
	case ',':
	token = ';';
	lexstate = ST_TABLE_2;
	APP;
	break;
	default:
	APP;
	break;
	}
	break;

	default:
	exit(1);
	}
	fini:

	if (suppress_type_lookup > 0)
	--suppress_type_lookup;
	if (dont_want_brace_phrase > 0)
	--dont_want_brace_phrase;

	yylval = &next_node->next;

	return token;
	}