%{ 
/* 
 *  c2gr.l:
 *    Translate C to MIF or another formatter for inclusion in documents
 *    Similar in function to tgrind, but so far without a vgrindefs-like
 *        pattern definition language.  Current emphasis is to 
 *        find the right layers to support multiple languages and 
 *         multiple document formatting systems.  In addition to 
 *        framemaker, we should be aiming for LaTeX, html, and Chimera.
 *    Author: Michal Young, July 1994
 * $Id: c2gr.l,v 1.37 1997/02/07 22:07:42 young Exp $ 
 * 
 * Note to C/C++ users:  I'm looking to replace this, but so far have
 * not found a suitable piece of base code. The approach here does not
 * extend cleanly to C++; it barely works for Ansii C.  To my surprise,
 * most of the pretty-printer sources I've found on the net are even 
 * more restrictive in their recognition of declarations.  Apparently a
 * parser (LL or LR) is needed to handle full C++ declarations with 
 * templates, etc., and even then the lexing part probably has to be hacked
 * with context dependencies (what a syntax!). 
 *
 * I punt.  I'd love it if someone will volunteer to write a recognizer/
 * grind front end for C++, but I'm not going to turn my attention to 
 * better facilities for Ada95 and coverage of a wider variety of languages.
 * 
 */

#include "groptions.h"
#include "gr.h"
#include "glyphcodes.h"

/* Under flex version 2.4.7,apparently we must declare
 * yywrap. I am not sure why this differs from earlier flex versions.
 */
#define yywrap() 1

char Marker_Suffix_Buf[100] = " (oops)";
char Marker_Buf[200] = "!!!"; 
char Image_Name[100] = "!undefined figure!";
char Image_MIME_Type[100] = ""; /* Null means not specified */
char Default_MIME_Type[] = "unknown"; 
char Index_Name[200] = "!uninitialized!";
char Index_Category[200] ="!uninitialized!";
static int  blocklevel = 0; 
static int  classcontext = 0; /* 1 in class declaration */

/* 
 * In several patterns, it is most convenient to match 
 * whitespace and a keyword together.  I'll sort out the 
 * components below. 
 */
void W_keyword(char *S); 

%}

Blank   [ ]
OptB    [ \t]*
Quote   ["]
Ident   [_a-zA-Z]((::)|[a-zA-Z0-9_])*	
SRC	[Ss][Rr][Cc]
IMG	[Ii][Mm][Gg]
URL	[^ \t\n>"]+
TYPE	[Tt][Yy][Pp][Ee]
MIME_Type [_a-zA-Z]([-/a-zA-Z0-9_])*
INDEX   [Ii][Nn][Dd][Ee][Xx]
NAME    [Nn][Aa][Mm][Ee]
CATEGORY [Cc][Aa][Tt][Ee][Gg][Oo][Rr][Yy]	
W       [ \t\n]+
OptW    [ \t\n]*
HH	^[\t ]*#[\t ]*
H	(#[ ]*)
  /* Comment begin, end, body */
CB	([/][*])
CE	([*][/])
CL	([/][/])
	/* CommentText useful *only* with CE,
	 * otherwise can't get line ends right
	 * (consider line ending with * 
	 */
CommentText ([^*\n]|([*][^/\n]))*
StorageClass (static|const|auto|volatile|register|extern|virtual|local|inline)
TypeMark (void)|((struct{W})?([_a-zA-Z][a-zA-Z0-9_.]*))
%x CplusBlockComment EmbeddedComment
%x Figure Link RawContent
%x URL MIME_Type
%x Index Index_Name_Text Index_Category_Text
%s Decl CDecl 

%%

  /* Class declarations (but no templates) */

^{OptW}class{W} {
	 if (blocklevel > 0) REJECT; 
	 W_keyword(yytext); 
	 strcpy(Marker_Suffix_Buf, "  Class declaration");
	 classcontext = 1; /* To recognize member functions */ 
	 BEGIN(CDecl); 
 }

<CDecl>{StorageClass} { W_keyword(yytext); }

<CDecl>{Ident} {
	declaration(yytext); 
	sprintf(Marker_Buf,"%s %s", yytext, Marker_Suffix_Buf); 
	if (! optblock.ctags_mode) {
	  mark_declaration(Marker_Buf); 
	}  /* Otherwise we leave it to ctags ... */
	BEGIN(INITIAL); 
 }

  /*
   * Structure declarations
   */
^{OptW}(typedef{W})?struct{W}/{Ident}{OptW}[{]  {
	if (blocklevel - classcontext > 0) REJECT;
	W_keyword(yytext);
	strcpy(Marker_Suffix_Buf, "  (Structure declaration)");
	BEGIN(CDecl);
  }

  /*
   * Enumeration declarations
   */
^{OptW}(typedef{W})?enum{W}/{Ident}{OptW}"{" {
	if (blocklevel - classcontext > 0) REJECT;
	W_keyword(yytext);
	strcpy(Marker_Suffix_Buf, "  (Enumeration)");
	BEGIN(CDecl);
  }


  /* Function declarations 
   * Prior bug:  because we don't reject keywords as possible
   *       typemarks, we can match something like 
   *       return foo(bar) as a `function returning "return"'
   * Workaround:  count braces (variable "blocklevel"), 
   *       match declarations only at 
   *       level 0 or level 1 within a class declaration.
   */

  /* Experiment: Can we separate declaration from definiton by looking
   * looking for ";" and "{" ? 
   *
   * We can still be fooled by macro definitions (e.g., the output of
   * flex has a "prototype" of function "if" returning type "else"!).
   * 
   * Additional bug: we can scan too far forward looking for "{", especially in 
   * in header files (but potentially in non-header files with long function bodies, 
   * such as those generated by a UI builder).  A potentially unbounded scan can 
   * overflow the input buffer, which Flex can't expand because we are using REJECT.
   * Hack semi-fix:  terminate scan forward at ";".  This could still break for very
   * long procedure declarations or calls, or anything that would cause us to scan over
   * a long comment. Workaround would be a semicolon (even if in a comment).
   */


^{OptW}({StorageClass}{W})*{TypeMark}(({OptW}[*]+{OptW})|{W})/{Ident}{OptW}"("[^{;]*")"{OptW};   {
	 if ((blocklevel - classcontext) > 0) REJECT; 
	 W_keyword(yytext); /* print the type mark */
	 sprintf(Marker_Suffix_Buf, "(prototype) returns %s", yytext);
	 BEGIN(Decl); 
 }

  /*
   * Constructors and destructors don't have type-marks.  They occur only at 
   * block level 1, in class context.  (Otherwise "if", "while",  etc.
   * look like constructor functions).
   */
^{W}{Ident}{OptW}"(" {
	if ((blocklevel != 1) || (classcontext !=  1)) REJECT;
	sprintf(Marker_Suffix_Buf, "(constructor)");
	BEGIN(Decl);
	REJECT;
 }

^{W}\~{Ident}{OptW}"(" {
	if (blocklevel - classcontext > 0) REJECT;
	sprintf(Marker_Suffix_Buf, "(destructor)"); 
	BEGIN(Decl);
	REJECT;
}
  
  /***********************************************/
  /* End of experiment on recognizing prototypes */
  /***********************************************/

^({StorageClass}{W})*{TypeMark}(({OptW}[*]+{OptW})|{W})/{Ident}{OptW}"("   {
	 if (blocklevel > 0) REJECT; 
	 W_keyword(yytext); /* print the type mark */
	 sprintf(Marker_Suffix_Buf, "(function) returns %s", yytext); 
	 BEGIN(Decl); 
 }

^{W}({StorageClass}{W})*{TypeMark}(({OptW}[*]+{OptW})|{W})/{Ident}{OptW}"("   {
	 if ((blocklevel - classcontext) > 0) REJECT; 
	 W_keyword(yytext); /* print the type mark */
	 sprintf(Marker_Suffix_Buf, "(function) returns %s", yytext); 
	 BEGIN(Decl); 
 }


<Decl>{Ident} {
	sprintf(Marker_Buf,"%s %s", yytext, Marker_Suffix_Buf); 
	if (! optblock.ctags_mode) 
	  { 
	    begin_mark_declaration(Marker_Buf); 
	  }
	declaration(yytext); 
	if (! optblock.ctags_mode)
	  {
	    end_mark_declaration();
	  }
	BEGIN(INITIAL); 
}
	

  /* Symbols before keywords, so that if a keyword is defined both
   * places, the symbol definition takes precedence. 
   * Note: backslash must be printed in the NORMAL font, not symbol font
   */

"!="   { echo_symbol(GLYPH_not_equal, yytext); }
"->"   { echo_symbol(GLYPH_rightarrow, yytext); }
"="	{ echo_symbol(GLYPH_fortran_assign, yytext); }
"=="	{ echo_symbol(GLYPH_Cequal, yytext); }
"<"	{ echo_symbol(GLYPH_less_than, yytext); }
">"	{ echo_symbol(GLYPH_greater, yytext); }
"<="	{ echo_symbol(GLYPH_less_equal, yytext); }
">="	{ echo_symbol(GLYPH_greater_equal, yytext); }
"#"	{ echo_symbol(GLYPH_hash, yytext); }
"+"	{ echo_symbol(GLYPH_plus, yytext); }
"-"	{ echo_symbol(GLYPH_minus,yytext); }

"!" |
"." |                        
":" |
"/" |
"*"  { 
	echo_char(yytext[0]); 
  }


"{"				{ ++blocklevel; keyword(yytext); }
"}"				{ --blocklevel; keyword(yytext); 
			          if (blocklevel <= 0) {
				     blocklevel = 0; 
				     classcontext = 0; 
			          }
				}



 /* Keywords: 
  *   Include phrases like "or else" to override the symbol definition
  *   of "or" 
  */ 

asm|auto|break|case|char|continue|default|do|double|else|enum |
extern|float|for|fortran|goto|if|int|long|register|return|short |
sizeof|static|struct|switch|typedef|union|unsigned|void|while {
   			keyword(yytext); }

{H}?define |
{H}?else|{H}?endif|{H}?if|{H}?ifdef|{H}?ifndef |
{H}?include|{H}?undef|{H}?define|{H}?else|{H}?endif |
{H}?if|{H}?ifdef|{H}?ifndef|{H}?elif|{H}?include|{H}?undef|{H}  {
   			keyword(yytext); }

   /* Additional keywords for C++ */

class|const|delete|friend|inline|new|operator|overload|private |
protected|public|virtual|local   {
			keyword(yytext);  }

   /* Quoted strings, identifiers, strings of blanks */

{Ident}  { identifier(yytext); }


 /* Pseudo-comments:  Embedded images, links, escape to formatting system.
  * We use a restricted version of the  HTML style for recognition, 
  * with the idea of converting as possible to other output formats
  * (except RawContent comes out the back end with no conversion ---
  *  therefore the existence of RawContent in a source file ties you
  *  to a single output format.) 
  * 
  * Current experimental version:  Only the html back-end exists, so
  * furthere variations and restrictions are likely in future versions
  * (especially as regards identifying file type, which the URL format
  *  does only implicitly through file name conventions.)
  * 
  * /*<IMG SRC=filename TYPE=mime-type >
  */
"/*"{OptB}"<"{IMG}{W} |
"//"{OptB}"<"{IMG}{W} {
	 	strcpy(Image_MIME_Type, Default_MIME_Type); 
		strcpy(Image_Name, "!NO_IMAGE_SPECIFIED"); 
		BEGIN Figure; 
	     }

<Figure>{SRC}=  { BEGIN URL; }

<URL>\"{URL}\"	{
	strcpy(Image_Name, yytext+1);
	Image_Name[yyleng-2] = 0;
	BEGIN Figure;
	}

<URL>{URL} {
	strcpy(Image_Name, yytext);
	BEGIN Figure;
	}

<URL>{W}	{ ; }
<URL>.		{ gr_error("Unrecognized characters in URL context", yytext);
		  BEGIN Figure;
		}
	
<Figure>{TYPE}= { BEGIN MIME_Type; }
<MIME_Type>\"{MIME_Type}\"	{
	strcpy(Image_MIME_Type, yytext+1);
	Image_MIME_Type[yyleng-2] = 0;
	BEGIN Figure;
	}

<MIME_Type>{MIME_Type} {
	strcpy(Image_MIME_Type, yytext); 
	BEGIN Figure;
	}

<MIME_Type>{W}	{ ; }
<MIME_Type>.		{ gr_error("Unrecognized characters"
				   " in MIME Type context", yytext);
			  BEGIN Figure;
			}

<Figure>">"{W}?"*/" |
<Figure>">"	{  gr_figure(Image_MIME_Type, Image_Name); 
		   BEGIN INITIAL;
		}

  /**
   *  Indexs from an external cross-reference tool
   *
   *
   * //<INDEX  NAME="text for index" CATEGORY="category if known">
   *
   * The custom tag INDEX is used because it will be ignored in 
   * straight html, and unlike <A> it does not require a closing tag
   * and a non-empty content model. (The content model requirement is 
   * the real gotcha for <A>; leaving a space, as we do in the html 
   * output, is a bogus hack. 
   */

"/*"{OptB}"<"{INDEX}{W} |
"//"{OptB}"<"{INDEX}{W} {
		strcpy(Index_Name,  "!NO_NAME_SPECIFIED"); 
		strcpy(Index_Category, ""); 
		BEGIN Index;
	     }

<Index>{NAME}{OptW}=  { BEGIN Index_Name_Text; }

<Index_Name_Text>\"{URL}\"	{
	strcpy(Index_Name, yytext+1);
	Index_Name[yyleng-2] = 0;
	BEGIN Index;
	}

<Index>{CATEGORY}{OptW}=  { BEGIN Index_Category_Text; }

<Index_Category_Text>\"[^\"]*\"	{
	strcpy(Index_Category, yytext+1);
	Index_Category[yyleng-2] = 0;
	BEGIN Index;
	}

<Index>{W}	{ ; }
<Index>.	{ gr_error("Unrecognized characters in Index context", yytext);
		  BEGIN Index;
		}
	

<Index>">"{W}?"*/" |
<Index>">"	{ 
                   strncat(Index_Name, Index_Category, sizeof(Index_Name)); 
                   mark_declaration(Index_Name); 
		   BEGIN INITIAL;
		}


 /*******************************************
  *******************************************
  END OF SPECIAL COMMENTS
  *******************************************
  *******************************************
  */


 /* Single line and embedded comments  - no condition tag */

  /* {CB}[^\n<]{CommentText}{CE}	{  comment(yytext); } */

 /* If we didn't match to CE, we must have broken across a line */
{CB}	{ 
	  comment(yytext); 
	  BEGIN EmbeddedComment;  
  	}

<EmbeddedComment>{CommentText}{CE} {
  comment(yytext); 
  BEGIN INITIAL;
}

<EmbeddedComment>([^*\n]|([*]*[*][^/\n*]))*  { 
	comment(yytext); 
	}

  /* Line-ending comment, in a block */

^[ \t]*{CL}([^\n<].*)?$ { 
  begin_block_comment(); 
  comment(yytext); 
  BEGIN CplusBlockComment;
}

<CplusBlockComment>^[ \t]*{CL}([^\n<].*)?$	{ 
  comment(yytext); 
}

<CplusBlockComment>. {
  end_block_comment(); 
  BEGIN INITIAL;
  unput(yytext[0]);
}

<CplusBlockComment,EmbeddedComment>"\n"   {
   newline();
}

<EmbeddedComment>.  {
  comment(yytext);
}

 /* Line-ending comment, not in a block (must follow block comment) */ 

{CL}([^\n<].*)$		 { 
		  comment(yytext);
		 }

	/* THIS NEEDS WORK for special characters */

{Quote}((\\.)|([^"\\\n]))*{Quote}	{ quoted(yytext); }
['](("\\"[a-zA-Z])|("\\"[0-9][0-9]0-9])|("\\".))['] { quoted(yytext); }
['][^\\]['] { quoted(yytext); }

{Blank}{Blank}*			{ blanks(yytext); }

   /* New line is not echoed - it starts a new paragraph */

"\n"				{  newline(); }


   /* Echo everything else.  If a character needs to be escaped, 
    * it will be.  The driver will build up multi-character strings
    * when it can. 
    */
.				{ echo_char(yytext[0]); }



%%

/* 
 * In several patterns, it is most convenient to match 
 * whitespace and one or more keyword together.  I'll sort out the 
 * components below. I assume that what I'm looking at is
 * a sequence of zero or more white-space characters, followed by a 
 * keyword (the only thing that should not be white-space), followed
 * by zero or more white-space characters, etc. 
 * 
 */
void W_keyword(char *S) {
  char buf[200];
  int i,j;

  i = 0; 
  while (i < yyleng) {

    /* A span of zero or more white-space characters ... */
    while (i < yyleng && isspace(yytext[i]))
    {
      if (yytext[i] == ' ') {
         blanks(" "); 
      } else if (yytext[i] == '\n') {
         newline(); 
      } else {
         echo_char(yytext[i]); 
      }
      ++i; 
     }

    /* A non white-space token */ 
    j = 0; 
    while (i < yyleng && ! isspace(yytext[i]))
    {
      buf[j++] = yytext[i++]; 
    }
    buf[j] = '\0';
    keyword(buf); 

  } /* End of while (i < yyleng) */

}

/* Driver moved to flexdriver.l */















