/**************************************************************************

  Tiny Self Lexical Analysis - (C) Jecel 20/08/1993

**************************************************************************/

#define LEX_BODY
#include "lex.h"
#include "scan.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

typedef unsigned boolean;

#define TRUE  (-1)
#define FALSE (0)

static char rcsid[] = "$Id: lex.c,v 1.3 1995/11/30 02:02:04 root Exp $";

struct token lt = { notValid, "INVALID" }, tk;   /* global tokens */

boolean digit (char c)
  {
  return(c>='0' && c<='9');
  }

boolean smallLetter (char c)
  {
  return(c>='a' && c<='z');
  }

boolean capLetter (char c)
  {
  return(c>='A' && c<='Z');
  }

boolean letter (char c)
  {
  boolean smallLetter(char), capLetter(char);

  return(smallLetter(c) || capLetter(c));
  }

boolean opChar (char c)
  {
  return ( c == '!' || c == '@' || c == '#' || c == '$' ||
	   c == '%' || c == '^' || c == '&' || c == '*' ||
	   c == '-' || c == '+' || c == '=' || c == '~' ||
	   c == '/' || c == '?' || c == '<' || c == '>' ||
	   c == ',' || c == ';' || c == '|' || c == '`' ||
	   c == '\\' );
  }

boolean delimChar (char c)
  {
  return ( c == '(' || c == ')' || c == '[' || c == ']' ||
	   c == '{' || c == '}' ||
	   c == '.' || c == ':' );
  }                       /* ^-- only in certain places */

void deblank ()
    {
    char c;

    while ( (c = nextChar()) == ' ' ||
	     c == '\t' /*||
	     c == '\n' */ );  /* eat up all space characters */
    if ( c == '"' )   /* start of comment */
	{
	while ( (c = nextChar()) != '"' && c != '\0' );
	deblank ();
	}
      else
	backChar(c);
    }

struct token * nextToken ()
    {
    char c, *cp;

    if ( lt.ltype != notValid )
	{
	tk.ltype = lt.ltype;
	strcpy ( tk.str, lt.str );
	lt.ltype = notValid;
	}
      else
	{
	tk.ltype = empty;
	cp = tk.str;
	deblank();
	*cp++ = c = nextChar();
	if ( c == '\0' )
	    {
	    tk.ltype = end;
	    }
	  else if ( c == '\n' )
	      *--cp = '\0';  /* empty tokens between lines */
	  else if ( digit(c) )
	      {
doNumber:     tk.ltype = num;
	      while ( digit(c = nextChar()) )
		  *cp++ = c;
	      if ( c == 'r' || c == 'R' )  /* radix integer */
		  {
		  *cp++ = c;
		  while ( digit(c = nextChar()) || letter(c) )
		      *cp++ = c;
		  }
		else if ( c == '.' || c == 'e' || c == 'E' )
		  {      /* fixed or floating point */
		  if ( c == '.' )
		      {
		      *cp++ = c;
		      if ( !digit(c = nextChar()) && c != 'e' && c != 'E' )
			  {
			  --cp;         /* '.' is a delimiter */
			  backChar(c);
			  c = '.';
			  }
		      else if ( c != 'e' && c != 'E' )
			  {
			  *cp++ = c;
			  while ( digit(c = nextChar()) )
			      *cp++ = c;
			  }
		      }
		  if ( c == 'e' || c == 'E' )  /* exponent */
		      {
		      *cp++ = c;
		      c = nextChar();
		      if ( digit(c) || c == '+' || c == '-' )
			  {
			  *cp++ = c;
			  while ( digit(c = nextChar()) )
			      *cp++ = c;
			  }
		      }
		  }
	      *cp = '\0';
	      backChar(c);
	      }
	  else if ( delimChar(c) )
	      {
	      tk.ltype = delim;
	      *cp = '\0';  /* delimiters are only one char long */
	      }
	  else if ( opChar(c) )
	      {
	      tk.ltype = op;
	      *cp++ = (c = nextChar());
	      if ( cp[-2] == '-' && digit(c) ) goto doNumber;
	      while(opChar(c)) *cp++ = (c = nextChar());
	      backChar(c);
	      *--cp = '\0';  /* oops! one too many */
	      if ( !strcmp(tk.str,"|") ) tk.ltype = delim;
	      if ( !strcmp(tk.str,"^") ) tk.ltype = delim;
	      }
	  else if ( capLetter(c) )
	      {
	      tk.ltype = capKey;
	      *cp++ = (c = nextChar());
	      while(letter(c)||digit(c)||c=='_') *cp++ = (c = nextChar());
	      if(c!=':')
		  {
		  fprintf(stderr, ": expected");
		  exit ( -1 );
		  }
	      *cp = '\0';
	      }
	  else if ( smallLetter(c)||c=='_' )
	      {
	      tk.ltype = ident;
	      *cp++ = (c = nextChar());
	      while(letter(c)||digit(c)||c=='_') *cp++ = (c = nextChar());
	      if ( c != ':' )
		  {
		  backChar(c);
		  *--cp = '\0';
		  }
		else
		  {
		  tk.ltype = smallKey;
		  *cp = '\0';
		  }
	      }
	  else if ( c == '\'' )
	      {
	      tk.ltype = lstring;
	      -- cp; /* don't include the opening "'" */
	      while ( (c=nextChar()) != '\0' && c != '\'' )
		  if ( c != '\\' )
		    *cp++=c;
		  else
		    {
		    c = nextChar();
		    switch ( c )
			{
			int d;

			case 't' : c = '\t'; break;
			case 'b' : c = ' '; break;
			case 'n' : c = '\n'; break;
			case 'f' : c = '\014'; break;
			case 'r' : c = '\015'; break;
			case 'v' : c = '\012'; break;
			case 'a' : c = '\007'; break;
			case '0' : c = 0; break;
			case '\\' : c = '\\'; break;
			case '\'' : c = '\''; break;
			case '"' : c = '"'; break;
			case '?' : c = '?'; break;
			case 'x' : d = nextChar();
				   c = (d<='9') ? d - '0' : d + 10 - 'a';
				   d = getchar();
				   c = 16*c + (d<='9') ? d - '0': d + 10 - 'a';
				   break;
			case 'd' : d = nextChar();
				   c = d - '0';
				   d = nextChar();
				   c = 10*c + d - '0';
				   d = nextChar();
				   c = 10*c + d - '0';
				   break;
			case 'o' : d = nextChar();
				   c = d - '0';
				   d = nextChar();
				   c = 8*c + d - '0';
				   d = nextChar();
				   c = 8*c + d - '0';
				   break;
			case '\n': c = nextChar();
				   break;  /* ignore escape-newlines */
			default  : fprintf(stderr, "\n** invalid escape character %c **\n", c);
				   break;
			}
		    *cp++=c;
		    }
	      *cp = 0;
	      }
	};
    return(&tk);
    }

void backToken ( struct token * rejected )
    {

    if ( lt.ltype == notValid )
	{
	lt.ltype = rejected->ltype;
	strcpy ( lt.str, rejected->str );
	}
      else
	{
	fprintf (stderr, "\n** back token called twice **\n" );
	exit ( -1 );
	};
    }
