/*
 * SansParser.java
 *
 * Created on December 3, 2002, 4:18 PM
 *
 * This software is copyright (c) 2002 Board of Regents, University of Wisconsin.
 * All Rights Reserved.
 *
 * FILE:        $Source: /cvs_archive/cvs/sansj/src/EDU/bmrb/sansj/SansParser.java,v $
 * 
 * AUTHOR:      $Author: dmaziuk $
 * DATE:        $Date: 2006/04/03 22:40:41 $
 * 
 * UPDATE HISTORY:
 * ---------------
 * $Log: SansParser.java,v $
 * Revision 1.1  2006/04/03 22:40:41  dmaziuk
 * bug fix in lex specs.
 *
 * Revision 1.6  2003/07/16 21:53:21  dmaziuk
 * added another parser
 *
 */

package EDU.bmrb.sansj;

/**
 * Validating NMR-STAR parser.
 * This parser accepts tokens from <CODE>STARLexer</CODE> and calls appropriate
 * methods of <CODE>ContentHandler</CODE> and <CODE>ErrorHandler</CODE> objects.
 * <P>
 * Parse errors:
 * <UL>
 *   <LI>Lexer error (should never happen).
 *   <LI>Global block(s) in input.
 *   <LI>Anything other than comments or data block at top-level.
 *   <LI>Anything other than comments or saveframes in data block.
 *   <LI>Anything other than comments, loops, <CODE>save_</CODE>, or free tags 
 *       (tag/value pairs) in saveframe.
 *   <LI>Anything other than comments, <CODE>stop_</CODE>, tags, or values in loop
 *   <LI>Loops with no values.
 *   <LI>Loops with no tags.
 *   <LI>Premature end of file: EOF is legal only inside a data block, EOF inside
 *       a saveframe or loop is an error.
 * </UL>
 * Error reporting is not very detailed at the moment: parser simply reports
 * <CODE>Invalid token: <EM>token</EM></CODE>. <BR>
 * Parser assumes that first data block continues to the end of input. If there
 * is more than one data block in input, parser will see the second <CODE>data_</CODE>
 * as being inside the current data block and it will report that as invalid token 
 * (only comments and saveframes are allowed in data block).
 * <P>
 * Parse warnings:
 * <UL>
 *   <LI>NMR-STAR keyword inside quoted value. This warning is generated by the
 *       lexer, it's purpose is to catch semicolon-delimited values where closing
 *       semicolon is missing.
 *   <LI>Loop count error. This warning is generated when number of values in the
 *       loop is not an exact multiple of the number of tags. Parser makes "best
 *       effort" to report the line number where the value is missing: in a 
 *       well-formatted loop it will report [first] line number where a value is
 *       missing, otherwise it'll report the line that contains <CODE>stop_</CODE>
 *       or anything in between. Note also that if there is as many values missing
 *       as there are columns in the loop, parser will not see that as error.
 * </UL>
 * @see ContentHandler
 * @see ErrorHandler
 * @see STARLexer
 * @author  dmaziuk
 * @version 1
 */
public class SansParser {
    /** scanner error */
    public static final String ERR_LEXER = "Lexer error: ";
    /** global blocks are not allowed */
    public static final String ERR_GLOBAL = "Global blocks are illegal in NMR-STAR";
    /** invalid token */
    public static final String ERR_TOKEN = "Invalid token: ";
    /** premature EOF */
    public static final String ERR_EOF = "Premature end of file";
    /** loop with no values */
    public static final String ERR_EMPTYLOOP = "Loop with no values";
    /** loop with no tags */
    public static final String ERR_LOOPNOTAGS = "Loop with no tags";
    /** parse warning */
    public static final String WARN_KEYWORD = "Keyword in value: ";
    /** loop count error */
    public static final String WARN_LOOPCOUNT = "Loop count error";
    /** token */
    public static final String TOKEN = "Token ";
    /** is not allowed outside of a data block */
    public static final String NOTINDATABLOCK = " is not allowed outside of a data block";
    /** is not allowed inside */
    public static final String ILLEGAL = " is not allowed inside a";
    /** data block */
    public static final String DATA_BLOCK = " data block";
    /** saveframe */
    public static final String SAVEFRAME = " saveframe";
    /** loop */
    public static final String LOOP = " loop";
    /* content handler object */
    private ContentHandler fCh = null;
    /* error handler object */
    private ErrorHandler fEh = null;
    /* scanner */
    STARLexer fLex = null;
//*******************************************************************************
    /** Creates new SansParser.
     * @param lex scaner
     */
    public SansParser( STARLexer lex ) {
        fLex = lex;
    } //*************************************************************************
    /** Creates new SansParser.
     * @param lex scaner
     * @param ch content handler object
     * @param eh error handler object
     */
    public SansParser( STARLexer lex, ContentHandler ch, ErrorHandler eh ) {
        fLex = lex;
        fCh = ch;
        fEh = eh;
    } //*************************************************************************
    /** Returns content handler.
     * @return content handler object
     */
    public ContentHandler getContentHandler() {
        return fCh;
    } //*************************************************************************
    /** Sets content handler.
     * @param ch content handler object
     */
    public void setContentHandler( ContentHandler ch ) {
        fCh = ch;
    } //*************************************************************************
    /** Returns error handler.
     * @return error handler object
     */
    public ErrorHandler getErrorHandler() {
        return fEh;
    } //*************************************************************************
    /** Sets error handler.
     * @param eh error handler object
     */
    public void setErrorHandler( ErrorHandler eh ) {
        fEh = eh;
    } //*************************************************************************
    /** Returns scanner object.
     * @return scanner
     */
    public STARLexer getScanner() {
        return fLex;
    } //*************************************************************************
    /** Sets scanner object.
     * @param lex scanner
     */
    public void setScanner( STARLexer lex ) {
        fLex = lex;
    } //*************************************************************************
    /** Parses input file */
    public void parse() {
        String dataid = null;
        int tok;
        try {
            do {
                tok = fLex.yylex();
                switch( tok ) {
                    case STARLexer.ERROR :
                        fEh.error( fLex.getLine(), fLex.getColumn(), ERR_LEXER
                        + fLex.yytext() );
                        return;
                    case STARLexer.COMMENT :
                        if( fCh.comment( fLex.getLine(), fLex.yytext() ) ) return;
                        break;
                    case STARLexer.DATASTART : // parse data block
                        dataid = fLex.yytext().substring( 5 ); // strip data_
                        if( fCh.startData( fLex.getLine(), dataid ) ) return;
                        if( parseDataBlock( dataid ) ) return;
                        break;
                    case STARLexer.YYEOF : // fake end of data block
                        fCh.endData( fLex.getLine(), dataid );
                        return;
                    default : // invalid token
                        fEh.error( fLex.getLine(), fLex.getColumn(), TOKEN
                        + fLex.yytext() + NOTINDATABLOCK );
                        return;
                }
            } while( tok != STARLexer.YYEOF );
        }
        catch( Exception e ) { e.printStackTrace(); }
    } //*************************************************************************
    /** Parses data block */
    public boolean parseDataBlock( String dataid ) {
        int tok;
        String sfname = null;
        try {
            do {
                tok = fLex.yylex();
                switch( tok ) {
                    case STARLexer.ERROR :
                        fEh.error( fLex.getLine(), fLex.getColumn(), ERR_LEXER
                        + fLex.yytext() );
                        return true;
                    case STARLexer.COMMENT :
                        if( fCh.comment( fLex.getLine(), fLex.yytext() ) ) 
                            return true;
                        break;
                    case STARLexer.SAVESTART : // parse saveframe
                        sfname = fLex.yytext().substring( 5 ); // strip save_
                        if( fCh.startSaveFrame( fLex.getLine(), sfname ) ) 
                            return true;
                        if( parseSaveFrame( sfname ) ) return true;
                        break;
                    case STARLexer.YYEOF : // fake end of data block
                        fCh.endData( fLex.getLine(), dataid );
                        return true;
                    default : // invalid token
                        fEh.error( fLex.getLine(), fLex.getColumn(), TOKEN
                        + fLex.yytext() + ILLEGAL + DATA_BLOCK );
                        return true;
                }
            } while( tok != STARLexer.YYEOF );
            return false;
        }
        catch( Exception e ) { 
            e.printStackTrace(); 
            return true;
        }
    } //*************************************************************************
    /** Parses saveframe */
    public boolean parseSaveFrame( String sfname ) {
        int tok;
        String tag = null;
        DataItemNode item = null;
        int tagline = -1;
        try {
            do {
                item = null;
                tok = fLex.yylex();
                switch( tok ) {
                    case STARLexer.ERROR :
                        fEh.error( fLex.getLine(), fLex.getColumn(), ERR_LEXER
                        + fLex.yytext() );
                        return true;
                    case STARLexer.WARNING :
                        if( fEh.warning( fLex.getLine(), fLex.getColumn(), WARN_KEYWORD
                        + fLex.yytext() ) ) return true;
                        break;
                    case STARLexer.SAVEEND : // exit point
                        if( fCh.endSaveFrame( fLex.getLine(), sfname ) )
                            return true;
                        return false;
                    case STARLexer.COMMENT :
                        if( fCh.comment( fLex.getLine(), fLex.yytext() ) ) 
                            return true;
                        break;
                    case STARLexer.LOOPSTART :
                        if( fCh.startLoop( fLex.getLine() ) ) return true;
                        if( parseLoop() ) return true;
                        break;
                    case STARLexer.TAGNAME : // save tag
                        tag = fLex.yytext();
                        tagline = fLex.getLine();
                        break;
                    case STARLexer.DVNSINGLE :
                    case STARLexer.DVNDOUBLE :
                    case STARLexer.DVNSEMICOLON :
                    case STARLexer.DVNFRAMECODE :
                    case STARLexer.DVNNON :
                        item = new DataItemNode( tagline, tag );
                        item.setDelimType( tok );
                        item.setValueLine( fLex.getLine() );
                        if( tok == STARLexer.DVNSEMICOLON ) { // strip \n
                            if( fLex.getText().substring( 0, 1 ).equals( "\n" ) )
                                item.setValue( fLex.getText().substring( 1 ) );
                            else item.setValue( fLex.getText() );
                        }
                        else if( tok == STARLexer.DVNFRAMECODE ) // strip $
                            item.setValue( fLex.yytext().substring( 1 ) );
                        else if( tok == STARLexer.DVNNON ) 
                            item.setValue( fLex.yytext() );
                        else item.setValue( fLex.getText() );
                        item.setLoopFlag( false );
                        if( fCh.data( item ) ) return true;
                        break;
                    case STARLexer.YYEOF : // error: no closing save_
                        fEh.error( fLex.getLine(), fLex.getColumn(), ERR_EOF );
                        return true;
                    default : // invalid token
                        fEh.error( fLex.getLine(), fLex.getColumn(), TOKEN
                        + fLex.yytext() + ILLEGAL + SAVEFRAME );
                        return true;
                }
            } while( tok != STARLexer.YYEOF );
            return false;
        }
        catch( Exception e ) { 
            e.printStackTrace(); 
            return true;
        }
    } //*************************************************************************
    /** Parses a loop */
    public boolean parseLoop() {
        int tok;
        int col = 0;
        int lastline = -1;
        int wrongline = -1;
        int wrongcol = -1;
        int numvals = 0;
        java.util.Vector tags = new java.util.Vector();
        DataItemNode item = null;
        try {
            do {
                item = null;
                tok = fLex.yylex();
                switch( tok ) {
                    case STARLexer.ERROR :
                        fEh.error( fLex.getLine(), fLex.getColumn(), ERR_LEXER
                        + fLex.yytext() );
                        return true;
                    case STARLexer.WARNING :
                        if( fEh.warning( fLex.getLine(), fLex.getColumn(), WARN_KEYWORD
                        + fLex.yytext() ) ) return true;
                        break;
                    case STARLexer.COMMENT :
                        if( fCh.comment( fLex.getLine(), fLex.yytext() ) ) 
                            return true;
                        break;
                    case STARLexer.SAVEEND : 
                    case STARLexer.STOP : 
                        if( tags.size() < 1 ) {
                            fEh.error( fLex.getLine(), fLex.getColumn(), ERR_LOOPNOTAGS );
                            tags = null;
                            return true;
                        }                            
                        if( numvals == 0 ) {
                            fEh.error( fLex.getLine(), fLex.getColumn(), ERR_EMPTYLOOP );
                            tags.removeAllElements();
                            tags = null;
                            return true;
                        }
                        boolean rc = false;
                        if( (numvals % tags.size()) != 0 ) 
                            rc = fEh.warning( ( (wrongline >= 0) ? wrongline : fLex.getLine() ),
                            ( (wrongcol >= 0) ? wrongcol : -1 ), WARN_LOOPCOUNT );
                        tags.removeAllElements();
                        tags = null;
                        rc = (rc || fCh.endLoop( fLex.getLine() ) ); 
                        return rc;
                    case STARLexer.TAGNAME : // save tag
                        IntStringPair tag = new IntStringPair( fLex.getLine(), 
                        fLex.yytext() );
                        tags.addElement( tag );
                        break;
                    case STARLexer.DVNSINGLE :
                    case STARLexer.DVNDOUBLE :
                    case STARLexer.DVNSEMICOLON :
                    case STARLexer.DVNFRAMECODE :
                    case STARLexer.DVNNON :
                        if( tags.size() < 1 ) {
                            fEh.error( fLex.getLine(), fLex.getColumn(), ERR_LOOPNOTAGS );
                            tags = null;
                            return true;
                        }
                        item = new DataItemNode( (IntStringPair)tags.elementAt( col ) );
                        item.setDelimType( tok );
                        item.setValueLine( fLex.getLine() );
                        if( tok == STARLexer.DVNSEMICOLON ) { // strip \n
                            if( fLex.getText().substring( 0, 1 ).equals( "\n" ) )
                                item.setValue( fLex.getText().substring( 1 ) );
                            else item.setValue( fLex.getText() );
                        }
                        if( tok == STARLexer.DVNFRAMECODE ) // strip $
                            item.setValue( fLex.yytext().substring( 1 ) );
                        else if( tok == STARLexer.DVNNON ) 
                            item.setValue( fLex.yytext() );
                        else item.setValue( fLex.getText() );
                        item.setLoopFlag( true );
// check # values in row
                        col++;
                        numvals++;
                        if( (col == tags.size()) && (lastline < fLex.getLine()) ) {
                            if( wrongline < 0 ) {
			        wrongline = fLex.getLine();
                                wrongcol = fLex.getColumn();
			    }
                        }
                        lastline = fLex.getLine();
                        if( col == tags.size() ) col = 0;
                        if( fCh.data( item ) ) return true;
                        break;
                    case STARLexer.YYEOF : // error: no closing save_
                        fEh.error( fLex.getLine(), fLex.getColumn(), ERR_EOF );
                        return true;
                    default : // invalid token
                        fEh.error( fLex.getLine(), fLex.getColumn(), TOKEN
                        + fLex.yytext() + ILLEGAL + LOOP );
                        return true;
                }
            } while( tok != STARLexer.YYEOF );
            return false;
        }
        catch( Exception e ) { 
            e.printStackTrace(); 
            return true;
        }
    } //*************************************************************************
    /**
    * @param args the command line arguments
    */
    public static void main (String args[]) {
        try {
            java.io.InputStream in;
            if( args.length < 1 ) in = System.in;
            else in = new java.io.FileInputStream( args[0] );
            java.io.BufferedReader reader = new java.io.BufferedReader( 
            new java.io.InputStreamReader( in ) );
            STARLexer lex = new STARLexer( reader );
            SansParser p = new SansParser( lex );
            p.test_parse();
        }
        catch( Exception e ) { e.printStackTrace(); }
    } //*************************************************************************
    public void test_parse() {
        try {
            int tok = fLex.yylex();
	    while( tok != STARLexer.YYEOF ) {
                    System.out.print( STARLexer.TOKEN_TYPES[tok] + "(" 
                    + fLex.getLine() + ":" + fLex.getColumn() + "): " );
	        switch( tok ) {
                        case STARLexer.DVNSINGLE :
                        case STARLexer.DVNDOUBLE :
                        case STARLexer.DVNSEMICOLON :
                        System.out.println( fLex.getText() );
                            break;
                        default :
                            System.out.println( fLex.yytext() );
	        }
	        tok = fLex.yylex();
            }
            System.out.println( "End of data_ (EOF)" );
        }
        catch( Exception e ) { e.printStackTrace(); }
    } //*************************************************************************
}
