/* * @(#)RTFParser.java 1.11 03/12/19 * * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */ package javax.swing.text.rtf; import java.io.*; import java.lang.*; /** * RTFParser is a subclass of AbstractFilter which understands basic RTF syntax * and passes a stream of control words, text, and begin/end group * indications to its subclass. * * Normally programmers will only use RTFFilter, a subclass of this class that knows what to * do with the tokens this class parses. * * @see AbstractFilter * @see RTFFilter */ abstract class RTFParser extends AbstractFilter { /** The current RTF group nesting level. */ public int level; private int state; private StringBuffer currentCharacters; private String pendingKeyword; // where keywords go while we // read their parameters private int pendingCharacter; // for the \'xx construct private long binaryBytesLeft; // in a \bin blob? ByteArrayOutputStream binaryBuf; private boolean[] savedSpecials; /** A stream to which to write warnings and debugging information * while parsing. This is set to System.out to log * any anomalous information to stdout. */ protected PrintStream warnings; // value for the 'state' variable private final int S_text = 0; // reading random text private final int S_backslashed = 1; // read a backslash, waiting for next private final int S_token = 2; // reading a multicharacter token private final int S_parameter = 3; // reading a token's parameter private final int S_aftertick = 4; // after reading \' private final int S_aftertickc = 5; // after reading \'x private final int S_inblob = 6; // in a \bin blob /** Implemented by subclasses to interpret a parameter-less RTF keyword. * The keyword is passed without the leading '/' or any delimiting * whitespace. */ public abstract boolean handleKeyword(String keyword); /** Implemented by subclasses to interpret a keyword with a parameter. * @param keyword The keyword, as with handleKeyword(String). * @param parameter The parameter following the keyword. */ public abstract boolean handleKeyword(String keyword, int parameter); /** Implemented by subclasses to interpret text from the RTF stream. */ public abstract void handleText(String text); public void handleText(char ch) { handleText(String.valueOf(ch)); } /** Implemented by subclasses to handle the contents of the \bin keyword. */ public abstract void handleBinaryBlob(byte[] data); /** Implemented by subclasses to react to an increase * in the nesting level. */ public abstract void begingroup(); /** Implemented by subclasses to react to the end of a group. */ public abstract void endgroup(); // table of non-text characters in rtf static final boolean rtfSpecialsTable[]; static { rtfSpecialsTable = (boolean[])noSpecialsTable.clone(); rtfSpecialsTable['\n'] = true; rtfSpecialsTable['\r'] = true; rtfSpecialsTable['{'] = true; rtfSpecialsTable['}'] = true; rtfSpecialsTable['\\'] = true; } public RTFParser() { currentCharacters = new StringBuffer(); state = S_text; pendingKeyword = null; level = 0; //warnings = System.out; specialsTable = rtfSpecialsTable; } // TODO: Handle wrapup at end of file correctly. public void writeSpecial(int b) throws IOException { write((char)b); } protected void warning(String s) { if (warnings != null) { warnings.println(s); } } public void write(String s) throws IOException { if (state != S_text) { int index = 0; int length = s.length(); while(index < length && state != S_text) { write(s.charAt(index)); index ++; } if(index >= length) return; s = s.substring(index); } if (currentCharacters.length() > 0) currentCharacters.append(s); else handleText(s); } public void write(char ch) throws IOException { boolean ok; switch (state) { case S_text: if (ch == '\n' || ch == '\r') { break; // unadorned newlines are ignored } else if (ch == '{') { if (currentCharacters.length() > 0) { handleText(currentCharacters.toString()); currentCharacters = new StringBuffer(); } level ++; begingroup(); } else if(ch == '}') { if (currentCharacters.length() > 0) { handleText(currentCharacters.toString()); currentCharacters = new StringBuffer(); } if (level == 0) throw new IOException("Too many close-groups in RTF text"); endgroup(); level --; } else if(ch == '\\') { if (currentCharacters.length() > 0) { handleText(currentCharacters.toString()); currentCharacters = new StringBuffer(); } state = S_backslashed; } else { currentCharacters.append(ch); } break; case S_backslashed: if (ch == '\'') { state = S_aftertick; break; } if (!Character.isLetter(ch)) { char newstring[] = new char[1]; newstring[0] = ch; if (!handleKeyword(new String(newstring))) { warning("Unknown keyword: " + newstring + " (" + (byte)ch + ")"); } state = S_text; pendingKeyword = null; /* currentCharacters is already an empty stringBuffer */ break; } state = S_token; /* FALL THROUGH */ case S_token: if (Character.isLetter(ch)) { currentCharacters.append(ch); } else { pendingKeyword = currentCharacters.toString(); currentCharacters = new StringBuffer(); // Parameter following? if (Character.isDigit(ch) || (ch == '-')) { state = S_parameter; currentCharacters.append(ch); } else { ok = handleKeyword(pendingKeyword); if (!ok) warning("Unknown keyword: " + pendingKeyword); pendingKeyword = null; state = S_text; // Non-space delimiters get included in the text if (!Character.isWhitespace(ch)) write(ch); } } break; case S_parameter: if (Character.isDigit(ch)) { currentCharacters.append(ch); } else { /* TODO: Test correct behavior of \bin keyword */ if (pendingKeyword.equals("bin")) { /* magic layer-breaking kwd */ long parameter = Long.parseLong(currentCharacters.toString()); pendingKeyword = null; state = S_inblob; binaryBytesLeft = parameter; if (binaryBytesLeft > Integer.MAX_VALUE) binaryBuf = new ByteArrayOutputStream(Integer.MAX_VALUE); else binaryBuf = new ByteArrayOutputStream((int)binaryBytesLeft); savedSpecials = specialsTable; specialsTable = allSpecialsTable; break; } int parameter = Integer.parseInt(currentCharacters.toString()); ok = handleKeyword(pendingKeyword, parameter); if (!ok) warning("Unknown keyword: " + pendingKeyword + " (param " + currentCharacters + ")"); pendingKeyword = null; currentCharacters = new StringBuffer(); state = S_text; // Delimiters here are interpreted as text too if (!Character.isWhitespace(ch)) write(ch); } break; case S_aftertick: if (Character.digit(ch, 16) == -1) state = S_text; else { pendingCharacter = Character.digit(ch, 16); state = S_aftertickc; } break; case S_aftertickc: state = S_text; if (Character.digit(ch, 16) != -1) { pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16); ch = translationTable[pendingCharacter]; if (ch != 0) handleText(ch); } break; case S_inblob: binaryBuf.write(ch); binaryBytesLeft --; if (binaryBytesLeft == 0) { state = S_text; specialsTable = savedSpecials; savedSpecials = null; handleBinaryBlob(binaryBuf.toByteArray()); binaryBuf = null; } } } /** Flushes any buffered but not yet written characters. * Subclasses which override this method should call this * method before flushing * any of their own buffers. */ public void flush() throws IOException { super.flush(); if (state == S_text && currentCharacters.length() > 0) { handleText(currentCharacters.toString()); currentCharacters = new StringBuffer(); } } /** Closes the parser. Currently, this simply does a flush(), * followed by some minimal consistency checks. */ public void close() throws IOException { flush(); if (state != S_text || level > 0) { warning("Truncated RTF file."); /* TODO: any sane way to handle termination in a non-S_text state? */ /* probably not */ /* this will cause subclasses to behave more reasonably some of the time */ while (level > 0) { endgroup(); level --; } } super.close(); } }