/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999-2004 The Apache Software Foundation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
*
* This component requires the following features and properties from the * component manager that uses it: *
*
* [23] XMLDecl ::= ''
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
* | ('"' ('yes' | 'no') '"'))
*
* [77] TextDecl ::= ''
*
*
* @param scanningTextDecl True if a text declaration is to
* be scanned instead of an XML
* declaration.
* @param pseudoAttributeValues An array of size 3 to return the version,
* encoding and standalone pseudo attribute values
* (in that order).
*
* Note: This method uses fString, anything in it
* at the time of calling is lost.
*/
protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
String[] pseudoAttributeValues)
throws IOException, XNIException {
// pseudo-attribute values
String version = null;
String encoding = null;
String standalone = null;
// scan pseudo-attributes
final int STATE_VERSION = 0;
final int STATE_ENCODING = 1;
final int STATE_STANDALONE = 2;
final int STATE_DONE = 3;
int state = STATE_VERSION;
boolean dataFoundForTarget = false;
boolean sawSpace = fEntityScanner.skipDeclSpaces();
// since pseudoattributes are *not* attributes,
// their quotes don't need to be preserved in external parameter entities.
// the XMLEntityScanner#scanLiteral method will continue to
// emit -1 in such cases when it finds a quote; this is
// fine for other methods that parse scanned entities,
// but not for the scanning of pseudoattributes. So,
// temporarily, we must mark the current entity as not being "literal"
XMLEntityManager.ScannedEntity currEnt = fEntityManager.getCurrentEntity();
boolean currLiteral = currEnt.literal;
currEnt.literal = false;
while (fEntityScanner.peekChar() != '?') {
dataFoundForTarget = true;
String name = scanPseudoAttribute(scanningTextDecl, fString);
switch (state) {
case STATE_VERSION: {
if (name == fVersionSymbol) {
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "SpaceRequiredBeforeVersionInTextDecl"
: "SpaceRequiredBeforeVersionInXMLDecl",
null);
}
version = fString.toString();
state = STATE_ENCODING;
if (!versionSupported(version)) {
reportFatalError(getVersionNotSupportedKey(),
new Object[]{version});
}
}
else if (name == fEncodingSymbol) {
if (!scanningTextDecl) {
reportFatalError("VersionInfoRequired", null);
}
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "SpaceRequiredBeforeEncodingInTextDecl"
: "SpaceRequiredBeforeEncodingInXMLDecl",
null);
}
encoding = fString.toString();
state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
}
else {
if (scanningTextDecl) {
reportFatalError("EncodingDeclRequired", null);
}
else {
reportFatalError("VersionInfoRequired", null);
}
}
break;
}
case STATE_ENCODING: {
if (name == fEncodingSymbol) {
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "SpaceRequiredBeforeEncodingInTextDecl"
: "SpaceRequiredBeforeEncodingInXMLDecl",
null);
}
encoding = fString.toString();
state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
// TODO: check encoding name; set encoding on
// entity scanner
}
else if (!scanningTextDecl && name == fStandaloneSymbol) {
if (!sawSpace) {
reportFatalError("SpaceRequiredBeforeStandalone",
null);
}
standalone = fString.toString();
state = STATE_DONE;
if (!standalone.equals("yes") && !standalone.equals("no")) {
reportFatalError("SDDeclInvalid", new Object[] {standalone});
}
}
else {
reportFatalError("EncodingDeclRequired", null);
}
break;
}
case STATE_STANDALONE: {
if (name == fStandaloneSymbol) {
if (!sawSpace) {
reportFatalError("SpaceRequiredBeforeStandalone",
null);
}
standalone = fString.toString();
state = STATE_DONE;
if (!standalone.equals("yes") && !standalone.equals("no")) {
reportFatalError("SDDeclInvalid", new Object[] {standalone});
}
}
else {
reportFatalError("EncodingDeclRequired", null);
}
break;
}
default: {
reportFatalError("NoMorePseudoAttributes", null);
}
}
sawSpace = fEntityScanner.skipDeclSpaces();
}
// restore original literal value
if(currLiteral)
currEnt.literal = true;
// REVISIT: should we remove this error reporting?
if (scanningTextDecl && state != STATE_DONE) {
reportFatalError("MorePseudoAttributes", null);
}
// If there is no data in the xml or text decl then we fail to report error
// for version or encoding info above.
if (scanningTextDecl) {
if (!dataFoundForTarget && encoding == null) {
reportFatalError("EncodingDeclRequired", null);
}
}
else {
if (!dataFoundForTarget && version == null) {
reportFatalError("VersionInfoRequired", null);
}
}
// end
if (!fEntityScanner.skipChar('?')) {
reportFatalError("XMLDeclUnterminated", null);
}
if (!fEntityScanner.skipChar('>')) {
reportFatalError("XMLDeclUnterminated", null);
}
// fill in return array
pseudoAttributeValues[0] = version;
pseudoAttributeValues[1] = encoding;
pseudoAttributeValues[2] = standalone;
} // scanXMLDeclOrTextDecl(boolean)
/**
* Scans a pseudo attribute.
*
* @param scanningTextDecl True if scanning this pseudo-attribute for a
* TextDecl; false if scanning XMLDecl. This
* flag is needed to report the correct type of
* error.
* @param value The string to fill in with the attribute
* value.
*
* @return The name of the attribute
*
* Note: This method uses fStringBuffer2, anything in it
* at the time of calling is lost.
*/
public String scanPseudoAttribute(boolean scanningTextDecl,
XMLString value)
throws IOException, XNIException {
// REVISIT: This method is used for generic scanning of
// pseudo attributes, but since there are only three such
// attributes: version, encoding, and standalone there are
// for performant ways of scanning them. Every decl must
// have a version, and in TextDecls this version must
// be followed by an encoding declaration. Also the
// methods we invoke on the scanners allow non-ASCII
// characters to be parsed in the decls, but since
// we don't even know what the actual encoding of the
// document is until we scan the encoding declaration
// you cannot reliably read any characters outside
// of the ASCII range here. -- mrglavas
String name = fEntityScanner.scanName();
XMLEntityManager.print(fEntityManager.getCurrentEntity());
if (name == null) {
reportFatalError("PseudoAttrNameExpected", null);
}
fEntityScanner.skipDeclSpaces();
if (!fEntityScanner.skipChar('=')) {
reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
: "EqRequiredInXMLDecl", new Object[]{name});
}
fEntityScanner.skipDeclSpaces();
int quote = fEntityScanner.peekChar();
if (quote != '\'' && quote != '"') {
reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
: "QuoteRequiredInXMLDecl" , new Object[]{name});
}
fEntityScanner.scanChar();
int c = fEntityScanner.scanLiteral(quote, value);
if (c != quote) {
fStringBuffer2.clear();
do {
fStringBuffer2.append(value);
if (c != -1) {
if (c == '&' || c == '%' || c == '<' || c == ']') {
fStringBuffer2.append((char)fEntityScanner.scanChar());
}
// REVISIT: Even if you could reliably read non-ASCII chars
// why bother scanning for surrogates here? Only ASCII chars
// match the productions in XMLDecls and TextDecls. -- mrglavas
else if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(fStringBuffer2);
}
else if (isInvalidLiteral(c)) {
String key = scanningTextDecl
? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
reportFatalError(key,
new Object[] {Integer.toString(c, 16)});
fEntityScanner.scanChar();
}
}
c = fEntityScanner.scanLiteral(quote, value);
} while (c != quote);
fStringBuffer2.append(value);
value.setValues(fStringBuffer2);
}
if (!fEntityScanner.skipChar(quote)) {
reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl"
: "CloseQuoteMissingInXMLDecl",
new Object[]{name});
}
// return
return name;
} // scanPseudoAttribute(XMLString):String
/**
* Scans a processing instruction.
* *
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
*
* Note: This method uses fString, anything in it
* at the time of calling is lost.
*/
protected void scanPI() throws IOException, XNIException {
// target
fReportEntity = false;
String target = null;
if(fNamespaces) {
target = fEntityScanner.scanNCName();
} else {
target = fEntityScanner.scanName();
}
if (target == null) {
reportFatalError("PITargetRequired", null);
}
// scan data
scanPIData(target, fString);
fReportEntity = true;
} // scanPI()
/**
* Scans a processing data. This is needed to handle the situation
* where a document starts with a processing instruction whose
* target name starts with "xml". (e.g. xmlfoo)
*
* Note: This method uses fStringBuffer, anything in it
* at the time of calling is lost.
*
* @param target The PI target
* @param data The string to fill in with the data
*/
protected void scanPIData(String target, XMLString data)
throws IOException, XNIException {
// check target
if (target.length() == 3) {
char c0 = Character.toLowerCase(target.charAt(0));
char c1 = Character.toLowerCase(target.charAt(1));
char c2 = Character.toLowerCase(target.charAt(2));
if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
reportFatalError("ReservedPITarget", null);
}
}
// spaces
if (!fEntityScanner.skipSpaces()) {
if (fEntityScanner.skipString("?>")) {
// we found the end, there is no data
data.clear();
return;
}
else {
if(fNamespaces && fEntityScanner.peekChar() == ':') {
fEntityScanner.scanChar();
XMLStringBuffer colonName = new XMLStringBuffer(target);
colonName.append(":");
String str = fEntityScanner.scanName();
if (str != null)
colonName.append(str);
reportFatalError("ColonNotLegalWithNS", new Object[] {colonName.toString()});
fEntityScanner.skipSpaces();
} else {
// if there is data there should be some space
reportFatalError("SpaceRequiredInPI", null);
}
}
}
fStringBuffer.clear();
// data
if (fEntityScanner.scanData("?>", fStringBuffer)) {
do {
int c = fEntityScanner.peekChar();
if (c != -1) {
if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(fStringBuffer);
}
else if (isInvalidLiteral(c)) {
reportFatalError("InvalidCharInPI",
new Object[]{Integer.toHexString(c)});
fEntityScanner.scanChar();
}
}
} while (fEntityScanner.scanData("?>", fStringBuffer));
}
data.setValues(fStringBuffer);
} // scanPIData(String,XMLString)
/**
* Scans a comment.
* *
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
*
* * Note: Called after scanning past '<!--' * Note: This method uses fString, anything in it * at the time of calling is lost. * * @param text The buffer to fill in with the text. */ protected void scanComment(XMLStringBuffer text) throws IOException, XNIException { // text // REVISIT: handle invalid character, eof text.clear(); while (fEntityScanner.scanData("--", text)) { int c = fEntityScanner.peekChar(); if (c != -1) { if (XMLChar.isHighSurrogate(c)) { scanSurrogates(text); } else if (isInvalidLiteral(c)) { reportFatalError("InvalidCharInComment", new Object[] { Integer.toHexString(c) }); fEntityScanner.scanChar(); } } } if (!fEntityScanner.skipChar('>')) { reportFatalError("DashDashInComment", null); } } // scanComment() /** * Scans an attribute value and normalizes whitespace converting all * whitespace characters to space characters. * * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" * * @param value The XMLString to fill in with the value. * @param nonNormalizedValue The XMLString to fill in with the * non-normalized value. * @param atName The name of the attribute being parsed (for error msgs). * @param checkEntities true if undeclared entities should be reported as VC violation, * false if undeclared entities should be reported as WFC violation. * @param eleName The name of element to which this attribute belongs. * * Note: This method uses fStringBuffer2, anything in it * at the time of calling is lost. **/ protected void scanAttributeValue(XMLString value, XMLString nonNormalizedValue, String atName, boolean checkEntities,String eleName) throws IOException, XNIException { // quote int quote = fEntityScanner.peekChar(); if (quote != '\'' && quote != '"') { reportFatalError("OpenQuoteExpected", new Object[]{eleName,atName}); } fEntityScanner.scanChar(); int entityDepth = fEntityDepth; int c = fEntityScanner.scanLiteral(quote, value); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** scanLiteral -> \"" + value.toString() + "\""); } fStringBuffer2.clear(); fStringBuffer2.append(value); normalizeWhitespace(value); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** normalizeWhitespace -> \"" + value.toString() + "\""); } if (c != quote) { fScanningAttribute = true; fStringBuffer.clear(); do { fStringBuffer.append(value); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** value2: \"" + fStringBuffer.toString() + "\""); } if (c == '&') { fEntityScanner.skipChar('&'); if (entityDepth == fEntityDepth) { fStringBuffer2.append('&'); } if (fEntityScanner.skipChar('#')) { if (entityDepth == fEntityDepth) { fStringBuffer2.append('#'); } int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2); if (ch != -1) { if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** value3: \"" + fStringBuffer.toString() + "\""); } } } else { String entityName = fEntityScanner.scanName(); if (entityName == null) { reportFatalError("NameRequiredInReference", null); } else if (entityDepth == fEntityDepth) { fStringBuffer2.append(entityName); } if (!fEntityScanner.skipChar(';')) { reportFatalError("SemicolonRequiredInReference", new Object []{entityName}); } else if (entityDepth == fEntityDepth) { fStringBuffer2.append(';'); } if (entityName == fAmpSymbol) { fStringBuffer.append('&'); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** value5: \"" + fStringBuffer.toString() + "\""); } } else if (entityName == fAposSymbol) { fStringBuffer.append('\''); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** value7: \"" + fStringBuffer.toString() + "\""); } } else if (entityName == fLtSymbol) { fStringBuffer.append('<'); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** value9: \"" + fStringBuffer.toString() + "\""); } } else if (entityName == fGtSymbol) { fStringBuffer.append('>'); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** valueB: \"" + fStringBuffer.toString() + "\""); } } else if (entityName == fQuotSymbol) { fStringBuffer.append('"'); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** valueD: \"" + fStringBuffer.toString() + "\""); } } else { if (fEntityManager.isExternalEntity(entityName)) { reportFatalError("ReferenceToExternalEntity", new Object[] { entityName }); } else { if (!fEntityManager.isDeclaredEntity(entityName)) { //WFC & VC: Entity Declared if (checkEntities) { if (fValidation) { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "EntityNotDeclared", new Object[]{entityName}, XMLErrorReporter.SEVERITY_ERROR); } } else { reportFatalError("EntityNotDeclared", new Object[]{entityName}); } } fEntityManager.startEntity(entityName, true); } } } } else if (c == '<') { reportFatalError("LessthanInAttValue", new Object[] { eleName, atName }); fEntityScanner.scanChar(); if (entityDepth == fEntityDepth) { fStringBuffer2.append((char)c); } } else if (c == '%' || c == ']') { fEntityScanner.scanChar(); fStringBuffer.append((char)c); if (entityDepth == fEntityDepth) { fStringBuffer2.append((char)c); } if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** valueF: \"" + fStringBuffer.toString() + "\""); } } else if (c == '\n' || c == '\r') { fEntityScanner.scanChar(); fStringBuffer.append(' '); if (entityDepth == fEntityDepth) { fStringBuffer2.append('\n'); } } else if (c != -1 && XMLChar.isHighSurrogate(c)) { fStringBuffer3.clear(); if (scanSurrogates(fStringBuffer3)) { fStringBuffer.append(fStringBuffer3); if (entityDepth == fEntityDepth) { fStringBuffer2.append(fStringBuffer3); } if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** valueI: \"" + fStringBuffer.toString() + "\""); } } } else if (c != -1 && isInvalidLiteral(c)) { reportFatalError("InvalidCharInAttValue", new Object[] {eleName, atName, Integer.toString(c, 16)}); fEntityScanner.scanChar(); if (entityDepth == fEntityDepth) { fStringBuffer2.append((char)c); } } c = fEntityScanner.scanLiteral(quote, value); if (entityDepth == fEntityDepth) { fStringBuffer2.append(value); } normalizeWhitespace(value); } while (c != quote || entityDepth != fEntityDepth); fStringBuffer.append(value); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** valueN: \"" + fStringBuffer.toString() + "\""); } value.setValues(fStringBuffer); fScanningAttribute = false; } nonNormalizedValue.setValues(fStringBuffer2); // quote int cquote = fEntityScanner.scanChar(); if (cquote != quote) { reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName}); } } // scanAttributeValue() /** * Scans External ID and return the public and system IDs. * * @param identifiers An array of size 2 to return the system id, * and public id (in that order). * @param optionalSystemId Specifies whether the system id is optional. * * Note: This method uses fString and fStringBuffer, * anything in them at the time of calling is lost. */ protected void scanExternalID(String[] identifiers, boolean optionalSystemId) throws IOException, XNIException { String systemId = null; String publicId = null; if (fEntityScanner.skipString("PUBLIC")) { if (!fEntityScanner.skipSpaces()) { reportFatalError("SpaceRequiredAfterPUBLIC", null); } scanPubidLiteral(fString); publicId = fString.toString(); if (!fEntityScanner.skipSpaces() && !optionalSystemId) { reportFatalError("SpaceRequiredBetweenPublicAndSystem", null); } } if (publicId != null || fEntityScanner.skipString("SYSTEM")) { if (publicId == null && !fEntityScanner.skipSpaces()) { reportFatalError("SpaceRequiredAfterSYSTEM", null); } int quote = fEntityScanner.peekChar(); if (quote != '\'' && quote != '"') { if (publicId != null && optionalSystemId) { // looks like we don't have any system id // simply return the public id identifiers[0] = null; identifiers[1] = publicId; return; } reportFatalError("QuoteRequiredInSystemID", null); } fEntityScanner.scanChar(); XMLString ident = fString; if (fEntityScanner.scanLiteral(quote, ident) != quote) { fStringBuffer.clear(); do { fStringBuffer.append(ident); int c = fEntityScanner.peekChar(); if (XMLChar.isMarkup(c) || c == ']') { fStringBuffer.append((char)fEntityScanner.scanChar()); } } while (fEntityScanner.scanLiteral(quote, ident) != quote); fStringBuffer.append(ident); ident = fStringBuffer; } systemId = ident.toString(); if (!fEntityScanner.skipChar(quote)) { reportFatalError("SystemIDUnterminated", null); } } // store result in array identifiers[0] = systemId; identifiers[1] = publicId; } /** * Scans public ID literal. * * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] * * The returned string is normalized according to the following rule, * from http://www.w3.org/TR/REC-xml#dt-pubid: * * Before a match is attempted, all strings of white space in the public * identifier must be normalized to single space characters (#x20), and * leading and trailing white space must be removed. * * @param literal The string to fill in with the public ID literal. * @return True on success. * * Note: This method uses fStringBuffer, anything in it at * the time of calling is lost. */ protected boolean scanPubidLiteral(XMLString literal) throws IOException, XNIException { int quote = fEntityScanner.scanChar(); if (quote != '\'' && quote != '"') { reportFatalError("QuoteRequiredInPublicID", null); return false; } fStringBuffer.clear(); // skip leading whitespace boolean skipSpace = true; boolean dataok = true; while (true) { int c = fEntityScanner.scanChar(); if (c == ' ' || c == '\n' || c == '\r') { if (!skipSpace) { // take the first whitespace as a space and skip the others fStringBuffer.append(' '); skipSpace = true; } } else if (c == quote) { if (skipSpace) { // if we finished on a space let's trim it fStringBuffer.length--; } literal.setValues(fStringBuffer); break; } else if (XMLChar.isPubid(c)) { fStringBuffer.append((char)c); skipSpace = false; } else if (c == -1) { reportFatalError("PublicIDUnterminated", null); return false; } else { dataok = false; reportFatalError("InvalidCharInPublicID", new Object[]{Integer.toHexString(c)}); } } return dataok; } /** * Normalize whitespace in an XMLString converting all whitespace * characters to space characters. */ protected void normalizeWhitespace(XMLString value) { int end = value.offset + value.length; for (int i = value.offset; i < end; i++) { int c = value.ch[i]; // Performance: For XML 1.0 documents take advantage of // the fact that the only legal characters below 0x20 // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've // already determined the well-formedness of these // characters it is sufficient (and safe) to check // against 0x20. -- mrglavas if (c < 0x20) { value.ch[i] = ' '; } } } // // XMLEntityHandler methods // /** * This method notifies of the start of an entity. The document entity * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" * parameter entity names start with '%'; and general entities are just * specified by their name. * * @param name The name of the entity. * @param identifier The resource identifier. * @param encoding The auto-detected IANA encoding name of the entity * stream. This value will be null in those situations * where the entity encoding is not auto-detected (e.g. * internal entities or a document entity that is * parsed from a java.io.Reader). * @param augs Additional information that may include infoset augmentations * * @throws XNIException Thrown by handler to signal an error. */ public void startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs) throws XNIException { // keep track of the entity depth fEntityDepth++; // must reset entity scanner fEntityScanner = fEntityManager.getEntityScanner(); } // startEntity(String,XMLResourceIdentifier,String) /** * This method notifies the end of an entity. The document entity has * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" * parameter entity names start with '%'; and general entities are just * specified by their name. * * @param name The name of the entity. * @param augs Additional information that may include infoset augmentations * * @throws XNIException Thrown by handler to signal an error. */ public void endEntity(String name, Augmentations augs) throws XNIException { // keep track of the entity depth fEntityDepth--; } // endEntity(String) /** * Scans a character reference and append the corresponding chars to the * specified buffer. * *
*
* [66] CharRef ::= '' [0-9]+ ';' | '' [0-9a-fA-F]+ ';'
*
*
* Note: This method uses fStringBuffer, anything in it
* at the time of calling is lost.
*
* @param buf the character buffer to append chars to
* @param buf2 the character buffer to append non-normalized chars to
*
* @return the character value or (-1) on conversion failure
*/
protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)
throws IOException, XNIException {
// scan hexadecimal value
boolean hex = false;
if (fEntityScanner.skipChar('x')) {
if (buf2 != null) { buf2.append('x'); }
hex = true;
fStringBuffer3.clear();
boolean digit = true;
int c = fEntityScanner.peekChar();
digit = (c >= '0' && c <= '9') ||
(c >= 'a' && c <= 'f') ||
(c >= 'A' && c <= 'F');
if (digit) {
if (buf2 != null) { buf2.append((char)c); }
fEntityScanner.scanChar();
fStringBuffer3.append((char)c);
do {
c = fEntityScanner.peekChar();
digit = (c >= '0' && c <= '9') ||
(c >= 'a' && c <= 'f') ||
(c >= 'A' && c <= 'F');
if (digit) {
if (buf2 != null) { buf2.append((char)c); }
fEntityScanner.scanChar();
fStringBuffer3.append((char)c);
}
} while (digit);
}
else {
reportFatalError("HexdigitRequiredInCharRef", null);
}
}
// scan decimal value
else {
fStringBuffer3.clear();
boolean digit = true;
int c = fEntityScanner.peekChar();
digit = c >= '0' && c <= '9';
if (digit) {
if (buf2 != null) { buf2.append((char)c); }
fEntityScanner.scanChar();
fStringBuffer3.append((char)c);
do {
c = fEntityScanner.peekChar();
digit = c >= '0' && c <= '9';
if (digit) {
if (buf2 != null) { buf2.append((char)c); }
fEntityScanner.scanChar();
fStringBuffer3.append((char)c);
}
} while (digit);
}
else {
reportFatalError("DigitRequiredInCharRef", null);
}
}
// end
if (!fEntityScanner.skipChar(';')) {
reportFatalError("SemicolonRequiredInCharRef", null);
}
if (buf2 != null) { buf2.append(';'); }
// convert string to number
int value = -1;
try {
value = Integer.parseInt(fStringBuffer3.toString(),
hex ? 16 : 10);
// character reference must be a valid XML character
if (isInvalid(value)) {
StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
if (hex) errorBuf.append('x');
errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
reportFatalError("InvalidCharRef",
new Object[]{errorBuf.toString()});
}
}
catch (NumberFormatException e) {
// Conversion failed, let -1 value drop through.
// If we end up here, the character reference was invalid.
StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
if (hex) errorBuf.append('x');
errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
reportFatalError("InvalidCharRef",
new Object[]{errorBuf.toString()});
}
// append corresponding chars to the given buffer
if (!XMLChar.isSupplemental(value)) {
buf.append((char) value);
}
else {
// character is supplemental, split it into surrogate chars
buf.append(XMLChar.highSurrogate(value));
buf.append(XMLChar.lowSurrogate(value));
}
// char refs notification code
if (fNotifyCharRefs && value != -1) {
String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
if (!fScanningAttribute) {
fCharRefLiteral = literal;
}
}
return value;
}
// returns true if the given character is not
// valid with respect to the version of
// XML understood by this scanner.
protected boolean isInvalid(int value) {
return (XMLChar.isInvalid(value));
} // isInvalid(int): boolean
// returns true if the given character is not
// valid or may not be used outside a character reference
// with respect to the version of XML understood by this scanner.
protected boolean isInvalidLiteral(int value) {
return (XMLChar.isInvalid(value));
} // isInvalidLiteral(int): boolean
// returns true if the given character is
// a valid nameChar with respect to the version of
// XML understood by this scanner.
protected boolean isValidNameChar(int value) {
return (XMLChar.isName(value));
} // isValidNameChar(int): boolean
// returns true if the given character is
// a valid nameStartChar with respect to the version of
// XML understood by this scanner.
protected boolean isValidNameStartChar(int value) {
return (XMLChar.isNameStart(value));
} // isValidNameStartChar(int): boolean
// returns true if the given character is
// a valid NCName character with respect to the version of
// XML understood by this scanner.
protected boolean isValidNCName(int value) {
return (XMLChar.isNCName(value));
} // isValidNCName(int): boolean
// returns true if the given character is
// a valid high surrogate for a nameStartChar
// with respect to the version of XML understood
// by this scanner.
protected boolean isValidNameStartHighSurrogate(int value) {
return false;
} // isValidNameStartHighSurrogate(int): boolean
protected boolean versionSupported(String version ) {
return version.equals("1.0");
} // version Supported
// returns the error message key for unsupported
// versions of XML with respect to the version of
// XML understood by this scanner.
protected String getVersionNotSupportedKey () {
return "VersionNotSupported";
} // getVersionNotSupportedKey: String
/**
* Scans surrogates and append them to the specified buffer.
* * Note: This assumes the current char has already been * identified as a high surrogate. * * @param buf The StringBuffer to append the read surrogates to. * @return True if it succeeded. */ protected boolean scanSurrogates(XMLStringBuffer buf) throws IOException, XNIException { int high = fEntityScanner.scanChar(); int low = fEntityScanner.peekChar(); if (!XMLChar.isLowSurrogate(low)) { reportFatalError("InvalidCharInContent", new Object[] {Integer.toString(high, 16)}); return false; } fEntityScanner.scanChar(); // convert surrogates to supplemental character int c = XMLChar.supplemental((char)high, (char)low); // supplemental character must be a valid XML character if (isInvalid(c)) { reportFatalError("InvalidCharInContent", new Object[]{Integer.toString(c, 16)}); return false; } // fill in the buffer buf.append((char)high); buf.append((char)low); return true; } // scanSurrogates():boolean /** * Convenience function used in all XML scanners. */ protected void reportFatalError(String msgId, Object[] args) throws XNIException { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, msgId, args, XMLErrorReporter.SEVERITY_FATAL_ERROR); } // private methods private void init() { fEntityScanner = null; // initialize vars fEntityDepth = 0; fReportEntity = true; fResourceIdentifier.clear(); } } // class XMLScanner