/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id: Lexer.java,v 1.14 2004/02/17 04:32:49 minchau Exp $ */ package com.sun.org.apache.xpath.internal.compiler; import java.util.Vector; import com.sun.org.apache.xml.internal.utils.PrefixResolver; import com.sun.org.apache.xpath.internal.res.XPATHErrorResources; /** * This class is in charge of lexical processing of the XPath * expression into tokens. */ class Lexer { /** * The target XPath. */ private Compiler m_compiler; /** * The prefix resolver to map prefixes to namespaces in the XPath. */ PrefixResolver m_namespaceContext; /** * The XPath processor object. */ XPathParser m_processor; /** * This value is added to each element name in the TARGETEXTRA * that is a 'target' (right-most top-level element name). */ static final int TARGETEXTRA = 10000; /** * Ignore this, it is going away. * This holds a map to the m_tokenQueue that tells where the top-level elements are. * It is used for pattern matching so the m_tokenQueue can be walked backwards. * Each element that is a 'target', (right-most top level element name) has * TARGETEXTRA added to it. * */ private int m_patternMap[] = new int[100]; /** * Ignore this, it is going away. * The number of elements that m_patternMap maps; */ private int m_patternMapSize; /** * Create a Lexer object. * * @param compiler The owning compiler for this lexer. * @param resolver The prefix resolver for mapping qualified name prefixes * to namespace URIs. * @param xpathProcessor The parser that is processing strings to opcodes. */ Lexer(Compiler compiler, PrefixResolver resolver, XPathParser xpathProcessor) { m_compiler = compiler; m_namespaceContext = resolver; m_processor = xpathProcessor; } /** * Walk through the expression and build a token queue, and a map of the top-level * elements. * @param pat XSLT Expression. * * @throws javax.xml.transform.TransformerException */ void tokenize(String pat) throws javax.xml.transform.TransformerException { tokenize(pat, null); } /** * Walk through the expression and build a token queue, and a map of the top-level * elements. * @param pat XSLT Expression. * @param targetStrings Vector to hold Strings, may be null. * * @throws javax.xml.transform.TransformerException */ void tokenize(String pat, Vector targetStrings) throws javax.xml.transform.TransformerException { m_compiler.m_currentPattern = pat; m_patternMapSize = 0; // This needs to grow too. m_compiler.m_opMap = new OpMapVector(OpMap.MAXTOKENQUEUESIZE * 5, OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH); int nChars = pat.length(); int startSubstring = -1; int posOfNSSep = -1; boolean isStartOfPat = true; boolean isAttrName = false; boolean isNum = false; // Nesting of '[' so we can know if the given element should be // counted inside the m_patternMap. int nesting = 0; // char[] chars = pat.toCharArray(); for (int i = 0; i < nChars; i++) { char c = pat.charAt(i); switch (c) { case '\"' : { if (startSubstring != -1) { isNum = false; isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); isAttrName = false; if (-1 != posOfNSSep) { posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); } else { addToTokenQueue(pat.substring(startSubstring, i)); } } startSubstring = i; for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++); if (c == '\"' && i < nChars) { addToTokenQueue(pat.substring(startSubstring, i + 1)); startSubstring = -1; } else { m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE, null); //"misquoted literal... expected double quote!"); } } break; case '\'' : if (startSubstring != -1) { isNum = false; isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); isAttrName = false; if (-1 != posOfNSSep) { posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); } else { addToTokenQueue(pat.substring(startSubstring, i)); } } startSubstring = i; for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++); if (c == '\'' && i < nChars) { addToTokenQueue(pat.substring(startSubstring, i + 1)); startSubstring = -1; } else { m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE, null); //"misquoted literal... expected single quote!"); } break; case 0x0A : case 0x0D : case ' ' : case '\t' : if (startSubstring != -1) { isNum = false; isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); isAttrName = false; if (-1 != posOfNSSep) { posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); } else { addToTokenQueue(pat.substring(startSubstring, i)); } startSubstring = -1; } break; case '@' : isAttrName = true; // fall-through on purpose case '-' : if ('-' == c) { if (!(isNum || (startSubstring == -1))) { break; } isNum = false; } // fall-through on purpose case '(' : case '[' : case ')' : case ']' : case '|' : case '/' : case '*' : case '+' : case '=' : case ',' : case '\\' : // Unused at the moment case '^' : // Unused at the moment case '!' : // Unused at the moment case '$' : case '<' : case '>' : if (startSubstring != -1) { isNum = false; isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); isAttrName = false; if (-1 != posOfNSSep) { posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); } else { addToTokenQueue(pat.substring(startSubstring, i)); } startSubstring = -1; } else if (('/' == c) && isStartOfPat) { isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); } else if ('*' == c) { isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); isAttrName = false; } if (0 == nesting) { if ('|' == c) { if (null != targetStrings) { recordTokenString(targetStrings); } isStartOfPat = true; } } if ((')' == c) || (']' == c)) { nesting--; } else if (('(' == c) || ('[' == c)) { nesting++; } addToTokenQueue(pat.substring(i, i + 1)); break; case ':' : if (i>0) { if (posOfNSSep == (i - 1)) { if (startSubstring != -1) { if (startSubstring < (i - 1)) addToTokenQueue(pat.substring(startSubstring, i - 1)); } isNum = false; isAttrName = false; startSubstring = -1; posOfNSSep = -1; addToTokenQueue(pat.substring(i - 1, i + 1)); break; } else { posOfNSSep = i; } } // fall through on purpose default : if (-1 == startSubstring) { startSubstring = i; isNum = Character.isDigit(c); } else if (isNum) { isNum = Character.isDigit(c); } } } if (startSubstring != -1) { isNum = false; isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); if ((-1 != posOfNSSep) || ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes()))) { posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars); } else { addToTokenQueue(pat.substring(startSubstring, nChars)); } } if (0 == m_compiler.getTokenQueueSize()) { m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null); //"Empty expression!"); } else if (null != targetStrings) { recordTokenString(targetStrings); } m_processor.m_queueMark = 0; } /** * Record the current position on the token queue as long as * this is a top-level element. Must be called before the * next token is added to the m_tokenQueue. * * @param nesting The nesting count for the pattern element. * @param isStart true if this is the start of a pattern. * @param isAttrName true if we have determined that this is an attribute name. * * @return true if this is the start of a pattern. */ private boolean mapPatternElemPos(int nesting, boolean isStart, boolean isAttrName) { if (0 == nesting) { if(m_patternMapSize >= m_patternMap.length) { int patternMap[] = m_patternMap; int len = m_patternMap.length; m_patternMap = new int[m_patternMapSize + 100]; System.arraycopy(patternMap, 0, m_patternMap, 0, len); } if (!isStart) { m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA; } m_patternMap[m_patternMapSize] = (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA; m_patternMapSize++; isStart = false; } return isStart; } /** * Given a map pos, return the corresponding token queue pos. * * @param i The index in the m_patternMap. * * @return the token queue position. */ private int getTokenQueuePosFromMap(int i) { int pos = m_patternMap[i]; return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos; } /** * Reset token queue mark and m_token to a * given position. * @param mark The new position. */ private final void resetTokenMark(int mark) { int qsz = m_compiler.getTokenQueueSize(); m_processor.m_queueMark = (mark > 0) ? ((mark <= qsz) ? mark - 1 : mark) : 0; if (m_processor.m_queueMark < qsz) { m_processor.m_token = (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++); m_processor.m_tokenChar = m_processor.m_token.charAt(0); } else { m_processor.m_token = null; m_processor.m_tokenChar = 0; } } /** * Given a string, return the corresponding keyword token. * * @param key The keyword. * * @return An opcode value. */ final int getKeywordToken(String key) { int tok; try { Integer itok = (Integer) Keywords.m_keywords.get(key); tok = (null != itok) ? itok.intValue() : 0; } catch (NullPointerException npe) { tok = 0; } catch (ClassCastException cce) { tok = 0; } return tok; } /** * Record the current token in the passed vector. * * @param targetStrings Vector of string. */ private void recordTokenString(Vector targetStrings) { int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1); resetTokenMark(tokPos + 1); if (m_processor.lookahead('(', 1)) { int tok = getKeywordToken(m_processor.m_token); switch (tok) { case OpCodes.NODETYPE_COMMENT : targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT); break; case OpCodes.NODETYPE_TEXT : targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT); break; case OpCodes.NODETYPE_NODE : targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY); break; case OpCodes.NODETYPE_ROOT : targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT); break; case OpCodes.NODETYPE_ANYELEMENT : targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY); break; case OpCodes.NODETYPE_PI : targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY); break; default : targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY); } } else { if (m_processor.tokenIs('@')) { tokPos++; resetTokenMark(tokPos + 1); } if (m_processor.lookahead(':', 1)) { tokPos += 2; } targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos)); } } /** * Add a token to the token queue. * * * @param s The token. */ private final void addToTokenQueue(String s) { m_compiler.getTokenQueue().addElement(s); } /** * When a seperator token is found, see if there's a element name or * the like to map. * * @param pat The XPath name string. * @param startSubstring The start of the name string. * @param posOfNSSep The position of the namespace seperator (':'). * @param posOfScan The end of the name index. * * @throws javax.xml.transform.TransformerException * * @return -1 always. */ private int mapNSTokens(String pat, int startSubstring, int posOfNSSep, int posOfScan) throws javax.xml.transform.TransformerException { String prefix = ""; if ((startSubstring >= 0) && (posOfNSSep >= 0)) { prefix = pat.substring(startSubstring, posOfNSSep); } String uName; if ((null != m_namespaceContext) &&!prefix.equals("*") &&!prefix.equals("xmlns")) { try { if (prefix.length() > 0) uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix( prefix); else { // Assume last was wildcard. This is not legal according // to the draft. Set the below to true to make namespace // wildcards work. if (false) { addToTokenQueue(":"); String s = pat.substring(posOfNSSep + 1, posOfScan); if (s.length() > 0) addToTokenQueue(s); return -1; } else { uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix( prefix); } } } catch (ClassCastException cce) { uName = m_namespaceContext.getNamespaceForPrefix(prefix); } } else { uName = prefix; } if ((null != uName) && (uName.length() > 0)) { addToTokenQueue(uName); addToTokenQueue(":"); String s = pat.substring(posOfNSSep + 1, posOfScan); if (s.length() > 0) addToTokenQueue(s); } else { // error("Could not locate namespace for prefix: "+prefix); m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE, new String[] {prefix}); //"Prefix must resolve to a namespace: {0}"; /*** Old code commented out 10-Jan-2001 addToTokenQueue(prefix); addToTokenQueue(":"); String s = pat.substring(posOfNSSep + 1, posOfScan); if (s.length() > 0) addToTokenQueue(s); ***/ } return -1; } }