/* * @(#)StringTokenizer.java 1.34 04/05/05 * * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */ package java.util; import java.lang.*; /** * The string tokenizer class allows an application to break a * string into tokens. The tokenization method is much simpler than * the one used by the StreamTokenizer class. The * StringTokenizer methods do not distinguish among * identifiers, numbers, and quoted strings, nor do they recognize * and skip comments. *

* The set of delimiters (the characters that separate tokens) may * be specified either at creation time or on a per-token basis. *

* An instance of StringTokenizer behaves in one of two * ways, depending on whether it was created with the * returnDelims flag having the value true * or false: *

* A StringTokenizer object internally maintains a current * position within the string to be tokenized. Some operations advance this * current position past the characters processed.

* A token is returned by taking a substring of the string that was used to * create the StringTokenizer object. *

* The following is one example of the use of the tokenizer. The code: *

 *     StringTokenizer st = new StringTokenizer("this is a test");
 *     while (st.hasMoreTokens()) {
 *         System.out.println(st.nextToken());
 *     }
 * 
*

* prints the following output: *

 *     this
 *     is
 *     a
 *     test
 * 
* *

* StringTokenizer is a legacy class that is retained for * compatibility reasons although its use is discouraged in new code. It is * recommended that anyone seeking this functionality use the split * method of String or the java.util.regex package instead. *

* The following example illustrates how the String.split * method can be used to break up a string into its basic tokens: *

 *     String[] result = "this is a test".split("\\s");
 *     for (int x=0; x<result.length; x++)
 *         System.out.println(result[x]);
 * 
*

* prints the following output: *

 *     this
 *     is
 *     a
 *     test
 * 
* * @author unascribed * @version 1.34, 05/05/04 * @see java.io.StreamTokenizer * @since JDK1.0 */ public class StringTokenizer implements Enumeration { private int currentPosition; private int newPosition; private int maxPosition; private String str; private String delimiters; private boolean retDelims; private boolean delimsChanged; /** * maxDelimCodePoint stores the value of the delimiter character with the * highest value. It is used to optimize the detection of delimiter * characters. * * It is unlikely to provide any optimization benefit in the * hasSurrogates case because most string characters will be * smaller than the limit, but we keep it so that the two code * paths remain similar. */ private int maxDelimCodePoint; /** * If delimiters include any surrogates (including surrogate * pairs), hasSurrogates is true and the tokenizer uses the * different code path. This is because String.indexOf(int) * doesn't handle unpaired surrogates as a single character. */ private boolean hasSurrogates = false; /** * When hasSurrogates is true, delimiters are converted to code * points and isDelimiter(int) is used to determine if the given * codepoint is a delimiter. */ private int[] delimiterCodePoints; /** * Set maxDelimCodePoint to the highest char in the delimiter set. */ private void setMaxDelimCodePoint() { if (delimiters == null) { maxDelimCodePoint = 0; return; } int m = 0; int c; int count = 0; for (int i = 0; i < delimiters.length(); i += Character.charCount(c)) { c = delimiters.charAt(i); if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_LOW_SURROGATE) { c = delimiters.codePointAt(i); hasSurrogates = true; } if (m < c) m = c; count++; } maxDelimCodePoint = m; if (hasSurrogates) { delimiterCodePoints = new int[count]; for (int i = 0, j = 0; i < count; i++, j += Character.charCount(c)) { c = delimiters.codePointAt(j); delimiterCodePoints[i] = c; } } } /** * Constructs a string tokenizer for the specified string. All * characters in the delim argument are the delimiters * for separating tokens. *

* If the returnDelims flag is true, then * the delimiter characters are also returned as tokens. Each * delimiter is returned as a string of length one. If the flag is * false, the delimiter characters are skipped and only * serve as separators between tokens. *

* Note that if delim is null, this constructor does * not throw an exception. However, trying to invoke other methods on the * resulting StringTokenizer may result in a * NullPointerException. * * @param str a string to be parsed. * @param delim the delimiters. * @param returnDelims flag indicating whether to return the delimiters * as tokens. * @exception NullPointerException if str is null */ public StringTokenizer(String str, String delim, boolean returnDelims) { currentPosition = 0; newPosition = -1; delimsChanged = false; this.str = str; maxPosition = str.length(); delimiters = delim; retDelims = returnDelims; setMaxDelimCodePoint(); } /** * Constructs a string tokenizer for the specified string. The * characters in the delim argument are the delimiters * for separating tokens. Delimiter characters themselves will not * be treated as tokens. *

* Note that if delim is null, this constructor does * not throw an exception. However, trying to invoke other methods on the * resulting StringTokenizer may result in a * NullPointerException. * * @param str a string to be parsed. * @param delim the delimiters. * @exception NullPointerException if str is null */ public StringTokenizer(String str, String delim) { this(str, delim, false); } /** * Constructs a string tokenizer for the specified string. The * tokenizer uses the default delimiter set, which is * " \t\n\r\f": the space character, * the tab character, the newline character, the carriage-return character, * and the form-feed character. Delimiter characters themselves will * not be treated as tokens. * * @param str a string to be parsed. * @exception NullPointerException if str is null */ public StringTokenizer(String str) { this(str, " \t\n\r\f", false); } /** * Skips delimiters starting from the specified position. If retDelims * is false, returns the index of the first non-delimiter character at or * after startPos. If retDelims is true, startPos is returned. */ private int skipDelimiters(int startPos) { if (delimiters == null) throw new NullPointerException(); int position = startPos; while (!retDelims && position < maxPosition) { if (!hasSurrogates) { char c = str.charAt(position); if ((c > maxDelimCodePoint) || (delimiters.indexOf(c) < 0)) break; position++; } else { int c = str.codePointAt(position); if ((c > maxDelimCodePoint) || !isDelimiter(c)) { break; } position += Character.charCount(c); } } return position; } /** * Skips ahead from startPos and returns the index of the next delimiter * character encountered, or maxPosition if no such delimiter is found. */ private int scanToken(int startPos) { int position = startPos; while (position < maxPosition) { if (!hasSurrogates) { char c = str.charAt(position); if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0)) break; position++; } else { int c = str.codePointAt(position); if ((c <= maxDelimCodePoint) && isDelimiter(c)) break; position += Character.charCount(c); } } if (retDelims && (startPos == position)) { if (!hasSurrogates) { char c = str.charAt(position); if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0)) position++; } else { int c = str.codePointAt(position); if ((c <= maxDelimCodePoint) && isDelimiter(c)) position += Character.charCount(c); } } return position; } private boolean isDelimiter(int codePoint) { for (int i = 0; i < delimiterCodePoints.length; i++) { if (delimiterCodePoints[i] == codePoint) { return true; } } return false; } /** * Tests if there are more tokens available from this tokenizer's string. * If this method returns true, then a subsequent call to * nextToken with no argument will successfully return a token. * * @return true if and only if there is at least one token * in the string after the current position; false * otherwise. */ public boolean hasMoreTokens() { /* * Temporarily store this position and use it in the following * nextToken() method only if the delimiters haven't been changed in * that nextToken() invocation. */ newPosition = skipDelimiters(currentPosition); return (newPosition < maxPosition); } /** * Returns the next token from this string tokenizer. * * @return the next token from this string tokenizer. * @exception NoSuchElementException if there are no more tokens in this * tokenizer's string. */ public String nextToken() { /* * If next position already computed in hasMoreElements() and * delimiters have changed between the computation and this invocation, * then use the computed value. */ currentPosition = (newPosition >= 0 && !delimsChanged) ? newPosition : skipDelimiters(currentPosition); /* Reset these anyway */ delimsChanged = false; newPosition = -1; if (currentPosition >= maxPosition) throw new NoSuchElementException(); int start = currentPosition; currentPosition = scanToken(currentPosition); return str.substring(start, currentPosition); } /** * Returns the next token in this string tokenizer's string. First, * the set of characters considered to be delimiters by this * StringTokenizer object is changed to be the characters in * the string delim. Then the next token in the string * after the current position is returned. The current position is * advanced beyond the recognized token. The new delimiter set * remains the default after this call. * * @param delim the new delimiters. * @return the next token, after switching to the new delimiter set. * @exception NoSuchElementException if there are no more tokens in this * tokenizer's string. * @exception NullPointerException if delim is null */ public String nextToken(String delim) { delimiters = delim; /* delimiter string specified, so set the appropriate flag. */ delimsChanged = true; setMaxDelimCodePoint(); return nextToken(); } /** * Returns the same value as the hasMoreTokens * method. It exists so that this class can implement the * Enumeration interface. * * @return true if there are more tokens; * false otherwise. * @see java.util.Enumeration * @see java.util.StringTokenizer#hasMoreTokens() */ public boolean hasMoreElements() { return hasMoreTokens(); } /** * Returns the same value as the nextToken method, * except that its declared return value is Object rather than * String. It exists so that this class can implement the * Enumeration interface. * * @return the next token in the string. * @exception NoSuchElementException if there are no more tokens in this * tokenizer's string. * @see java.util.Enumeration * @see java.util.StringTokenizer#nextToken() */ public Object nextElement() { return nextToken(); } /** * Calculates the number of times that this tokenizer's * nextToken method can be called before it generates an * exception. The current position is not advanced. * * @return the number of tokens remaining in the string using the current * delimiter set. * @see java.util.StringTokenizer#nextToken() */ public int countTokens() { int count = 0; int currpos = currentPosition; while (currpos < maxPosition) { currpos = skipDelimiters(currpos); if (currpos >= maxPosition) break; currpos = scanToken(currpos); count++; } return count; } }