/* * @(#)URLDecoder.java 1.27 04/05/18 * * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */ package java.net; import java.io.*; /** * Utility class for HTML form decoding. This class contains static methods * for decoding a String from the application/x-www-form-urlencoded * MIME format. *

* To conversion process is the reverse of that used by the URLEncoder class. It is assumed * that all characters in the encoded string are one of the following: * "a" through "z", * "A" through "Z", * "0" through "9", and * "-", "_", * ".", and "*". The * character "%" is allowed but is interpreted * as the start of a special escaped sequence. *

* The following rules are applied in the conversion: *

The alphanumeric characters "a" through * "z", "A" through * "Z" and "0" * through "9" remain the same. *
The special characters ".", * "-", "*", and * "_" remain the same. *
The plus sign "+" is converted into a * space character " " . *
A sequence of the form "%xy" will be * treated as representing a byte where xy is the two-digit * hexadecimal representation of the 8 bits. Then, all substrings * that contain one or more of these byte sequences consecutively * will be replaced by the character(s) whose encoding would result * in those consecutive bytes. * The encoding scheme used to decode these characters may be specified, * or if unspecified, the default encoding of the platform will be used. *

* There are two possible ways in which this decoder could deal with * illegal strings. It could either leave illegal characters alone or * it could throw an {@link java.lang.IllegalArgumentException}. * Which approach the decoder takes is left to the * implementation. * * @author Mark Chamness * @author Michael McCloskey * @version 1.27, 05/18/04 * @since 1.2 */ public class URLDecoder { // The platform default encoding static String dfltEncName = URLEncoder.dfltEncName; /** * Decodes a x-www-form-urlencoded string. * The platform's default encoding is used to determine what characters * are represented by any consecutive sequences of the form * "%xy". * @param s the String to decode * @deprecated The resulting string may vary depending on the platform's * default encoding. Instead, use the decode(String,String) method * to specify the encoding. * @return the newly decoded String */ @Deprecated public static String decode(String s) { String str = null; try { str = decode(s, dfltEncName); } catch (UnsupportedEncodingException e) { // The system should always have the platform default } return str; } /** * Decodes a application/x-www-form-urlencoded string using a specific * encoding scheme. * The supplied encoding is used to determine * what characters are represented by any consecutive sequences of the * form "%xy". *

* Note: The * World Wide Web Consortium Recommendation states that * UTF-8 should be used. Not doing so may introduce * incompatibilites. * * @param s the String to decode * @param enc The name of a supported * character * encoding. * @return the newly decoded String * @exception UnsupportedEncodingException * If character encoding needs to be consulted, but * named character encoding is not supported * @see URLEncoder#encode(java.lang.String, java.lang.String) * @since 1.4 */ public static String decode(String s, String enc) throws UnsupportedEncodingException{ boolean needToChange = false; int numChars = s.length(); StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars); int i = 0; if (enc.length() == 0) { throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter"); } char c; byte[] bytes = null; while (i < numChars) { c = s.charAt(i); switch (c) { case '+': sb.append(' '); i++; needToChange = true; break; case '%': /* * Starting with this instance of %, process all * consecutive substrings of the form %xy. Each * substring %xy will yield a byte. Convert all * consecutive bytes obtained this way to whatever * character(s) they represent in the provided * encoding. */ try { // (numChars-i)/3 is an upper bound for the number // of remaining bytes if (bytes == null) bytes = new byte[(numChars-i)/3]; int pos = 0; while ( ((i+2) < numChars) && (c=='%')) { bytes[pos++] = (byte)Integer.parseInt(s.substring(i+1,i+3),16); i+= 3; if (i < numChars) c = s.charAt(i); } // A trailing, incomplete byte encoding such as // "%x" will cause an exception to be thrown if ((i < numChars) && (c=='%')) throw new IllegalArgumentException( "URLDecoder: Incomplete trailing escape (%) pattern"); sb.append(new String(bytes, 0, pos, enc)); } catch (NumberFormatException e) { throw new IllegalArgumentException( "URLDecoder: Illegal hex characters in escape (%) pattern - " + e.getMessage()); } needToChange = true; break; default: sb.append(c); i++; break; } } return (needToChange? sb.toString() : s); } }