View Javadoc

1   /* 
2    * Copyright 2002-2004 The Apache Software Foundation
3    * Licensed  under the  Apache License,  Version 2.0  (the "License");
4    * you may not use  this file  except in  compliance with the License.
5    * You may obtain a copy of the License at 
6    * 
7    *   http://www.apache.org/licenses/LICENSE-2.0
8    * 
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed  under the  License is distributed on an "AS IS" BASIS,
11   * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY KIND, either  express  or
12   * implied.
13   * 
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.struts.flow.core.source;
18  
19  import java.io.ByteArrayOutputStream;
20  import java.io.File;
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.io.OutputStreamWriter;
25  import java.io.UnsupportedEncodingException;
26  import java.util.BitSet;
27  import java.util.Iterator;
28  
29  /***
30   *
31   * Utility class for source resolving.
32   *
33   * @author <a href="mailto:dev@avalon.apache.org">Avalon Development Team</a>
34   * @version CVS $Revision: 1.5 $ $Date: 2004/02/28 11:47:26 $
35   */
36  public final class SourceUtil
37  {
38      private static final char[] alphabet = new char[]
39      {
40          'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // 0 to 7
41          'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 8 to 15
42          'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', // 16 to 23
43          'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', // 24 to 31
44          'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', // 32 to 39
45          'o', 'p', 'q', 'r', 's', 't', 'u', 'v', // 40 to 47
46          'w', 'x', 'y', 'z', '0', '1', '2', '3', // 48 to 55
47          '4', '5', '6', '7', '8', '9', '+', '/'}; // 56 to 63
48  
49  
50      /***
51       * Append parameters to the uri
52       * Each parameter is appended to the uri with "parameter=value",
53       * the parameters are separated by "&".
54       */
55      public static String appendParameters( String uri,
56                                             SourceParameters parameters )
57      {
58          if( parameters != null )
59          {
60              StringBuffer buffer = new StringBuffer( uri );
61              Iterator keys = parameters.getParameterNames();
62              String current;
63              char separator = ( uri.indexOf( "?" ) == -1 ? '?' : '&' );
64              Iterator values;
65  
66              while( keys.hasNext() == true )
67              {
68                  current = (String)keys.next();
69                  values = parameters.getParameterValues( current );
70                  while( values.hasNext() == true )
71                  {
72                      buffer.append( separator )
73                          .append( current )
74                          .append( '=' )
75                          .append( SourceUtil.encode( (String)values.next() ) );
76                      separator = '&';
77                  }
78              }
79              return buffer.toString();
80          }
81  
82          return uri;
83      }
84  
85      /***
86       * BASE 64 encoding.
87       * See also RFC 1421
88       */
89      public static String encodeBASE64( String s )
90      {
91          return encodeBASE64( s.getBytes() );
92      }
93  
94      /***
95       * BASE 64 encoding.
96       * See also RFC 1421
97       */
98      public static String encodeBASE64( byte[] octetString )
99      {
100         int bits24;
101         int bits6;
102 
103         char[] out = new char[ ( ( octetString.length - 1 ) / 3 + 1 ) * 4 ];
104 
105         int outIndex = 0;
106         int i = 0;
107 
108         while( ( i + 3 ) <= octetString.length )
109         {
110             // store the octets
111             bits24 = ( octetString[ i++ ] & 0xFF ) << 16;
112             bits24 |= ( octetString[ i++ ] & 0xFF ) << 8;
113             bits24 |= ( octetString[ i++ ] & 0xFF ) << 0;
114 
115             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
116             out[ outIndex++ ] = alphabet[ bits6 ];
117             bits6 = ( bits24 & 0x0003F000 ) >> 12;
118             out[ outIndex++ ] = alphabet[ bits6 ];
119             bits6 = ( bits24 & 0x00000FC0 ) >> 6;
120             out[ outIndex++ ] = alphabet[ bits6 ];
121             bits6 = ( bits24 & 0x0000003F );
122             out[ outIndex++ ] = alphabet[ bits6 ];
123         }
124 
125         if( octetString.length - i == 2 )
126         {
127             // store the octets
128             bits24 = ( octetString[ i ] & 0xFF ) << 16;
129             bits24 |= ( octetString[ i + 1 ] & 0xFF ) << 8;
130 
131             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
132             out[ outIndex++ ] = alphabet[ bits6 ];
133             bits6 = ( bits24 & 0x0003F000 ) >> 12;
134             out[ outIndex++ ] = alphabet[ bits6 ];
135             bits6 = ( bits24 & 0x00000FC0 ) >> 6;
136             out[ outIndex++ ] = alphabet[ bits6 ];
137 
138             // padding
139             out[ outIndex++ ] = '=';
140         }
141         else if( octetString.length - i == 1 )
142         {
143             // store the octets
144             bits24 = ( octetString[ i ] & 0xFF ) << 16;
145 
146             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
147             out[ outIndex++ ] = alphabet[ bits6 ];
148             bits6 = ( bits24 & 0x0003F000 ) >> 12;
149             out[ outIndex++ ] = alphabet[ bits6 ];
150 
151             // padding
152             out[ outIndex++ ] = '=';
153             out[ outIndex++ ] = '=';
154         }
155 
156         return new String( out );
157     }
158 
159     /*** A BitSet defining the characters which don't need encoding */
160     static BitSet charactersDontNeedingEncoding;
161     static final int characterCaseDiff = ( 'a' - 'A' );
162 
163     /*** Initialize the BitSet */
164     static
165     {
166         charactersDontNeedingEncoding = new BitSet( 256 );
167         int i;
168         for( i = 'a'; i <= 'z'; i++ )
169         {
170             charactersDontNeedingEncoding.set( i );
171         }
172         for( i = 'A'; i <= 'Z'; i++ )
173         {
174             charactersDontNeedingEncoding.set( i );
175         }
176         for( i = '0'; i <= '9'; i++ )
177         {
178             charactersDontNeedingEncoding.set( i );
179         }
180         charactersDontNeedingEncoding.set( '-' );
181         charactersDontNeedingEncoding.set( '_' );
182         charactersDontNeedingEncoding.set( '.' );
183         charactersDontNeedingEncoding.set( '*' );
184         charactersDontNeedingEncoding.set( '"' );
185     }
186 
187     /***
188      * Translates a string into <code>x-www-form-urlencoded</code> format.
189      *
190      * @param   s   <code>String</code> to be translated.
191      * @return  the translated <code>String</code>.
192      */
193     public static String encode( String s )
194     {
195         final StringBuffer out = new StringBuffer( s.length() );
196         final ByteArrayOutputStream buf = new ByteArrayOutputStream( 32 );
197         final OutputStreamWriter writer = new OutputStreamWriter( buf );
198         for( int i = 0; i < s.length(); i++ )
199         {
200             int c = s.charAt( i );
201             if( charactersDontNeedingEncoding.get( c ) )
202             {
203                 out.append( (char)c );
204             }
205             else
206             {
207                 try
208                 {
209                     writer.write( c );
210                     writer.flush();
211                 }
212                 catch( IOException e )
213                 {
214                     buf.reset();
215                     continue;
216                 }
217                 byte[] ba = buf.toByteArray();
218                 for( int j = 0; j < ba.length; j++ )
219                 {
220                     out.append( '%' );
221                     char ch = Character.forDigit( ( ba[ j ] >> 4 ) & 0xF, 16 );
222                     // converting to use uppercase letter as part of
223                     // the hex value if ch is a letter.
224                     if( Character.isLetter( ch ) )
225                     {
226                         ch -= characterCaseDiff;
227                     }
228                     out.append( ch );
229                     ch = Character.forDigit( ba[ j ] & 0xF, 16 );
230                     if( Character.isLetter( ch ) )
231                     {
232                         ch -= characterCaseDiff;
233                     }
234                     out.append( ch );
235                 }
236                 buf.reset();
237             }
238         }
239 
240         return out.toString();
241     }
242 
243     /***
244      * Translates a string into <code>x-www-form-urlencoded</code> format
245      * with specified encoding
246      *
247      * @param   s   <code>String</code> to be translated.
248      * @param	enc The name of a supported charset
249      * @return  the translated <code>String</code>.
250      * @throws UnsupportedEncodingException
251      */
252     public static String encode( String s, String enc ) throws UnsupportedEncodingException
253     {
254         // Why not use the java.net.URLEncoder for this purpose?
255         final StringBuffer out = new StringBuffer( s.length() );
256         final ByteArrayOutputStream buf = new ByteArrayOutputStream( 32 );
257         final OutputStreamWriter writer = new OutputStreamWriter( buf, enc );
258         for( int i = 0; i < s.length(); i++ )
259         {
260             int c = s.charAt( i );
261             if( charactersDontNeedingEncoding.get( c ) )
262             {
263                 out.append( (char)c );
264             }
265             else
266             {
267                 try
268                 {
269                     writer.write( c );
270                     writer.flush();
271                 }
272                 catch( IOException e )
273                 {
274                     buf.reset();
275                     continue;
276                 }
277                 byte[] ba = buf.toByteArray();
278                 for( int j = 0; j < ba.length; j++ )
279                 {
280                     out.append( '%' );
281                     char ch = Character.forDigit( ( ba[ j ] >> 4 ) & 0xF, 16 );
282                     // converting to use uppercase letter as part of
283                     // the hex value if ch is a letter.
284                     if( Character.isLetter( ch ) )
285                     {
286                         ch -= characterCaseDiff;
287                     }
288                     out.append( ch );
289                     ch = Character.forDigit( ba[ j ] & 0xF, 16 );
290                     if( Character.isLetter( ch ) )
291                     {
292                         ch -= characterCaseDiff;
293                     }
294                     out.append( ch );
295                 }
296                 buf.reset();
297             }
298         }
299 
300         return out.toString();
301     }
302 
303     /***
304      * Return a <code>File</code> object associated with the <code>Source</code> object.
305      *
306      * @return The corresponding <code>File</code> object or null if the
307      *         <code>Source</code> object does not point to a file URI.
308      */
309     public static File getFile( Source source )
310     {
311         final String systemId = source.getURI();
312         if( systemId.startsWith( "file:" ) )
313         {
314             return new File( systemId.substring( 5 ) );
315         }
316         return null;
317     }
318 
319     /***
320      * Move the source to a specified destination.
321      *
322      * @param source Source of the source.
323      * @param destination Destination of the source.
324      *
325      * @throws SourceException If an exception occurs during
326      *                         the move.
327      */
328     static public void move(Source source,
329                               Source destination)
330     throws SourceException
331     {
332         if (source instanceof MoveableSource
333             && source.getClass().equals(destination.getClass()))
334         {
335             ((MoveableSource)source).moveTo(destination);
336         }
337         else if (source instanceof ModifiableSource)
338         {
339             copy(source, destination);
340             ((ModifiableSource) source).delete();
341         }
342         else
343         {
344             throw new SourceException("Source '"+source.getURI()+ "' is not writeable");
345         }
346     }
347 
348     /***
349      * Get the position of the scheme-delimiting colon in an absolute URI, as specified
350      * by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>, appendix A. This method is
351      * primarily useful for {@link Source} implementors that want to separate
352      * the scheme part from the specific part of an URI.
353      * <p>
354      * Use this method when you need both the scheme and the scheme-specific part of an URI,
355      * as calling successively {@link #getScheme(String)} and {@link #getSpecificPart(String)}
356      * will call this method twice, and as such won't be efficient.
357      *
358      * @param uri the URI
359      * @return int the scheme-delimiting colon, or <code>-1</code> if not found.
360      */
361     public static int indexOfSchemeColon(String uri)
362     {
363         // absoluteURI   = scheme ":" ( hier_part | opaque_part )
364         //
365         // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
366         //
367         // alpha         = lowalpha | upalpha
368         //
369         // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
370         //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
371         //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
372         //
373         // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
374         //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
375         //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
376         //
377         // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
378         //            "8" | "9"
379 
380         // Must have at least one character followed by a colon
381         if (uri == null || uri.length() < 2)
382         {
383             return -1;
384         }
385 
386         // Check that first character is alpha
387         // (lowercase first since it's the most common case)
388         char ch = uri.charAt(0);
389         if ( (ch < 'a' || ch > 'z') &&
390              (ch < 'A' || ch > 'Z') )
391         {
392             // Invalid first character
393             return -1;
394         }
395 
396         int pos = uri.indexOf(':');
397         if (pos != -1)
398         {
399             // Check that every character before the colon is in the allowed range
400             // (the first one was tested above)
401             for (int i = 1; i < pos; i++)
402             {
403                 ch = uri.charAt(i);
404                 if ( (ch < 'a' || ch > 'z') &&
405                      (ch < 'A' || ch > 'Z') &&
406                      (ch < '0' || ch > '9') &&
407                      ch != '+' && ch != '-' && ch != '.')
408                 {
409                     return -1;
410                 }
411             }
412         }
413 
414         return pos;
415     }
416 
417     /***
418      * Get the scheme of an absolute URI.
419      *
420      * @param uri the absolute URI
421      * @return the URI scheme
422      */
423     public static String getScheme(String uri)
424     {
425         int pos = indexOfSchemeColon(uri);
426         return (pos == -1) ? null : uri.substring(0, pos);
427     }
428 
429     /***
430      * Get the scheme-specific part of an absolute URI. Note that this includes everything
431      * after the separating colon, including the fragment, if any (RFC 2396 separates it
432      * from the scheme-specific part).
433      *
434      * @param uri the absolute URI
435      * @return the scheme-specific part of the URI
436      */
437     public static String getSpecificPart(String uri)
438     {
439         int pos = indexOfSchemeColon(uri);
440         return (pos == -1) ? null : uri.substring(pos+1);
441     }
442 
443     /***
444      * Copy the source to a specified destination.
445      *
446      * @param source Source of the source.
447      * @param destination Destination of the source.
448      *
449      * @throws SourceException If an exception occurs during
450      *                         the copy.
451      */
452     static public void copy(Source source,
453                             Source destination)
454         throws SourceException
455     {
456         if (source instanceof MoveableSource
457             && source.getClass().equals(destination.getClass()))
458         {
459             ((MoveableSource) source).copyTo(destination);
460         }
461         else
462         {
463             if ( !(destination instanceof ModifiableSource)) {
464                 throw new SourceException("Source '"+
465                                           destination.getURI()+
466                                           "' is not writeable");
467             }
468             
469             IOException firstE = null;
470             ModifiableSource modDestination = (ModifiableSource)destination;
471             try
472             {
473                 InputStream in = source.getInputStream();
474                 try
475                 {
476                     OutputStream out = modDestination.getOutputStream();
477                     try
478                     {
479                         try
480                         {
481                             copy(in, out);
482                         }
483                         catch ( IOException e )
484                         {
485                             // Remebver the original exception in case there are problems closing
486                             //  any streams.
487                             firstE = e;
488                             
489                             // If possible, cancel the destination.
490                             if ( modDestination.canCancel( out ) )
491                             {
492                                 modDestination.cancel( out );
493                                 out = null;
494                             }
495                         }
496                     }
497                     finally
498                     {
499                         // out may have already been closed if there was a problem.
500                         if ( out != null )
501                         {
502                             out.close();
503                         }
504                     }
505                 }
506                 finally
507                 {
508                     in.close();
509                 }
510             } catch (IOException ioe) {
511                 if ( firstE == null )
512                 {
513                     firstE = ioe;
514                 }
515             }
516             
517             // If there were any problems then wrap the original exception in a SourceException.
518             if ( firstE != null )
519             {
520                 throw new SourceException("Could not copy source '"+
521                                           source.getURI()+"' to '"+
522                                           destination.getURI()+"' :"+
523                                           firstE.getMessage(), firstE);
524             }
525         }
526     }
527 
528     /***
529      * Copy the contents of an <code>InputStream</code> to an <code>OutputStream</code>.
530      *
531      * @param in
532      * @param out
533      * @throws IOException
534      */
535     static public void copy(InputStream in, OutputStream out) throws IOException
536     {
537         byte[] buffer = new byte[8192];
538         int length = -1;
539 
540         while ((length = in.read(buffer))>-1) {
541             out.write(buffer, 0, length);
542         }
543         in.close();
544         out.flush();
545         out.close();
546     }
547 
548     /***
549      * Calls absolutize(url1, url2, false).
550      */
551     public static String absolutize(String url1, String url2)
552     {
553         return absolutize(url1, url2, false, true);
554     }
555 
556     /***
557      * Calls absolutize(url1, url2, false, true).
558      */
559     public static String absolutize(String url1, String url2, boolean treatAuthorityAsBelongingToPath)
560     {
561         return absolutize(url1, url2, treatAuthorityAsBelongingToPath, true);
562     }
563 
564     /***
565      * Applies a location to a baseURI. This is done as described in RFC 2396 section 5.2.
566      *
567      * @param url1 the baseURI
568      * @param url2 the location
569      * @param treatAuthorityAsBelongingToPath considers the authority to belong to the path. These
570      * special kind of URIs are used in the Apache Cocoon project.
571      * @param normalizePath should the path be normalized, i.e. remove ../ and /./ etc.
572      */
573     public static String absolutize(String url1, String url2, boolean treatAuthorityAsBelongingToPath, boolean normalizePath)
574     {
575         if (url1 == null)
576             return url2;
577 
578         // If the URL contains a scheme (and thus is already absolute), don't do any further work
579         if (getScheme(url2) != null)
580             return url2;
581 
582         // parse the urls into parts
583         // if the second url contains a scheme, it is not relative so return it right away (part 3 of the algorithm)
584         String[] url1Parts = parseUrl(url1);
585         String[] url2Parts = parseUrl(url2);
586 
587         if (treatAuthorityAsBelongingToPath)
588             return absolutizeWithoutAuthority(url1Parts, url2Parts);
589 
590         // check if it is a reference to the current document (part 2 of the algorithm)
591         if (url2Parts[PATH].equals("") && url2Parts[QUERY] == null && url2Parts[AUTHORITY] == null)
592             return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], url1Parts[PATH], url1Parts[QUERY], url2Parts[FRAGMENT]);
593 
594         // it is a network reference (part 4 of the algorithm)
595         if (url2Parts[AUTHORITY] != null)
596             return makeUrl(url1Parts[SCHEME], url2Parts[AUTHORITY], url2Parts[PATH], url2Parts[QUERY], url2Parts[QUERY]);
597 
598         String url1Path = url1Parts[PATH];
599         String url2Path = url2Parts[PATH];
600 
601         // if the path starts with a slash (part 5 of the algorithm)
602         if (url2Path != null && url2Path.length() > 0 && url2Path.charAt(0) == '/')
603             return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], url2Parts[PATH], url2Parts[QUERY], url2Parts[QUERY]);
604 
605         // combine the 2 paths
606         String path = stripLastSegment(url1Path);
607         path = path + (path.endsWith("/") ? "" : "/") + url2Path;
608         if (normalizePath)
609             path = normalize(path);
610 
611         return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], path, url2Parts[QUERY], url2Parts[FRAGMENT]);
612     }
613 
614     /***
615      * Absolutizes URIs whereby the authority part is considered to be a part of the path.
616      * This special kind of URIs is used in the Apache Cocoon project for the cocoon and context protocols.
617      * This method is internally used by {@link #absolutize(String, String, boolean, boolean)}.
618      */
619     private static String absolutizeWithoutAuthority(String[] url1Parts, String[] url2Parts)
620     {
621         String authority1 = url1Parts[AUTHORITY];
622         String authority2 = url2Parts[AUTHORITY];
623 
624         String path1 = url1Parts[PATH];
625         String path2 = url2Parts[PATH];
626 
627         if (authority1 != null)
628             path1 = "//" + authority1 + path1;
629         if (authority2 != null)
630             path2 = "//" + authority2 + path2;
631 
632         String path = stripLastSegment(path1);
633         path = path + (path.endsWith("/") ? "" : "/") + path2;
634         path = normalize(path);
635 
636         String scheme = url1Parts[SCHEME];
637         return scheme + ":" + path;
638     }
639 
640     private static String stripLastSegment(String path)
641     {
642         int i = path.lastIndexOf('/');
643         if(i > -1)
644             return path.substring(0, i + 1);
645         return path;
646     }
647 
648     /***
649      * Removes things like &lt;segment&gt;/../ or ./, as described in RFC 2396 in
650      * step 6 of section 5.2.
651      */
652     private static String normalize(String path)
653     {
654         // replace all /./ with /
655         int i = path.indexOf("/./");
656         while (i > -1)
657         {
658             path = path.substring(0, i + 1) + path.substring(i + 3);
659             i = path.indexOf("/./");
660         }
661 
662         if (path.endsWith("/."))
663             path = path.substring(0, path.length() - 1);
664 
665         int f = path.indexOf("/../");
666         while (f > 0)
667         {
668             int sb = path.lastIndexOf("/", f - 1);
669             if (sb > - 1)
670                 path = path.substring(0, sb + 1) + (path.length() >= f + 4 ? path.substring(f + 4) : "");
671             f = path.indexOf("/../");
672         }
673 
674         if (path.length() > 3 && path.endsWith("/.."))
675         {
676             int sb = path.lastIndexOf("/", path.length() - 4);
677             String segment = path.substring(sb, path.length() - 3);
678             if (!segment.equals(".."))
679             {
680                 path = path.substring(0, sb + 1);
681             }
682         }
683 
684         return path;
685     }
686 
687     /***
688      * Assembles an URL from the given URL parts, each of these parts can be null.
689      * Used internally by {@link #absolutize(String, String, boolean, boolean)}.
690      */
691     private static String makeUrl(String scheme, String authority, String path, String query, String fragment)
692     {
693         StringBuffer url = new StringBuffer();
694         if (scheme != null)
695             url.append(scheme).append(':');
696 
697         if (authority != null)
698             url.append("//").append(authority);
699 
700         if (path != null)
701             url.append(path);
702 
703         if (query != null)
704             url.append('?').append(query);
705 
706         if (fragment != null)
707             url.append('#').append(fragment);
708 
709         return url.toString();
710     }
711 
712     public static final int SCHEME = 0;
713     public static final int AUTHORITY = 1;
714     public static final int PATH = 2;
715     public static final int QUERY = 3;
716     public static final int FRAGMENT = 4;
717 
718     /***
719      * Parses an URL into the following parts: scheme, authority, path, query and fragment identifier.
720      *
721      * <p>The parsing is designed to be robust in the sense that it will never fail, even when an invalid
722      * URL is given. The parser will simply look for the most important delimiter characters. Basically
723      * it does the same as what would be achieved using the following regular expression (from RFC 2396):
724      * <pre>
725      * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
726      *  12            3  4          5       6  7        8 9
727      * </pre>
728      * but without actually using the regular expression.
729      *
730      * <p>The result is returned as a string array, use the constants SCHEME, AUTHORITY, PATH,
731      * QUERY and FRAGMENT_IDENTIFIER to access the different parts.
732      *
733      * <p>If a part is missing, its corresponding entry in the array will be null, except for the
734      * path, which will never be null.
735      */
736     public static String[] parseUrl(String url) {
737         char[] urlchars = url.toCharArray();
738 
739         int pos = 0;
740 
741         String scheme = null;
742         String authority = null;
743         String path = null;
744         String query = null;
745         String fragid = null;
746 
747         //  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
748 
749         // the scheme
750         boolean keepgoing = true;
751         while (keepgoing && pos < urlchars.length)
752         {
753             switch (urlchars[pos])
754             {
755                 case ':':
756                     if (pos >= 1)
757                     {
758                         scheme = new String(urlchars, 0, pos);
759                         keepgoing = false;
760                         pos++;
761                         break;
762                     }
763                 case '/':
764                 case '?':
765                 case '#':
766                     keepgoing = false;
767                     break;
768                 default:
769                     pos++;
770             }
771         }
772 
773         if (scheme == null)
774             pos = 0;
775 
776         //  the authority
777         if (pos + 1 < urlchars.length && urlchars[pos] == '/' && urlchars[pos+1] == '/')
778         {
779             pos += 2;
780             int authorityBeginPos = pos;
781             keepgoing = true;
782             while (keepgoing && pos < urlchars.length)
783             {
784                 switch (urlchars[pos])
785                 {
786                     case '/':
787                     case '?':
788                     case '#':
789                         keepgoing = false;
790                         break;
791                     default:
792                         pos++;
793                 }
794             }
795             authority = new String(urlchars, authorityBeginPos, pos - authorityBeginPos);
796         }
797 
798         //  the path
799         int pathBeginPos = pos;
800         keepgoing = true;
801         while (keepgoing && pos < urlchars.length)
802         {
803             switch (urlchars[pos])
804             {
805                 case '?':
806                 case '#':
807                     keepgoing = false;
808                     break;
809                 default:
810                     pos++;
811             }
812         }
813         path = new String(urlchars, pathBeginPos, pos - pathBeginPos);
814 
815         // the query
816         if (pos < urlchars.length && urlchars[pos] == '?')
817         {
818             pos++;
819             int queryBeginPos = pos;
820             keepgoing = true;
821             while (keepgoing && pos < urlchars.length)
822             {
823                 switch (urlchars[pos])
824                 {
825                     case '#':
826                         keepgoing = false;
827                         break;
828                     default:
829                         pos++;
830                 }
831             }
832             query = new String(urlchars, queryBeginPos, pos - queryBeginPos);
833         }
834 
835         // the fragment identifier
836         pos++;
837         if (pos < urlchars.length)
838             fragid = new String(urlchars, pos, urlchars.length - pos);
839 
840         return new String[] {scheme, authority, path, query, fragid};
841     }
842 
843     /***
844      * Decode a path.
845      *
846      * <p>Interprets %XX (where XX is hexadecimal number) as UTF-8 encoded bytes.
847      * <p>The validity of the input path is not checked (i.e. characters that
848      * were not encoded will not be reported as errors).
849      * <p>This method differs from URLDecoder.decode in that it always uses UTF-8
850      * (while URLDecoder uses the platform default encoding, often ISO-8859-1),
851      * and doesn't translate + characters to spaces.
852      *
853      * @param path the path to decode
854      * @return the decoded path
855      */
856     public static String decodePath(String path) {
857         StringBuffer translatedPath = new StringBuffer(path.length());
858         byte[] encodedchars = new byte[path.length() / 3];
859         int i = 0;
860         int length = path.length();
861         int encodedcharsLength = 0;
862         while (i < length) {
863             if (path.charAt(i) == '%') {
864                 // we must process all consecutive %-encoded characters in one go, because they represent
865                 // an UTF-8 encoded string, and in UTF-8 one character can be encoded as multiple bytes
866                 while (i < length && path.charAt(i) == '%') {
867                     if (i + 2 < length) {
868                         try {
869                             byte x = (byte)Integer.parseInt(path.substring(i + 1, i + 3), 16);
870                             encodedchars[encodedcharsLength] = x;
871                         } catch (NumberFormatException e) {
872                             throw new IllegalArgumentException("Illegal hex characters in pattern %" + path.substring(i + 1, i + 3));
873                         }
874                         encodedcharsLength++;
875                         i += 3;
876                     } else {
877                         throw new IllegalArgumentException("% character should be followed by 2 hexadecimal characters.");
878                     }
879                 }
880                 try {
881                     String translatedPart = new String(encodedchars, 0, encodedcharsLength, "UTF-8");
882                     translatedPath.append(translatedPart);
883                 } catch (UnsupportedEncodingException e) {
884                     // the situation that UTF-8 is not supported is quite theoretical, so throw a runtime exception
885                     throw new RuntimeException("Problem in decodePath: UTF-8 encoding not supported.");
886                 }
887                 encodedcharsLength = 0;
888             } else {
889                 // a normal character
890                 translatedPath.append(path.charAt(i));
891                 i++;
892             }
893         }
894         return translatedPath.toString();
895     }
896 
897 
898 }