GNU Classpath (0.17) | ||
Frames | No Frames |
1: /* InputStreamReader.java -- Reader than transforms bytes to chars 2: Copyright (C) 1998, 1999, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.io; 40: 41: import java.nio.charset.UnsupportedCharsetException; 42: import java.nio.charset.CharacterCodingException; 43: import java.nio.charset.IllegalCharsetNameException; 44: import java.nio.charset.CoderResult; 45: import java.nio.charset.CodingErrorAction; 46: import java.nio.charset.Charset; 47: import java.nio.charset.CharsetDecoder; 48: import java.nio.CharBuffer; 49: import java.nio.ByteBuffer; 50: import gnu.java.nio.charset.EncodingHelper; 51: 52: /** 53: * This class reads characters from a byte input stream. The characters 54: * read are converted from bytes in the underlying stream by a 55: * decoding layer. The decoding layer transforms bytes to chars according 56: * to an encoding standard. There are many available encodings to choose 57: * from. The desired encoding can either be specified by name, or if no 58: * encoding is selected, the system default encoding will be used. The 59: * system default encoding name is determined from the system property 60: * <code>file.encoding</code>. The only encodings that are guaranteed to 61: * be availalbe are "8859_1" (the Latin-1 character set) and "UTF8". 62: * Unforunately, Java does not provide a mechanism for listing the 63: * ecodings that are supported in a given implementation. 64: * <p> 65: * Here is a list of standard encoding names that may be available: 66: * <p> 67: * <ul> 68: * <li>8859_1 (ISO-8859-1/Latin-1)</li> 69: * <li>8859_2 (ISO-8859-2/Latin-2)</li> 70: * <li>8859_3 (ISO-8859-3/Latin-3)</li> 71: * <li>8859_4 (ISO-8859-4/Latin-4)</li> 72: * <li>8859_5 (ISO-8859-5/Latin-5)</li> 73: * <li>8859_6 (ISO-8859-6/Latin-6)</li> 74: * <li>8859_7 (ISO-8859-7/Latin-7)</li> 75: * <li>8859_8 (ISO-8859-8/Latin-8)</li> 76: * <li>8859_9 (ISO-8859-9/Latin-9)</li> 77: * <li>ASCII (7-bit ASCII)</li> 78: * <li>UTF8 (UCS Transformation Format-8)</li> 79: * <li>More later</li> 80: * </ul> 81: * <p> 82: * It is recommended that applications do not use 83: * <code>InputStreamReader</code>'s 84: * directly. Rather, for efficiency purposes, an object of this class 85: * should be wrapped by a <code>BufferedReader</code>. 86: * <p> 87: * Due to a deficiency the Java class library design, there is no standard 88: * way for an application to install its own byte-character encoding. 89: * 90: * @see BufferedReader 91: * @see InputStream 92: * 93: * @author Robert Schuster 94: * @author Aaron M. Renn (arenn@urbanophile.com) 95: * @author Per Bothner (bothner@cygnus.com) 96: * @date April 22, 1998. 97: */ 98: public class InputStreamReader extends Reader 99: { 100: /** 101: * The input stream. 102: */ 103: private InputStream in; 104: 105: /** 106: * The charset decoder. 107: */ 108: private CharsetDecoder decoder; 109: 110: /** 111: * End of stream reached. 112: */ 113: private boolean isDone = false; 114: 115: /** 116: * Need this. 117: */ 118: private float maxBytesPerChar; 119: 120: /** 121: * Buffer holding surplus loaded bytes (if any) 122: */ 123: private ByteBuffer byteBuffer; 124: 125: /** 126: * java.io canonical name of the encoding. 127: */ 128: private String encoding; 129: 130: /** 131: * We might decode to a 2-char UTF-16 surrogate, which won't fit in the 132: * output buffer. In this case we need to save the surrogate char. 133: */ 134: private char savedSurrogate; 135: private boolean hasSavedSurrogate = false; 136: 137: /** 138: * This method initializes a new instance of <code>InputStreamReader</code> 139: * to read from the specified stream using the default encoding. 140: * 141: * @param in The <code>InputStream</code> to read from 142: */ 143: public InputStreamReader(InputStream in) 144: { 145: if (in == null) 146: throw new NullPointerException(); 147: this.in = in; 148: try 149: { 150: encoding = System.getProperty("file.encoding"); 151: // Don't use NIO if avoidable 152: if(EncodingHelper.isISOLatin1(encoding)) 153: { 154: encoding = "ISO8859_1"; 155: maxBytesPerChar = 1f; 156: decoder = null; 157: return; 158: } 159: Charset cs = EncodingHelper.getCharset(encoding); 160: decoder = cs.newDecoder(); 161: encoding = EncodingHelper.getOldCanonical(cs.name()); 162: try { 163: maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 164: } catch(UnsupportedOperationException _){ 165: maxBytesPerChar = 1f; 166: } 167: decoder.onMalformedInput(CodingErrorAction.REPLACE); 168: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 169: decoder.reset(); 170: } catch(RuntimeException e) { 171: encoding = "ISO8859_1"; 172: maxBytesPerChar = 1f; 173: decoder = null; 174: } catch(UnsupportedEncodingException e) { 175: encoding = "ISO8859_1"; 176: maxBytesPerChar = 1f; 177: decoder = null; 178: } 179: } 180: 181: /** 182: * This method initializes a new instance of <code>InputStreamReader</code> 183: * to read from the specified stream using a caller supplied character 184: * encoding scheme. Note that due to a deficiency in the Java language 185: * design, there is no way to determine which encodings are supported. 186: * 187: * @param in The <code>InputStream</code> to read from 188: * @param encoding_name The name of the encoding scheme to use 189: * 190: * @exception UnsupportedEncodingException If the encoding scheme 191: * requested is not available. 192: */ 193: public InputStreamReader(InputStream in, String encoding_name) 194: throws UnsupportedEncodingException 195: { 196: if (in == null 197: || encoding_name == null) 198: throw new NullPointerException(); 199: 200: this.in = in; 201: // Don't use NIO if avoidable 202: if(EncodingHelper.isISOLatin1(encoding_name)) 203: { 204: encoding = "ISO8859_1"; 205: maxBytesPerChar = 1f; 206: decoder = null; 207: return; 208: } 209: try { 210: Charset cs = EncodingHelper.getCharset(encoding_name); 211: try { 212: maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 213: } catch(UnsupportedOperationException _){ 214: maxBytesPerChar = 1f; 215: } 216: 217: decoder = cs.newDecoder(); 218: decoder.onMalformedInput(CodingErrorAction.REPLACE); 219: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 220: decoder.reset(); 221: 222: // The encoding should be the old name, if such exists. 223: encoding = EncodingHelper.getOldCanonical(cs.name()); 224: } catch(RuntimeException e) { 225: encoding = "ISO8859_1"; 226: maxBytesPerChar = 1f; 227: decoder = null; 228: } 229: } 230: 231: /** 232: * Creates an InputStreamReader that uses a decoder of the given 233: * charset to decode the bytes in the InputStream into 234: * characters. 235: */ 236: public InputStreamReader(InputStream in, Charset charset) { 237: this.in = in; 238: decoder = charset.newDecoder(); 239: 240: decoder.onMalformedInput(CodingErrorAction.REPLACE); 241: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 242: decoder.reset(); 243: encoding = EncodingHelper.getOldCanonical(charset.name()); 244: } 245: 246: /** 247: * Creates an InputStreamReader that uses the given charset decoder 248: * to decode the bytes in the InputStream into characters. 249: */ 250: public InputStreamReader(InputStream in, CharsetDecoder decoder) { 251: this.in = in; 252: this.decoder = decoder; 253: 254: try { 255: maxBytesPerChar = decoder.charset().newEncoder().maxBytesPerChar(); 256: } catch(UnsupportedOperationException _){ 257: maxBytesPerChar = 1f; 258: } 259: 260: decoder.onMalformedInput(CodingErrorAction.REPLACE); 261: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 262: decoder.reset(); 263: encoding = EncodingHelper.getOldCanonical(decoder.charset().name()); 264: } 265: 266: /** 267: * This method closes this stream, as well as the underlying 268: * <code>InputStream</code>. 269: * 270: * @exception IOException If an error occurs 271: */ 272: public void close() throws IOException 273: { 274: synchronized (lock) 275: { 276: // Makes sure all intermediate data is released by the decoder. 277: if (decoder != null) 278: decoder.reset(); 279: if (in != null) 280: in.close(); 281: in = null; 282: isDone = true; 283: decoder = null; 284: } 285: } 286: 287: /** 288: * This method returns the name of the encoding that is currently in use 289: * by this object. If the stream has been closed, this method is allowed 290: * to return <code>null</code>. 291: * 292: * @return The current encoding name 293: */ 294: public String getEncoding() 295: { 296: return in != null ? encoding : null; 297: } 298: 299: /** 300: * This method checks to see if the stream is ready to be read. It 301: * will return <code>true</code> if is, or <code>false</code> if it is not. 302: * If the stream is not ready to be read, it could (although is not required 303: * to) block on the next read attempt. 304: * 305: * @return <code>true</code> if the stream is ready to be read, 306: * <code>false</code> otherwise 307: * 308: * @exception IOException If an error occurs 309: */ 310: public boolean ready() throws IOException 311: { 312: if (in == null) 313: throw new IOException("Reader has been closed"); 314: 315: return in.available() != 0; 316: } 317: 318: /** 319: * This method reads up to <code>length</code> characters from the stream into 320: * the specified array starting at index <code>offset</code> into the 321: * array. 322: * 323: * @param buf The character array to recieve the data read 324: * @param offset The offset into the array to start storing characters 325: * @param length The requested number of characters to read. 326: * 327: * @return The actual number of characters read, or -1 if end of stream. 328: * 329: * @exception IOException If an error occurs 330: */ 331: public int read(char[] buf, int offset, int length) throws IOException 332: { 333: if (in == null) 334: throw new IOException("Reader has been closed"); 335: if (isDone) 336: return -1; 337: if(decoder != null){ 338: int totalBytes = (int)((double)length * maxBytesPerChar); 339: byte[] bytes = new byte[totalBytes]; 340: 341: int remaining = 0; 342: if(byteBuffer != null) 343: { 344: remaining = byteBuffer.remaining(); 345: byteBuffer.get(bytes, 0, remaining); 346: } 347: int read; 348: if(totalBytes - remaining > 0) 349: { 350: read = in.read(bytes, remaining, totalBytes - remaining); 351: if(read == -1){ 352: read = remaining; 353: isDone = true; 354: } else 355: read += remaining; 356: } else 357: read = remaining; 358: byteBuffer = ByteBuffer.wrap(bytes, 0, read); 359: CharBuffer cb = CharBuffer.wrap(buf, offset, length); 360: int startPos = cb.position(); 361: 362: if(hasSavedSurrogate){ 363: hasSavedSurrogate = false; 364: cb.put(savedSurrogate); 365: read++; 366: } 367: 368: CoderResult cr = decoder.decode(byteBuffer, cb, isDone); 369: decoder.reset(); 370: // 1 char remains which is the first half of a surrogate pair. 371: if(cr.isOverflow() && cb.hasRemaining()){ 372: CharBuffer overflowbuf = CharBuffer.allocate(2); 373: cr = decoder.decode(byteBuffer, overflowbuf, isDone); 374: overflowbuf.flip(); 375: if(overflowbuf.hasRemaining()) 376: { 377: cb.put(overflowbuf.get()); 378: savedSurrogate = overflowbuf.get(); 379: hasSavedSurrogate = true; 380: isDone = false; 381: } 382: } 383: 384: if(byteBuffer.hasRemaining()) { 385: byteBuffer.compact(); 386: byteBuffer.flip(); 387: isDone = false; 388: } else 389: byteBuffer = null; 390: 391: read = cb.position() - startPos; 392: return (read <= 0) ? -1 : read; 393: } else { 394: byte[] bytes = new byte[length]; 395: int read = in.read(bytes); 396: for(int i=0;i<read;i++) 397: buf[offset+i] = (char)(bytes[i]&0xFF); 398: return read; 399: } 400: } 401: 402: /** 403: * Reads an char from the input stream and returns it 404: * as an int in the range of 0-65535. This method also will return -1 if 405: * the end of the stream has been reached. 406: * <p> 407: * This method will block until the char can be read. 408: * 409: * @return The char read or -1 if end of stream 410: * 411: * @exception IOException If an error occurs 412: */ 413: public int read() throws IOException 414: { 415: char[] buf = new char[1]; 416: int count = read(buf, 0, 1); 417: return count > 0 ? buf[0] : -1; 418: } 419: 420: /** 421: * Skips the specified number of chars in the stream. It 422: * returns the actual number of chars skipped, which may be less than the 423: * requested amount. 424: * 425: * @param count The requested number of chars to skip 426: * 427: * @return The actual number of chars skipped. 428: * 429: * @exception IOException If an error occurs 430: */ 431: public long skip(long count) throws IOException 432: { 433: if (in == null) 434: throw new IOException("Reader has been closed"); 435: 436: return super.skip(count); 437: } 438: }
GNU Classpath (0.17) |