java - Inquiry about method readUTF() of class DataInputStream -
does know how works under hood? have read api, it's not clear. put down in more simplistic way? in advance.
- first unsigned short read, length of string.
- repeat length of string following steps:
- read byte. if byte matches bit pattern 0xxxxxxx 1 character. if byte matches bit pattern 110xxxxx character consists of 2 bytes (unicode). if byte matches bit pattern 1110xxxx character consists of 3 bytes. when new character assembled appended end of string returned.
seeing code behind function may help:
public final static string readutf(datainput in) throws ioexception { int utflen = in.readunsignedshort(); byte[] bytearr = null; char[] chararr = null; if (in instanceof datainputstream) { datainputstream dis = (datainputstream)in; if (dis.bytearr.length < utflen){ dis.bytearr = new byte[utflen*2]; dis.chararr = new char[utflen*2]; } chararr = dis.chararr; bytearr = dis.bytearr; } else { bytearr = new byte[utflen]; chararr = new char[utflen]; } int c, char2, char3; int count = 0; int chararr_count=0; in.readfully(bytearr, 0, utflen); while (count < utflen) { c = (int) bytearr[count] & 0xff; if (c > 127) break; count++; chararr[chararr_count++]=(char)c; } while (count < utflen) { c = (int) bytearr[count] & 0xff; switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: /* 0xxxxxxx*/ count++; chararr[chararr_count++]=(char)c; break; case 12: case 13: /* 110x xxxx 10xx xxxx*/ count += 2; if (count > utflen) throw new utfdataformatexception( "malformed input: partial character @ end"); char2 = (int) bytearr[count-1]; if ((char2 & 0xc0) != 0x80) throw new utfdataformatexception( "malformed input around byte " + count); chararr[chararr_count++]=(char)(((c & 0x1f) << 6) | (char2 & 0x3f)); break; case 14: /* 1110 xxxx 10xx xxxx 10xx xxxx */ count += 3; if (count > utflen) throw new utfdataformatexception( "malformed input: partial character @ end"); char2 = (int) bytearr[count-2]; char3 = (int) bytearr[count-1]; if (((char2 & 0xc0) != 0x80) || ((char3 & 0xc0) != 0x80)) throw new utfdataformatexception( "malformed input around byte " + (count-1)); chararr[chararr_count++]=(char)(((c & 0x0f) << 12) | ((char2 & 0x3f) << 6) | ((char3 & 0x3f) << 0)); break; default: /* 10xx xxxx, 1111 xxxx */ throw new utfdataformatexception( "malformed input around byte " + count); } } // number of chars produced may less utflen return new string(chararr, 0, chararr_count);
}
Comments
Post a Comment