java - Inquiry about method readUTF() of class DataInputStream -


does know how works under hood? have read api, it's not clear. put down in more simplistic way? in advance.

  1. first unsigned short read, length of string.
  2. repeat length of string following steps:
  3. read byte. if byte matches bit pattern 0xxxxxxx 1 character. if byte matches bit pattern 110xxxxx character consists of 2 bytes (unicode). if byte matches bit pattern 1110xxxx character consists of 3 bytes. when new character assembled appended end of string returned.

seeing code behind function may help:

 public final static string readutf(datainput in) throws ioexception { int utflen = in.readunsignedshort(); byte[] bytearr = null; char[] chararr = null; if (in instanceof datainputstream) {     datainputstream dis = (datainputstream)in;     if (dis.bytearr.length < utflen){         dis.bytearr = new byte[utflen*2];         dis.chararr = new char[utflen*2];     }     chararr = dis.chararr;     bytearr = dis.bytearr; } else {     bytearr = new byte[utflen];     chararr = new char[utflen]; }  int c, char2, char3; int count = 0; int chararr_count=0;  in.readfully(bytearr, 0, utflen);  while (count < utflen) {     c = (int) bytearr[count] & 0xff;     if (c > 127) break;     count++;     chararr[chararr_count++]=(char)c; }  while (count < utflen) {     c = (int) bytearr[count] & 0xff;     switch (c >> 4) {         case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:             /* 0xxxxxxx*/             count++;             chararr[chararr_count++]=(char)c;             break;         case 12: case 13:             /* 110x xxxx   10xx xxxx*/             count += 2;             if (count > utflen)                 throw new utfdataformatexception(                     "malformed input: partial character @ end");             char2 = (int) bytearr[count-1];             if ((char2 & 0xc0) != 0x80)                 throw new utfdataformatexception(                     "malformed input around byte " + count);             chararr[chararr_count++]=(char)(((c & 0x1f) << 6) |                                             (char2 & 0x3f));             break;         case 14:             /* 1110 xxxx  10xx xxxx  10xx xxxx */             count += 3;             if (count > utflen)                 throw new utfdataformatexception(                     "malformed input: partial character @ end");             char2 = (int) bytearr[count-2];             char3 = (int) bytearr[count-1];             if (((char2 & 0xc0) != 0x80) || ((char3 & 0xc0) != 0x80))                 throw new utfdataformatexception(                     "malformed input around byte " + (count-1));             chararr[chararr_count++]=(char)(((c     & 0x0f) << 12) |                                             ((char2 & 0x3f) << 6)  |                                             ((char3 & 0x3f) << 0));             break;         default:             /* 10xx xxxx,  1111 xxxx */             throw new utfdataformatexception(                 "malformed input around byte " + count);     } } // number of chars produced may less utflen return new string(chararr, 0, chararr_count); 

}


Comments

Popular posts from this blog

basic authentication with http post params android -

vb.net - Virtual Keyboard commands -

css - Firefox for ubuntu renders wrong colors -