forked from bigjosh/subethasmtp
-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cd65eac
commit 1b8a42c
Showing
8 changed files
with
251 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
124 changes: 124 additions & 0 deletions
124
src/main/java/org/subethamail/smtp/internal/io/Utf8InputStreamReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
package org.subethamail.smtp.internal.io; | ||
|
||
import java.io.EOFException; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.Reader; | ||
import java.nio.ByteBuffer; | ||
import java.nio.CharBuffer; | ||
import java.nio.charset.CharsetDecoder; | ||
import java.nio.charset.StandardCharsets; | ||
|
||
/** | ||
* No-buffering, no-locking (not thread-safe) InputStream Reader for UTF-8 | ||
* encoded strings. This class exists mainly because | ||
* {@code java.io.InputStreamReader} does more buffering than strictly necessary | ||
* (for performance reasons) and this stuffs up passing the underlying | ||
* InputStream from command to command. | ||
*/ | ||
public final class Utf8InputStreamReader extends Reader { | ||
|
||
private static final CharsetDecoder DECODER = StandardCharsets.UTF_8.newDecoder(); | ||
|
||
private final InputStream in; | ||
private final ByteBuffer bb = ByteBuffer.allocate(4); | ||
private int leftOver = -1; | ||
|
||
public Utf8InputStreamReader(InputStream in) { | ||
this.in = in; | ||
} | ||
|
||
@Override | ||
public int read(char[] cbuf, int off, int len) throws IOException { | ||
for (int i = off; i < off + len; i++) { | ||
int a = read(); | ||
if (a == -1) { | ||
if (i == off) { | ||
return -1; | ||
} else { | ||
return i - off; | ||
} | ||
} | ||
cbuf[i] = (char) a; | ||
} | ||
return len; | ||
} | ||
|
||
@Override | ||
public int read() throws IOException { | ||
if (leftOver != -1) { | ||
int v = leftOver; | ||
leftOver = -1; | ||
return v; | ||
} | ||
int b = in.read(); | ||
if (b == -1) { | ||
return b; | ||
} | ||
int numBytes = numBytes(b); | ||
if (numBytes == 1) { | ||
return b; | ||
} else { | ||
bb.clear(); | ||
bb.put((byte) b); | ||
for (int i = 0; i < numBytes - 1; i++) { | ||
byte a = (byte) in.read(); | ||
if (a == -1) { | ||
throw new EOFException(); | ||
} | ||
if (!isContinuation(a)) { | ||
throw new IOException( | ||
"wrong continuation bits, bytes after first in a UTF-8 character must start with bits 10"); | ||
} | ||
bb.put(a); | ||
} | ||
bb.flip(); | ||
CharBuffer r = DECODER.decode(bb); | ||
int v = r.get(); | ||
if (r.limit() > 1) { | ||
leftOver = r.get(); | ||
} | ||
return v; | ||
} | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
in.close(); | ||
} | ||
|
||
private static boolean isContinuation(int a) { | ||
if (!bit(a, 1)) { | ||
return false; | ||
} else { | ||
return !bit(a, 2); | ||
} | ||
} | ||
|
||
// VisibleForTesting | ||
static int numBytes(int a) throws IOException { | ||
if (!bit(a, 1)) { | ||
return 1; | ||
} else { | ||
if (!bit(a, 2)) { | ||
throw new IOException("leading bits 10 illegal for first byte of UTF-8 character"); | ||
} else if (!bit(a, 3)) { | ||
return 2; | ||
} else { | ||
if (!bit(a, 4)) { | ||
return 3; | ||
} else { | ||
if (!bit(a, 5)) { | ||
return 4; | ||
} else { | ||
throw new IOException("leading bits 11111 illegal for first byte of UTF-8 character"); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
private static boolean bit(int a, int index) { | ||
return ((a >> (8 - index)) & 1) == 1; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
106 changes: 106 additions & 0 deletions
106
src/test/java/org/subethamail/smtp/internal/io/Utf8InputStreamReaderTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
package org.subethamail.smtp.internal.io; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.assertThrows; | ||
import static org.subethamail.smtp.internal.io.Utf8InputStreamReader.numBytes; | ||
|
||
import java.io.ByteArrayInputStream; | ||
import java.io.EOFException; | ||
import java.io.IOException; | ||
import java.io.Reader; | ||
import java.nio.charset.StandardCharsets; | ||
|
||
import org.junit.Test; | ||
|
||
public class Utf8InputStreamReaderTest { | ||
|
||
@Test | ||
public void test() throws IOException { | ||
final char[] chars = Character.toChars(0x1F701); | ||
assertEquals(2, chars.length); | ||
final String str = new String(chars); | ||
String s = "$£Иह€한薠" + str; | ||
try (Reader r = reader(s)) { | ||
assertEquals('$', (char) r.read()); | ||
assertEquals('£', (char) r.read()); | ||
assertEquals('И', (char) r.read()); | ||
assertEquals('ह', (char) r.read()); | ||
assertEquals('€', (char) r.read()); | ||
assertEquals('한', (char) r.read()); | ||
assertEquals('薠', (char) r.read()); | ||
char[] chrs = new char[2]; | ||
assertEquals(2, r.read(chrs)); | ||
assertEquals(55357, chrs[0]); | ||
assertEquals(57089, chrs[1]); | ||
assertEquals(-1, r.read()); | ||
} | ||
} | ||
|
||
@Test | ||
public void testReadIntoArray() throws IOException { | ||
final char[] chars = Character.toChars(0x1F701); | ||
final String str = new String(chars); | ||
String s = "$£Иह€한薠" + str; | ||
try (Reader r = reader(s)) { | ||
char[] chrs = new char[1000]; | ||
int n = r.read(chrs, 0, 2); | ||
n += r.read(chrs, 2, 1000); | ||
assertEquals(9, n); | ||
assertEquals(-1, r.read(chrs)); | ||
} | ||
} | ||
|
||
@Test | ||
public void testEarlyEof() throws IOException { | ||
byte[] bytes = "£".getBytes(StandardCharsets.UTF_8); | ||
byte[] b = new byte[] { bytes[0] }; | ||
try (Reader r = new Utf8InputStreamReader(new ByteArrayInputStream(b))) { | ||
assertThrows(EOFException.class, () -> r.read()); | ||
} | ||
} | ||
|
||
@Test | ||
public void testNotContinuation() throws IOException { | ||
byte[] bytes = "£".getBytes(StandardCharsets.UTF_8); | ||
byte[] b = new byte[] { bytes[0], bytes[0] }; | ||
try (Reader r = new Utf8InputStreamReader(new ByteArrayInputStream(b))) { | ||
assertThrows(IOException.class, () -> r.read()); | ||
} | ||
} | ||
|
||
@Test | ||
public void testNotContinuation2() throws IOException { | ||
byte[] bytes = "£".getBytes(StandardCharsets.UTF_8); | ||
byte[] b = new byte[] { bytes[0], '$' }; | ||
try (Reader r = new Utf8InputStreamReader(new ByteArrayInputStream(b))) { | ||
assertThrows(IOException.class, () -> r.read()); | ||
} | ||
} | ||
|
||
@Test | ||
public void testContinuationByteCannotBeFirstByte() throws IOException { | ||
byte[] bytes = "£".getBytes(StandardCharsets.UTF_8); | ||
byte[] b = new byte[] { bytes[1] }; | ||
try (Reader r = new Utf8InputStreamReader(new ByteArrayInputStream(b))) { | ||
assertThrows(IOException.class, () -> r.read()); | ||
} | ||
} | ||
|
||
@Test | ||
public void testUtf8ByteHasTooManyLeadingOnes() throws IOException { | ||
byte[] b = new byte[] { (byte) 248 }; | ||
try (Reader r = new Utf8InputStreamReader(new ByteArrayInputStream(b))) { | ||
assertThrows(IOException.class, () -> r.read()); | ||
} | ||
} | ||
|
||
@Test | ||
public void testNumBytes() throws IOException { | ||
assertEquals(1, numBytes('$')); | ||
} | ||
|
||
private static Utf8InputStreamReader reader(String s) { | ||
return new Utf8InputStreamReader(new ByteArrayInputStream(s.getBytes(StandardCharsets.UTF_8))); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters