support SMTPUTF8 #132 (#133)

davidmoten · May 14, 2024 · 1b8a42c · 1b8a42c
1 parent cd65eac
commit 1b8a42c
Show file tree

Hide file tree

Showing 8 changed files with 251 additions and 9 deletions.
diff --git a/src/main/java/org/subethamail/smtp/internal/command/EhloCommand.java b/src/main/java/org/subethamail/smtp/internal/command/EhloCommand.java
@@ -70,6 +70,9 @@ public void execute(String commandString, Session sess) throws IOException
 		// Chunking (BDAT) support
 		response.append("\r\n250-CHUNKING");
 
+		// SMTPUTF8 support
+		response.append("\r\n250-SMTPUTF8");
+
 		// Check to see if we support authentication
 		Optional<AuthenticationHandlerFactory> authFact = sess.getServer().getAuthenticationHandlerFactory();
         final boolean displayAuth;

diff --git a/src/main/java/org/subethamail/smtp/internal/io/CRLFTerminatedReader.java b/src/main/java/org/subethamail/smtp/internal/io/CRLFTerminatedReader.java
@@ -73,7 +73,7 @@ public MaxLineLengthException(String s)
 		}
 	}
 
-	private final InputStream in;
+	private final Reader in;
 
 	/**
 	 * Constructs this CRLFTerminatedReader.
@@ -86,7 +86,7 @@ public MaxLineLengthException(String s)
 
 	public CRLFTerminatedReader(InputStream in)
 	{
-		this.in = in;
+		this.in = new Utf8InputStreamReader(in);
 	}
 
 	private final StringBuffer lineBuffer = new StringBuffer();
@@ -189,13 +189,13 @@ public int read() throws IOException
 	@Override
 	public boolean ready() throws IOException
 	{
-		return this.in.available() > 0;
+		return this.in.ready();
 	}
 
 	@Override
 	public int read(char[] cbuf, int off, int len) throws IOException
 	{
-		byte[] temp = new byte[len];
+		char[] temp = new char[len];
 		int result = this.in.read(temp, 0, len);
 		for (int i = 0; i < result; i++)
 			cbuf[i] = (char) temp[i];

diff --git a/src/main/java/org/subethamail/smtp/internal/io/Utf8InputStreamReader.java b/src/main/java/org/subethamail/smtp/internal/io/Utf8InputStreamReader.java
@@ -0,0 +1,124 @@
+package org.subethamail.smtp.internal.io;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * No-buffering, no-locking (not thread-safe) InputStream Reader for UTF-8
+ * encoded strings. This class exists mainly because
+ * {@code java.io.InputStreamReader} does more buffering than strictly necessary
+ * (for performance reasons) and this stuffs up passing the underlying
+ * InputStream from command to command.
+ */
+public final class Utf8InputStreamReader extends Reader {
+
+    private static final CharsetDecoder DECODER = StandardCharsets.UTF_8.newDecoder();
+
+    private final InputStream in;
+    private final ByteBuffer bb = ByteBuffer.allocate(4);
+    private int leftOver = -1;
+
+    public Utf8InputStreamReader(InputStream in) {
+        this.in = in;
+    }
+
+    @Override
+    public int read(char[] cbuf, int off, int len) throws IOException {
+        for (int i = off; i < off + len; i++) {
+            int a = read();
+            if (a == -1) {
+                if (i == off) {
+                    return -1;
+                } else {
+                    return i - off;
+                }
+            }
+            cbuf[i] = (char) a;
+        }
+        return len;
+    }
+
+    @Override
+    public int read() throws IOException {
+        if (leftOver != -1) {
+            int v = leftOver;
+            leftOver = -1;
+            return v;
+        }
+        int b = in.read();
+        if (b == -1) {
+            return b;
+        }
+        int numBytes = numBytes(b);
+        if (numBytes == 1) {
+            return b;
+        } else {
+            bb.clear();
+            bb.put((byte) b);
+            for (int i = 0; i < numBytes - 1; i++) {
+                byte a = (byte) in.read();
+                if (a == -1) {
+                    throw new EOFException();
+                }
+                if (!isContinuation(a)) {
+                    throw new IOException(
+                            "wrong continuation bits, bytes after first in a UTF-8 character must start with bits 10");
+                }
+                bb.put(a);
+            }
+            bb.flip();
+            CharBuffer r = DECODER.decode(bb);
+            int v = r.get();
+            if (r.limit() > 1) {
+                leftOver = r.get();
+            }
+            return v;
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+        in.close();
+    }
+
+    private static boolean isContinuation(int a) {
+        if (!bit(a, 1)) {
+            return false;
+        } else {
+            return !bit(a, 2);
+        }
+    }
+
+    // VisibleForTesting
+    static int numBytes(int a) throws IOException {
+        if (!bit(a, 1)) {
+            return 1;
+        } else {
+            if (!bit(a, 2)) {
+                throw new IOException("leading bits 10 illegal for first byte of UTF-8 character");
+            } else if (!bit(a, 3)) {
+                return 2;
+            } else {
+                if (!bit(a, 4)) {
+                    return 3;
+                } else {
+                    if (!bit(a, 5)) {
+                        return 4;
+                    } else {
+                        throw new IOException("leading bits 11111 illegal for first byte of UTF-8 character");
+                    }
+                }
+            }
+        }
+    }
+
+    private static boolean bit(int a, int index) {
+        return ((a >> (8 - index)) & 1) == 1;
+    }
+}
diff --git a/src/test/java/org/subethamail/smtp/client/SmartClientTest.java b/src/test/java/org/subethamail/smtp/client/SmartClientTest.java
@@ -28,10 +28,11 @@ public void test() throws InterruptedException, UnknownHostException, SMTPExcept
             assertEquals("clientHeloHost", client.getHeloHost());
             assertEquals(0, client.getRecipientCount());
             Assert.assertFalse(client.getAuthenticator().isPresent());
-            assertEquals(3, client.getExtensions().size());
+            assertEquals(4, client.getExtensions().size());
             Set<String> set = client.getExtensions().keySet();
             assertTrue(set.contains("8BITMIME"));
             assertTrue(set.contains("CHUNKING"));
+            assertTrue(set.contains("SMTPUTF8"));
             //TODO why is OK in client.getExtensions?
         } finally {
             server.stop();

diff --git a/src/test/java/org/subethamail/smtp/command/EhloCommandTest.java b/src/test/java/org/subethamail/smtp/command/EhloCommandTest.java
@@ -5,6 +5,7 @@
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.ServerSocket;
 import java.net.Socket;
 import java.nio.charset.StandardCharsets;
@@ -29,6 +30,7 @@ public void testEhloWhenTlsRequiredAuthShouldNotAdvertisedBeforeTlsStarted() thr
         System.out.println(output);
         assertTrue(output.contains("250-STARTTLS"));
         assertFalse(output.contains("250-AUTH PLAIN"));
+        assertTrue(output.contains("250-SMTPUTF8"));
     }
 
     @Test
@@ -62,6 +64,8 @@ private String getOutput(boolean isTlsStarted, boolean showAuthBeforeSTARTTLS) t
             ByteArrayOutputStream out = new ByteArrayOutputStream();
             Socket socket = Mockito.mock(Socket.class);
             Mockito.when(socket.getOutputStream()).thenReturn(out);
+            InputStream in = Mockito.mock(InputStream.class);
+            Mockito.when(socket.getInputStream()).thenReturn(in);
             SMTPServer server = SMTPServer //
                     .port(ss.getLocalPort()) //
                     .serverSocketFactory(() -> ss) //
@@ -84,8 +88,7 @@ public AuthenticationHandler create() {
             Session session = new Session(server, new ServerThread(server, ss, ProxyHandler.NOP), socket, ProxyHandler.NOP);
             session.setTlsStarted(isTlsStarted);
             ec.execute("EHLO me.com", session);
-            String output = new String(out.toByteArray(), StandardCharsets.UTF_8);
-            return output;
+            return new String(out.toByteArray(), StandardCharsets.UTF_8);
         }
     }
 

diff --git a/src/test/java/org/subethamail/smtp/internal/io/Utf8InputStreamReaderTest.java b/src/test/java/org/subethamail/smtp/internal/io/Utf8InputStreamReaderTest.java
@@ -0,0 +1,106 @@
+package org.subethamail.smtp.internal.io;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+import static org.subethamail.smtp.internal.io.Utf8InputStreamReader.numBytes;
+
+import java.io.ByteArrayInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+
+import org.junit.Test;
+
+public class Utf8InputStreamReaderTest {
+
+    @Test
+    public void test() throws IOException {
+        final char[] chars = Character.toChars(0x1F701);
+        assertEquals(2, chars.length);
+        final String str = new String(chars);
+        String s = "$£Иह€한薠" + str;
+        try (Reader r = reader(s)) {
+            assertEquals('$', (char) r.read());
+            assertEquals('£', (char) r.read());
+            assertEquals('И', (char) r.read());
+            assertEquals('ह', (char) r.read());
+            assertEquals('€', (char) r.read());
+            assertEquals('한', (char) r.read());
+            assertEquals('薠', (char) r.read());
+            char[] chrs = new char[2];
+            assertEquals(2, r.read(chrs));
+            assertEquals(55357, chrs[0]);
+            assertEquals(57089, chrs[1]);
+            assertEquals(-1, r.read());
+        }
+    }
+
+    @Test
+    public void testReadIntoArray() throws IOException {
+        final char[] chars = Character.toChars(0x1F701);
+        final String str = new String(chars);
+        String s = "$£Иह€한薠" + str;
+        try (Reader r = reader(s)) {
+            char[] chrs = new char[1000];
+            int n = r.read(chrs, 0, 2);
+            n += r.read(chrs, 2, 1000);
+            assertEquals(9, n);
+            assertEquals(-1, r.read(chrs));
+        }
+    }
+
+    @Test
+    public void testEarlyEof() throws IOException {
+        byte[] bytes = "£".getBytes(StandardCharsets.UTF_8);
+        byte[] b = new byte[] { bytes[0] };
+        try (Reader r = new Utf8InputStreamReader(new ByteArrayInputStream(b))) {
+            assertThrows(EOFException.class, () -> r.read());
+        }
+    }
+
+    @Test
+    public void testNotContinuation() throws IOException {
+        byte[] bytes = "£".getBytes(StandardCharsets.UTF_8);
+        byte[] b = new byte[] { bytes[0], bytes[0] };
+        try (Reader r = new Utf8InputStreamReader(new ByteArrayInputStream(b))) {
+            assertThrows(IOException.class, () -> r.read());
+        }
+    }
+
+    @Test
+    public void testNotContinuation2() throws IOException {
+        byte[] bytes = "£".getBytes(StandardCharsets.UTF_8);
+        byte[] b = new byte[] { bytes[0], '$' };
+        try (Reader r = new Utf8InputStreamReader(new ByteArrayInputStream(b))) {
+            assertThrows(IOException.class, () -> r.read());
+        }
+    }
+
+    @Test
+    public void testContinuationByteCannotBeFirstByte() throws IOException {
+        byte[] bytes = "£".getBytes(StandardCharsets.UTF_8);
+        byte[] b = new byte[] { bytes[1] };
+        try (Reader r = new Utf8InputStreamReader(new ByteArrayInputStream(b))) {
+            assertThrows(IOException.class, () -> r.read());
+        }
+    }
+
+    @Test
+    public void testUtf8ByteHasTooManyLeadingOnes() throws IOException {
+        byte[] b = new byte[] { (byte) 248 };
+        try (Reader r = new Utf8InputStreamReader(new ByteArrayInputStream(b))) {
+            assertThrows(IOException.class, () -> r.read());
+        }
+    }
+
+    @Test
+    public void testNumBytes() throws IOException {
+        assertEquals(1, numBytes('$'));
+    }
+
+    private static Utf8InputStreamReader reader(String s) {
+        return new Utf8InputStreamReader(new ByteArrayInputStream(s.getBytes(StandardCharsets.UTF_8)));
+    }
+
+}
diff --git a/src/test/java/org/subethamail/smtp/server/MessageHandlerTest.java b/src/test/java/org/subethamail/smtp/server/MessageHandlerTest.java
@@ -39,7 +39,7 @@ public void testCompletedMailTransaction() throws Exception {
         try {
             SmartClient client = SmartClient.createAndConnect("localhost", server.getPort(), "localhost");
             client.from("john@example.com");
-            client.to("jane@example.com");
+            client.to("eñe@example.com");
             client.dataStart();
             client.dataWrite(TextUtils.getAsciiBytes("body"), 4);
             client.dataEnd();
@@ -50,7 +50,7 @@ public void testCompletedMailTransaction() throws Exception {
         InOrder o = Mockito.inOrder(f, h);
         o.verify(f).create(ArgumentMatchers.any(MessageContext.class));
         o.verify(h).from("john@example.com");
-        o.verify(h).recipient("jane@example.com");
+        o.verify(h).recipient("eñe@example.com");
         o.verify(h).data(ArgumentMatchers.any(InputStream.class));
         o.verify(h).done();
         Mockito.verifyNoMoreInteractions(f, h);

diff --git a/src/test/java/org/subethamail/smtp/util/EmailUtilsTest.java b/src/test/java/org/subethamail/smtp/util/EmailUtilsTest.java
@@ -24,6 +24,11 @@ public void testSpaceAddressIsNotValid() {
     public void testBlankAddressIsValid() {
         Assert.assertTrue(EmailUtils.isValidEmailAddress("", true));
     }
+
+    @Test
+    public void testSpecialUtf8CharacterIsInvalidInEmailAddress() {
+        Assert.assertTrue(EmailUtils.isValidEmailAddress("ñ@abc.com", true));
+    }
 
     @Test
     public void testExtract() {