From 44f04c95a1eed09cf2e8125475d0c480917da753 Mon Sep 17 00:00:00 2001 From: Jean Aurambault Date: Wed, 6 Mar 2024 15:20:01 -0800 Subject: [PATCH] Support unicode escape with \uXXXX format in Android strings (#1006) As per doc https://developer.android.com/guide/topics/resources/string-resource#FormattingAndStyling Note on the output, whatever is used as input: \u00A0,  ,   or   the output will be   --- .../mojito/okapi/filters/AndroidFilter.java | 1 + .../mojito/okapi/filters/UnescapeUtils.java | 18 ++++- .../okapi/filters/AndroidFilterTest.java | 4 + .../l10n/mojito/service/tm/TMServiceTest.java | 76 ++++++++++++++++++- 4 files changed, 96 insertions(+), 3 deletions(-) diff --git a/common/src/main/java/com/box/l10n/mojito/okapi/filters/AndroidFilter.java b/common/src/main/java/com/box/l10n/mojito/okapi/filters/AndroidFilter.java index 884bddebbd..f60b6ed9ff 100644 --- a/common/src/main/java/com/box/l10n/mojito/okapi/filters/AndroidFilter.java +++ b/common/src/main/java/com/box/l10n/mojito/okapi/filters/AndroidFilter.java @@ -157,6 +157,7 @@ String unescape(String sourceString) { String unescapedSourceString; unescapedSourceString = sourceString.trim(); + unescapedSourceString = unescapeUtils.replaceEscapedUnicode(unescapedSourceString); if (StringUtils.startsWith(unescapedSourceString, "\"") && StringUtils.endsWith(unescapedSourceString, "\"")) { diff --git a/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java b/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java index 857173a25b..02d1e73a8e 100644 --- a/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java +++ b/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java @@ -19,6 +19,7 @@ public class UnescapeUtils { private static final Pattern ESCAPED_QUOTES = Pattern.compile("\\\\(\"|')"); private static final Pattern ESCAPED_BACKQUOTES = Pattern.compile("\\\\(`)"); private static final Pattern ESCAPED_CHARACTERS = Pattern.compile("\\\\(.)?"); + private static final Pattern ESCAPED_UNICODE = Pattern.compile("\\\\u([0-9a-fA-F]{4})"); private static final Pattern SPACES = Pattern.compile("\\s+"); private static final Pattern LINE_FEED = Pattern.compile("\n"); @@ -63,10 +64,25 @@ String replaceEscapedBackquotes(String text) { return ESCAPED_BACKQUOTES.matcher(text).replaceAll("$1"); } + /** + * Replace unicode escape character of the form \\uXXXX. + * + *

Must be call before calling other method that would unescape the "u" letter like {@link + * #replaceEscapedCharacters(String)} (String)} + * + * @param text + * @return + */ + String replaceEscapedUnicode(String text) { + return ESCAPED_UNICODE + .matcher(text) + .replaceAll(match -> new String(Character.toChars(Integer.parseInt(match.group(1), 16)))); + } + /** * Replace other escape character with the character itself. * - *

Must be call after replacing espace sequence that need a different treatment like {@link + *

Must be call after replacing escape sequence that need a different treatment like {@link * #replaceEscapedLineFeed(String)} * * @param text diff --git a/common/src/test/java/com/box/l10n/mojito/okapi/filters/AndroidFilterTest.java b/common/src/test/java/com/box/l10n/mojito/okapi/filters/AndroidFilterTest.java index b4ebdef105..29a7187e91 100644 --- a/common/src/test/java/com/box/l10n/mojito/okapi/filters/AndroidFilterTest.java +++ b/common/src/test/java/com/box/l10n/mojito/okapi/filters/AndroidFilterTest.java @@ -96,6 +96,10 @@ public void testUnescaping() { // multi lines and spaces testUnescaping("\n line1 \n line2 \n", "line1 line2"); + + // unicode escape + var str = "Unicode\\u00A0escape"; + testUnescaping("Unicode\\u00A0escape", "Unicode\u00A0escape"); } void testUnescaping(String input, String expected) { diff --git a/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java b/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java index 0903f0caa5..f6741ed0c5 100644 --- a/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java +++ b/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java @@ -1699,7 +1699,79 @@ public void testLocalizeAndroidCommentWithTranslatableFalse() throws Exception { InheritanceMode.USE_PARENT, null); logger.debug("localized=\n{}", localizedAsset); - assertEquals(localizedAsset, localizedAsset); + assertEquals(assetContent, localizedAsset); + } + + @Test + public void testLocalizeAndroidUnicodeEscape() throws Exception { + + Repository repo = repositoryService.createRepository(testIdWatcher.getEntityName("repository")); + RepositoryLocale repoLocale = repositoryService.addRepositoryLocale(repo, "en-GB"); + + String assetContent = + """ + + + + A string with\\u00A0Unicode Escape + A string with Unicode Escape + A string with Unicode Escape + A string with Unicode Escape + """; + + asset = + assetService.createAssetWithContent(repo.getId(), "res/values/strings.xml", assetContent); + asset = assetRepository.findById(asset.getId()).orElse(null); + assetId = asset.getId(); + tmId = repo.getTm().getId(); + + PollableFuture assetResult = + assetService.addOrUpdateAssetAndProcessIfNeeded( + repo.getId(), asset.getPath(), assetContent, false, null, null, null, null, null, null); + try { + pollableTaskService.waitForPollableTask(assetResult.getPollableTask().getId()); + } catch (PollableTaskException | InterruptedException e) { + throw new RuntimeException(e); + } + assetResult.get(); + + TextUnitSearcherParameters textUnitSearcherParameters = new TextUnitSearcherParameters(); + textUnitSearcherParameters.setRepositoryIds(repo.getId()); + textUnitSearcherParameters.setStatusFilter(StatusFilter.FOR_TRANSLATION); + List textUnitDTOs = textUnitSearcher.search(textUnitSearcherParameters); + for (TextUnitDTO textUnitDTO : textUnitDTOs) { + logger.debug("comment=[{}]", textUnitDTO.getComment()); + } + assertEquals(4, textUnitDTOs.size()); + assertEquals("A string with\u00A0Unicode Escape", textUnitDTOs.get(0).getSource()); + assertEquals("Test Unicode Escapes", textUnitDTOs.get(0).getComment()); + assertEquals("A string with\u00A0Unicode Escape", textUnitDTOs.get(1).getSource()); + assertEquals("A string with\u00A0Unicode Escape", textUnitDTOs.get(2).getSource()); + assertEquals("A string with\u00A0Unicode Escape", textUnitDTOs.get(3).getSource()); + + String localizedAsset = + tmService.generateLocalized( + asset, + assetContent, + repoLocale, + "en-GB", + null, + null, + Status.ALL, + InheritanceMode.USE_PARENT, + null); + logger.error("localized=\n{}", localizedAsset); + String expectedLocalizedAsset = + """ + + + + A string with Unicode Escape + A string with Unicode Escape + A string with Unicode Escape + A string with Unicode Escape + """; + assertEquals(expectedLocalizedAsset, localizedAsset); } @Test @@ -2042,7 +2114,7 @@ public void testLocalizeAndroidStringsRemoveUntranslatedSingleItem() throws Exce InheritanceMode.REMOVE_UNTRANSLATED, null); logger.debug("localized=\n{}", localizedAsset); - assertEquals(localizedAsset, localizedAsset); + assertEquals(expectedLocalized, localizedAsset); } /**