Skip to content

Commit

Permalink
Merge pull request #70 from hmlendea/normalisation
Browse files Browse the repository at this point in the history
Improved normalisation of arabic names
  • Loading branch information
hmlendea authored Dec 4, 2023
2 parents 1ba5ae0 + e1cc7d2 commit a03e1db
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ public void SetUp()
[TestCase("Ǧuzur al-Īǧẗ", "Ğuzur al-Īğah")]
[TestCase("Ḥadīṯẗ", "Hadīthah")]
[TestCase("Ȟaȟáwakpa", "Ĥaĥáwakpa")]
[TestCase("Ḥamāẗ", "Hamāh")]
[TestCase("H̱rūnīnġn", "Khrūnīnġn")]
[TestCase("Ins Br̥k", "Ins Bruk")]
[TestCase("Iṉspruk", "Iņspruk")]
Expand All @@ -93,6 +94,7 @@ public void SetUp()
[TestCase("Kȁzahstān", "Kàzahstān")]
[TestCase("Khar‌gōn", "Khargōn")]
[TestCase("K‍ragujevak", "Kragujevak")]
[TestCase("Lablaẗ", "Lablah")]
[TestCase("Lāip‌ॎsiśa", "Lāipsiśa")]
[TestCase("Lėnkėjė", "Lėnkėjė")]
[TestCase("Likṟṟaṉ‌sṟṟaiṉ", "Likrransrrain")]
Expand Down Expand Up @@ -150,11 +152,11 @@ public void SetUp()
[TestCase("Užhorod", "Užhorod")]
[TestCase("Vialikaja Poĺšča", "Vialikaja Poĺšča")]
[TestCase("Vюrцby’rg", "Viurcby’rg")]
[TestCase("پwyrṭūrīkū", "Bwyrțūrīkū")]
[TestCase("Ẇel‌ś‌", "Ẃelś")]
[TestCase("Вуллонгонг", "Vullongong")]
[TestCase("Эstoniья", "Estoni'ia")]
[TestCase("Юli’h", "Iuli’h")]
[TestCase("پwyrṭūrīkū", "Bwyrțūrīkū")]
public void WhenNormalisingForCK3_ReturnsTheExpectedNormalisedName(
string name,
string expectedResult)
Expand Down Expand Up @@ -218,6 +220,7 @@ public void WhenNormalisingForCK3_ReturnsTheExpectedNormalisedName(
[TestCase("G‍roseṭō", "Groseţō")]
[TestCase("Ḥadīṯẗ", "Hadīthah")]
[TestCase("Ȟaȟáwakpa", "Ĥaĥáwakpa")]
[TestCase("Ḥamāẗ", "Hamāh")]
[TestCase("H̱rūnīnġn", "Khrūnīnġn")]
[TestCase("Ins Br̥k", "Ins Bruk")]
[TestCase("Iṉspruk", "Iņspruk")]
Expand All @@ -232,6 +235,7 @@ public void WhenNormalisingForCK3_ReturnsTheExpectedNormalisedName(
[TestCase("Kȁzahstān", "Kàzahstān")]
[TestCase("Khar‌gōn", "Khargōn")]
[TestCase("K‍ragujevak", "Kragujevak")]
[TestCase("Lablaẗ", "Lablah")]
[TestCase("Lāip‌ॎsiśa", "Lāipsiśa")]
[TestCase("Lėnkėjė", "Lėnkėjė")]
[TestCase("Likṟṟaṉ‌sṟṟaiṉ", "Likrransrrain")]
Expand Down Expand Up @@ -355,6 +359,7 @@ public void WhenNormalisingForHOI4City_ReturnsTheExpectedNormalisedName(
[TestCase("G‍roseṭō", "Grosetõ")]
[TestCase("Ḥadīṯẗ", "Hadïthah")]
[TestCase("Ȟaȟáwakpa", "Haháwakpa")]
[TestCase("Ḥamāẗ", "Hamãh")]
[TestCase("H̱rūnīnġn", "Khrünïnghn")]
[TestCase("Ins Br̥k", "Ins Bruk")]
[TestCase("Iṉspruk", "Inspruk")]
Expand All @@ -369,6 +374,7 @@ public void WhenNormalisingForHOI4City_ReturnsTheExpectedNormalisedName(
[TestCase("Kȁzahstān", "Kàzahstãn")]
[TestCase("Khar‌gōn", "Khargõn")]
[TestCase("K‍ragujevak", "Kragujevak")]
[TestCase("Lablaẗ", "Lablah")]
[TestCase("Lāip‌ॎsiśa", "Lãipsisa")]
[TestCase("Lėnkėjė", "Lénkéjé")]
[TestCase("Likṟṟaṉ‌sṟṟaiṉ", "Likrransrrain")]
Expand Down Expand Up @@ -493,6 +499,7 @@ public void WhenNormalisingForWindow1252_ReturnsTheExpectedNormalisedName(
[TestCase("G‍roseṭō", "Grosetō")]
[TestCase("Ḥadīṯẗ", "Hadīthah")]
[TestCase("Ȟaȟáwakpa", "Haháwakpa")]
[TestCase("Ḥamāẗ", "Hamāh")]
[TestCase("H̱rūnīnġn", "Khrūnīnghn")]
[TestCase("Ins Br̥k", "Ins Bruk")]
[TestCase("Iṉspruk", "Inspruk")]
Expand All @@ -507,6 +514,7 @@ public void WhenNormalisingForWindow1252_ReturnsTheExpectedNormalisedName(
[TestCase("Kȁzahstān", "Kàzahstān")]
[TestCase("Khar‌gōn", "Khargōn")]
[TestCase("K‍ragujevak", "Kragujevak")]
[TestCase("Lablaẗ", "Lablah")]
[TestCase("Lāip‌ॎsiśa", "Lāipsisa")]
[TestCase("Lėnkėjė", "Lénkéjé")]
[TestCase("Likṟṟaṉ‌sṟṟaiṉ", "Likrransrrain")]
Expand Down
12 changes: 8 additions & 4 deletions MoreCulturalNamesBuilder/Service/NameNormaliser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ public string ToCK3Charset(string name)
processedName = Regex.Replace(processedName, "[ả]", "à");
processedName = Regex.Replace(processedName, "[ǟ]", "ä");
processedName = Regex.Replace(processedName, "[ậ]", "â");
processedName = Regex.Replace(processedName, "[ẗ]", "ah");
processedName = Regex.Replace(processedName, "ā[ẗ]", "āh");
processedName = Regex.Replace(processedName, "[a]*[ẗ]", "ah");
processedName = Regex.Replace(processedName, "[ḃḅ]", "b");
processedName = Regex.Replace(processedName, "[ḏḍɗɖḑ]", "d");
processedName = Regex.Replace(processedName, "[ẹ]", "e");
Expand Down Expand Up @@ -204,7 +205,8 @@ public string ToHOI4CityCharset(string name)
processedName = Regex.Replace(processedName, "[ả]", "à");
processedName = Regex.Replace(processedName, "[ǟ]", "ä");
processedName = Regex.Replace(processedName, "[ậ]", "â");
processedName = Regex.Replace(processedName, "[ẗ]", "ah");
processedName = Regex.Replace(processedName, "ā[ẗ]", "āh");
processedName = Regex.Replace(processedName, "[a]*[ẗ]", "ah");
processedName = Regex.Replace(processedName, "[ḃḅ]", "b");
processedName = Regex.Replace(processedName, "[ḏḍɗɖḑ]", "d");
processedName = Regex.Replace(processedName, "[ẹ]", "e");
Expand Down Expand Up @@ -420,7 +422,8 @@ public string ToImperatorRomeCharset(string name)
processedName = Regex.Replace(processedName, "[ǟ]", "ä");
processedName = Regex.Replace(processedName, "[ậ]", "â");
processedName = Regex.Replace(processedName, "[ả]", "à");
processedName = Regex.Replace(processedName, "[ẗ]", "ah");
processedName = Regex.Replace(processedName, "ā[ẗ]", "āh");
processedName = Regex.Replace(processedName, "[a]*[ẗ]", "ah");
processedName = Regex.Replace(processedName, "[č]", "ch");
processedName = Regex.Replace(processedName, "[ćĉċ]", "c");
processedName = Regex.Replace(processedName, "[ď]", "d");
Expand Down Expand Up @@ -552,7 +555,8 @@ public string ToWindows1252(string name)
processedName = Regex.Replace(processedName, "[Ż]", "Ž");
processedName = Regex.Replace(processedName, "[ǣ]", "æ");
processedName = Regex.Replace(processedName, "[ạəą]", "a");
processedName = Regex.Replace(processedName, "[ẗ]", "ah");
processedName = Regex.Replace(processedName, "ā[ẗ]", "āh");
processedName = Regex.Replace(processedName, "[a]*[ẗ]", "ah");
processedName = Regex.Replace(processedName, "[ả]", "à");
processedName = Regex.Replace(processedName, "[ậ]", "â");
processedName = Regex.Replace(processedName, "[ăā]", "ã");
Expand Down

0 comments on commit a03e1db

Please sign in to comment.