Skip to content

Commit

Permalink
[librptext] conversion_win32.cpp, cpN_to_utf8(): Call cpN_to_utf16().
Browse files Browse the repository at this point in the history
Windows uses UTF-16 natively, so the UTF-8 conversion function was
essentially cpN_to_utf16() followed by "convert to UTF-8".

Rewrite the function to actually call cpN_to_utf16() so we can
de-duplicate a fairly big chunk of code.
  • Loading branch information
GerbilSoft committed Oct 24, 2024
1 parent 4e6f03c commit 6605d38
Showing 1 changed file with 1 addition and 53 deletions.
54 changes: 1 addition & 53 deletions src/librptext/conversion_win32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,60 +101,8 @@ string cpN_to_utf8(unsigned int cp, const char *str, int len, unsigned int flags
return cpRP_to_utf8(cp, str, len);
}

len = check_NULL_terminator(str, len);
DWORD dwFlags = 0;
if (flags & TEXTCONV_FLAG_CP1252_FALLBACK) {
// Fallback is enabled.
// Fail on invalid characters in the first pass.
dwFlags = MB_ERR_INVALID_CHARS;
}

// Convert from `cp` to UTF-16.
u16string s_wcs;

if ((flags & TEXTCONV_FLAG_JIS_X_0208) && len >= 1) {
// Check if the string might be JIS X 0208.
// If it is, make it EUC-JP compatible, then convert it.
bool is0208 = false;
// Heuristic: First character should be 0x21-0x24.
if (*str >= 0x21 && *str <= 0x24) {
is0208 = true;
const char *const p_end = str + len;
for (const char *p = str + 1; p < p_end; p++) {
const uint8_t chr = static_cast<uint8_t>(*p);
if (chr == 0) {
// End of string
break;
} else if (chr & 0x80) {
// High bit cannot be set
is0208 = false;
}
}
}

if (is0208) {
// Make the string EUC-JP compatible.
string eucJP(str, 0, len);
for (char &c : eucJP) {
c |= 0x80;
}
if (W32U_mbs_to_UTF16(s_wcs, eucJP.c_str(), eucJP.size(), 20932, dwFlags) != 0) {
s_wcs.clear();
}
}
}

if (s_wcs.empty()) {
if (W32U_mbs_to_UTF16(s_wcs, str, len, cp, dwFlags) != 0) {
if (flags & TEXTCONV_FLAG_CP1252_FALLBACK) {
// Try again using cp1252.
if (W32U_mbs_to_UTF16(s_wcs, str, len, 1252, 0) != 0) {
// Failed.
s_wcs.clear();
}
}
}
}
u16string s_wcs = cpN_to_utf16(cp, str, len, flags);

string s_mbs;
if (!s_wcs.empty()) {
Expand Down

0 comments on commit 6605d38

Please sign in to comment.