From 57d0678090d6d496214bb6111f862b4ab1670b5c Mon Sep 17 00:00:00 2001 From: elasota Date: Fri, 7 Feb 2020 22:08:52 -0500 Subject: [PATCH] Transcode special chars via UTF-8 --- MacRomanConversion/MacRomanConversion.h | 2 +- PortabilityLayer/DialogManager.cpp | 12 +- PortabilityLayer/PLCore.cpp | 2 + PortabilityLayer/PLCore.h | 3 + PortabilityLayer/PortabilityLayer.vcxproj | 2 + .../PortabilityLayer.vcxproj.filters | 6 + PortabilityLayer/UTF8.cpp | 146 ++++++++++++++++++ PortabilityLayer/UTF8.h | 15 ++ gpr2gpa/gpr2gpa.cpp | 100 +++++------- gpr2gpa/gpr2gpa.vcxproj | 4 + 10 files changed, 222 insertions(+), 70 deletions(-) create mode 100644 PortabilityLayer/UTF8.cpp create mode 100644 PortabilityLayer/UTF8.h diff --git a/MacRomanConversion/MacRomanConversion.h b/MacRomanConversion/MacRomanConversion.h index ccf1130..1bf3412 100644 --- a/MacRomanConversion/MacRomanConversion.h +++ b/MacRomanConversion/MacRomanConversion.h @@ -3,6 +3,6 @@ #include namespace MacRoman -{ +{ uint16_t ToUnicode(uint8_t character); } diff --git a/PortabilityLayer/DialogManager.cpp b/PortabilityLayer/DialogManager.cpp index a7e5f71..e9c072c 100644 --- a/PortabilityLayer/DialogManager.cpp +++ b/PortabilityLayer/DialogManager.cpp @@ -23,6 +23,7 @@ #include "QDPixMap.h" #include "ResTypeID.h" #include "SharedTypes.h" +#include "UTF8.h" #include "WindowDef.h" #include "WindowManager.h" @@ -145,12 +146,11 @@ namespace PortabilityLayer const rapidjson::Value &nameValue = itemData["name"]; if (nameValue.IsString()) { - uint8_t *destName = item.m_name; - size_t nameLength = nameValue.GetStringLength(); - if (nameLength > 255) - nameLength = 255; - destName[0] = static_cast(nameLength); - memcpy(destName + 1, nameValue.GetString(), nameLength); + size_t strSize; + if (UTF8Processor::DecodeToMacRomanPascalStr(reinterpret_cast(nameValue.GetString()), nameValue.GetStringLength(), item.m_name + 1, sizeof(item.m_name) - 1, strSize)) + item.m_name[0] = static_cast(strSize); + else + item.m_name[0] = 0; } else if (nameValue.IsArray()) { diff --git a/PortabilityLayer/PLCore.cpp b/PortabilityLayer/PLCore.cpp index 08f807d..4f888f8 100644 --- a/PortabilityLayer/PLCore.cpp +++ b/PortabilityLayer/PLCore.cpp @@ -713,6 +713,8 @@ Window::Window() , m_wmY(0) , m_widgets(nullptr) , m_numWidgets(0) + , m_widgetWithFocus(0) + , m_haveFocus(false) { } diff --git a/PortabilityLayer/PLCore.h b/PortabilityLayer/PLCore.h index ac9fb5a..fa1e65e 100644 --- a/PortabilityLayer/PLCore.h +++ b/PortabilityLayer/PLCore.h @@ -96,6 +96,9 @@ protected: PortabilityLayer::Widget **m_widgets; size_t m_numWidgets; + + size_t m_widgetWithFocus; + bool m_haveFocus; }; struct DateTimeRec diff --git a/PortabilityLayer/PortabilityLayer.vcxproj b/PortabilityLayer/PortabilityLayer.vcxproj index c618a54..ee14890 100644 --- a/PortabilityLayer/PortabilityLayer.vcxproj +++ b/PortabilityLayer/PortabilityLayer.vcxproj @@ -224,6 +224,7 @@ + @@ -378,6 +379,7 @@ + diff --git a/PortabilityLayer/PortabilityLayer.vcxproj.filters b/PortabilityLayer/PortabilityLayer.vcxproj.filters index 19160cb..0fe804a 100644 --- a/PortabilityLayer/PortabilityLayer.vcxproj.filters +++ b/PortabilityLayer/PortabilityLayer.vcxproj.filters @@ -471,6 +471,9 @@ Header Files + + Header Files + @@ -740,5 +743,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/PortabilityLayer/UTF8.cpp b/PortabilityLayer/UTF8.cpp new file mode 100644 index 0000000..c6a267a --- /dev/null +++ b/PortabilityLayer/UTF8.cpp @@ -0,0 +1,146 @@ +#include "UTF8.h" +#include "MacRomanConversion.h" + +namespace PortabilityLayer +{ + bool UTF8Processor::DecodeCodePoint(const uint8_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint) + { + if (availableCharacters <= 0) + return false; + + if ((characters[0] & 0x80) == 0x00) + { + outCharactersDigested = 1; + outCodePoint = characters[0]; + return true; + } + + size_t sz = 0; + uint32_t codePoint = 0; + uint32_t minCodePoint = 0; + if ((characters[0] & 0xe0) == 0xc0) + { + sz = 2; + minCodePoint = 0x80; + codePoint = (characters[0] & 0x1f); + } + else if ((characters[0] & 0xf0) == 0xe0) + { + sz = 3; + minCodePoint = 0x800; + codePoint = (characters[0] & 0x0f); + } + else if ((characters[0] & 0xf8) == 0xf0) + { + sz = 4; + minCodePoint = 0x10000; + codePoint = (characters[0] & 0x07); + } + else + return false; + + if (availableCharacters < sz) + return false; + + for (size_t auxByte = 1; auxByte < sz; auxByte++) + { + if ((characters[auxByte] & 0xc0) != 0x80) + return false; + + codePoint = (codePoint << 6) | (characters[auxByte] & 0x3f); + } + + if (codePoint < minCodePoint || codePoint > 0x10ffff) + return false; + + if (codePoint >= 0xd800 && codePoint <= 0xdfff) + return false; + + outCodePoint = codePoint; + outCharactersDigested = sz; + + return true; + } + + void UTF8Processor::EncodeCodePoint(uint8_t *characters, size_t &outCharactersEmitted, uint32_t codePoint) + { + codePoint &= 0x1fffff; + + uint8_t signalBits = 0; + size_t numBytes = 0; + if (codePoint < 0x0080) + { + numBytes = 1; + signalBits = 0; + } + else if (codePoint < 0x0800) + { + numBytes = 2; + signalBits = 0xc0; + } + else if (codePoint < 0x10000) + { + numBytes = 3; + signalBits = 0xe0; + } + else + { + numBytes = 4; + signalBits = 0xf0; + } + + characters[0] = static_cast((codePoint >> (6 * (numBytes - 1))) | signalBits); + + for (size_t i = 1; i < numBytes; i++) + { + const uint32_t isolate = ((codePoint >> (6 * (numBytes - 1 - i))) & 0x3f) | 0x80; + characters[i] = static_cast(isolate); + } + + outCharactersEmitted = numBytes; + } + + bool UTF8Processor::DecodeToMacRomanPascalStr(const uint8_t *inChars, size_t inSize, uint8_t *outChars, size_t maxOutSize, size_t &outSizeRef) + { + size_t outSize = 0; + while (inSize > 0 && outSize < maxOutSize) + { + size_t digestedChars = 0; + uint32_t codePoint = 0; + if (!DecodeCodePoint(inChars, inSize, digestedChars, codePoint)) + return false; + + inChars += digestedChars; + inSize -= digestedChars; + + uint8_t macRomanChar = 0; + if (codePoint >= 0x11 && codePoint <= 0x14) + macRomanChar = static_cast('?'); + else if (codePoint < 0x80) + macRomanChar = static_cast(codePoint); + else + { + for (uint16_t c = 0x80; c <= 0xff; c++) + { + uint16_t decodedCP = MacRoman::ToUnicode(c); + if (decodedCP == codePoint) + { + macRomanChar = c; + break; + } + } + + if (macRomanChar == 0) + macRomanChar = static_cast('?'); + } + + *outChars = macRomanChar; + outChars++; + outSize++; + } + + outSizeRef = outSize; + + return true; + } +} diff --git a/PortabilityLayer/UTF8.h b/PortabilityLayer/UTF8.h new file mode 100644 index 0000000..1cd9a64 --- /dev/null +++ b/PortabilityLayer/UTF8.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace PortabilityLayer +{ + class UTF8Processor + { + public: + static bool DecodeCodePoint(const uint8_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint); + static void EncodeCodePoint(uint8_t *characters, size_t &outCharactersEmitted, uint32_t codePoint); + + static bool DecodeToMacRomanPascalStr(const uint8_t *inChars, size_t inSize, uint8_t *outChars, size_t maxOutSize, size_t &outSize); + }; +} diff --git a/gpr2gpa/gpr2gpa.cpp b/gpr2gpa/gpr2gpa.cpp index 85bb196..171fffc 100644 --- a/gpr2gpa/gpr2gpa.cpp +++ b/gpr2gpa/gpr2gpa.cpp @@ -2,6 +2,7 @@ #include "CFileStream.h" #include "CombinedTimestamp.h" #include "GPArchive.h" +#include "MacRomanConversion.h" #include "MemReaderStream.h" #include "QDPictDecoder.h" #include "QDPictEmitContext.h" @@ -10,6 +11,7 @@ #include "ResourceFile.h" #include "ResourceCompiledTypeList.h" #include "SharedTypes.h" +#include "UTF8.h" #include "ZipFile.h" #include "WaveFormat.h" @@ -83,6 +85,16 @@ void AppendFmt(std::vector &array, const char *fmt, ...) va_end(args); } +void AppendUTF8(std::vector &array, uint32_t codePoint) +{ + uint8_t bytes[5]; + size_t sz; + + PortabilityLayer::UTF8Processor::EncodeCodePoint(bytes, sz, codePoint); + for (size_t i = 0; i < sz; i++) + array.push_back(bytes[i]); +} + template void VectorAppend(std::vector &vec, const T *items, size_t numItems) { @@ -895,85 +907,47 @@ bool ImportDialogItemTemplate(std::vector &outTXT, const void *inData, nameLength = 0; } - bool isAsciiSafe = true; + AppendStr(outTXT, "\n\t\t\t\"name\" : \""); + for (size_t i = 0; i < nameLength; i++) { uint8_t nameByte = nameBytes[i]; switch (nameByte) { + case '\"': + AppendStr(outTXT, "\\\""); + break; + case '\\': + AppendStr(outTXT, "\\\\"); + break; case '\b': + AppendStr(outTXT, "\\b"); + break; case '\f': + AppendStr(outTXT, "\\f"); + break; case '\n': + AppendStr(outTXT, "\\n"); + break; case '\r': + AppendStr(outTXT, "\\r"); + break; case '\t': + AppendStr(outTXT, "\\r"); break; default: - if (nameByte < ' ' || nameByte > 127) - { - isAsciiSafe = false; - break; - } - }; - - if (!isAsciiSafe) - break; - } - - AppendStr(outTXT, "\n\t\t\t\"name\" : "); - if (isAsciiSafe) - { - outTXT.push_back('\"'); - - for (size_t i = 0; i < nameLength; i++) - { - uint8_t nameByte = nameBytes[i]; - switch (nameByte) - { - case '\"': - AppendStr(outTXT, "\\\""); - break; - case '\\': - AppendStr(outTXT, "\\\\"); - break; - case '\b': - AppendStr(outTXT, "\\b"); - break; - case '\f': - AppendStr(outTXT, "\\f"); - break; - case '\n': - AppendStr(outTXT, "\\n"); - break; - case '\r': - AppendStr(outTXT, "\\r"); - break; - case '\t': - AppendStr(outTXT, "\\r"); - break; - default: + uint16_t unicodeCodePoint = MacRoman::ToUnicode(nameByte); + if (unicodeCodePoint < 0x20 || unicodeCodePoint == 0x7f) + AppendFmt(outTXT, "\\u%04x", static_cast(unicodeCodePoint)); + else if (unicodeCodePoint > 0x7f) + AppendUTF8(outTXT, unicodeCodePoint); + else outTXT.push_back(nameByte); - break; - } + break; } - - outTXT.push_back('\"'); } - else - { - AppendStr(outTXT, "[ "); - for (size_t i = 0; i < nameLength; i++) - { - if (i != 0) - AppendStr(outTXT, ", "); - - uint8_t nameByte = nameBytes[i]; - AppendFmt(outTXT, "%i", static_cast(nameByte)); - - } - - AppendStr(outTXT, " ]"); - } + outTXT.push_back('\"'); AppendStr(outTXT, ",\n\t\t\t\"itemType\" : "); diff --git a/gpr2gpa/gpr2gpa.vcxproj b/gpr2gpa/gpr2gpa.vcxproj index 8f6d615..99002c4 100644 --- a/gpr2gpa/gpr2gpa.vcxproj +++ b/gpr2gpa/gpr2gpa.vcxproj @@ -62,6 +62,7 @@ + @@ -69,6 +70,7 @@ + @@ -76,6 +78,7 @@ + @@ -83,6 +86,7 @@ +