Transcode special chars via UTF-8

This commit is contained in:
elasota
2020-02-07 22:08:52 -05:00
parent 7c3dc7d1a3
commit 57d0678090
10 changed files with 222 additions and 70 deletions

View File

@@ -3,6 +3,6 @@
#include <stdint.h>
namespace MacRoman
{
{
uint16_t ToUnicode(uint8_t character);
}

View File

@@ -23,6 +23,7 @@
#include "QDPixMap.h"
#include "ResTypeID.h"
#include "SharedTypes.h"
#include "UTF8.h"
#include "WindowDef.h"
#include "WindowManager.h"
@@ -145,12 +146,11 @@ namespace PortabilityLayer
const rapidjson::Value &nameValue = itemData["name"];
if (nameValue.IsString())
{
uint8_t *destName = item.m_name;
size_t nameLength = nameValue.GetStringLength();
if (nameLength > 255)
nameLength = 255;
destName[0] = static_cast<uint8_t>(nameLength);
memcpy(destName + 1, nameValue.GetString(), nameLength);
size_t strSize;
if (UTF8Processor::DecodeToMacRomanPascalStr(reinterpret_cast<const uint8_t*>(nameValue.GetString()), nameValue.GetStringLength(), item.m_name + 1, sizeof(item.m_name) - 1, strSize))
item.m_name[0] = static_cast<uint8_t>(strSize);
else
item.m_name[0] = 0;
}
else if (nameValue.IsArray())
{

View File

@@ -713,6 +713,8 @@ Window::Window()
, m_wmY(0)
, m_widgets(nullptr)
, m_numWidgets(0)
, m_widgetWithFocus(0)
, m_haveFocus(false)
{
}

View File

@@ -96,6 +96,9 @@ protected:
PortabilityLayer::Widget **m_widgets;
size_t m_numWidgets;
size_t m_widgetWithFocus;
bool m_haveFocus;
};
struct DateTimeRec

View File

@@ -224,6 +224,7 @@
<ClInclude Include="PLRadioButtonWidget.h" />
<ClInclude Include="PLScrollBarWidget.h" />
<ClInclude Include="PLWidgets.h" />
<ClInclude Include="UTF8.h" />
<ClInclude Include="ZipFileProxy.h" />
<ClInclude Include="SimpleImage.h" />
<ClInclude Include="PLKeyEncoding.h" />
@@ -378,6 +379,7 @@
<ClCompile Include="ScanlineMaskIterator.cpp" />
<ClCompile Include="SimpleGraphic.cpp" />
<ClCompile Include="PLHandle.cpp" />
<ClCompile Include="UTF8.cpp" />
<ClCompile Include="WindowDef.cpp" />
<ClCompile Include="WindowManager.cpp" />
<ClCompile Include="XModemCRC.cpp" />

View File

@@ -471,6 +471,9 @@
<ClInclude Include="PLEditboxWidget.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="UTF8.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="CFileStream.cpp">
@@ -740,5 +743,8 @@
<ClCompile Include="PLEditboxWidget.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="UTF8.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

146
PortabilityLayer/UTF8.cpp Normal file
View File

@@ -0,0 +1,146 @@
#include "UTF8.h"
#include "MacRomanConversion.h"
namespace PortabilityLayer
{
bool UTF8Processor::DecodeCodePoint(const uint8_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint)
{
if (availableCharacters <= 0)
return false;
if ((characters[0] & 0x80) == 0x00)
{
outCharactersDigested = 1;
outCodePoint = characters[0];
return true;
}
size_t sz = 0;
uint32_t codePoint = 0;
uint32_t minCodePoint = 0;
if ((characters[0] & 0xe0) == 0xc0)
{
sz = 2;
minCodePoint = 0x80;
codePoint = (characters[0] & 0x1f);
}
else if ((characters[0] & 0xf0) == 0xe0)
{
sz = 3;
minCodePoint = 0x800;
codePoint = (characters[0] & 0x0f);
}
else if ((characters[0] & 0xf8) == 0xf0)
{
sz = 4;
minCodePoint = 0x10000;
codePoint = (characters[0] & 0x07);
}
else
return false;
if (availableCharacters < sz)
return false;
for (size_t auxByte = 1; auxByte < sz; auxByte++)
{
if ((characters[auxByte] & 0xc0) != 0x80)
return false;
codePoint = (codePoint << 6) | (characters[auxByte] & 0x3f);
}
if (codePoint < minCodePoint || codePoint > 0x10ffff)
return false;
if (codePoint >= 0xd800 && codePoint <= 0xdfff)
return false;
outCodePoint = codePoint;
outCharactersDigested = sz;
return true;
}
void UTF8Processor::EncodeCodePoint(uint8_t *characters, size_t &outCharactersEmitted, uint32_t codePoint)
{
codePoint &= 0x1fffff;
uint8_t signalBits = 0;
size_t numBytes = 0;
if (codePoint < 0x0080)
{
numBytes = 1;
signalBits = 0;
}
else if (codePoint < 0x0800)
{
numBytes = 2;
signalBits = 0xc0;
}
else if (codePoint < 0x10000)
{
numBytes = 3;
signalBits = 0xe0;
}
else
{
numBytes = 4;
signalBits = 0xf0;
}
characters[0] = static_cast<uint8_t>((codePoint >> (6 * (numBytes - 1))) | signalBits);
for (size_t i = 1; i < numBytes; i++)
{
const uint32_t isolate = ((codePoint >> (6 * (numBytes - 1 - i))) & 0x3f) | 0x80;
characters[i] = static_cast<uint8_t>(isolate);
}
outCharactersEmitted = numBytes;
}
bool UTF8Processor::DecodeToMacRomanPascalStr(const uint8_t *inChars, size_t inSize, uint8_t *outChars, size_t maxOutSize, size_t &outSizeRef)
{
size_t outSize = 0;
while (inSize > 0 && outSize < maxOutSize)
{
size_t digestedChars = 0;
uint32_t codePoint = 0;
if (!DecodeCodePoint(inChars, inSize, digestedChars, codePoint))
return false;
inChars += digestedChars;
inSize -= digestedChars;
uint8_t macRomanChar = 0;
if (codePoint >= 0x11 && codePoint <= 0x14)
macRomanChar = static_cast<uint8_t>('?');
else if (codePoint < 0x80)
macRomanChar = static_cast<uint8_t>(codePoint);
else
{
for (uint16_t c = 0x80; c <= 0xff; c++)
{
uint16_t decodedCP = MacRoman::ToUnicode(c);
if (decodedCP == codePoint)
{
macRomanChar = c;
break;
}
}
if (macRomanChar == 0)
macRomanChar = static_cast<uint8_t>('?');
}
*outChars = macRomanChar;
outChars++;
outSize++;
}
outSizeRef = outSize;
return true;
}
}

15
PortabilityLayer/UTF8.h Normal file
View File

@@ -0,0 +1,15 @@
#pragma once
#include <stdint.h>
namespace PortabilityLayer
{
class UTF8Processor
{
public:
static bool DecodeCodePoint(const uint8_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint);
static void EncodeCodePoint(uint8_t *characters, size_t &outCharactersEmitted, uint32_t codePoint);
static bool DecodeToMacRomanPascalStr(const uint8_t *inChars, size_t inSize, uint8_t *outChars, size_t maxOutSize, size_t &outSize);
};
}

View File

@@ -2,6 +2,7 @@
#include "CFileStream.h"
#include "CombinedTimestamp.h"
#include "GPArchive.h"
#include "MacRomanConversion.h"
#include "MemReaderStream.h"
#include "QDPictDecoder.h"
#include "QDPictEmitContext.h"
@@ -10,6 +11,7 @@
#include "ResourceFile.h"
#include "ResourceCompiledTypeList.h"
#include "SharedTypes.h"
#include "UTF8.h"
#include "ZipFile.h"
#include "WaveFormat.h"
@@ -83,6 +85,16 @@ void AppendFmt(std::vector<uint8_t> &array, const char *fmt, ...)
va_end(args);
}
void AppendUTF8(std::vector<uint8_t> &array, uint32_t codePoint)
{
uint8_t bytes[5];
size_t sz;
PortabilityLayer::UTF8Processor::EncodeCodePoint(bytes, sz, codePoint);
for (size_t i = 0; i < sz; i++)
array.push_back(bytes[i]);
}
template<class T>
void VectorAppend(std::vector<T> &vec, const T *items, size_t numItems)
{
@@ -895,85 +907,47 @@ bool ImportDialogItemTemplate(std::vector<uint8_t> &outTXT, const void *inData,
nameLength = 0;
}
bool isAsciiSafe = true;
AppendStr(outTXT, "\n\t\t\t\"name\" : \"");
for (size_t i = 0; i < nameLength; i++)
{
uint8_t nameByte = nameBytes[i];
switch (nameByte)
{
case '\"':
AppendStr(outTXT, "\\\"");
break;
case '\\':
AppendStr(outTXT, "\\\\");
break;
case '\b':
AppendStr(outTXT, "\\b");
break;
case '\f':
AppendStr(outTXT, "\\f");
break;
case '\n':
AppendStr(outTXT, "\\n");
break;
case '\r':
AppendStr(outTXT, "\\r");
break;
case '\t':
AppendStr(outTXT, "\\r");
break;
default:
if (nameByte < ' ' || nameByte > 127)
{
isAsciiSafe = false;
break;
}
};
if (!isAsciiSafe)
break;
}
AppendStr(outTXT, "\n\t\t\t\"name\" : ");
if (isAsciiSafe)
{
outTXT.push_back('\"');
for (size_t i = 0; i < nameLength; i++)
{
uint8_t nameByte = nameBytes[i];
switch (nameByte)
{
case '\"':
AppendStr(outTXT, "\\\"");
break;
case '\\':
AppendStr(outTXT, "\\\\");
break;
case '\b':
AppendStr(outTXT, "\\b");
break;
case '\f':
AppendStr(outTXT, "\\f");
break;
case '\n':
AppendStr(outTXT, "\\n");
break;
case '\r':
AppendStr(outTXT, "\\r");
break;
case '\t':
AppendStr(outTXT, "\\r");
break;
default:
uint16_t unicodeCodePoint = MacRoman::ToUnicode(nameByte);
if (unicodeCodePoint < 0x20 || unicodeCodePoint == 0x7f)
AppendFmt(outTXT, "\\u%04x", static_cast<int>(unicodeCodePoint));
else if (unicodeCodePoint > 0x7f)
AppendUTF8(outTXT, unicodeCodePoint);
else
outTXT.push_back(nameByte);
break;
}
break;
}
outTXT.push_back('\"');
}
else
{
AppendStr(outTXT, "[ ");
for (size_t i = 0; i < nameLength; i++)
{
if (i != 0)
AppendStr(outTXT, ", ");
uint8_t nameByte = nameBytes[i];
AppendFmt(outTXT, "%i", static_cast<int>(nameByte));
}
AppendStr(outTXT, " ]");
}
outTXT.push_back('\"');
AppendStr(outTXT, ",\n\t\t\t\"itemType\" : ");

View File

@@ -62,6 +62,7 @@
<Import Project="..\zlib.props" />
<Import Project="..\Common.props" />
<Import Project="..\GpCommon.props" />
<Import Project="..\MacRomanConversion.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@@ -69,6 +70,7 @@
<Import Project="..\zlib.props" />
<Import Project="..\Common.props" />
<Import Project="..\GpCommon.props" />
<Import Project="..\MacRomanConversion.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@@ -76,6 +78,7 @@
<Import Project="..\zlib.props" />
<Import Project="..\Common.props" />
<Import Project="..\GpCommon.props" />
<Import Project="..\MacRomanConversion.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@@ -83,6 +86,7 @@
<Import Project="..\zlib.props" />
<Import Project="..\Common.props" />
<Import Project="..\GpCommon.props" />
<Import Project="..\MacRomanConversion.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />