diff --git a/Aerofoil/GpSystemServices_Win32.cpp b/Aerofoil/GpSystemServices_Win32.cpp index 8758638..f2af474 100644 --- a/Aerofoil/GpSystemServices_Win32.cpp +++ b/Aerofoil/GpSystemServices_Win32.cpp @@ -5,8 +5,7 @@ #include "IGpClipboardContents.h" -#include "UTF16.h" -#include "UTF8.h" +#include "GpUnicode.h" #include #include diff --git a/AerofoilSDL/GpDisplayDriver_SDL_GL2.cpp b/AerofoilSDL/GpDisplayDriver_SDL_GL2.cpp index fe48b9b..eee9354 100644 --- a/AerofoilSDL/GpDisplayDriver_SDL_GL2.cpp +++ b/AerofoilSDL/GpDisplayDriver_SDL_GL2.cpp @@ -8,6 +8,7 @@ #include "GpRingBuffer.h" #include "GpInputDriver_SDL_Gamepad.h" #include "GpSDL.h" +#include "GpUnicode.h" #include "IGpCursor.h" #include "IGpDisplayDriverSurface.h" #include "IGpLogDriver.h" @@ -52,68 +53,6 @@ struct GpDisplayDriver_SDL_GL2_Prefs bool m_isFullScreen; }; -namespace DeleteMe -{ - bool DecodeCodePoint(const uint8_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint) - { - if (availableCharacters <= 0) - return false; - - if ((characters[0] & 0x80) == 0x00) - { - outCharactersDigested = 1; - outCodePoint = characters[0]; - return true; - } - - size_t sz = 0; - uint32_t codePoint = 0; - uint32_t minCodePoint = 0; - if ((characters[0] & 0xe0) == 0xc0) - { - sz = 2; - minCodePoint = 0x80; - codePoint = (characters[0] & 0x1f); - } - else if ((characters[0] & 0xf0) == 0xe0) - { - sz = 3; - minCodePoint = 0x800; - codePoint = (characters[0] & 0x0f); - } - else if ((characters[0] & 0xf8) == 0xf0) - { - sz = 4; - minCodePoint = 0x10000; - codePoint = (characters[0] & 0x07); - } - else - return false; - - if (availableCharacters < sz) - return false; - - for (size_t auxByte = 1; auxByte < sz; auxByte++) - { - if ((characters[auxByte] & 0xc0) != 0x80) - return false; - - codePoint = (codePoint << 6) | (characters[auxByte] & 0x3f); - } - - if (codePoint < minCodePoint || codePoint > 0x10ffff) - return false; - - if (codePoint >= 0xd800 && codePoint <= 0xdfff) - return false; - - outCodePoint = codePoint; - outCharactersDigested = sz; - - return true; - } -} - namespace GpBinarizedShaders { extern const char *g_drawQuadV_GL2; @@ -2041,7 +1980,7 @@ void GpDisplayDriver_SDL_GL2::TranslateSDLMessage(const SDL_Event *msg, IGpVOSEv { uint32_t codePoint; size_t numDigested; - DeleteMe::DecodeCodePoint(reinterpret_cast(teEvt->text) + parseOffset, lenUTF8 - parseOffset, numDigested, codePoint); + GpUnicode::UTF8::Decode(reinterpret_cast(teEvt->text) + parseOffset, lenUTF8 - parseOffset, numDigested, codePoint); parseOffset += numDigested; diff --git a/CMakeLists.txt b/CMakeLists.txt index 592cb6b..a3480d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,6 @@ add_library(PortabilityLayer STATIC PortabilityLayer/SimpleGraphic.cpp PortabilityLayer/TextPlacer.cpp PortabilityLayer/UTF8.cpp - PortabilityLayer/UTF16.cpp PortabilityLayer/WindowDef.cpp PortabilityLayer/WindowManager.cpp PortabilityLayer/WorkerThread.cpp diff --git a/GpCommon/GpUnicode.h b/GpCommon/GpUnicode.h new file mode 100644 index 0000000..786b4b5 --- /dev/null +++ b/GpCommon/GpUnicode.h @@ -0,0 +1,165 @@ +#pragma once + +#include +#include + +namespace GpUnicode +{ + namespace UTF8 + { + static const unsigned int kMaxEncodedBytes = 4; + + inline bool Decode(const uint8_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint) + { + if (availableCharacters <= 0) + return false; + + if ((characters[0] & 0x80) == 0x00) + { + outCharactersDigested = 1; + outCodePoint = characters[0]; + return true; + } + + size_t sz = 0; + uint32_t codePoint = 0; + uint32_t minCodePoint = 0; + if ((characters[0] & 0xe0) == 0xc0) + { + sz = 2; + minCodePoint = 0x80; + codePoint = (characters[0] & 0x1f); + } + else if ((characters[0] & 0xf0) == 0xe0) + { + sz = 3; + minCodePoint = 0x800; + codePoint = (characters[0] & 0x0f); + } + else if ((characters[0] & 0xf8) == 0xf0) + { + sz = 4; + minCodePoint = 0x10000; + codePoint = (characters[0] & 0x07); + } + else + return false; + + if (availableCharacters < sz) + return false; + + for (size_t auxByte = 1; auxByte < sz; auxByte++) + { + if ((characters[auxByte] & 0xc0) != 0x80) + return false; + + codePoint = (codePoint << 6) | (characters[auxByte] & 0x3f); + } + + if (codePoint < minCodePoint || codePoint > 0x10ffff) + return false; + + if (codePoint >= 0xd800 && codePoint <= 0xdfff) + return false; + + outCodePoint = codePoint; + outCharactersDigested = sz; + + return true; + } + + inline void Encode(uint8_t *characters, size_t &outCharactersEmitted, uint32_t codePoint) + { + codePoint &= 0x1fffff; + + uint8_t signalBits = 0; + size_t numBytes = 0; + if (codePoint < 0x0080) + { + numBytes = 1; + signalBits = 0; + } + else if (codePoint < 0x0800) + { + numBytes = 2; + signalBits = 0xc0; + } + else if (codePoint < 0x10000) + { + numBytes = 3; + signalBits = 0xe0; + } + else + { + numBytes = 4; + signalBits = 0xf0; + } + + characters[0] = static_cast((codePoint >> (6 * (numBytes - 1))) | signalBits); + + for (size_t i = 1; i < numBytes; i++) + { + const uint32_t isolate = ((codePoint >> (6 * (numBytes - 1 - i))) & 0x3f) | 0x80; + characters[i] = static_cast(isolate); + } + + outCharactersEmitted = numBytes; + } + } + + namespace UTF16 + { + inline bool Decode(const uint16_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint) + { + if (availableCharacters <= 0) + return false; + + if ((characters[0] & 0xff80) == 0x00) + { + outCharactersDigested = 1; + outCodePoint = characters[0]; + return true; + } + + if (characters[0] <= 0xd7ff || characters[0] >= 0xe000) + { + outCharactersDigested = 1; + outCodePoint = characters[0]; + return true; + } + + // Surrogate pair + if (characters[0] >= 0xdc00 || availableCharacters < 2) + return false; + + if (characters[1] < 0xdc00 || characters[1] >= 0xe000) + return false; + + uint16_t highBits = (characters[0] & 0x3ff); + uint16_t lowBits = (characters[1] & 0x3ff); + + outCharactersDigested = 2; + outCodePoint = (highBits << 10) + lowBits + 0x10000; + + return true; + } + + inline void Encode(uint16_t *characters, size_t &outCharactersEmitted, uint32_t codePoint) + { + if (codePoint <= 0xd7ff || codePoint >= 0xe000) + { + outCharactersEmitted = 1; + characters[0] = static_cast(codePoint); + return; + } + + uint32_t codePointBits = (codePoint - 0x10000) & 0xfffff; + uint16_t lowBits = (codePointBits & 0x3ff); + uint16_t highBits = ((codePointBits >> 10) & 0x3ff); + + outCharactersEmitted = 2; + characters[0] = (0xd800 + highBits); + characters[1] = (0xdc00 + lowBits); + } + } +} diff --git a/HouseTool/HouseTool.cpp b/HouseTool/HouseTool.cpp index bb052f7..4278ad4 100644 --- a/HouseTool/HouseTool.cpp +++ b/HouseTool/HouseTool.cpp @@ -2,7 +2,7 @@ #include "PLCore.h" #include "PLBigEndian.h" #include "MacRomanConversion.h" -#include "UTF8.h" +#include "GpUnicode.h" #include "WindowsUnicodeToolShim.h" #include @@ -376,122 +376,122 @@ static const size_t houseSize = sizeof(houseType); EnumDef g_objectTypeEnum[] = { { -1, "kObjectIsEmpty" }, - ENUM_VALUE_DEF(kFloorVent), - ENUM_VALUE_DEF(kCeilingVent), - ENUM_VALUE_DEF(kFloorBlower), - ENUM_VALUE_DEF(kCeilingBlower), - ENUM_VALUE_DEF(kSewerGrate), - ENUM_VALUE_DEF(kLeftFan), - ENUM_VALUE_DEF(kRightFan), - ENUM_VALUE_DEF(kTaper), - ENUM_VALUE_DEF(kCandle), - ENUM_VALUE_DEF(kStubby), - ENUM_VALUE_DEF(kTiki), - ENUM_VALUE_DEF(kBBQ), - ENUM_VALUE_DEF(kInvisBlower), - ENUM_VALUE_DEF(kGrecoVent), - ENUM_VALUE_DEF(kSewerBlower), - ENUM_VALUE_DEF(kLiftArea), - ENUM_VALUE_DEF(kTable), - ENUM_VALUE_DEF(kShelf), - ENUM_VALUE_DEF(kCabinet), - ENUM_VALUE_DEF(kFilingCabinet), - ENUM_VALUE_DEF(kWasteBasket), - ENUM_VALUE_DEF(kMilkCrate), - ENUM_VALUE_DEF(kCounter), - ENUM_VALUE_DEF(kDresser), - ENUM_VALUE_DEF(kDeckTable), - ENUM_VALUE_DEF(kStool), - ENUM_VALUE_DEF(kTrunk), - ENUM_VALUE_DEF(kInvisObstacle), - ENUM_VALUE_DEF(kManhole), - ENUM_VALUE_DEF(kBooks), - ENUM_VALUE_DEF(kInvisBounce), - ENUM_VALUE_DEF(kRedClock), - ENUM_VALUE_DEF(kBlueClock), - ENUM_VALUE_DEF(kYellowClock), - ENUM_VALUE_DEF(kCuckoo), - ENUM_VALUE_DEF(kPaper), - ENUM_VALUE_DEF(kBattery), - ENUM_VALUE_DEF(kBands), - ENUM_VALUE_DEF(kGreaseRt), - ENUM_VALUE_DEF(kGreaseLf), - ENUM_VALUE_DEF(kFoil), - ENUM_VALUE_DEF(kInvisBonus), - ENUM_VALUE_DEF(kStar), - ENUM_VALUE_DEF(kSparkle), - ENUM_VALUE_DEF(kHelium), - ENUM_VALUE_DEF(kSlider), - ENUM_VALUE_DEF(kUpStairs), - ENUM_VALUE_DEF(kDownStairs), - ENUM_VALUE_DEF(kMailboxLf), - ENUM_VALUE_DEF(kMailboxRt), - ENUM_VALUE_DEF(kFloorTrans), - ENUM_VALUE_DEF(kCeilingTrans), - ENUM_VALUE_DEF(kDoorInLf), - ENUM_VALUE_DEF(kDoorInRt), - ENUM_VALUE_DEF(kDoorExRt), - ENUM_VALUE_DEF(kDoorExLf), - ENUM_VALUE_DEF(kWindowInLf), - ENUM_VALUE_DEF(kWindowInRt), - ENUM_VALUE_DEF(kWindowExRt), - ENUM_VALUE_DEF(kWindowExLf), - ENUM_VALUE_DEF(kInvisTrans), - ENUM_VALUE_DEF(kDeluxeTrans), - ENUM_VALUE_DEF(kLightSwitch), - ENUM_VALUE_DEF(kMachineSwitch), - ENUM_VALUE_DEF(kThermostat), - ENUM_VALUE_DEF(kPowerSwitch), - ENUM_VALUE_DEF(kKnifeSwitch), - ENUM_VALUE_DEF(kInvisSwitch), - ENUM_VALUE_DEF(kTrigger), - ENUM_VALUE_DEF(kLgTrigger), - ENUM_VALUE_DEF(kSoundTrigger), - ENUM_VALUE_DEF(kCeilingLight), - ENUM_VALUE_DEF(kLightBulb), - ENUM_VALUE_DEF(kTableLamp), - ENUM_VALUE_DEF(kHipLamp), - ENUM_VALUE_DEF(kDecoLamp), - ENUM_VALUE_DEF(kFlourescent), - ENUM_VALUE_DEF(kTrackLight), - ENUM_VALUE_DEF(kInvisLight), - ENUM_VALUE_DEF(kShredder), - ENUM_VALUE_DEF(kToaster), - ENUM_VALUE_DEF(kMacPlus), - ENUM_VALUE_DEF(kGuitar), - ENUM_VALUE_DEF(kTV), - ENUM_VALUE_DEF(kCoffee), - ENUM_VALUE_DEF(kOutlet), - ENUM_VALUE_DEF(kVCR), - ENUM_VALUE_DEF(kStereo), - ENUM_VALUE_DEF(kMicrowave), - ENUM_VALUE_DEF(kCinderBlock), - ENUM_VALUE_DEF(kFlowerBox), - ENUM_VALUE_DEF(kCDs), - ENUM_VALUE_DEF(kCustomPict), - ENUM_VALUE_DEF(kBalloon), - ENUM_VALUE_DEF(kCopterLf), - ENUM_VALUE_DEF(kCopterRt), - ENUM_VALUE_DEF(kDartLf), - ENUM_VALUE_DEF(kDartRt), - ENUM_VALUE_DEF(kBall), - ENUM_VALUE_DEF(kDrip), - ENUM_VALUE_DEF(kFish), - ENUM_VALUE_DEF(kCobweb), - ENUM_VALUE_DEF(kOzma), - ENUM_VALUE_DEF(kMirror), - ENUM_VALUE_DEF(kMousehole), - ENUM_VALUE_DEF(kFireplace), - ENUM_VALUE_DEF(kFlower), - ENUM_VALUE_DEF(kWallWindow), - ENUM_VALUE_DEF(kBear), - ENUM_VALUE_DEF(kCalendar), - ENUM_VALUE_DEF(kVase1), - ENUM_VALUE_DEF(kVase2), - ENUM_VALUE_DEF(kBulletin), - ENUM_VALUE_DEF(kCloud), - ENUM_VALUE_DEF(kFaucet), - ENUM_VALUE_DEF(kRug), + ENUM_VALUE_DEF(kFloorVent), + ENUM_VALUE_DEF(kCeilingVent), + ENUM_VALUE_DEF(kFloorBlower), + ENUM_VALUE_DEF(kCeilingBlower), + ENUM_VALUE_DEF(kSewerGrate), + ENUM_VALUE_DEF(kLeftFan), + ENUM_VALUE_DEF(kRightFan), + ENUM_VALUE_DEF(kTaper), + ENUM_VALUE_DEF(kCandle), + ENUM_VALUE_DEF(kStubby), + ENUM_VALUE_DEF(kTiki), + ENUM_VALUE_DEF(kBBQ), + ENUM_VALUE_DEF(kInvisBlower), + ENUM_VALUE_DEF(kGrecoVent), + ENUM_VALUE_DEF(kSewerBlower), + ENUM_VALUE_DEF(kLiftArea), + ENUM_VALUE_DEF(kTable), + ENUM_VALUE_DEF(kShelf), + ENUM_VALUE_DEF(kCabinet), + ENUM_VALUE_DEF(kFilingCabinet), + ENUM_VALUE_DEF(kWasteBasket), + ENUM_VALUE_DEF(kMilkCrate), + ENUM_VALUE_DEF(kCounter), + ENUM_VALUE_DEF(kDresser), + ENUM_VALUE_DEF(kDeckTable), + ENUM_VALUE_DEF(kStool), + ENUM_VALUE_DEF(kTrunk), + ENUM_VALUE_DEF(kInvisObstacle), + ENUM_VALUE_DEF(kManhole), + ENUM_VALUE_DEF(kBooks), + ENUM_VALUE_DEF(kInvisBounce), + ENUM_VALUE_DEF(kRedClock), + ENUM_VALUE_DEF(kBlueClock), + ENUM_VALUE_DEF(kYellowClock), + ENUM_VALUE_DEF(kCuckoo), + ENUM_VALUE_DEF(kPaper), + ENUM_VALUE_DEF(kBattery), + ENUM_VALUE_DEF(kBands), + ENUM_VALUE_DEF(kGreaseRt), + ENUM_VALUE_DEF(kGreaseLf), + ENUM_VALUE_DEF(kFoil), + ENUM_VALUE_DEF(kInvisBonus), + ENUM_VALUE_DEF(kStar), + ENUM_VALUE_DEF(kSparkle), + ENUM_VALUE_DEF(kHelium), + ENUM_VALUE_DEF(kSlider), + ENUM_VALUE_DEF(kUpStairs), + ENUM_VALUE_DEF(kDownStairs), + ENUM_VALUE_DEF(kMailboxLf), + ENUM_VALUE_DEF(kMailboxRt), + ENUM_VALUE_DEF(kFloorTrans), + ENUM_VALUE_DEF(kCeilingTrans), + ENUM_VALUE_DEF(kDoorInLf), + ENUM_VALUE_DEF(kDoorInRt), + ENUM_VALUE_DEF(kDoorExRt), + ENUM_VALUE_DEF(kDoorExLf), + ENUM_VALUE_DEF(kWindowInLf), + ENUM_VALUE_DEF(kWindowInRt), + ENUM_VALUE_DEF(kWindowExRt), + ENUM_VALUE_DEF(kWindowExLf), + ENUM_VALUE_DEF(kInvisTrans), + ENUM_VALUE_DEF(kDeluxeTrans), + ENUM_VALUE_DEF(kLightSwitch), + ENUM_VALUE_DEF(kMachineSwitch), + ENUM_VALUE_DEF(kThermostat), + ENUM_VALUE_DEF(kPowerSwitch), + ENUM_VALUE_DEF(kKnifeSwitch), + ENUM_VALUE_DEF(kInvisSwitch), + ENUM_VALUE_DEF(kTrigger), + ENUM_VALUE_DEF(kLgTrigger), + ENUM_VALUE_DEF(kSoundTrigger), + ENUM_VALUE_DEF(kCeilingLight), + ENUM_VALUE_DEF(kLightBulb), + ENUM_VALUE_DEF(kTableLamp), + ENUM_VALUE_DEF(kHipLamp), + ENUM_VALUE_DEF(kDecoLamp), + ENUM_VALUE_DEF(kFlourescent), + ENUM_VALUE_DEF(kTrackLight), + ENUM_VALUE_DEF(kInvisLight), + ENUM_VALUE_DEF(kShredder), + ENUM_VALUE_DEF(kToaster), + ENUM_VALUE_DEF(kMacPlus), + ENUM_VALUE_DEF(kGuitar), + ENUM_VALUE_DEF(kTV), + ENUM_VALUE_DEF(kCoffee), + ENUM_VALUE_DEF(kOutlet), + ENUM_VALUE_DEF(kVCR), + ENUM_VALUE_DEF(kStereo), + ENUM_VALUE_DEF(kMicrowave), + ENUM_VALUE_DEF(kCinderBlock), + ENUM_VALUE_DEF(kFlowerBox), + ENUM_VALUE_DEF(kCDs), + ENUM_VALUE_DEF(kCustomPict), + ENUM_VALUE_DEF(kBalloon), + ENUM_VALUE_DEF(kCopterLf), + ENUM_VALUE_DEF(kCopterRt), + ENUM_VALUE_DEF(kDartLf), + ENUM_VALUE_DEF(kDartRt), + ENUM_VALUE_DEF(kBall), + ENUM_VALUE_DEF(kDrip), + ENUM_VALUE_DEF(kFish), + ENUM_VALUE_DEF(kCobweb), + ENUM_VALUE_DEF(kOzma), + ENUM_VALUE_DEF(kMirror), + ENUM_VALUE_DEF(kMousehole), + ENUM_VALUE_DEF(kFireplace), + ENUM_VALUE_DEF(kFlower), + ENUM_VALUE_DEF(kWallWindow), + ENUM_VALUE_DEF(kBear), + ENUM_VALUE_DEF(kCalendar), + ENUM_VALUE_DEF(kVase1), + ENUM_VALUE_DEF(kVase2), + ENUM_VALUE_DEF(kBulletin), + ENUM_VALUE_DEF(kCloud), + ENUM_VALUE_DEF(kFaucet), + ENUM_VALUE_DEF(kRug), ENUM_VALUE_DEF(kChimes), }; @@ -1124,7 +1124,7 @@ void PatchVisitor::VisitLPStr(uint8_t &length, uint8_t *chars, int capacity) { uint32_t codePoint = 0; size_t charsDigested = 0; - if (!PortabilityLayer::UTF8Processor::DecodeCodePoint(replacementUTF8, rLen, charsDigested, codePoint)) + if (!GpUnicode::UTF8::Decode(replacementUTF8, rLen, charsDigested, codePoint)) break; rLen -= charsDigested; @@ -1382,9 +1382,9 @@ bool PatchVisitor::DecodeQuotedString(const std::string &scopeStr, size_t startP } const uint16_t unicodeCodePoint = (nibbles[0] << 12) + (nibbles[1] << 8) + (nibbles[2] << 4) + nibbles[3]; - uint8_t encoded[PortabilityLayer::UTF8Processor::kMaxEncodedBytes]; + uint8_t encoded[GpUnicode::UTF8::kMaxEncodedBytes]; size_t emitted = 0; - PortabilityLayer::UTF8Processor::EncodeCodePoint(encoded, emitted, unicodeCodePoint); + GpUnicode::UTF8::Encode(encoded, emitted, unicodeCodePoint); for (size_t ei = 0; ei < emitted; ei++) decoded.push_back(static_cast(encoded[ei])); diff --git a/PortabilityLayer/PLCore.cpp b/PortabilityLayer/PLCore.cpp index 88772f6..53a4e77 100644 --- a/PortabilityLayer/PLCore.cpp +++ b/PortabilityLayer/PLCore.cpp @@ -46,7 +46,7 @@ #include "PLTimeTaggedVOSEvent.h" #include "PLWidgets.h" -#include "UTF8.h" +#include "GpUnicode.h" #include #include @@ -691,12 +691,12 @@ PLClipboardContentsText *PLClipboardContentsText::CreateFromMacRomanStr(const ui for (size_t i = 0; i < length; i++) { - uint8_t utf8Bytes[PortabilityLayer::UTF8Processor::kMaxEncodedBytes]; + uint8_t utf8Bytes[GpUnicode::UTF8::kMaxEncodedBytes]; uint16_t codePoint = MacRoman::ToUnicode(chars[i]); size_t numBytesEmitted = 0; - PortabilityLayer::UTF8Processor::EncodeCodePoint(utf8Bytes, numBytesEmitted, codePoint); + GpUnicode::UTF8::Encode(utf8Bytes, numBytesEmitted, codePoint); numUTF8Bytes += numBytesEmitted; } @@ -715,7 +715,7 @@ PLClipboardContentsText *PLClipboardContentsText::CreateFromMacRomanStr(const ui uint16_t codePoint = MacRoman::ToUnicode(chars[i]); size_t numBytesEmitted = 0; - PortabilityLayer::UTF8Processor::EncodeCodePoint(utf8Bytes + numUTF8Bytes, numBytesEmitted, codePoint); + GpUnicode::UTF8::Encode(utf8Bytes + numUTF8Bytes, numBytesEmitted, codePoint); numUTF8Bytes += numBytesEmitted; } diff --git a/PortabilityLayer/PLEditboxWidget.cpp b/PortabilityLayer/PLEditboxWidget.cpp index 8995548..180c35c 100644 --- a/PortabilityLayer/PLEditboxWidget.cpp +++ b/PortabilityLayer/PLEditboxWidget.cpp @@ -12,7 +12,7 @@ #include "ResolveCachingColor.h" #include "Rect2i.h" #include "TextPlacer.h" -#include "UTF8.h" +#include "GpUnicode.h" #include "PLDrivers.h" #include "PLKeyEncoding.h" @@ -721,7 +721,7 @@ namespace PortabilityLayer { uint32_t codePoint = 0; size_t numDigested = 0; - if (!UTF8Processor::DecodeCodePoint(utf8Bytes + i, utf8Size - i, numDigested, codePoint)) + if (!GpUnicode::UTF8::Decode(utf8Bytes + i, utf8Size - i, numDigested, codePoint)) { clipboardContents->Destroy(); return; @@ -745,7 +745,7 @@ namespace PortabilityLayer { uint32_t codePoint = 0; size_t numDigested = 0; - if (!UTF8Processor::DecodeCodePoint(utf8Bytes + i, utf8Size - i, numDigested, codePoint)) + if (!GpUnicode::UTF8::Decode(utf8Bytes + i, utf8Size - i, numDigested, codePoint)) { clipboardContents->Destroy(); return; diff --git a/PortabilityLayer/PortabilityLayer.vcxproj b/PortabilityLayer/PortabilityLayer.vcxproj index 956bbaa..faacf9f 100644 --- a/PortabilityLayer/PortabilityLayer.vcxproj +++ b/PortabilityLayer/PortabilityLayer.vcxproj @@ -157,7 +157,6 @@ - @@ -302,7 +301,6 @@ - diff --git a/PortabilityLayer/PortabilityLayer.vcxproj.filters b/PortabilityLayer/PortabilityLayer.vcxproj.filters index bab3226..94f7687 100644 --- a/PortabilityLayer/PortabilityLayer.vcxproj.filters +++ b/PortabilityLayer/PortabilityLayer.vcxproj.filters @@ -399,9 +399,6 @@ Header Files - - Header Files - Header Files @@ -671,9 +668,6 @@ Source Files - - Source Files - Source Files diff --git a/PortabilityLayer/PortabilityLayer_Combined.cpp b/PortabilityLayer/PortabilityLayer_Combined.cpp index 8f68695..29c8afb 100644 --- a/PortabilityLayer/PortabilityLayer_Combined.cpp +++ b/PortabilityLayer/PortabilityLayer_Combined.cpp @@ -78,7 +78,6 @@ #include "SimpleGraphic.cpp" #include "TextPlacer.cpp" #include "UTF8.cpp" -#include "UTF16.cpp" #include "WindowDef.cpp" #include "WindowManager.cpp" #include "WorkerThread.cpp" diff --git a/PortabilityLayer/UTF16.cpp b/PortabilityLayer/UTF16.cpp deleted file mode 100644 index 7757236..0000000 --- a/PortabilityLayer/UTF16.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include "UTF16.h" - -namespace PortabilityLayer -{ - bool UTF16Processor::DecodeCodePoint(const uint16_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint) - { - if (availableCharacters <= 0) - return false; - - if ((characters[0] & 0xff80) == 0x00) - { - outCharactersDigested = 1; - outCodePoint = characters[0]; - return true; - } - - if (characters[0] <= 0xd7ff || characters[0] >= 0xe000) - { - outCharactersDigested = 1; - outCodePoint = characters[0]; - return true; - } - - // Surrogate pair - if (characters[0] >= 0xdc00 || availableCharacters < 2) - return false; - - if (characters[1] < 0xdc00 || characters[1] >= 0xe000) - return false; - - uint16_t highBits = (characters[0] & 0x3ff); - uint16_t lowBits = (characters[1] & 0x3ff); - - outCharactersDigested = 2; - outCodePoint = (highBits << 10) + lowBits + 0x10000; - - return true; - } - - void UTF16Processor::EncodeCodePoint(uint16_t *characters, size_t &outCharactersEmitted, uint32_t codePoint) - { - if (codePoint <= 0xd7ff || codePoint >= 0xe000) - { - outCharactersEmitted = 1; - characters[0] = static_cast(codePoint); - return; - } - - uint32_t codePointBits = (codePoint - 0x10000) & 0xfffff; - uint16_t lowBits = (codePointBits & 0x3ff); - uint16_t highBits = ((codePointBits >> 10) & 0x3ff); - - outCharactersEmitted = 2; - characters[0] = (0xd800 + highBits); - characters[1] = (0xdc00 + lowBits); - } -} diff --git a/PortabilityLayer/UTF16.h b/PortabilityLayer/UTF16.h deleted file mode 100644 index 4de6ab9..0000000 --- a/PortabilityLayer/UTF16.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include -#include - -namespace PortabilityLayer -{ - class UTF16Processor - { - public: - static bool DecodeCodePoint(const uint16_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint); - static void EncodeCodePoint(uint16_t *characters, size_t &outCharactersEmitted, uint32_t codePoint); - }; -} diff --git a/PortabilityLayer/UTF8.cpp b/PortabilityLayer/UTF8.cpp index c6a267a..12775df 100644 --- a/PortabilityLayer/UTF8.cpp +++ b/PortabilityLayer/UTF8.cpp @@ -1,105 +1,9 @@ #include "UTF8.h" +#include "GpUnicode.h" #include "MacRomanConversion.h" namespace PortabilityLayer { - bool UTF8Processor::DecodeCodePoint(const uint8_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint) - { - if (availableCharacters <= 0) - return false; - - if ((characters[0] & 0x80) == 0x00) - { - outCharactersDigested = 1; - outCodePoint = characters[0]; - return true; - } - - size_t sz = 0; - uint32_t codePoint = 0; - uint32_t minCodePoint = 0; - if ((characters[0] & 0xe0) == 0xc0) - { - sz = 2; - minCodePoint = 0x80; - codePoint = (characters[0] & 0x1f); - } - else if ((characters[0] & 0xf0) == 0xe0) - { - sz = 3; - minCodePoint = 0x800; - codePoint = (characters[0] & 0x0f); - } - else if ((characters[0] & 0xf8) == 0xf0) - { - sz = 4; - minCodePoint = 0x10000; - codePoint = (characters[0] & 0x07); - } - else - return false; - - if (availableCharacters < sz) - return false; - - for (size_t auxByte = 1; auxByte < sz; auxByte++) - { - if ((characters[auxByte] & 0xc0) != 0x80) - return false; - - codePoint = (codePoint << 6) | (characters[auxByte] & 0x3f); - } - - if (codePoint < minCodePoint || codePoint > 0x10ffff) - return false; - - if (codePoint >= 0xd800 && codePoint <= 0xdfff) - return false; - - outCodePoint = codePoint; - outCharactersDigested = sz; - - return true; - } - - void UTF8Processor::EncodeCodePoint(uint8_t *characters, size_t &outCharactersEmitted, uint32_t codePoint) - { - codePoint &= 0x1fffff; - - uint8_t signalBits = 0; - size_t numBytes = 0; - if (codePoint < 0x0080) - { - numBytes = 1; - signalBits = 0; - } - else if (codePoint < 0x0800) - { - numBytes = 2; - signalBits = 0xc0; - } - else if (codePoint < 0x10000) - { - numBytes = 3; - signalBits = 0xe0; - } - else - { - numBytes = 4; - signalBits = 0xf0; - } - - characters[0] = static_cast((codePoint >> (6 * (numBytes - 1))) | signalBits); - - for (size_t i = 1; i < numBytes; i++) - { - const uint32_t isolate = ((codePoint >> (6 * (numBytes - 1 - i))) & 0x3f) | 0x80; - characters[i] = static_cast(isolate); - } - - outCharactersEmitted = numBytes; - } - bool UTF8Processor::DecodeToMacRomanPascalStr(const uint8_t *inChars, size_t inSize, uint8_t *outChars, size_t maxOutSize, size_t &outSizeRef) { size_t outSize = 0; @@ -107,7 +11,7 @@ namespace PortabilityLayer { size_t digestedChars = 0; uint32_t codePoint = 0; - if (!DecodeCodePoint(inChars, inSize, digestedChars, codePoint)) + if (!GpUnicode::UTF8::Decode(inChars, inSize, digestedChars, codePoint)) return false; inChars += digestedChars; @@ -122,10 +26,10 @@ namespace PortabilityLayer { for (uint16_t c = 0x80; c <= 0xff; c++) { - uint16_t decodedCP = MacRoman::ToUnicode(c); + uint16_t decodedCP = MacRoman::ToUnicode(static_cast(c)); if (decodedCP == codePoint) { - macRomanChar = c; + macRomanChar = static_cast(c); break; } } diff --git a/PortabilityLayer/UTF8.h b/PortabilityLayer/UTF8.h index 2c9412a..a668092 100644 --- a/PortabilityLayer/UTF8.h +++ b/PortabilityLayer/UTF8.h @@ -8,11 +8,6 @@ namespace PortabilityLayer class UTF8Processor { public: - static bool DecodeCodePoint(const uint8_t *characters, size_t availableCharacters, size_t &outCharactersDigested, uint32_t &outCodePoint); - static void EncodeCodePoint(uint8_t *characters, size_t &outCharactersEmitted, uint32_t codePoint); - static bool DecodeToMacRomanPascalStr(const uint8_t *inChars, size_t inSize, uint8_t *outChars, size_t maxOutSize, size_t &outSize); - - static const unsigned int kMaxEncodedBytes = 4; }; } diff --git a/WindowsUnicodeToolShim/WindowsUnicodeToolShim.cpp b/WindowsUnicodeToolShim/WindowsUnicodeToolShim.cpp index ceed94a..a2c4bd7 100644 --- a/WindowsUnicodeToolShim/WindowsUnicodeToolShim.cpp +++ b/WindowsUnicodeToolShim/WindowsUnicodeToolShim.cpp @@ -1,7 +1,6 @@ #include -#include "UTF8.h" -#include "UTF16.h" +#include "GpUnicode.h" #include #include @@ -20,13 +19,13 @@ static std::string ConvertWStringToUTF8(const wchar_t *str) size_t charsDigested = 0; uint32_t codePoint = 0; uint8_t asUTF8[4]; - if (!PortabilityLayer::UTF16Processor::DecodeCodePoint(reinterpret_cast(str) + i, strLength - i, charsDigested, codePoint)) + if (!GpUnicode::UTF16::Decode(reinterpret_cast(str) + i, strLength - i, charsDigested, codePoint)) return ""; i += charsDigested; size_t bytesEmitted = 0; - PortabilityLayer::UTF8Processor::EncodeCodePoint(asUTF8, bytesEmitted, codePoint); + GpUnicode::UTF8::Encode(asUTF8, bytesEmitted, codePoint); result.append(reinterpret_cast(asUTF8), bytesEmitted); } @@ -45,13 +44,13 @@ static std::wstring ConvertUTF8ToWString(const char *str) size_t charsDigested = 0; uint32_t codePoint = 0; uint16_t asUTF16[4]; - if (!PortabilityLayer::UTF8Processor::DecodeCodePoint(reinterpret_cast(str) + i, strLength - i, charsDigested, codePoint)) + if (!GpUnicode::UTF8::Decode(reinterpret_cast(str) + i, strLength - i, charsDigested, codePoint)) return L""; i += charsDigested; size_t codePointsEmitted = 0; - PortabilityLayer::UTF16Processor::EncodeCodePoint(asUTF16, codePointsEmitted, codePoint); + GpUnicode::UTF16::Encode(asUTF16, codePointsEmitted, codePoint); result.append(reinterpret_cast(asUTF16), codePointsEmitted); } diff --git a/WindowsUnicodeToolShim/WindowsUnicodeToolShim.vcxproj b/WindowsUnicodeToolShim/WindowsUnicodeToolShim.vcxproj index 2267d6a..c4bd6b2 100644 --- a/WindowsUnicodeToolShim/WindowsUnicodeToolShim.vcxproj +++ b/WindowsUnicodeToolShim/WindowsUnicodeToolShim.vcxproj @@ -41,6 +41,7 @@ + @@ -48,6 +49,7 @@ + diff --git a/gpr2gpa/gpr2gpa.cpp b/gpr2gpa/gpr2gpa.cpp index 18a7680..bc329dd 100644 --- a/gpr2gpa/gpr2gpa.cpp +++ b/gpr2gpa/gpr2gpa.cpp @@ -14,6 +14,7 @@ #include "UTF8.h" #include "ZipFile.h" #include "WaveFormat.h" +#include "GpUnicode.h" #include "zlib.h" @@ -116,7 +117,7 @@ void AppendUTF8(std::vector &array, uint32_t codePoint) uint8_t bytes[5]; size_t sz; - PortabilityLayer::UTF8Processor::EncodeCodePoint(bytes, sz, codePoint); + GpUnicode::UTF8::Encode(bytes, sz, codePoint); for (size_t i = 0; i < sz; i++) array.push_back(bytes[i]); } diff --git a/unpacktool/StringCommon.cpp b/unpacktool/StringCommon.cpp index c6b2b1e..36c3209 100644 --- a/unpacktool/StringCommon.cpp +++ b/unpacktool/StringCommon.cpp @@ -1,7 +1,7 @@ #include "StringCommon.h" #include "MacRomanConversion.h" -#include "UTF8.h" +#include "GpUnicode.h" void StringCommon::ConvertMacRomanFileName(std::vector &utf8FileName, const uint8_t *macRomanName, size_t macRomanLength) { @@ -9,7 +9,7 @@ void StringCommon::ConvertMacRomanFileName(std::vector &utf8FileName, c { uint8_t bytes[8]; size_t bytesEmitted; - PortabilityLayer::UTF8Processor::EncodeCodePoint(bytes, bytesEmitted, MacRoman::ToUnicode(macRomanName[i])); + GpUnicode::UTF8::Encode(bytes, bytesEmitted, MacRoman::ToUnicode(macRomanName[i])); for (size_t bi = 0; bi < bytesEmitted; bi++) utf8FileName.push_back(bytes[bi]); diff --git a/unpacktool/unpacktool.cpp b/unpacktool/unpacktool.cpp index 9633a89..630b3cb 100644 --- a/unpacktool/unpacktool.cpp +++ b/unpacktool/unpacktool.cpp @@ -7,8 +7,7 @@ #include "CompactProParser.h" #include "CFileStream.h" -#include "UTF8.h" -#include "UTF16.h" +#include "GpUnicode.h" #include "ArchiveDescription.h" #include "IDecompressor.h"