From 0df94405f8c6b904e413e0f157a6b089e080fa51 Mon Sep 17 00:00:00 2001 From: elasota Date: Sat, 27 Mar 2021 03:01:43 -0400 Subject: [PATCH] Faster AA table generation --- PortabilityLayer/AntiAliasTable.cpp | 136 +++++++++++++++++++++++++ PortabilityLayer/AntiAliasTable.h | 3 + PortabilityLayer/QDStandardPalette.cpp | 4 +- 3 files changed, 141 insertions(+), 2 deletions(-) diff --git a/PortabilityLayer/AntiAliasTable.cpp b/PortabilityLayer/AntiAliasTable.cpp index c062703..66fef53 100644 --- a/PortabilityLayer/AntiAliasTable.cpp +++ b/PortabilityLayer/AntiAliasTable.cpp @@ -5,6 +5,7 @@ #include "PLBigEndian.h" #include "PLDrivers.h" +#include "QDStandardPalette.h" #include #include @@ -12,6 +13,29 @@ namespace PortabilityLayer { + template + unsigned int BinTreeQuantize(const uint32_t(&tree)[TTreeSize], const uint32_t(&scale)[TScaleSize], uint32_t value) + { + unsigned int minIndexInclusive = 0; + unsigned int maxIndexExclusive = TScaleSize; + + while (maxIndexExclusive - minIndexInclusive > 1) + { + unsigned int partitionIndex = (maxIndexExclusive + minIndexInclusive) / 2; + uint32_t partitionValue = tree[partitionIndex - 1]; + + if (value < partitionValue) + maxIndexExclusive = partitionIndex; + else if (value > partitionValue) + minIndexInclusive = partitionIndex; + else + return partitionIndex; + } + + return minIndexInclusive; + } + + bool AntiAliasTable::LoadFromCache(const char *cacheFileName) { GpIOStream *stream = PLDrivers::GetFileSystem()->OpenFile(PortabilityLayer::VirtualDirectories::kFontCache, cacheFileName, false, GpFileCreationDispositions::kOpenExisting); @@ -45,6 +69,117 @@ namespace PortabilityLayer stream->Close(); } + void AntiAliasTable::GenerateForPaletteFast(const RGBAColor &baseColorRef) + { + const RGBAColor baseColor = baseColorRef; + const unsigned int kDivisions = 16; + const unsigned int numColors = 256; + + unsigned int baseCh[3] = { baseColor.r, baseColor.g, baseColor.b }; + unsigned int baseChLinear[3]; + + uint32_t rgbScaleLinear[6]; + for (unsigned int i = 0; i < 6; i++) + { + unsigned int upscaled = i * 51; + rgbScaleLinear[i] = upscaled * upscaled * (kDivisions - 1); + } + + uint32_t rgbScaleTree[5]; + for (unsigned int i = 0; i < 5; i++) + rgbScaleTree[i] = (rgbScaleLinear[i] + rgbScaleLinear[i + 1]) / 2; + + uint32_t toneScaleLinear[16]; + for (unsigned int i = 0; i < 16; i++) + { + unsigned int upscaled = i * 17; + toneScaleLinear[i] = upscaled * upscaled * (kDivisions - 1); + } + + uint32_t toneScaleTree[15]; + for (unsigned int i = 0; i < 15; i++) + toneScaleTree[i] = (toneScaleLinear[i] + toneScaleLinear[i + 1]) / 2; + + for (int i = 0; i < 3; i++) + baseChLinear[i] = baseCh[i] * baseCh[i]; + + for (size_t i = 0; i < numColors; i++) + { + const RGBAColor existingColor = StandardPalette::GetInstance()->GetColors()[i]; + + unsigned int existingCh[3] = { existingColor.r, existingColor.g, existingColor.b }; + unsigned int existingChLinear[3]; + + for (int i = 0; i < 3; i++) + existingChLinear[i] = existingCh[i] * existingCh[i]; + + // 0 alpha is always the same color + m_aaTranslate[i][0] = static_cast(i); + + for (unsigned int b = 1; b < kDivisions; b++) + { + uint32_t newChLinear[3]; + + for (unsigned int ch = 0; ch < 3; ch++) + newChLinear[ch] = (15 - b) * existingChLinear[ch] + b * baseChLinear[ch]; + + + unsigned int toneIndexes[3]; + unsigned int rgbIndexes[3]; + + for (int i = 0; i < 3; i++) + { + toneIndexes[i] = BinTreeQuantize(toneScaleTree, toneScaleLinear, newChLinear[i]); + rgbIndexes[i] = BinTreeQuantize(rgbScaleTree, rgbScaleLinear, newChLinear[i]); + } + + uint64_t toneZeroError[3]; + uint64_t toneQuantizedError[3]; + uint64_t rgbError = 0; + for (int i = 0; i < 3; i++) + { + toneZeroError[i] = static_cast(newChLinear[i]); + toneZeroError[i] *= toneZeroError[i]; + + int32_t toneDelta = static_cast(toneScaleLinear[toneIndexes[i]]) - static_cast(newChLinear[i]); + toneQuantizedError[i] = static_cast(static_cast(toneDelta) * static_cast(toneDelta)); + + int32_t rgbDelta = static_cast(rgbScaleLinear[toneIndexes[i]]) - static_cast(newChLinear[i]); + rgbError += static_cast(static_cast(toneDelta) * static_cast(toneDelta)); + } + + uint64_t possibleErrors[5]; + possibleErrors[0] = toneQuantizedError[0] + toneZeroError[1] + toneZeroError[2]; + possibleErrors[1] = toneZeroError[0] + toneQuantizedError[1] + toneZeroError[2]; + possibleErrors[2] = toneZeroError[0] + toneZeroError[1] + toneQuantizedError[2]; + possibleErrors[3] = toneQuantizedError[0] + toneQuantizedError[1] + toneQuantizedError[2]; + possibleErrors[4] = rgbError; + + int bestErrorIndex = 0; + for (int i = 1; i < 5; i++) + { + if (possibleErrors[i] < possibleErrors[bestErrorIndex]) + bestErrorIndex = i; + } + + uint8_t bestColor = 0; + if (bestErrorIndex == 0) + bestColor = StandardPalette::GetInstance()->MapColorAnalyticTruncated(toneIndexes[0], 0, 0); + else if (bestErrorIndex == 1) + bestColor = StandardPalette::GetInstance()->MapColorAnalyticTruncated(0, toneIndexes[1], 0); + else if (bestErrorIndex == 2) + bestColor = StandardPalette::GetInstance()->MapColorAnalyticTruncated(0, 0, toneIndexes[2]); + else if (bestErrorIndex == 3) + bestColor = StandardPalette::GetInstance()->MapColorAnalyticTruncated(toneIndexes[0], toneIndexes[1], toneIndexes[2]); + else //if (bestErrorIndex == 4) + bestColor = StandardPalette::GetInstance()->MapColorAnalyticTruncated(rgbIndexes[0] * 3, rgbIndexes[1] * 3, rgbIndexes[2] * 3); + + m_aaTranslate[i][b] = bestColor; + } + } + } + +#if 0 void AntiAliasTable::GenerateForPalette(const RGBAColor &baseColorRef, const RGBAColor *colors, size_t numColors, bool cacheable) { char cacheFileName[256]; @@ -117,6 +252,7 @@ namespace PortabilityLayer if (cacheable) SaveToCache(cacheFileName); } +#endif void AntiAliasTable::GenerateForSimpleScale(uint8_t colorChannel, bool cacheable) { diff --git a/PortabilityLayer/AntiAliasTable.h b/PortabilityLayer/AntiAliasTable.h index 148f1f5..ffde3f4 100644 --- a/PortabilityLayer/AntiAliasTable.h +++ b/PortabilityLayer/AntiAliasTable.h @@ -12,7 +12,10 @@ namespace PortabilityLayer // Striped 256x16 because constant background color is more likely than constant sample uint8_t m_aaTranslate[256][16]; +#if 0 void GenerateForPalette(const RGBAColor &baseColor, const RGBAColor *colors, size_t numColors, bool cacheable); +#endif + void GenerateForPaletteFast(const RGBAColor &baseColor); void GenerateForSimpleScale(uint8_t colorChannel, bool cacheable); private: diff --git a/PortabilityLayer/QDStandardPalette.cpp b/PortabilityLayer/QDStandardPalette.cpp index 1a6a096..9add49e 100644 --- a/PortabilityLayer/QDStandardPalette.cpp +++ b/PortabilityLayer/QDStandardPalette.cpp @@ -262,7 +262,7 @@ namespace PortabilityLayer if (mutex) mutex->Unlock(); - entry.m_aaTable.GenerateForPalette(color, m_colors, 256, true); + entry.m_aaTable.GenerateForPaletteFast(color); return entry.m_aaTable; } @@ -292,7 +292,7 @@ namespace PortabilityLayer if (mutex) mutex->Unlock(); - entry.m_aaTable.GenerateForSimpleScale(tone, true); + entry.m_aaTable.GenerateForSimpleScale(tone, false); return entry.m_aaTable; }