Aerofoil/ImportCharSet/ImportCharSet.cpp

#include <string>
#include <stdio.h>

uint16_t g_toUnicode[256];
uint8_t g_toUpper[256];
uint8_t g_toLower[256];
uint8_t g_stripDiacritic[256];

std::string g_charDescs[256];

template<class T>
void DumpListing(FILE *outF, const T *buf, const char *hexFormat, const char *title)
{
	fprintf(outF, "\t\tconst %s[256] =\n", title);
	fprintf(outF, "\t\t{");

	for (int i = 0; i < 256; i++)
	{
		if (i % 16 == 0)
			fprintf(outF, "\n\t\t\t");

		fprintf(outF, hexFormat, static_cast<int>(buf[i]));
		fputc(',', outF);
		if (i % 16 != 15)
			fputc(' ', outF);
	}

	fprintf(outF, "\n\t\t};\n");
}

uint32_t ParseHexCode(const std::string &str)
{
	if (str.substr(0, 2) != "0x")
		return 0;

	uint32_t result = 0;
	for (size_t i = 2; i < str.length(); i++)
	{
		const char c = str[i];

		result = result * 16;

		if (c >= 'a' && c <= 'f')
			result += (c - 'a') + 0xA;
		else if (c >= 'A' && c <= 'F')
			result += (c - 'A') + 0xA;
		else if (c >= '0' && c <= '9')
			result += (c - '0');
		else
			result = result / 16;
	}

	return result;
}

bool ParseLatinDesc(const std::string &str, bool &isSmall, bool &isCapital, std::string &remainder)
{
	if (str.substr(0, 6) != "LATIN ")
		return false;

	isCapital = false;
	isSmall = false;

	if (str.substr(6, 6) == "SMALL ")
	{
		isSmall = true;
		remainder = str.substr(12);
	}
	else if (str.substr(6, 8) == "CAPITAL ")
	{
		isCapital = true;
		remainder = str.substr(14);
	}
	else
		remainder = str.substr(6);

	return true;
}

void ProcessLine(const std::string &lineStr)
{
	std::string comment;
	std::string contents;

	size_t commentPos = lineStr.find('#');
	if (commentPos == std::string::npos)
		contents = lineStr;
	else
	{
		comment = lineStr.substr(commentPos);
		contents = lineStr.substr(0, commentPos);
	}

	size_t tabLoc = contents.find('\t');
	if (tabLoc == std::string::npos)
		return;

	std::string pageCode = contents.substr(0, tabLoc);

	size_t secondTabLoc = contents.find('\t', tabLoc + 1);
	if (secondTabLoc == std::string::npos)
		return;

	std::string unicodeCode = contents.substr(tabLoc + 1, secondTabLoc - tabLoc);

	uint32_t decodedUnicodeCode = ParseHexCode(unicodeCode);
	uint32_t decodedCharCode = ParseHexCode(pageCode);

	size_t contentOffset = 1;
	while (contentOffset < comment.length())
	{
		const char c = comment[contentOffset];
		if (c <= ' ')
			contentOffset++;
		else
			break;
	}

	g_toUnicode[decodedCharCode] = decodedUnicodeCode;
	g_charDescs[decodedCharCode] = comment.substr(contentOffset);
}

int main(int argc, const char **argv)
{
	for (int i = 0; i < 256; i++)
	{
		g_toUnicode[i] = 0xffff;
		g_toUpper[i] = i;
		g_toLower[i] = i;
		g_stripDiacritic[i] = i;
	}

	FILE *f = nullptr;

	if (errno_t err = fopen_s(&f, "MiscData/MacRoman.txt", "rb"))
		return err;

	std::string currentLine;

	while (!feof(f))
	{
		char c = fgetc(f);

		if (c == '\n')
		{
			ProcessLine(currentLine);
			currentLine = "";
		}
		else
			currentLine += c;
	}

	fclose(f);

	// Fill unlisted codes
	for (int i = 0; i < 0x20; i++)
		g_toUnicode[i] = i;

	g_toUnicode[0x11] = 0x2318;
	g_toUnicode[0x12] = 0x21e7;
	g_toUnicode[0x13] = 0x2325;
	g_toUnicode[0x14] = 0x2303;

	for (int i = 0; i < 256; i++)
	{
		bool isSmall = false;
		bool isCapital = false;
		std::string remainder;

		if (ParseLatinDesc(g_charDescs[i], isSmall, isCapital, remainder))
		{
			for (int j = 0; j < 256; j++)
			{
				bool otherIsSmall = false;
				bool otherIsCapital = false;
				std::string otherRemainder = remainder;

				if (ParseLatinDesc(g_charDescs[j], otherIsSmall, otherIsCapital, otherRemainder))
				{
					if (isCapital && otherIsSmall && remainder == otherRemainder)
					{
						g_toLower[i] = j;
						g_toUpper[j] = i;
					}

					if (isSmall == otherIsSmall && isCapital == otherIsCapital && otherRemainder.length() < remainder.length() && remainder.substr(0, otherRemainder.length()) == otherRemainder)
						g_stripDiacritic[i] = j;
				}
			}
		}
	}

	FILE *outF;
	if (errno_t err = fopen_s(&outF, "PortabilityLayer/MacRoman.cpp", "wb"))
		return err;

	fprintf(outF, "#include \"MacRoman.h\"\n");
	fprintf(outF, "\n");
	fprintf(outF, "// This file is automatically generated by the ImportCharSet tool.  DO NOT MODIFY THIS BY HAND.\n");
	fprintf(outF, "namespace PortabilityLayer\n");
	fprintf(outF, "{\n");
	fprintf(outF, "\tnamespace MacRoman\n");
	fprintf(outF, "\t{\n");
	DumpListing(outF, g_toUnicode, "0x%04x", "uint16_t g_toUnicode");
	fprintf(outF, "\n");
	DumpListing(outF, g_toUpper, "0x%02x", "uint8_t g_toUpper");
	fprintf(outF, "\n");
	DumpListing(outF, g_toLower, "0x%02x", "uint8_t g_toLower");
	fprintf(outF, "\n");
	DumpListing(outF, g_stripDiacritic, "0x%02x", "uint8_t g_stripDiacritic");
	fprintf(outF, "\t}\n");
	fprintf(outF, "}\n");

	fclose(outF);

	return 0;
}