From e2fcd7851df5ea7488a8b3536cda7a84e49f7b64 Mon Sep 17 00:00:00 2001
From: Diomendius <42310725+Diomendius@users.noreply.github.com>
Date: Mon, 5 Aug 2024 15:46:24 +1200
Subject: [PATCH] Add UnixUnicodeToolShim.cpp

This is the counterpart to WindowsUnicodeToolShim.cpp for *nix systems.
---
 .../UnixUnicodeToolShim.cpp                   | 99 +++++++++++++++++++
 .../WindowsUnicodeToolShim.h                  |  9 ++
 2 files changed, 108 insertions(+)
 create mode 100644 WindowsUnicodeToolShim/UnixUnicodeToolShim.cpp
diff --git a/WindowsUnicodeToolShim/UnixUnicodeToolShim.cpp b/WindowsUnicodeToolShim/UnixUnicodeToolShim.cpp
new file mode 100644
index 0000000..65f09c6
--- /dev/null
+++ b/WindowsUnicodeToolShim/UnixUnicodeToolShim.cpp
@@ -0,0 +1,99 @@
+#include "WindowsUnicodeToolShim.h"
+
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <dirent.h>
+#include <sys/stat.h>
+
+// Linux and macOS (and probably other non-Windows systems in general) do not
+// have separate file system or command line APIs for different text encodings.
+// On these systems UTF-8 is commonly the system encoding and, if so, argv will
+// typically be UTF-8 encoded, though it is the calling process's responsibility
+// to ensure this, as argv is passed verbatim as a sequence of byte strings.
+//
+// Filesystems on Unix-like systems are typically encoding-unaware, in which
+// case the actual encoding used should match the system encoding, or else
+// manual intervention is required.
+//
+// On macOS, HFS+ and APFS filenames are encoded as UTF-16 and UTF-8
+// respectively, and the C filesystem API accepts UTF-8 strings. There are some
+// gotchas relating to Unicode normalization, where HFS+ enforces a specific
+// form of normalization at the filesystem layer but APFS does not, and using
+// the C API to access the filesystem may avoid automatic normalization that
+// higher-level macOS API functions may perform.
+//
+// In summary, text encoding is still a hairy problem on every computer system,
+// though on the major non-Windows systems, assuming UTF-8 encoding is
+// reasonable. For now, this header simply maps the WindowsUnicodeToolShim
+// functions to their regular C API counterparts.
+int toolMain(int argc, const char **argv);
+int main(int argc, const char **argv) { return toolMain(argc, argv); }
+
+FILE *fopen_utf8(const char *path, const char *options) { return fopen(path, options); }
+int fputs_utf8(const char *str, FILE *f) { return fputs(str, f); }
+int mkdir_utf8(const char *path) { return mkdir(path, 0777); }
+
+void TerminateDirectoryPath(std::string &path)
+{
+	const size_t len = path.length();
+
+	if (len == 0 || path[len - 1] != '/')
+		path.push_back('/');
+}
+
+void ScanDirectoryForExtension
+(
+	std::vector<std::string> &outPaths,
+	const char *path,
+	const char *ending,
+	bool recursive
+) {
+	DIR *dir = opendir(path);
+	if (!dir)
+	{
+		return;
+	}
+
+	size_t endingLen = strlen(ending);
+
+	dirent *ent;
+	while ((ent = readdir(dir)))
+	{
+		if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
+			continue;
+		else if (recursive && ent->d_type == DT_DIR)
+		{
+			std::string tmpPath(path);
+			tmpPath.append("/");
+			tmpPath.append(ent->d_name);
+			ScanDirectoryForExtension(
+				outPaths,
+				tmpPath.c_str(),
+				ending,
+				recursive
+			);
+		}
+		else
+		{
+			size_t nameLen = strlen(ent->d_name);
+
+			if (endingLen <= nameLen && memcmp
+				(
+					ent->d_name + nameLen - endingLen,
+					ending,
+					endingLen
+				) == 0
+			) {
+				std::string tmpPath(path);
+				tmpPath.append("/");
+				tmpPath.append(ent->d_name);
+				outPaths.push_back(std::move(tmpPath));
+			}
+		}
+	}
+	closedir(dir);
+}
diff --git a/WindowsUnicodeToolShim/WindowsUnicodeToolShim.h b/WindowsUnicodeToolShim/WindowsUnicodeToolShim.h
index 8d94ab5..2396c69 100644
--- a/WindowsUnicodeToolShim/WindowsUnicodeToolShim.h
+++ b/WindowsUnicodeToolShim/WindowsUnicodeToolShim.h
@@ -8,6 +8,7 @@ int mkdir_utf8(const char *path);
 void TerminateDirectoryPath(std::string &path);
 void ScanDirectoryForExtension(std::vector<std::string>& outPaths, const char *path, const char *ending, bool recursive);
 
+#ifdef _WIN32
 struct DirectoryScanContext;
 struct DirectoryScanEntry
 {
@@ -17,3 +18,11 @@ struct DirectoryScanEntry
 DirectoryScanContext *opendir_utf8(const char *name);
 DirectoryScanEntry *readdir_utf8(DirectoryScanContext *dir);
 void closedir_utf8(DirectoryScanContext *context);
+
+#else
+
+inline int _fseeki64(FILE *stream, long offset, int whence) {
+	return fseek(stream, offset, whence);
+}
+inline long _ftelli64(FILE *stream) { return ftell(stream); };
+#endif