From e2fcd7851df5ea7488a8b3536cda7a84e49f7b64 Mon Sep 17 00:00:00 2001 From: Diomendius <42310725+Diomendius@users.noreply.github.com> Date: Mon, 5 Aug 2024 15:46:24 +1200 Subject: [PATCH] Add UnixUnicodeToolShim.cpp This is the counterpart to WindowsUnicodeToolShim.cpp for *nix systems. --- .../UnixUnicodeToolShim.cpp | 99 +++++++++++++++++++ .../WindowsUnicodeToolShim.h | 9 ++ 2 files changed, 108 insertions(+) create mode 100644 WindowsUnicodeToolShim/UnixUnicodeToolShim.cpp diff --git a/WindowsUnicodeToolShim/UnixUnicodeToolShim.cpp b/WindowsUnicodeToolShim/UnixUnicodeToolShim.cpp new file mode 100644 index 0000000..65f09c6 --- /dev/null +++ b/WindowsUnicodeToolShim/UnixUnicodeToolShim.cpp @@ -0,0 +1,99 @@ +#include "WindowsUnicodeToolShim.h" + +#include +#include +#include +#include +#include + +#include +#include + +// Linux and macOS (and probably other non-Windows systems in general) do not +// have separate file system or command line APIs for different text encodings. +// On these systems UTF-8 is commonly the system encoding and, if so, argv will +// typically be UTF-8 encoded, though it is the calling process's responsibility +// to ensure this, as argv is passed verbatim as a sequence of byte strings. +// +// Filesystems on Unix-like systems are typically encoding-unaware, in which +// case the actual encoding used should match the system encoding, or else +// manual intervention is required. +// +// On macOS, HFS+ and APFS filenames are encoded as UTF-16 and UTF-8 +// respectively, and the C filesystem API accepts UTF-8 strings. There are some +// gotchas relating to Unicode normalization, where HFS+ enforces a specific +// form of normalization at the filesystem layer but APFS does not, and using +// the C API to access the filesystem may avoid automatic normalization that +// higher-level macOS API functions may perform. +// +// In summary, text encoding is still a hairy problem on every computer system, +// though on the major non-Windows systems, assuming UTF-8 encoding is +// reasonable. For now, this header simply maps the WindowsUnicodeToolShim +// functions to their regular C API counterparts. +int toolMain(int argc, const char **argv); +int main(int argc, const char **argv) { return toolMain(argc, argv); } + +FILE *fopen_utf8(const char *path, const char *options) { return fopen(path, options); } +int fputs_utf8(const char *str, FILE *f) { return fputs(str, f); } +int mkdir_utf8(const char *path) { return mkdir(path, 0777); } + +void TerminateDirectoryPath(std::string &path) +{ + const size_t len = path.length(); + + if (len == 0 || path[len - 1] != '/') + path.push_back('/'); +} + +void ScanDirectoryForExtension +( + std::vector &outPaths, + const char *path, + const char *ending, + bool recursive +) { + DIR *dir = opendir(path); + if (!dir) + { + return; + } + + size_t endingLen = strlen(ending); + + dirent *ent; + while ((ent = readdir(dir))) + { + if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) + continue; + else if (recursive && ent->d_type == DT_DIR) + { + std::string tmpPath(path); + tmpPath.append("/"); + tmpPath.append(ent->d_name); + ScanDirectoryForExtension( + outPaths, + tmpPath.c_str(), + ending, + recursive + ); + } + else + { + size_t nameLen = strlen(ent->d_name); + + if (endingLen <= nameLen && memcmp + ( + ent->d_name + nameLen - endingLen, + ending, + endingLen + ) == 0 + ) { + std::string tmpPath(path); + tmpPath.append("/"); + tmpPath.append(ent->d_name); + outPaths.push_back(std::move(tmpPath)); + } + } + } + closedir(dir); +} diff --git a/WindowsUnicodeToolShim/WindowsUnicodeToolShim.h b/WindowsUnicodeToolShim/WindowsUnicodeToolShim.h index 8d94ab5..2396c69 100644 --- a/WindowsUnicodeToolShim/WindowsUnicodeToolShim.h +++ b/WindowsUnicodeToolShim/WindowsUnicodeToolShim.h @@ -8,6 +8,7 @@ int mkdir_utf8(const char *path); void TerminateDirectoryPath(std::string &path); void ScanDirectoryForExtension(std::vector& outPaths, const char *path, const char *ending, bool recursive); +#ifdef _WIN32 struct DirectoryScanContext; struct DirectoryScanEntry { @@ -17,3 +18,11 @@ struct DirectoryScanEntry DirectoryScanContext *opendir_utf8(const char *name); DirectoryScanEntry *readdir_utf8(DirectoryScanContext *dir); void closedir_utf8(DirectoryScanContext *context); + +#else + +inline int _fseeki64(FILE *stream, long offset, int whence) { + return fseek(stream, offset, whence); +} +inline long _ftelli64(FILE *stream) { return ftell(stream); }; +#endif