Bulk import script early version + StuffIt 5 fixes.

This commit is contained in:
elasota
2021-06-30 00:45:53 -04:00
parent 66cce6bcd4
commit c27d78d329
15 changed files with 2228 additions and 156 deletions

View File

@@ -6,6 +6,7 @@
#include "PLBigEndian.h"
#include <vector>
#include <unordered_map>
#include "CSInputBuffer.h"
@@ -101,9 +102,15 @@ struct StuffIt5Block
std::vector<uint8_t> m_filename;
std::vector<StuffIt5Block> m_children;
int m_numChildren;
bool Read(IFileReader &reader)
int64_t m_endPos;
bool Read(IFileReader &reader, bool &outIsDirectoryAppendage)
{
outIsDirectoryAppendage = false;
int64_t headerPos = reader.GetPosition();
if (!reader.ReadExact(&m_header, sizeof(m_header)))
return false;
@@ -145,13 +152,12 @@ struct StuffIt5Block
if (commentLength > m_header.m_headerSize - sizeWithOnlyNameAndPasswordInfo - 4)
return false;
m_commentSize = commentLength;
m_commentPos = reader.GetPosition();
if (commentLength)
{
if (reader.SeekCurrent(commentLength))
if (!reader.SeekCurrent(commentLength))
return false;
}
@@ -166,6 +172,13 @@ struct StuffIt5Block
if (!reader.SeekCurrent(m_header.m_headerSize - sizeWithCommentData))
return false;
if (m_header.m_dataForkDesc.m_uncompressedSize == static_cast<uint32_t>(0xffffffff))
{
outIsDirectoryAppendage = true;
m_endPos = reader.GetPosition();
return true;
}
if (!reader.ReadExact(&m_annex1, sizeof(m_annex1)))
return false;
@@ -199,21 +212,13 @@ struct StuffIt5Block
{
int numFiles = (m_header.m_dataForkDesc.m_algorithm_dirNumFilesHigh << 8) | (m_header.m_dataForkDesc.m_passwordDataLength_dirNumFilesLow);
m_children.resize(numFiles);
for (int i = 0; i < numFiles; i++)
{
if (i != 0)
{
if (!reader.SeekStart(m_children[i - 1].m_header.m_nextEntryOffset))
return false;
}
if (!m_children[i].Read(reader))
return false;
}
m_numChildren = numFiles;
m_endPos = reader.GetPosition();
}
else
{
m_numChildren = 0;
if (m_hasResourceFork)
{
m_resForkPos = reader.GetPosition();
@@ -221,6 +226,8 @@ struct StuffIt5Block
}
else
m_dataForkPos = reader.GetPosition();
m_endPos = m_dataForkPos + m_header.m_dataForkDesc.m_compressedSize;
}
return true;
@@ -304,6 +311,34 @@ bool StuffIt5Parser::Check(IFileReader &reader)
return (*match) == '\0';
}
static bool RecursiveBuildTree(std::vector<StuffIt5Block> &dirBlocks, uint32_t dirPos, const std::vector<StuffIt5Block> &flatBlocks, const std::unordered_map<uint32_t, size_t> &filePosToDirectoryBlock, const std::unordered_map<size_t, uint32_t> &directoryBlockToFilePos, const std::unordered_map<uint32_t, std::vector<size_t>> &entryChildren, int depth)
{
if (depth == 16)
return false;
std::unordered_map<uint32_t, std::vector<size_t>>::const_iterator children = entryChildren.find(dirPos);
if (children == entryChildren.end())
return true;
for (size_t childIndex : children->second)
{
StuffIt5Block block = flatBlocks[childIndex];
if (block.m_isDirectory)
{
std::unordered_map<size_t, uint32_t>::const_iterator directoryFilePosIt = directoryBlockToFilePos.find(childIndex);
if (directoryFilePosIt == directoryBlockToFilePos.end())
return false;
if (!RecursiveBuildTree(block.m_children, directoryFilePosIt->second, flatBlocks, filePosToDirectoryBlock, directoryBlockToFilePos, entryChildren, depth + 1))
return false;
}
dirBlocks.push_back(static_cast<StuffIt5Block&&>(block));
}
return true;
}
ArchiveItemList *StuffIt5Parser::Parse(IFileReader &reader)
{
reader.SeekStart(0);
@@ -317,17 +352,52 @@ ArchiveItemList *StuffIt5Parser::Parse(IFileReader &reader)
if (!reader.SeekStart(header.m_rootDirFirstEntryOffset))
return nullptr;
std::vector<StuffIt5Block> rootDirBlocks;
rootDirBlocks.resize(numRootDirEntries);
size_t totalBlocks = numRootDirEntries;
std::vector<StuffIt5Block> flatBlocks;
for (int i = 0; i < numRootDirEntries; i++)
std::unordered_map<size_t, uint32_t> directoryBlockToFilePos;
std::unordered_map<uint32_t, size_t> filePosToDirectoryBlock;
// Unfortunately StuffIt 5 archive next/prev entry chains seem to be meaningless.
// The only real way to determine directory structure is after the fact.
for (int i = 0; i < totalBlocks; i++)
{
if (i != 0)
reader.SeekStart(rootDirBlocks[i - 1].m_header.m_nextEntryOffset);
int64_t fpos = reader.GetPosition();
if (!rootDirBlocks[i].Read(reader))
bool isAppendage = false;
StuffIt5Block flatBlock;
if (!flatBlock.Read(reader, isAppendage))
return nullptr;
if (isAppendage)
{
totalBlocks++;
continue;
}
if (flatBlock.m_isDirectory)
{
totalBlocks += flatBlock.m_numChildren;
directoryBlockToFilePos[flatBlocks.size()] = static_cast<uint32_t>(fpos);
filePosToDirectoryBlock[static_cast<uint32_t>(fpos)] = flatBlocks.size();
}
if (i != totalBlocks - 1)
{
if (!reader.SeekStart(flatBlock.m_endPos))
return nullptr;
}
flatBlocks.push_back(flatBlock);
}
std::unordered_map<uint32_t, std::vector<size_t>> entryChildren;
for (size_t i = 0; i < flatBlocks.size(); i++)
entryChildren[flatBlocks[i].m_header.m_dirEntryOffset].push_back(i);
std::vector<StuffIt5Block> rootDirBlocks;
RecursiveBuildTree(rootDirBlocks, 0, flatBlocks, filePosToDirectoryBlock, directoryBlockToFilePos, entryChildren, 0);
return ConvertToItemList(rootDirBlocks);
}

View File

@@ -95,7 +95,12 @@ StuffItParser g_stuffItParser;
StuffIt5Parser g_stuffIt5Parser;
CompactProParser g_compactProParser;
std::string LegalizeWindowsFileName(const std::string &path)
static bool IsSeparator(char c)
{
return c == '/' || c == '\\';
}
std::string LegalizeWindowsFileName(const std::string &path, bool paranoid)
{
const size_t length = path.length();
@@ -115,6 +120,9 @@ std::string LegalizeWindowsFileName(const std::string &path)
isLegalChar = false;
}
if (paranoid && isLegalChar)
isLegalChar = c == '_' || c == ' ' || c == '.' || c == ',' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
if (isLegalChar)
legalizedPath.append(&c, 1);
else
@@ -209,7 +217,7 @@ void MakeIntermediateDirectories(const std::string &path)
}
}
int RecursiveExtractFiles(int depth, ArchiveItemList *itemList, const std::string &path, IFileReader &reader, const PortabilityLayer::CombinedTimestamp &ts);
int RecursiveExtractFiles(int depth, ArchiveItemList *itemList, const std::string &path, bool pathParanoid, IFileReader &reader, const PortabilityLayer::CombinedTimestamp &ts);
int ExtractSingleFork(const ArchiveCompressedChunkDesc &chunkDesc, const std::string &path, IFileReader &reader)
{
@@ -361,7 +369,7 @@ int ExtractFile(const ArchiveItem &item, const std::string &path, IFileReader &r
return 0;
}
int ExtractItem(int depth, const ArchiveItem &item, const std::string &dirPath, IFileReader &reader, const PortabilityLayer::CombinedTimestamp &ts)
int ExtractItem(int depth, const ArchiveItem &item, const std::string &dirPath, bool pathParanoid, IFileReader &reader, const PortabilityLayer::CombinedTimestamp &ts)
{
std::string path(reinterpret_cast<const char*>(item.m_fileNameUTF8.data()), item.m_fileNameUTF8.size());
@@ -371,7 +379,7 @@ int ExtractItem(int depth, const ArchiveItem &item, const std::string &dirPath,
fputs_utf8(path.c_str(), stdout);
printf("\n");
path = LegalizeWindowsFileName(path);
path = LegalizeWindowsFileName(path, pathParanoid);
path = dirPath + path;
@@ -381,7 +389,7 @@ int ExtractItem(int depth, const ArchiveItem &item, const std::string &dirPath,
path.append("\\");
int returnCode = RecursiveExtractFiles(depth + 1, item.m_children, path, reader, ts);
int returnCode = RecursiveExtractFiles(depth + 1, item.m_children, path, pathParanoid, reader, ts);
if (returnCode)
return returnCode;
@@ -391,14 +399,14 @@ int ExtractItem(int depth, const ArchiveItem &item, const std::string &dirPath,
return ExtractFile(item, path, reader, ts);
}
int RecursiveExtractFiles(int depth, ArchiveItemList *itemList, const std::string &path, IFileReader &reader, const PortabilityLayer::CombinedTimestamp &ts)
int RecursiveExtractFiles(int depth, ArchiveItemList *itemList, const std::string &path, bool pathParanoid, IFileReader &reader, const PortabilityLayer::CombinedTimestamp &ts)
{
const std::vector<ArchiveItem> &items = itemList->m_items;
const size_t numChildren = items.size();
for (size_t i = 0; i < numChildren; i++)
{
int returnCode = ExtractItem(depth, items[i], path, reader, ts);
int returnCode = ExtractItem(depth, items[i], path, pathParanoid, reader, ts);
if (returnCode)
return returnCode;
}
@@ -406,22 +414,25 @@ int RecursiveExtractFiles(int depth, ArchiveItemList *itemList, const std::strin
return 0;
}
int toolMain(int argc, const char **argv)
int PrintUsage()
{
if (argc != 4)
{
fprintf(stderr, "Usage: unpacktool <archive file> <timestamp.ts> <destination>");
return -1;
}
fprintf(stderr, "Usage: unpacktool <archive file> <timestamp.ts> <destination> [options]");
fprintf(stderr, "Usage: unpacktool -bulk <timestamp.ts> <archive files>");
return -1;
}
FILE *inputArchive = fopen_utf8(argv[1], "rb");
int decompMain(int argc, const char **argv)
{
for (int i = 0; i < argc; i++)
printf("%s\n", argv[i]);
if (!inputArchive)
{
fprintf(stderr, "Could not open input archive");
return -1;
}
if (argc < 4)
return PrintUsage();
bool isBulkMode = !strcmp(argv[1], "-bulk");
if (!isBulkMode && argc < 4)
return PrintUsage();
FILE *tsFile = fopen_utf8(argv[2], "rb");
@@ -440,45 +451,111 @@ int toolMain(int argc, const char **argv)
fclose(tsFile);
CFileReader reader(inputArchive);
int arcArg = 1;
int numArgArcs = 1;
IArchiveParser *parsers[] =
if (isBulkMode)
{
&g_compactProParser,
&g_stuffItParser,
&g_stuffIt5Parser
};
arcArg = 3;
numArgArcs = argc - 3;
}
ArchiveItemList *archiveItemList = nullptr;
printf("Reading archive...\n");
for (IArchiveParser *parser : parsers)
bool pathParanoid = false;
if (!isBulkMode)
{
if (parser->Check(reader))
for (int optArgIndex = 4; optArgIndex < argc; )
{
archiveItemList = parser->Parse(reader);
break;
const char *optArg = argv[optArgIndex++];
if (!strcmp(optArg, "-paranoid"))
pathParanoid = true;
else
{
fprintf(stderr, "Unknown option %s\n", optArg);
return -1;
}
}
}
if (!archiveItemList)
for (int arcArgIndex = 0; arcArgIndex < numArgArcs; arcArgIndex++)
{
fprintf(stderr, "Failed to open archive");
return -1;
const char *arcPath = argv[arcArg + arcArgIndex];
FILE *inputArchive = fopen_utf8(arcPath, "rb");
std::string destPath;
if (isBulkMode)
{
destPath = arcPath;
size_t lastSepIndex = 0;
for (size_t i = 1; i < destPath.size(); i++)
{
if (destPath[i] == '/' || destPath[i] == '\\')
lastSepIndex = i;
}
destPath = destPath.substr(0, lastSepIndex);
}
else
destPath = argv[3];
if (!inputArchive)
{
fprintf(stderr, "Could not open input archive");
return -1;
}
CFileReader reader(inputArchive);
IArchiveParser *parsers[] =
{
&g_compactProParser,
&g_stuffItParser,
&g_stuffIt5Parser
};
ArchiveItemList *archiveItemList = nullptr;
printf("Reading archive '%s'...\n", arcPath);
for (IArchiveParser *parser : parsers)
{
if (parser->Check(reader))
{
archiveItemList = parser->Parse(reader);
break;
}
}
if (!archiveItemList)
{
fprintf(stderr, "Failed to open archive");
return -1;
}
printf("Decompressing files...\n");
std::string currentPath = destPath;
TerminateDirectoryPath(currentPath);
MakeIntermediateDirectories(currentPath);
int returnCode = RecursiveExtractFiles(0, archiveItemList, currentPath, pathParanoid, reader, ts);
if (returnCode != 0)
{
fprintf(stderr, "Error decompressing archive");
return returnCode;
}
delete archiveItemList;
}
printf("Decompressing files...\n");
std::string currentPath = argv[3];
TerminateDirectoryPath(currentPath);
MakeIntermediateDirectories(currentPath);
int returnCode = RecursiveExtractFiles(0, archiveItemList, currentPath, reader, ts);
delete archiveItemList;
return returnCode;
return 0;
}
int toolMain(int argc, const char **argv)
{
int returnCode = decompMain(argc, argv);
return returnCode;
}