cdc/source/app.d
2025-09-04 21:51:23 +03:00

272 lines
6.3 KiB
D
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module app;
import std.stdio : writeln, writefln, File;
import std.file : exists, mkdirRecurse, read, write, readText;
import std.path : baseName, buildPath, absolutePath;
import std.getopt : getopt;
import std.string : strip, split, splitLines;
import std.algorithm.searching : startsWith;
import std.conv : to;
import std.datetime : Clock;
import std.exception : enforce;
import std.digest.sha : sha256Of;
import fastcdc; // твой модуль FastCDC
// ---------- утилиты ----------
// hex: параметр scope, чтобы можно было безопасно передавать срез локального массива
@safe pure
string toHex(scope const(ubyte)[] bytes)
{
immutable char[16] HEX = "0123456789abcdef";
auto buf = new char[bytes.length * 2];
size_t j = 0;
foreach (b; bytes)
{
buf[j++] = HEX[(b >> 4) & 0xF];
buf[j++] = HEX[b & 0xF];
}
return buf.idup; // immutable string
}
// fanout: store/chunks/aa/bb/<hash>.bin
@safe
string chunkPath(string storeDir, string hashHex)
{
auto a = hashHex[0 .. 2];
auto b = hashHex[2 .. 4];
return buildPath(storeDir, "chunks", a, b, hashHex ~ ".bin");
}
// manifest: store/manifests/<name>.<epoch>.manifest
@safe
string manifestPath(string storeDir, string srcPath, long epoch)
{
auto name = baseName(srcPath);
return buildPath(storeDir, "manifests", name ~ "." ~ to!string(epoch) ~ ".manifest");
}
@safe
void ensureDirs(string storeDir)
{
mkdirRecurse(buildPath(storeDir, "chunks"));
mkdirRecurse(buildPath(storeDir, "manifests"));
}
@trusted ubyte[] readBytes(string path)
{
// std.file.read выделяет новый буфер байтов → безопасно привести к ubyte[]
auto v = read(path); // void[]
return cast(ubyte[]) v; // доверяем Phobos: это сырой байтовый буфер
}
// ---------- split ----------
struct SplitOpts
{
string storeDir;
string filePath;
size_t minSize = 8 * 1024;
size_t avgSize = 64 * 1024;
size_t maxSize = 256 * 1024;
}
@safe
int cmdSplit(SplitOpts opt)
{
enforce(exists(opt.filePath), "Файл не найден: " ~ opt.filePath);
ensureDirs(opt.storeDir);
// бинарное чтение: std.file.read возвращает ubyte[]
ubyte[] data = readBytes(opt.filePath);
FastCDCParams p = {opt.minSize, opt.avgSize, opt.maxSize};
p.normalize();
size_t chunkCount = 0;
size_t totalBytes = data.length;
auto epoch = Clock.currTime().toUnixTime();
auto mfPath = manifestPath(opt.storeDir, opt.filePath, epoch);
mkdirRecurse(buildPath(opt.storeDir, "manifests"));
auto mf = File(mfPath, "w");
// шапка манифеста
mf.writeln("# FastCDC manifest");
mf.writefln("path\t%s", absolutePath(opt.filePath));
mf.writefln("size\t%s", to!string(totalBytes));
mf.writefln("algo\tsha256");
mf.writefln("min\t%u", cast(uint) p.minSize);
mf.writefln("avg\t%u", cast(uint) p.avgSize);
mf.writefln("max\t%u", cast(uint) p.maxSize);
mf.writeln("ord\thash\tsize");
size_t ord = 0;
processStream(data, p, (size_t start, size_t len) @safe {
auto slice = data[start .. start + len];
auto digest = sha256Of(slice); // ubyte[32] (на стеке)
auto hex = toHex(digest[]); // scope-параметр — ок
mkdirRecurse(buildPath(opt.storeDir, "chunks", hex[0 .. 2]));
mkdirRecurse(buildPath(opt.storeDir, "chunks", hex[0 .. 2], hex[2 .. 4]));
auto cpath = chunkPath(opt.storeDir, hex);
if (!exists(cpath))
write(cpath, slice);
mf.writefln("%u\t%s\t%u", cast(uint) ord, hex, cast(uint) len);
++ord;
++chunkCount;
return 0;
});
mf.flush();
mf.close();
writefln("split: %s", opt.filePath);
writefln("store: %s", opt.storeDir);
writefln("manifest: %s", mfPath);
writefln("chunks: %u, bytes: %u",
cast(uint) chunkCount, cast(uint) totalBytes);
return 0;
}
// ---------- restore ----------
struct RestoreOpts
{
string storeDir;
string manifestFile;
string outFile;
}
@safe
int cmdRestore(RestoreOpts opt)
{
enforce(exists(opt.manifestFile), "Манифест не найден: " ~ opt.manifestFile);
string text = readText(opt.manifestFile);
auto lines = splitLines(text);
// найти строку "ord\thash\tsize"
size_t i = 0;
while (i < lines.length && !lines[i].strip.startsWith("ord"))
++i;
enforce(i < lines.length, "Не найден заголовок секции данных в манифесте");
++i;
auto dst = File(opt.outFile, "wb");
size_t count = 0;
for (; i < lines.length; ++i)
{
auto ln = lines[i].strip;
if (ln.length == 0 || ln[0] == '#')
continue;
auto cols = ln.split('\t');
enforce(cols.length == 3, "Строка манифеста повреждена: " ~ ln);
auto hashHex = cols[1];
auto cpath = chunkPath(opt.storeDir, hashHex);
enforce(exists(cpath), "Чанк не найден: " ~ cpath);
ubyte[] chunkData = readBytes(cpath);
dst.rawWrite(chunkData);
++count;
}
dst.close();
writefln("restore: %s <- %s (chunks: %u)",
opt.outFile, opt.manifestFile, cast(uint) count);
return 0;
}
// ---------- CLI ----------
@safe
void printHelp(string prog)
{
writeln("Usage:");
writeln(" ", prog, " split --store <dir> <file> [--min N] [--avg N] [--max N]");
writeln(" ", prog, " restore --store <dir> <manifest> <out_file>");
}
int main(string[] args) // без @safe: getopt требует &var
{
if (args.length < 2)
{
printHelp(args[0]);
return 1;
}
switch (args[1])
{
case "split":
{
SplitOpts opt;
string store;
size_t minS = 0, avgS = 0, maxS = 0;
auto res = getopt(args,
"store", &store,
"min", &minS,
"avg", &avgS,
"max", &maxS
);
if (res.helpWanted)
{
printHelp(args[0]);
return 0;
}
// после getopt в args остаются позиционные
if (args.length < 3 || store.length == 0)
{
printHelp(args[0]);
return 1;
}
opt.storeDir = store;
opt.filePath = args[2];
if (minS)
opt.minSize = minS;
if (avgS)
opt.avgSize = avgS;
if (maxS)
opt.maxSize = maxS;
return cmdSplit(opt);
}
case "restore":
{
RestoreOpts opt;
string store;
auto res = getopt(args, "store", &store);
if (res.helpWanted)
{
printHelp(args[0]);
return 0;
}
if (args.length < 4 || store.length == 0)
{
printHelp(args[0]);
return 1;
}
opt.storeDir = store;
opt.manifestFile = args[2];
opt.outFile = args[3];
return cmdRestore(opt);
}
default:
printHelp(args[0]);
return 1;
}
}