1
0
Fork 0
forked from dlang/cdcdb

Добавлен англоязычный DDoc

This commit is contained in:
Alexander Zhirov 2025-09-13 03:15:22 +03:00
parent 85aa3c8f53
commit 5d0cf228d6
Signed by: alexander
GPG key ID: C8D8BE544A27C511
4 changed files with 173 additions and 27 deletions

View file

@ -5,12 +5,12 @@ import std.digest.sha : SHA256, digest;
struct Chunk struct Chunk
{ {
size_t index; // 1..N size_t index; // 1..N
size_t offset; // смещение в исходном буфере size_t offset; // offset in the source buffer
size_t size; // размер чанка size_t size; // chunk size
immutable(ubyte)[32] sha256; // hex(SHA-256) содержимого immutable(ubyte)[32] sha256; // hex(SHA-256) of the content
} }
// Change Data Capture (Захват изменения данных) // Change Data Capture (CDC)
final class CDC final class CDC
{ {
private: private:
@ -36,13 +36,13 @@ private:
ulong fingerprint = 0; ulong fingerprint = 0;
size_t index; size_t index;
// инициализация без cut-check // initialization without a cut-check
while (index < _minSize) while (index < _minSize)
{ {
fingerprint = (fingerprint << 1) + _gear[src[index]]; fingerprint = (fingerprint << 1) + _gear[src[index]];
++index; ++index;
} }
// строгая маска // strict mask
while (index < normalSize) while (index < normalSize)
{ {
fingerprint = (fingerprint << 1) + _gear[src[index]]; fingerprint = (fingerprint << 1) + _gear[src[index]];
@ -50,7 +50,7 @@ private:
return index; return index;
++index; ++index;
} }
// слабая маска // weak mask
while (index < size) while (index < size)
{ {
fingerprint = (fingerprint << 1) + _gear[src[index]]; fingerprint = (fingerprint << 1) + _gear[src[index]];
@ -65,7 +65,7 @@ public:
this(size_t minSize, size_t normalSize, size_t maxSize, ulong maskS, ulong maskL) @safe @nogc nothrow this(size_t minSize, size_t normalSize, size_t maxSize, ulong maskS, ulong maskL) @safe @nogc nothrow
{ {
assert(minSize > 0 && minSize < normalSize && normalSize < maxSize, assert(minSize > 0 && minSize < normalSize && normalSize < maxSize,
"Неверные размеры: требуется min < normal < max и min > 0"); "Invalid sizes: require min < normal < max and min > 0");
_minSize = minSize; _minSize = minSize;
_normalSize = normalSize; _normalSize = normalSize;
_maxSize = maxSize; _maxSize = maxSize;

View file

@ -87,7 +87,7 @@ private:
if (msg.toLower.canFind("locked", "busy")) { if (msg.toLower.canFind("locked", "busy")) {
if (--tryNo == 0) { if (--tryNo == 0) {
throw new Exception( throw new Exception(
"Не удалось выполнить подключение к базе данных после %d неудачных попыток: %s" "Failed to connect to the database after %d failed attempts: %s"
.format(_maxRetries, msg) .format(_maxRetries, msg)
); );
} }
@ -99,7 +99,7 @@ private:
throw new Exception(msg); throw new Exception(msg);
} }
// Проверка БД на наличие существующих в ней необходимых таблиц // Check that the database contains the required tables; otherwise create them
void check() void check()
{ {
SqliteResult queryResult = sql( SqliteResult queryResult = sql(
@ -123,7 +123,7 @@ private:
} }
enforce(missingTables.length == 0 || missingTables.length == 3, enforce(missingTables.length == 0 || missingTables.length == 3,
"База данных повреждена. Отсутствуют таблицы: " ~ missingTables.join(", ") "Database is corrupted. Missing tables: " ~ missingTables.join(", ")
); );
if (missingTables.length == 3) if (missingTables.length == 3)
@ -221,7 +221,7 @@ public:
); );
if (queryResult.empty()) { if (queryResult.empty()) {
throw new Exception("Ошибка при добавлении нового снимока в базу данных"); throw new Exception("Error adding a new snapshot to the database");
} }
return queryResult.front()["id"].to!long; return queryResult.front()["id"].to!long;

View file

@ -8,6 +8,28 @@ import std.digest.sha : SHA256, digest;
import std.datetime : DateTime; import std.datetime : DateTime;
import std.exception : enforce; import std.exception : enforce;
/**
* Snapshot reader and lifecycle helper.
*
* This class reconstructs full file content from chunked storage persisted
* via `DBLite`, verifies integrity (per-chunk SHA-256 and final file hash),
* and provides a safe way to remove a snapshot record.
*
* Usage:
* ---
* auto s1 = new Snapshot(db, snapshotId);
* auto bytes = s1.data(); // materialize full content in memory
*
* // or stream into a sink to avoid large allocations:
* s1.data((const(ubyte)[] part) {
* // consume part
* });
* ---
*
* Notes:
* - All integrity checks are enforced; any mismatch throws.
* - `data(void delegate(...))` is preferred for very large files.
*/
final class Snapshot final class Snapshot
{ {
private: private:
@ -19,32 +41,50 @@ private:
ubyte[] bytes; ubyte[] bytes;
if (chunk.zstd) if (chunk.zstd)
{ {
enforce(chunk.zSize == chunk.content.length, "Размер сжатого фрагмента не соответствует ожидаемому"); enforce(chunk.zSize == chunk.content.length, "Compressed chunk size does not match the expected value");
bytes = cast(ubyte[]) uncompress(chunk.content); bytes = cast(ubyte[]) uncompress(chunk.content);
} }
else else
{ {
bytes = chunk.content.dup; bytes = chunk.content.dup;
} }
enforce(chunk.size == bytes.length, "Оригинальный размер не соответствует ожидаемому"); enforce(chunk.size == bytes.length, "Original size does not match the expected value");
enforce(chunk.sha256 == digest!SHA256(bytes), "Хеш-сумма фрагмента не совпадает"); enforce(chunk.sha256 == digest!SHA256(bytes), "Chunk hash does not match");
return bytes; return bytes;
} }
public: public:
/// Construct a `Snapshot` from an already fetched `DBSnapshot` row.
///
/// Params:
/// dblite = database handle
/// dbSnapshot = snapshot row (metadata) previously retrieved
this(DBLite dblite, DBSnapshot dbSnapshot) this(DBLite dblite, DBSnapshot dbSnapshot)
{ {
_db = dblite; _db = dblite;
_snapshot = dbSnapshot; _snapshot = dbSnapshot;
} }
/// Construct a `Snapshot` by loading metadata from the database.
///
/// Params:
/// dblite = database handle
/// idSnapshot = snapshot id to load
this(DBLite dblite, long idSnapshot) this(DBLite dblite, long idSnapshot)
{ {
_db = dblite; _db = dblite;
_snapshot = _db.getSnapshot(idSnapshot); _snapshot = _db.getSnapshot(idSnapshot);
} }
/// Materialize the full file content in memory.
///
/// Reassembles all chunks in order, validates each chunk SHA-256 and the
/// final file SHA-256 (`snapshots.sha256`).
///
/// Returns: full file content as a newly allocated `ubyte[]`
///
/// Throws: Exception on any integrity check failure
ubyte[] data() ubyte[] data()
{ {
auto chunks = _db.getChunks(_snapshot.id); auto chunks = _db.getChunks(_snapshot.id);
@ -60,11 +100,20 @@ public:
fctx.put(bytes); fctx.put(bytes);
} }
enforce(_snapshot.sha256 == fctx.finish(), "Хеш-сумма файла не совпадает"); enforce(_snapshot.sha256 == fctx.finish(), "File hash does not match");
return content; return content;
} }
/// Stream the full file content into a caller-provided sink.
///
/// This variant avoids allocating a single large buffer. Chunks are
/// decoded, verified, and passed to `sink` in order.
///
/// Params:
/// sink = delegate invoked for each verified chunk (may be called many times)
///
/// Throws: Exception on any integrity check failure
void data(void delegate(const(ubyte)[]) sink) void data(void delegate(const(ubyte)[]) sink)
{ {
auto chunks = _db.getChunks(_snapshot.id); auto chunks = _db.getChunks(_snapshot.id);
@ -77,9 +126,17 @@ public:
fctx.put(bytes); fctx.put(bytes);
} }
enforce(_snapshot.sha256 == fctx.finish(), "Хеш-сумма файла не совпадает"); enforce(_snapshot.sha256 == fctx.finish(), "File hash does not match");
} }
/// Remove this snapshot from the database inside a transaction.
///
/// Starts an IMMEDIATE transaction, deletes the snapshot row, and commits.
/// On any failure it rolls back.
///
/// Returns: `true` if the snapshot row was deleted, `false` otherwise
///
/// Note: Does not garbage-collect unreferenced blobs; perform that separately.
bool remove() bool remove()
{ {
_db.beginImmediate(); _db.beginImmediate();
@ -103,31 +160,37 @@ public:
return _snapshot.id == idDeleted; return _snapshot.id == idDeleted;
} }
/// Snapshot id (primary key).
@property long id() const nothrow @safe @property long id() const nothrow @safe
{ {
return _snapshot.id; return _snapshot.id;
} }
/// User-defined label.
@property string label() const @safe @property string label() const @safe
{ {
return _snapshot.label; return _snapshot.label;
} }
/// Creation timestamp (UTC) from the database.
@property DateTime created() const @safe @property DateTime created() const @safe
{ {
return _snapshot.createdUtc; return _snapshot.createdUtc;
} }
/// Original file length in bytes.
@property long length() const nothrow @safe @property long length() const nothrow @safe
{ {
return _snapshot.sourceLength; return _snapshot.sourceLength;
} }
/// Expected SHA-256 of the full file (32 raw bytes).
@property ubyte[32] sha256() const nothrow @safe @property ubyte[32] sha256() const nothrow @safe
{ {
return _snapshot.sha256; return _snapshot.sha256;
} }
/// Snapshot status as a string (enum to string).
@property string status() const @property string status() const
{ {
import std.conv : to; import std.conv : to;
@ -135,6 +198,7 @@ public:
return _snapshot.status.to!string; return _snapshot.status.to!string;
} }
/// Optional human-readable description.
@property string description() const nothrow @safe @property string description() const nothrow @safe
{ {
return _snapshot.description; return _snapshot.description;

View file

@ -6,14 +6,35 @@ import cdcdb.snapshot;
import zstd : compress, Level; import zstd : compress, Level;
/**
* High-level storage facade: splits data into CDC chunks, stores chunks/blobs
* into SQLite via `DBLite`, links them into snapshots, and returns `Snapshot`
* objects for retrieval and deletion.
*
* Features:
* - FastCDC-based content-defined chunking (configurable sizes/masks)
* - Optional Zstandard compression (level configurable)
* - Idempotent snapshot creation: skips if identical to the latest for label
*
* Typical usage:
* ---
* auto store = new Storage("cdc.sqlite", true, Level.default_);
* store.setupCDC(4096, 8192, 16384, 0x3FFF, 0x03FF);
*
* auto snap = store.newSnapshot("my.txt", data, "initial import");
* auto bytes = snap.data(); // retrieve
*
* auto removed = store.removeSnapshots("my.txt"); // remove by label
* ---
*/
final class Storage final class Storage
{ {
private: private:
// Параметры работы с базой данных // Database parameters
DBLite _db; DBLite _db;
bool _zstd; bool _zstd;
int _level; int _level;
// Настройки CDC механизма // CDC settings
CDC _cdc; CDC _cdc;
size_t _minSize; size_t _minSize;
size_t _normalSize; size_t _normalSize;
@ -29,11 +50,19 @@ private:
_maxSize = maxSize; _maxSize = maxSize;
_maskS = maskS; _maskS = maskS;
_maskL = maskL; _maskL = maskL;
// CDC не хранит динамически выделенных данных, переинициализация безопасна // CDC holds no dynamically allocated state; reinitialization is safe
_cdc = new CDC(_minSize, _normalSize, _maxSize, _maskS, _maskL); _cdc = new CDC(_minSize, _normalSize, _maxSize, _maskS, _maskL);
} }
public: public:
/// Construct the storage facade and open (or create) the database.
///
/// Params:
/// database = path to SQLite file
/// zstd = enable Zstandard compression for stored blobs
/// level = Zstd compression level (see `zstd.Level`)
/// busyTimeout = SQLite busy timeout in milliseconds
/// maxRetries = max retries on SQLITE_BUSY/LOCKED errors
this(string database, bool zstd = false, int level = Level.base, size_t busyTimeout = 3000, size_t maxRetries = 3) this(string database, bool zstd = false, int level = Level.base, size_t busyTimeout = 3000, size_t maxRetries = 3)
{ {
_db = new DBLite(database, busyTimeout, maxRetries); _db = new DBLite(database, busyTimeout, maxRetries);
@ -42,23 +71,44 @@ public:
initCDC(); initCDC();
} }
/// Reconfigure CDC parameters (takes effect for subsequent snapshots).
///
/// Params:
/// minSize, normalSize, maxSize, maskS, maskL = FastCDC parameters
void setupCDC(size_t minSize, size_t normalSize, size_t maxSize, size_t maskS, size_t maskL) void setupCDC(size_t minSize, size_t normalSize, size_t maxSize, size_t maskS, size_t maskL)
{ {
initCDC(minSize, normalSize, maxSize, maskS, maskL); initCDC(minSize, normalSize, maxSize, maskS, maskL);
} }
/// Create a new snapshot from raw data.
///
/// - Splits data with FastCDC using current settings.
/// - Optionally compresses chunks with Zstd.
/// - Stores unique blobs and links them to the created snapshot.
/// - If the latest snapshot for `label` already has the same file SHA-256,
/// returns `null` (idempotent).
///
/// Params:
/// label = user-provided snapshot label (file identifier)
/// data = raw file bytes
/// description = optional human-readable description
///
/// Returns: a `Snapshot` instance for the created snapshot, or `null`
///
/// Throws:
/// Exception if `data` is empty or on database/storage errors
Snapshot newSnapshot(string label, const(ubyte)[] data, string description = string.init) Snapshot newSnapshot(string label, const(ubyte)[] data, string description = string.init)
{ {
if (data.length == 0) if (data.length == 0)
{ {
throw new Exception("Данные имеют нулевой размер"); throw new Exception("Data has zero length");
} }
import std.digest.sha : SHA256, digest; import std.digest.sha : SHA256, digest;
ubyte[32] sha256 = digest!SHA256(data); ubyte[32] sha256 = digest!SHA256(data);
// Если последний снимок файла соответствует текущему состоянию // If the last snapshot for the label matches current content
if (_db.isLast(label, sha256)) if (_db.isLast(label, sha256))
return null; return null;
@ -95,10 +145,10 @@ public:
dbBlob.zstd = _zstd; dbBlob.zstd = _zstd;
// Разбить на фрагменты // Split into chunks
Chunk[] chunks = _cdc.split(data); Chunk[] chunks = _cdc.split(data);
// Запись фрагментов в БД // Write chunks to DB
foreach (chunk; chunks) foreach (chunk; chunks)
{ {
dbBlob.sha256 = chunk.sha256; dbBlob.sha256 = chunk.sha256;
@ -118,7 +168,7 @@ public:
dbBlob.content = content.dup; dbBlob.content = content.dup;
} }
// Запись фрагментов // Store/ensure blob
_db.addBlob(dbBlob); _db.addBlob(dbBlob);
dbSnapshotChunk.snapshotId = idSnapshot; dbSnapshotChunk.snapshotId = idSnapshot;
@ -126,7 +176,7 @@ public:
dbSnapshotChunk.offset = chunk.offset; dbSnapshotChunk.offset = chunk.offset;
dbSnapshotChunk.sha256 = chunk.sha256; dbSnapshotChunk.sha256 = chunk.sha256;
// Привязка фрагментов к снимку // Link chunk to snapshot
_db.addSnapshotChunk(dbSnapshotChunk); _db.addSnapshotChunk(dbSnapshotChunk);
} }
@ -137,23 +187,52 @@ public:
return snapshot; return snapshot;
} }
// Удаляет снимок по метке, возвращает количество удаленных снимков /// Delete snapshots by label.
///
/// Params:
/// label = snapshot label
///
/// Returns: number of deleted snapshots
long removeSnapshots(string label) { long removeSnapshots(string label) {
return _db.deleteSnapshot(label); return _db.deleteSnapshot(label);
} }
/// Delete a specific snapshot instance.
///
/// Params:
/// snapshot = `Snapshot` to remove
///
/// Returns: `true` on success, `false` otherwise
bool removeSnapshots(Snapshot snapshot) { bool removeSnapshots(Snapshot snapshot) {
return removeSnapshots(snapshot.id); return removeSnapshots(snapshot.id);
} }
/// Delete a snapshot by id.
///
/// Params:
/// idSnapshot = snapshot id
///
/// Returns: `true` if the row was deleted
bool removeSnapshots(long idSnapshot) { bool removeSnapshots(long idSnapshot) {
return _db.deleteSnapshot(idSnapshot) == idSnapshot; return _db.deleteSnapshot(idSnapshot) == idSnapshot;
} }
/// Get a `Snapshot` object by id.
///
/// Params:
/// idSnapshot = snapshot id
///
/// Returns: `Snapshot` handle (metadata loaded lazily via constructor)
Snapshot getSnapshot(long idSnapshot) { Snapshot getSnapshot(long idSnapshot) {
return new Snapshot(_db, idSnapshot); return new Snapshot(_db, idSnapshot);
} }
/// List snapshots (optionally filtered by label).
///
/// Params:
/// label = filter by exact label; empty string returns all
///
/// Returns: array of `Snapshot` handles
Snapshot[] getSnapshots(string label = string.init) { Snapshot[] getSnapshots(string label = string.init) {
Snapshot[] snapshots; Snapshot[] snapshots;
@ -164,6 +243,9 @@ public:
return snapshots; return snapshots;
} }
/// Library version string.
///
/// Returns: semantic version of the `cdcdb` library
string getVersion() const @safe nothrow string getVersion() const @safe nothrow
{ {
import cdcdb.version_ : cdcdbVersion; import cdcdb.version_ : cdcdbVersion;