1
0
Fork 0
forked from dlang/cdcdb

Добавлен англоязычный DDoc

This commit is contained in:
Alexander Zhirov 2025-09-13 03:15:22 +03:00
parent 85aa3c8f53
commit 5d0cf228d6
Signed by: alexander
GPG key ID: C8D8BE544A27C511
4 changed files with 173 additions and 27 deletions

View file

@ -5,12 +5,12 @@ import std.digest.sha : SHA256, digest;
struct Chunk
{
size_t index; // 1..N
size_t offset; // смещение в исходном буфере
size_t size; // размер чанка
immutable(ubyte)[32] sha256; // hex(SHA-256) содержимого
size_t offset; // offset in the source buffer
size_t size; // chunk size
immutable(ubyte)[32] sha256; // hex(SHA-256) of the content
}
// Change Data Capture (Захват изменения данных)
// Change Data Capture (CDC)
final class CDC
{
private:
@ -36,13 +36,13 @@ private:
ulong fingerprint = 0;
size_t index;
// инициализация без cut-check
// initialization without a cut-check
while (index < _minSize)
{
fingerprint = (fingerprint << 1) + _gear[src[index]];
++index;
}
// строгая маска
// strict mask
while (index < normalSize)
{
fingerprint = (fingerprint << 1) + _gear[src[index]];
@ -50,7 +50,7 @@ private:
return index;
++index;
}
// слабая маска
// weak mask
while (index < size)
{
fingerprint = (fingerprint << 1) + _gear[src[index]];
@ -65,7 +65,7 @@ public:
this(size_t minSize, size_t normalSize, size_t maxSize, ulong maskS, ulong maskL) @safe @nogc nothrow
{
assert(minSize > 0 && minSize < normalSize && normalSize < maxSize,
"Неверные размеры: требуется min < normal < max и min > 0");
"Invalid sizes: require min < normal < max and min > 0");
_minSize = minSize;
_normalSize = normalSize;
_maxSize = maxSize;

View file

@ -87,7 +87,7 @@ private:
if (msg.toLower.canFind("locked", "busy")) {
if (--tryNo == 0) {
throw new Exception(
"Не удалось выполнить подключение к базе данных после %d неудачных попыток: %s"
"Failed to connect to the database after %d failed attempts: %s"
.format(_maxRetries, msg)
);
}
@ -99,7 +99,7 @@ private:
throw new Exception(msg);
}
// Проверка БД на наличие существующих в ней необходимых таблиц
// Check that the database contains the required tables; otherwise create them
void check()
{
SqliteResult queryResult = sql(
@ -123,7 +123,7 @@ private:
}
enforce(missingTables.length == 0 || missingTables.length == 3,
"База данных повреждена. Отсутствуют таблицы: " ~ missingTables.join(", ")
"Database is corrupted. Missing tables: " ~ missingTables.join(", ")
);
if (missingTables.length == 3)
@ -221,7 +221,7 @@ public:
);
if (queryResult.empty()) {
throw new Exception("Ошибка при добавлении нового снимока в базу данных");
throw new Exception("Error adding a new snapshot to the database");
}
return queryResult.front()["id"].to!long;

View file

@ -8,6 +8,28 @@ import std.digest.sha : SHA256, digest;
import std.datetime : DateTime;
import std.exception : enforce;
/**
* Snapshot reader and lifecycle helper.
*
* This class reconstructs full file content from chunked storage persisted
* via `DBLite`, verifies integrity (per-chunk SHA-256 and final file hash),
* and provides a safe way to remove a snapshot record.
*
* Usage:
* ---
* auto s1 = new Snapshot(db, snapshotId);
* auto bytes = s1.data(); // materialize full content in memory
*
* // or stream into a sink to avoid large allocations:
* s1.data((const(ubyte)[] part) {
* // consume part
* });
* ---
*
* Notes:
* - All integrity checks are enforced; any mismatch throws.
* - `data(void delegate(...))` is preferred for very large files.
*/
final class Snapshot
{
private:
@ -19,32 +41,50 @@ private:
ubyte[] bytes;
if (chunk.zstd)
{
enforce(chunk.zSize == chunk.content.length, "Размер сжатого фрагмента не соответствует ожидаемому");
enforce(chunk.zSize == chunk.content.length, "Compressed chunk size does not match the expected value");
bytes = cast(ubyte[]) uncompress(chunk.content);
}
else
{
bytes = chunk.content.dup;
}
enforce(chunk.size == bytes.length, "Оригинальный размер не соответствует ожидаемому");
enforce(chunk.sha256 == digest!SHA256(bytes), "Хеш-сумма фрагмента не совпадает");
enforce(chunk.size == bytes.length, "Original size does not match the expected value");
enforce(chunk.sha256 == digest!SHA256(bytes), "Chunk hash does not match");
return bytes;
}
public:
/// Construct a `Snapshot` from an already fetched `DBSnapshot` row.
///
/// Params:
/// dblite = database handle
/// dbSnapshot = snapshot row (metadata) previously retrieved
this(DBLite dblite, DBSnapshot dbSnapshot)
{
_db = dblite;
_snapshot = dbSnapshot;
}
/// Construct a `Snapshot` by loading metadata from the database.
///
/// Params:
/// dblite = database handle
/// idSnapshot = snapshot id to load
this(DBLite dblite, long idSnapshot)
{
_db = dblite;
_snapshot = _db.getSnapshot(idSnapshot);
}
/// Materialize the full file content in memory.
///
/// Reassembles all chunks in order, validates each chunk SHA-256 and the
/// final file SHA-256 (`snapshots.sha256`).
///
/// Returns: full file content as a newly allocated `ubyte[]`
///
/// Throws: Exception on any integrity check failure
ubyte[] data()
{
auto chunks = _db.getChunks(_snapshot.id);
@ -60,11 +100,20 @@ public:
fctx.put(bytes);
}
enforce(_snapshot.sha256 == fctx.finish(), "Хеш-сумма файла не совпадает");
enforce(_snapshot.sha256 == fctx.finish(), "File hash does not match");
return content;
}
/// Stream the full file content into a caller-provided sink.
///
/// This variant avoids allocating a single large buffer. Chunks are
/// decoded, verified, and passed to `sink` in order.
///
/// Params:
/// sink = delegate invoked for each verified chunk (may be called many times)
///
/// Throws: Exception on any integrity check failure
void data(void delegate(const(ubyte)[]) sink)
{
auto chunks = _db.getChunks(_snapshot.id);
@ -77,9 +126,17 @@ public:
fctx.put(bytes);
}
enforce(_snapshot.sha256 == fctx.finish(), "Хеш-сумма файла не совпадает");
enforce(_snapshot.sha256 == fctx.finish(), "File hash does not match");
}
/// Remove this snapshot from the database inside a transaction.
///
/// Starts an IMMEDIATE transaction, deletes the snapshot row, and commits.
/// On any failure it rolls back.
///
/// Returns: `true` if the snapshot row was deleted, `false` otherwise
///
/// Note: Does not garbage-collect unreferenced blobs; perform that separately.
bool remove()
{
_db.beginImmediate();
@ -103,31 +160,37 @@ public:
return _snapshot.id == idDeleted;
}
/// Snapshot id (primary key).
@property long id() const nothrow @safe
{
return _snapshot.id;
}
/// User-defined label.
@property string label() const @safe
{
return _snapshot.label;
}
/// Creation timestamp (UTC) from the database.
@property DateTime created() const @safe
{
return _snapshot.createdUtc;
}
/// Original file length in bytes.
@property long length() const nothrow @safe
{
return _snapshot.sourceLength;
}
/// Expected SHA-256 of the full file (32 raw bytes).
@property ubyte[32] sha256() const nothrow @safe
{
return _snapshot.sha256;
}
/// Snapshot status as a string (enum to string).
@property string status() const
{
import std.conv : to;
@ -135,6 +198,7 @@ public:
return _snapshot.status.to!string;
}
/// Optional human-readable description.
@property string description() const nothrow @safe
{
return _snapshot.description;

View file

@ -6,14 +6,35 @@ import cdcdb.snapshot;
import zstd : compress, Level;
/**
* High-level storage facade: splits data into CDC chunks, stores chunks/blobs
* into SQLite via `DBLite`, links them into snapshots, and returns `Snapshot`
* objects for retrieval and deletion.
*
* Features:
* - FastCDC-based content-defined chunking (configurable sizes/masks)
* - Optional Zstandard compression (level configurable)
* - Idempotent snapshot creation: skips if identical to the latest for label
*
* Typical usage:
* ---
* auto store = new Storage("cdc.sqlite", true, Level.default_);
* store.setupCDC(4096, 8192, 16384, 0x3FFF, 0x03FF);
*
* auto snap = store.newSnapshot("my.txt", data, "initial import");
* auto bytes = snap.data(); // retrieve
*
* auto removed = store.removeSnapshots("my.txt"); // remove by label
* ---
*/
final class Storage
{
private:
// Параметры работы с базой данных
// Database parameters
DBLite _db;
bool _zstd;
int _level;
// Настройки CDC механизма
// CDC settings
CDC _cdc;
size_t _minSize;
size_t _normalSize;
@ -29,11 +50,19 @@ private:
_maxSize = maxSize;
_maskS = maskS;
_maskL = maskL;
// CDC не хранит динамически выделенных данных, переинициализация безопасна
// CDC holds no dynamically allocated state; reinitialization is safe
_cdc = new CDC(_minSize, _normalSize, _maxSize, _maskS, _maskL);
}
public:
/// Construct the storage facade and open (or create) the database.
///
/// Params:
/// database = path to SQLite file
/// zstd = enable Zstandard compression for stored blobs
/// level = Zstd compression level (see `zstd.Level`)
/// busyTimeout = SQLite busy timeout in milliseconds
/// maxRetries = max retries on SQLITE_BUSY/LOCKED errors
this(string database, bool zstd = false, int level = Level.base, size_t busyTimeout = 3000, size_t maxRetries = 3)
{
_db = new DBLite(database, busyTimeout, maxRetries);
@ -42,23 +71,44 @@ public:
initCDC();
}
/// Reconfigure CDC parameters (takes effect for subsequent snapshots).
///
/// Params:
/// minSize, normalSize, maxSize, maskS, maskL = FastCDC parameters
void setupCDC(size_t minSize, size_t normalSize, size_t maxSize, size_t maskS, size_t maskL)
{
initCDC(minSize, normalSize, maxSize, maskS, maskL);
}
/// Create a new snapshot from raw data.
///
/// - Splits data with FastCDC using current settings.
/// - Optionally compresses chunks with Zstd.
/// - Stores unique blobs and links them to the created snapshot.
/// - If the latest snapshot for `label` already has the same file SHA-256,
/// returns `null` (idempotent).
///
/// Params:
/// label = user-provided snapshot label (file identifier)
/// data = raw file bytes
/// description = optional human-readable description
///
/// Returns: a `Snapshot` instance for the created snapshot, or `null`
///
/// Throws:
/// Exception if `data` is empty or on database/storage errors
Snapshot newSnapshot(string label, const(ubyte)[] data, string description = string.init)
{
if (data.length == 0)
{
throw new Exception("Данные имеют нулевой размер");
throw new Exception("Data has zero length");
}
import std.digest.sha : SHA256, digest;
ubyte[32] sha256 = digest!SHA256(data);
// Если последний снимок файла соответствует текущему состоянию
// If the last snapshot for the label matches current content
if (_db.isLast(label, sha256))
return null;
@ -95,10 +145,10 @@ public:
dbBlob.zstd = _zstd;
// Разбить на фрагменты
// Split into chunks
Chunk[] chunks = _cdc.split(data);
// Запись фрагментов в БД
// Write chunks to DB
foreach (chunk; chunks)
{
dbBlob.sha256 = chunk.sha256;
@ -118,7 +168,7 @@ public:
dbBlob.content = content.dup;
}
// Запись фрагментов
// Store/ensure blob
_db.addBlob(dbBlob);
dbSnapshotChunk.snapshotId = idSnapshot;
@ -126,7 +176,7 @@ public:
dbSnapshotChunk.offset = chunk.offset;
dbSnapshotChunk.sha256 = chunk.sha256;
// Привязка фрагментов к снимку
// Link chunk to snapshot
_db.addSnapshotChunk(dbSnapshotChunk);
}
@ -137,23 +187,52 @@ public:
return snapshot;
}
// Удаляет снимок по метке, возвращает количество удаленных снимков
/// Delete snapshots by label.
///
/// Params:
/// label = snapshot label
///
/// Returns: number of deleted snapshots
long removeSnapshots(string label) {
return _db.deleteSnapshot(label);
}
/// Delete a specific snapshot instance.
///
/// Params:
/// snapshot = `Snapshot` to remove
///
/// Returns: `true` on success, `false` otherwise
bool removeSnapshots(Snapshot snapshot) {
return removeSnapshots(snapshot.id);
}
/// Delete a snapshot by id.
///
/// Params:
/// idSnapshot = snapshot id
///
/// Returns: `true` if the row was deleted
bool removeSnapshots(long idSnapshot) {
return _db.deleteSnapshot(idSnapshot) == idSnapshot;
}
/// Get a `Snapshot` object by id.
///
/// Params:
/// idSnapshot = snapshot id
///
/// Returns: `Snapshot` handle (metadata loaded lazily via constructor)
Snapshot getSnapshot(long idSnapshot) {
return new Snapshot(_db, idSnapshot);
}
/// List snapshots (optionally filtered by label).
///
/// Params:
/// label = filter by exact label; empty string returns all
///
/// Returns: array of `Snapshot` handles
Snapshot[] getSnapshots(string label = string.init) {
Snapshot[] snapshots;
@ -164,6 +243,9 @@ public:
return snapshots;
}
/// Library version string.
///
/// Returns: semantic version of the `cdcdb` library
string getVersion() const @safe nothrow
{
import cdcdb.version_ : cdcdbVersion;