forked from dlang/cdcdb
Добавлен англоязычный DDoc
This commit is contained in:
parent
85aa3c8f53
commit
5d0cf228d6
4 changed files with 173 additions and 27 deletions
|
@ -5,12 +5,12 @@ import std.digest.sha : SHA256, digest;
|
|||
struct Chunk
|
||||
{
|
||||
size_t index; // 1..N
|
||||
size_t offset; // смещение в исходном буфере
|
||||
size_t size; // размер чанка
|
||||
immutable(ubyte)[32] sha256; // hex(SHA-256) содержимого
|
||||
size_t offset; // offset in the source buffer
|
||||
size_t size; // chunk size
|
||||
immutable(ubyte)[32] sha256; // hex(SHA-256) of the content
|
||||
}
|
||||
|
||||
// Change Data Capture (Захват изменения данных)
|
||||
// Change Data Capture (CDC)
|
||||
final class CDC
|
||||
{
|
||||
private:
|
||||
|
@ -36,13 +36,13 @@ private:
|
|||
ulong fingerprint = 0;
|
||||
size_t index;
|
||||
|
||||
// инициализация без cut-check
|
||||
// initialization without a cut-check
|
||||
while (index < _minSize)
|
||||
{
|
||||
fingerprint = (fingerprint << 1) + _gear[src[index]];
|
||||
++index;
|
||||
}
|
||||
// строгая маска
|
||||
// strict mask
|
||||
while (index < normalSize)
|
||||
{
|
||||
fingerprint = (fingerprint << 1) + _gear[src[index]];
|
||||
|
@ -50,7 +50,7 @@ private:
|
|||
return index;
|
||||
++index;
|
||||
}
|
||||
// слабая маска
|
||||
// weak mask
|
||||
while (index < size)
|
||||
{
|
||||
fingerprint = (fingerprint << 1) + _gear[src[index]];
|
||||
|
@ -65,7 +65,7 @@ public:
|
|||
this(size_t minSize, size_t normalSize, size_t maxSize, ulong maskS, ulong maskL) @safe @nogc nothrow
|
||||
{
|
||||
assert(minSize > 0 && minSize < normalSize && normalSize < maxSize,
|
||||
"Неверные размеры: требуется min < normal < max и min > 0");
|
||||
"Invalid sizes: require min < normal < max and min > 0");
|
||||
_minSize = minSize;
|
||||
_normalSize = normalSize;
|
||||
_maxSize = maxSize;
|
||||
|
|
|
@ -87,7 +87,7 @@ private:
|
|||
if (msg.toLower.canFind("locked", "busy")) {
|
||||
if (--tryNo == 0) {
|
||||
throw new Exception(
|
||||
"Не удалось выполнить подключение к базе данных после %d неудачных попыток: %s"
|
||||
"Failed to connect to the database after %d failed attempts: %s"
|
||||
.format(_maxRetries, msg)
|
||||
);
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ private:
|
|||
throw new Exception(msg);
|
||||
}
|
||||
|
||||
// Проверка БД на наличие существующих в ней необходимых таблиц
|
||||
// Check that the database contains the required tables; otherwise create them
|
||||
void check()
|
||||
{
|
||||
SqliteResult queryResult = sql(
|
||||
|
@ -123,7 +123,7 @@ private:
|
|||
}
|
||||
|
||||
enforce(missingTables.length == 0 || missingTables.length == 3,
|
||||
"База данных повреждена. Отсутствуют таблицы: " ~ missingTables.join(", ")
|
||||
"Database is corrupted. Missing tables: " ~ missingTables.join(", ")
|
||||
);
|
||||
|
||||
if (missingTables.length == 3)
|
||||
|
@ -221,7 +221,7 @@ public:
|
|||
);
|
||||
|
||||
if (queryResult.empty()) {
|
||||
throw new Exception("Ошибка при добавлении нового снимока в базу данных");
|
||||
throw new Exception("Error adding a new snapshot to the database");
|
||||
}
|
||||
|
||||
return queryResult.front()["id"].to!long;
|
||||
|
|
|
@ -8,6 +8,28 @@ import std.digest.sha : SHA256, digest;
|
|||
import std.datetime : DateTime;
|
||||
import std.exception : enforce;
|
||||
|
||||
/**
|
||||
* Snapshot reader and lifecycle helper.
|
||||
*
|
||||
* This class reconstructs full file content from chunked storage persisted
|
||||
* via `DBLite`, verifies integrity (per-chunk SHA-256 and final file hash),
|
||||
* and provides a safe way to remove a snapshot record.
|
||||
*
|
||||
* Usage:
|
||||
* ---
|
||||
* auto s1 = new Snapshot(db, snapshotId);
|
||||
* auto bytes = s1.data(); // materialize full content in memory
|
||||
*
|
||||
* // or stream into a sink to avoid large allocations:
|
||||
* s1.data((const(ubyte)[] part) {
|
||||
* // consume part
|
||||
* });
|
||||
* ---
|
||||
*
|
||||
* Notes:
|
||||
* - All integrity checks are enforced; any mismatch throws.
|
||||
* - `data(void delegate(...))` is preferred for very large files.
|
||||
*/
|
||||
final class Snapshot
|
||||
{
|
||||
private:
|
||||
|
@ -19,32 +41,50 @@ private:
|
|||
ubyte[] bytes;
|
||||
if (chunk.zstd)
|
||||
{
|
||||
enforce(chunk.zSize == chunk.content.length, "Размер сжатого фрагмента не соответствует ожидаемому");
|
||||
enforce(chunk.zSize == chunk.content.length, "Compressed chunk size does not match the expected value");
|
||||
bytes = cast(ubyte[]) uncompress(chunk.content);
|
||||
}
|
||||
else
|
||||
{
|
||||
bytes = chunk.content.dup;
|
||||
}
|
||||
enforce(chunk.size == bytes.length, "Оригинальный размер не соответствует ожидаемому");
|
||||
enforce(chunk.sha256 == digest!SHA256(bytes), "Хеш-сумма фрагмента не совпадает");
|
||||
enforce(chunk.size == bytes.length, "Original size does not match the expected value");
|
||||
enforce(chunk.sha256 == digest!SHA256(bytes), "Chunk hash does not match");
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public:
|
||||
/// Construct a `Snapshot` from an already fetched `DBSnapshot` row.
|
||||
///
|
||||
/// Params:
|
||||
/// dblite = database handle
|
||||
/// dbSnapshot = snapshot row (metadata) previously retrieved
|
||||
this(DBLite dblite, DBSnapshot dbSnapshot)
|
||||
{
|
||||
_db = dblite;
|
||||
_snapshot = dbSnapshot;
|
||||
}
|
||||
|
||||
/// Construct a `Snapshot` by loading metadata from the database.
|
||||
///
|
||||
/// Params:
|
||||
/// dblite = database handle
|
||||
/// idSnapshot = snapshot id to load
|
||||
this(DBLite dblite, long idSnapshot)
|
||||
{
|
||||
_db = dblite;
|
||||
_snapshot = _db.getSnapshot(idSnapshot);
|
||||
}
|
||||
|
||||
/// Materialize the full file content in memory.
|
||||
///
|
||||
/// Reassembles all chunks in order, validates each chunk SHA-256 and the
|
||||
/// final file SHA-256 (`snapshots.sha256`).
|
||||
///
|
||||
/// Returns: full file content as a newly allocated `ubyte[]`
|
||||
///
|
||||
/// Throws: Exception on any integrity check failure
|
||||
ubyte[] data()
|
||||
{
|
||||
auto chunks = _db.getChunks(_snapshot.id);
|
||||
|
@ -60,11 +100,20 @@ public:
|
|||
fctx.put(bytes);
|
||||
}
|
||||
|
||||
enforce(_snapshot.sha256 == fctx.finish(), "Хеш-сумма файла не совпадает");
|
||||
enforce(_snapshot.sha256 == fctx.finish(), "File hash does not match");
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
/// Stream the full file content into a caller-provided sink.
|
||||
///
|
||||
/// This variant avoids allocating a single large buffer. Chunks are
|
||||
/// decoded, verified, and passed to `sink` in order.
|
||||
///
|
||||
/// Params:
|
||||
/// sink = delegate invoked for each verified chunk (may be called many times)
|
||||
///
|
||||
/// Throws: Exception on any integrity check failure
|
||||
void data(void delegate(const(ubyte)[]) sink)
|
||||
{
|
||||
auto chunks = _db.getChunks(_snapshot.id);
|
||||
|
@ -77,9 +126,17 @@ public:
|
|||
fctx.put(bytes);
|
||||
}
|
||||
|
||||
enforce(_snapshot.sha256 == fctx.finish(), "Хеш-сумма файла не совпадает");
|
||||
enforce(_snapshot.sha256 == fctx.finish(), "File hash does not match");
|
||||
}
|
||||
|
||||
/// Remove this snapshot from the database inside a transaction.
|
||||
///
|
||||
/// Starts an IMMEDIATE transaction, deletes the snapshot row, and commits.
|
||||
/// On any failure it rolls back.
|
||||
///
|
||||
/// Returns: `true` if the snapshot row was deleted, `false` otherwise
|
||||
///
|
||||
/// Note: Does not garbage-collect unreferenced blobs; perform that separately.
|
||||
bool remove()
|
||||
{
|
||||
_db.beginImmediate();
|
||||
|
@ -103,31 +160,37 @@ public:
|
|||
return _snapshot.id == idDeleted;
|
||||
}
|
||||
|
||||
/// Snapshot id (primary key).
|
||||
@property long id() const nothrow @safe
|
||||
{
|
||||
return _snapshot.id;
|
||||
}
|
||||
|
||||
/// User-defined label.
|
||||
@property string label() const @safe
|
||||
{
|
||||
return _snapshot.label;
|
||||
}
|
||||
|
||||
/// Creation timestamp (UTC) from the database.
|
||||
@property DateTime created() const @safe
|
||||
{
|
||||
return _snapshot.createdUtc;
|
||||
}
|
||||
|
||||
/// Original file length in bytes.
|
||||
@property long length() const nothrow @safe
|
||||
{
|
||||
return _snapshot.sourceLength;
|
||||
}
|
||||
|
||||
/// Expected SHA-256 of the full file (32 raw bytes).
|
||||
@property ubyte[32] sha256() const nothrow @safe
|
||||
{
|
||||
return _snapshot.sha256;
|
||||
}
|
||||
|
||||
/// Snapshot status as a string (enum to string).
|
||||
@property string status() const
|
||||
{
|
||||
import std.conv : to;
|
||||
|
@ -135,6 +198,7 @@ public:
|
|||
return _snapshot.status.to!string;
|
||||
}
|
||||
|
||||
/// Optional human-readable description.
|
||||
@property string description() const nothrow @safe
|
||||
{
|
||||
return _snapshot.description;
|
||||
|
|
|
@ -6,14 +6,35 @@ import cdcdb.snapshot;
|
|||
|
||||
import zstd : compress, Level;
|
||||
|
||||
/**
|
||||
* High-level storage facade: splits data into CDC chunks, stores chunks/blobs
|
||||
* into SQLite via `DBLite`, links them into snapshots, and returns `Snapshot`
|
||||
* objects for retrieval and deletion.
|
||||
*
|
||||
* Features:
|
||||
* - FastCDC-based content-defined chunking (configurable sizes/masks)
|
||||
* - Optional Zstandard compression (level configurable)
|
||||
* - Idempotent snapshot creation: skips if identical to the latest for label
|
||||
*
|
||||
* Typical usage:
|
||||
* ---
|
||||
* auto store = new Storage("cdc.sqlite", true, Level.default_);
|
||||
* store.setupCDC(4096, 8192, 16384, 0x3FFF, 0x03FF);
|
||||
*
|
||||
* auto snap = store.newSnapshot("my.txt", data, "initial import");
|
||||
* auto bytes = snap.data(); // retrieve
|
||||
*
|
||||
* auto removed = store.removeSnapshots("my.txt"); // remove by label
|
||||
* ---
|
||||
*/
|
||||
final class Storage
|
||||
{
|
||||
private:
|
||||
// Параметры работы с базой данных
|
||||
// Database parameters
|
||||
DBLite _db;
|
||||
bool _zstd;
|
||||
int _level;
|
||||
// Настройки CDC механизма
|
||||
// CDC settings
|
||||
CDC _cdc;
|
||||
size_t _minSize;
|
||||
size_t _normalSize;
|
||||
|
@ -29,11 +50,19 @@ private:
|
|||
_maxSize = maxSize;
|
||||
_maskS = maskS;
|
||||
_maskL = maskL;
|
||||
// CDC не хранит динамически выделенных данных, переинициализация безопасна
|
||||
// CDC holds no dynamically allocated state; reinitialization is safe
|
||||
_cdc = new CDC(_minSize, _normalSize, _maxSize, _maskS, _maskL);
|
||||
}
|
||||
|
||||
public:
|
||||
/// Construct the storage facade and open (or create) the database.
|
||||
///
|
||||
/// Params:
|
||||
/// database = path to SQLite file
|
||||
/// zstd = enable Zstandard compression for stored blobs
|
||||
/// level = Zstd compression level (see `zstd.Level`)
|
||||
/// busyTimeout = SQLite busy timeout in milliseconds
|
||||
/// maxRetries = max retries on SQLITE_BUSY/LOCKED errors
|
||||
this(string database, bool zstd = false, int level = Level.base, size_t busyTimeout = 3000, size_t maxRetries = 3)
|
||||
{
|
||||
_db = new DBLite(database, busyTimeout, maxRetries);
|
||||
|
@ -42,23 +71,44 @@ public:
|
|||
initCDC();
|
||||
}
|
||||
|
||||
/// Reconfigure CDC parameters (takes effect for subsequent snapshots).
|
||||
///
|
||||
/// Params:
|
||||
/// minSize, normalSize, maxSize, maskS, maskL = FastCDC parameters
|
||||
void setupCDC(size_t minSize, size_t normalSize, size_t maxSize, size_t maskS, size_t maskL)
|
||||
{
|
||||
initCDC(minSize, normalSize, maxSize, maskS, maskL);
|
||||
}
|
||||
|
||||
/// Create a new snapshot from raw data.
|
||||
///
|
||||
/// - Splits data with FastCDC using current settings.
|
||||
/// - Optionally compresses chunks with Zstd.
|
||||
/// - Stores unique blobs and links them to the created snapshot.
|
||||
/// - If the latest snapshot for `label` already has the same file SHA-256,
|
||||
/// returns `null` (idempotent).
|
||||
///
|
||||
/// Params:
|
||||
/// label = user-provided snapshot label (file identifier)
|
||||
/// data = raw file bytes
|
||||
/// description = optional human-readable description
|
||||
///
|
||||
/// Returns: a `Snapshot` instance for the created snapshot, or `null`
|
||||
///
|
||||
/// Throws:
|
||||
/// Exception if `data` is empty or on database/storage errors
|
||||
Snapshot newSnapshot(string label, const(ubyte)[] data, string description = string.init)
|
||||
{
|
||||
if (data.length == 0)
|
||||
{
|
||||
throw new Exception("Данные имеют нулевой размер");
|
||||
throw new Exception("Data has zero length");
|
||||
}
|
||||
|
||||
import std.digest.sha : SHA256, digest;
|
||||
|
||||
ubyte[32] sha256 = digest!SHA256(data);
|
||||
|
||||
// Если последний снимок файла соответствует текущему состоянию
|
||||
// If the last snapshot for the label matches current content
|
||||
if (_db.isLast(label, sha256))
|
||||
return null;
|
||||
|
||||
|
@ -95,10 +145,10 @@ public:
|
|||
|
||||
dbBlob.zstd = _zstd;
|
||||
|
||||
// Разбить на фрагменты
|
||||
// Split into chunks
|
||||
Chunk[] chunks = _cdc.split(data);
|
||||
|
||||
// Запись фрагментов в БД
|
||||
// Write chunks to DB
|
||||
foreach (chunk; chunks)
|
||||
{
|
||||
dbBlob.sha256 = chunk.sha256;
|
||||
|
@ -118,7 +168,7 @@ public:
|
|||
dbBlob.content = content.dup;
|
||||
}
|
||||
|
||||
// Запись фрагментов
|
||||
// Store/ensure blob
|
||||
_db.addBlob(dbBlob);
|
||||
|
||||
dbSnapshotChunk.snapshotId = idSnapshot;
|
||||
|
@ -126,7 +176,7 @@ public:
|
|||
dbSnapshotChunk.offset = chunk.offset;
|
||||
dbSnapshotChunk.sha256 = chunk.sha256;
|
||||
|
||||
// Привязка фрагментов к снимку
|
||||
// Link chunk to snapshot
|
||||
_db.addSnapshotChunk(dbSnapshotChunk);
|
||||
}
|
||||
|
||||
|
@ -137,23 +187,52 @@ public:
|
|||
return snapshot;
|
||||
}
|
||||
|
||||
// Удаляет снимок по метке, возвращает количество удаленных снимков
|
||||
/// Delete snapshots by label.
|
||||
///
|
||||
/// Params:
|
||||
/// label = snapshot label
|
||||
///
|
||||
/// Returns: number of deleted snapshots
|
||||
long removeSnapshots(string label) {
|
||||
return _db.deleteSnapshot(label);
|
||||
}
|
||||
|
||||
/// Delete a specific snapshot instance.
|
||||
///
|
||||
/// Params:
|
||||
/// snapshot = `Snapshot` to remove
|
||||
///
|
||||
/// Returns: `true` on success, `false` otherwise
|
||||
bool removeSnapshots(Snapshot snapshot) {
|
||||
return removeSnapshots(snapshot.id);
|
||||
}
|
||||
|
||||
/// Delete a snapshot by id.
|
||||
///
|
||||
/// Params:
|
||||
/// idSnapshot = snapshot id
|
||||
///
|
||||
/// Returns: `true` if the row was deleted
|
||||
bool removeSnapshots(long idSnapshot) {
|
||||
return _db.deleteSnapshot(idSnapshot) == idSnapshot;
|
||||
}
|
||||
|
||||
/// Get a `Snapshot` object by id.
|
||||
///
|
||||
/// Params:
|
||||
/// idSnapshot = snapshot id
|
||||
///
|
||||
/// Returns: `Snapshot` handle (metadata loaded lazily via constructor)
|
||||
Snapshot getSnapshot(long idSnapshot) {
|
||||
return new Snapshot(_db, idSnapshot);
|
||||
}
|
||||
|
||||
/// List snapshots (optionally filtered by label).
|
||||
///
|
||||
/// Params:
|
||||
/// label = filter by exact label; empty string returns all
|
||||
///
|
||||
/// Returns: array of `Snapshot` handles
|
||||
Snapshot[] getSnapshots(string label = string.init) {
|
||||
Snapshot[] snapshots;
|
||||
|
||||
|
@ -164,6 +243,9 @@ public:
|
|||
return snapshots;
|
||||
}
|
||||
|
||||
/// Library version string.
|
||||
///
|
||||
/// Returns: semantic version of the `cdcdb` library
|
||||
string getVersion() const @safe nothrow
|
||||
{
|
||||
import cdcdb.version_ : cdcdbVersion;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue