From 4f735abae7be486ce95954541030fe4f0ad5c306 Mon Sep 17 00:00:00 2001 From: Alexander Zhirov Date: Fri, 12 Sep 2025 13:16:31 +0300 Subject: [PATCH 1/2] =?UTF-8?q?=D0=9D=D0=B5=D0=B1=D0=BE=D0=BB=D1=8C=D1=88?= =?UTF-8?q?=D0=B0=D1=8F=20=D0=BE=D0=BF=D1=82=D0=B8=D0=BC=D0=B8=D0=B7=D0=B0?= =?UTF-8?q?=D1=86=D0=B8=D1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- source/cdcdb/cdc/cas.d | 76 +++++++++++++++++++++++++--------------- source/cdcdb/db/dblite.d | 2 +- test/app.d | 8 ++--- 3 files changed, 52 insertions(+), 34 deletions(-) diff --git a/source/cdcdb/cdc/cas.d b/source/cdcdb/cdc/cas.d index bcde2af..02c072c 100644 --- a/source/cdcdb/cdc/cas.d +++ b/source/cdcdb/cdc/cas.d @@ -22,44 +22,42 @@ private: DBLite _db; bool _zstd; - ubyte[] buildContent(const ref Snapshot snapshot) - { - auto dataChunks = _db.getChunks(snapshot.id); - ubyte[] content; - - foreach (chunk; dataChunks) { - ubyte[] bytes; - if (chunk.zstd) { - enforce(chunk.zSize == chunk.content.length, "Размер сжатого фрагмента не соответствует ожидаемому"); - bytes = cast(ubyte[]) uncompress(chunk.content); - } else { - bytes = chunk.content; - } - enforce(chunk.size == bytes.length, "Оригинальный размер не соответствует ожидаемому"); - content ~= bytes; - } - enforce(snapshot.fileSha256 == digest!SHA256(content), "Хеш-сумма файла не совпадает"); - - return content; - } + size_t _minSize; + size_t _normalSize; + size_t _maxSize; + size_t _maskS; + size_t _maskL; + CDC _cdc; public: - this(string database, bool zstd = false) - { + this( + string database, + bool zstd = false, + size_t minSize = 256, + size_t normalSize = 512, + size_t maxSize = 1024, + size_t maskS = 0xFF, + size_t maskL = 0x0F + ) { _db = new DBLite(database); _zstd = zstd; + + _minSize = minSize; + _normalSize = normalSize; + _maxSize = maxSize; + _maskS = maskS; + _maskL = maskL; + + _cdc = new CDC(_minSize, _normalSize, _maxSize, _maskS, _maskL); } - size_t newSnapshot(string filePath, string label, const(ubyte)[] data) + size_t newSnapshot(string filePath, const(ubyte)[] data, string label = string.init) { ubyte[32] hashSource = digest!SHA256(data); - // Сделать запрос в БД по filePath и сверить хеш файлов + // Если последний снимок файла соответствует текущему if (_db.isLast(filePath, hashSource)) return 0; - - // Параметры для CDC вынести в отдельные настройки (продумать) - auto cdc = new CDC(256, 512, 1024, 0xFF, 0x0F); // Разбить на фрагменты - auto chunks = cdc.split(data); + auto chunks = _cdc.split(data); Snapshot snapshot; @@ -67,6 +65,11 @@ public: snapshot.fileSha256 = hashSource; snapshot.label = label; snapshot.sourceLength = data.length; + snapshot.algoMin = _minSize; + snapshot.algoNormal = _normalSize; + snapshot.algoMax = _maxSize; + snapshot.maskS = _maskS; + snapshot.maskL = _maskL; _db.beginImmediate(); @@ -133,7 +136,22 @@ public: ubyte[] getSnapshotData(const ref Snapshot snapshot) { - ubyte[] content = buildContent(snapshot); + auto dataChunks = _db.getChunks(snapshot.id); + ubyte[] content; + + foreach (chunk; dataChunks) { + ubyte[] bytes; + if (chunk.zstd) { + enforce(chunk.zSize == chunk.content.length, "Размер сжатого фрагмента не соответствует ожидаемому"); + bytes = cast(ubyte[]) uncompress(chunk.content); + } else { + bytes = chunk.content; + } + enforce(chunk.size == bytes.length, "Оригинальный размер не соответствует ожидаемому"); + content ~= bytes; + } + enforce(snapshot.fileSha256 == digest!SHA256(content), "Хеш-сумма файла не совпадает"); + return content; } diff --git a/source/cdcdb/db/dblite.d b/source/cdcdb/db/dblite.d index d2bf2c2..9b1e3a1 100644 --- a/source/cdcdb/db/dblite.d +++ b/source/cdcdb/db/dblite.d @@ -111,7 +111,7 @@ public: }, snapshot.filePath, snapshot.fileSha256[], - snapshot.label, + snapshot.label.length ? snapshot.label : null, snapshot.sourceLength, snapshot.algoMin, snapshot.algoNormal, diff --git a/test/app.d b/test/app.d index 8c360ea..a2e1312 100644 --- a/test/app.d +++ b/test/app.d @@ -7,10 +7,10 @@ import std.file : read; void main() { auto cas = new CAS("/tmp/base.db", true); - // cas.newSnapshot("/tmp/text", "Файл для тестирования", cast(ubyte[]) read("/tmp/text")); - import std.stdio : writeln; + cas.newSnapshot("/tmp/text", cast(ubyte[]) read("/tmp/text")); + // import std.stdio : writeln; - writeln(cas.getSnapshotList("/tmp/text")); + // writeln(cas.getSnapshotList("/tmp/text")); - writeln(cas.getVersion); + // writeln(cas.getVersion); } From 9ba43b0e38f6144ea952fbedf220785324ae9292 Mon Sep 17 00:00:00 2001 From: Alexander Zhirov Date: Fri, 12 Sep 2025 15:08:32 +0300 Subject: [PATCH 2/2] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=20=D1=82=D0=B0=D0=B9=D0=BC=D0=B0=D1=83=D1=82=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=B7=D0=B0=D0=BF=D1=80=D0=BE=D1=81=D0=BE=D0=B2?= =?UTF-8?q?=20=D0=B2=20=D0=91=D0=94.=20=D0=97=D0=B0=D0=BF=D1=80=D0=B5?= =?UTF-8?q?=D1=82=20=D0=BD=D0=B0=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B0=D0=B1=D0=BB=D0=B8=D1=86?= =?UTF-8?q?=20=D0=B2=20=D0=91=D0=94,=20=D0=BA=D1=80=D0=BE=D0=BC=D0=B5=20?= =?UTF-8?q?=D1=80=D0=B0=D0=B7=D1=80=D0=B5=D1=88=D0=B5=D0=BD=D0=BD=D1=8B?= =?UTF-8?q?=D1=85=20=D0=BF=D0=BE=D0=BB=D0=B5=D0=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- source/cdcdb/cdc/cas.d | 23 ++++++------ source/cdcdb/db/dblite.d | 76 ++++++++++++++++++++++++++++------------ source/cdcdb/db/scheme.d | 60 ++++++++++++++++++++++++++----- source/cdcdb/db/types.d | 4 +-- test/app.d | 2 +- 5 files changed, 121 insertions(+), 44 deletions(-) diff --git a/source/cdcdb/cdc/cas.d b/source/cdcdb/cdc/cas.d index 02c072c..c7f155b 100644 --- a/source/cdcdb/cdc/cas.d +++ b/source/cdcdb/cdc/cas.d @@ -32,13 +32,15 @@ public: this( string database, bool zstd = false, + int busyTimeout = 3000, + ubyte maxRetries = 3, size_t minSize = 256, size_t normalSize = 512, size_t maxSize = 1024, size_t maskS = 0xFF, size_t maskL = 0x0F ) { - _db = new DBLite(database); + _db = new DBLite(database, busyTimeout, maxRetries); _zstd = zstd; _minSize = minSize; @@ -50,20 +52,18 @@ public: _cdc = new CDC(_minSize, _normalSize, _maxSize, _maskS, _maskL); } - size_t newSnapshot(string filePath, const(ubyte)[] data, string label = string.init) + size_t newSnapshot(string label, const(ubyte)[] data, string description = string.init) { - ubyte[32] hashSource = digest!SHA256(data); + ubyte[32] sha256 = digest!SHA256(data); - // Если последний снимок файла соответствует текущему - if (_db.isLast(filePath, hashSource)) return 0; - // Разбить на фрагменты - auto chunks = _cdc.split(data); + // Если последний снимок файла соответствует текущему состоянию + if (_db.isLast(label, sha256)) return 0; Snapshot snapshot; - snapshot.filePath = filePath; - snapshot.fileSha256 = hashSource; snapshot.label = label; + snapshot.sha256 = sha256; + snapshot.description = description; snapshot.sourceLength = data.length; snapshot.algoMin = _minSize; snapshot.algoNormal = _normalSize; @@ -92,6 +92,9 @@ public: blob.zstd = _zstd; + // Разбить на фрагменты + auto chunks = _cdc.split(data); + // Запись фрагментов в БД foreach (chunk; chunks) { @@ -150,7 +153,7 @@ public: enforce(chunk.size == bytes.length, "Оригинальный размер не соответствует ожидаемому"); content ~= bytes; } - enforce(snapshot.fileSha256 == digest!SHA256(content), "Хеш-сумма файла не совпадает"); + enforce(snapshot.sha256 == digest!SHA256(content), "Хеш-сумма файла не совпадает"); return content; } diff --git a/source/cdcdb/db/dblite.d b/source/cdcdb/db/dblite.d index 9b1e3a1..1c59ff4 100644 --- a/source/cdcdb/db/dblite.d +++ b/source/cdcdb/db/dblite.d @@ -7,18 +7,45 @@ import arsd.sqlite; import std.file : exists; import std.exception : enforce; import std.conv : to; -import std.string : join, replace; +import std.string : join, replace, toLower; +import std.algorithm : canFind; +import std.format : format; final class DBLite : Sqlite { private: string _dbPath; + ubyte _maxRetries; // _scheme mixin(import("scheme.d")); SqliteResult sql(T...)(string queryText, T args) { - return cast(SqliteResult) query(queryText, args); + if (_maxRetries == 0) { + return cast(SqliteResult) query(queryText, args); + } + + string msg; + ubyte tryNo = _maxRetries; + + while (tryNo) { + try { + return cast(SqliteResult) query(queryText, args); + } catch (DatabaseException e) { + msg = e.msg; + if (msg.toLower.canFind("locked", "busy")) { + if (--tryNo == 0) { + throw new Exception( + "Не удалось выполнить подключение к базе данных после %d неудачных попыток: %s" + .format(_maxRetries, msg) + ); + } + continue; + } + break; + } + } + throw new Exception(msg); } // Проверка БД на наличие существующих в ней необходимых таблиц @@ -64,31 +91,34 @@ private: } public: - this(string database) + this(string database, int busyTimeout, ubyte maxRetries) { _dbPath = database; super(database); check(); + _maxRetries = maxRetries; + query("PRAGMA journal_mode=WAL"); query("PRAGMA synchronous=NORMAL"); query("PRAGMA foreign_keys=ON"); + query("PRAGMA busy_timeout=%d".format(busyTimeout)); } void beginImmediate() { - query("BEGIN IMMEDIATE"); + sql("BEGIN IMMEDIATE"); } void commit() { - query("COMMIT"); + sql("COMMIT"); } void rollback() { - query("ROLLBACK"); + sql("ROLLBACK"); } long addSnapshot(Snapshot snapshot) @@ -96,9 +126,9 @@ public: auto queryResult = sql( q{ INSERT INTO snapshots( - file_path, - file_sha256, label, + sha256, + description, source_length, algo_min, algo_normal, @@ -109,9 +139,9 @@ public: ) VALUES (?,?,?,?,?,?,?,?,?,?) RETURNING id }, - snapshot.filePath, - snapshot.fileSha256[], - snapshot.label.length ? snapshot.label : null, + snapshot.label, + snapshot.sha256[], + snapshot.description.length ? snapshot.description : null, snapshot.sourceLength, snapshot.algoMin, snapshot.algoNormal, @@ -157,17 +187,17 @@ public: ); } - bool isLast(string filePath, ubyte[] fileSha256) { + bool isLast(string label, ubyte[] sha256) { auto queryResult = sql( q{ SELECT COALESCE( - (SELECT (file_path = ? AND file_sha256 = ?) + (SELECT (label = ? AND sha256 = ?) FROM snapshots ORDER BY created_utc DESC LIMIT 1), 0 ) AS is_last; - }, filePath, fileSha256 + }, label, sha256 ); if (!queryResult.empty()) @@ -175,14 +205,14 @@ public: return false; } - Snapshot[] getSnapshots(string filePath) + Snapshot[] getSnapshots(string label) { auto queryResult = sql( q{ - SELECT id, file_path, file_sha256, label, created_utc, source_length, + SELECT id, label, sha256, description, created_utc, source_length, algo_min, algo_normal, algo_max, mask_s, mask_l, status - FROM snapshots WHERE file_path = ? - }, filePath + FROM snapshots WHERE label = ? + }, label ); Snapshot[] snapshots; @@ -192,9 +222,9 @@ public: Snapshot snapshot; snapshot.id = row["id"].to!long; - snapshot.filePath = row["file_path"].to!string; - snapshot.fileSha256 = cast(ubyte[]) row["file_sha256"].dup; snapshot.label = row["label"].to!string; + snapshot.sha256 = cast(ubyte[]) row["sha256"].dup; + snapshot.description = row["description"].to!string; snapshot.createdUtc = toDateTime(row["created_utc"].to!string); snapshot.sourceLength = row["source_length"].to!long; snapshot.algoMin = row["algo_min"].to!long; @@ -214,7 +244,7 @@ public: { auto queryResult = sql( q{ - SELECT id, file_path, file_sha256, label, created_utc, source_length, + SELECT id, label, sha256, description, created_utc, source_length, algo_min, algo_normal, algo_max, mask_s, mask_l, status FROM snapshots WHERE id = ? }, id @@ -227,9 +257,9 @@ public: auto data = queryResult.front(); snapshot.id = data["id"].to!long; - snapshot.filePath = data["file_path"].to!string; - snapshot.fileSha256 = cast(ubyte[]) data["file_sha256"].dup; snapshot.label = data["label"].to!string; + snapshot.sha256 = cast(ubyte[]) data["sha256"].dup; + snapshot.description = data["description"].to!string; snapshot.createdUtc = toDateTime(data["created_utc"].to!string); snapshot.sourceLength = data["source_length"].to!long; snapshot.algoMin = data["algo_min"].to!long; diff --git a/source/cdcdb/db/scheme.d b/source/cdcdb/db/scheme.d index c8f6176..b3b10c4 100644 --- a/source/cdcdb/db/scheme.d +++ b/source/cdcdb/db/scheme.d @@ -6,12 +6,12 @@ auto _scheme = [ CREATE TABLE IF NOT EXISTS snapshots ( -- идентификатор снимка id INTEGER PRIMARY KEY AUTOINCREMENT, - -- путь к исходному файлу - file_path TEXT, - -- SHA-256 всего файла (BLOB(32)) - file_sha256 BLOB NOT NULL CHECK (length(file_sha256) = 32), -- метка/название снимка - label TEXT, + label TEXT NOT NULL, + -- SHA-256 всего файла (BLOB(32)) + sha256 BLOB NOT NULL CHECK (length(sha256) = 32), + -- Комментарий/описание + description TEXT DEFAULT NULL, -- время создания (UTC) created_utc TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP), -- длина исходного файла в байтах @@ -88,9 +88,9 @@ auto _scheme = [ ) }, q{ - -- Индекс для запросов вида: WHERE file_path=? AND file_sha256=? + -- Индекс для запросов вида: WHERE label=? AND sha256=? CREATE INDEX IF NOT EXISTS idx_snapshots_path_sha - ON snapshots(file_path, file_sha256) + ON snapshots(label, sha256) }, q{ -- Индекс для обратного поиска использования blob по sha256 @@ -203,7 +203,51 @@ auto _scheme = [ CREATE TRIGGER IF NOT EXISTS trg_sc_block_update BEFORE UPDATE ON snapshot_chunks BEGIN - SELECT RAISE(ABORT, "snapshot_chunks: UPDATE запрещён; используйте DELETE + INSERT"); + SELECT RAISE(ABORT, "snapshot_chunks: UPDATE запрещён"); + END + }, + q{ + -- ------------------------------------------------------------ + -- snapshots: можно менять только status + -- ------------------------------------------------------------ + CREATE TRIGGER IF NOT EXISTS trg_snapshots_block_update + BEFORE UPDATE ON snapshots + FOR EACH ROW + WHEN + NEW.id IS NOT OLD.id OR + NEW.label IS NOT OLD.label OR + NEW.sha256 IS NOT OLD.sha256 OR + NEW.description IS NOT OLD.description OR + NEW.created_utc IS NOT OLD.created_utc OR + NEW.source_length IS NOT OLD.source_length OR + NEW.algo_min IS NOT OLD.algo_min OR + NEW.algo_normal IS NOT OLD.algo_normal OR + NEW.algo_max IS NOT OLD.algo_max OR + NEW.mask_s IS NOT OLD.mask_s OR + NEW.mask_l IS NOT OLD.mask_l + -- status менять разрешено, поэтому его не сравниваем + BEGIN + SELECT RAISE(ABORT, "snapshots: разрешён UPDATE только поля status"); + END + }, + q{ + -- ------------------------------------------------------------ + -- blobs: можно менять только last_seen_utc и refcount + -- ------------------------------------------------------------ + CREATE TRIGGER IF NOT EXISTS trg_blobs_block_update + BEFORE UPDATE ON blobs + FOR EACH ROW + WHEN + NEW.sha256 IS NOT OLD.sha256 OR + NEW.z_sha256 IS NOT OLD.z_sha256 OR + NEW.size IS NOT OLD.size OR + NEW.z_size IS NOT OLD.z_size OR + NEW.content IS NOT OLD.content OR + NEW.created_utc IS NOT OLD.created_utc OR + NEW.zstd IS NOT OLD.zstd + -- last_seen_utc и refcount менять разрешено + BEGIN + SELECT RAISE(ABORT, "blobs: разрешён UPDATE только полей last_seen_utc и refcount"); END } ]; diff --git a/source/cdcdb/db/types.d b/source/cdcdb/db/types.d index 7e64d02..f1c4e53 100644 --- a/source/cdcdb/db/types.d +++ b/source/cdcdb/db/types.d @@ -11,9 +11,9 @@ enum SnapshotStatus : int struct Snapshot { long id; - string filePath; - ubyte[32] fileSha256; string label; + ubyte[32] sha256; + string description; DateTime createdUtc; long sourceLength; long algoMin; diff --git a/test/app.d b/test/app.d index a2e1312..df57add 100644 --- a/test/app.d +++ b/test/app.d @@ -10,7 +10,7 @@ void main() cas.newSnapshot("/tmp/text", cast(ubyte[]) read("/tmp/text")); // import std.stdio : writeln; - // writeln(cas.getSnapshotList("/tmp/text")); + writeln(cas.getSnapshotList("/tmp/text")); // writeln(cas.getVersion); }