cas.c (12593B)
1 /* Public content-addressed store API: a thin composition over the internal 2 * dist content model (src/dist/{blob,tree,cas}.c). See <kit/cas.h>. */ 3 4 #include "dist/cas.h" 5 6 #include <kit/cas.h> 7 #include <stdarg.h> 8 #include <stdio.h> 9 #include <string.h> 10 11 #include "dist/blob.h" 12 #include "dist/dist.h" 13 #include "dist/tree.h" 14 15 /* Emit a human-readable operational error through the context diag sink (no 16 * source location), mirroring how other subsystems report. No-op when the 17 * caller supplied no sink. */ 18 static void cas_diagf(const KitContext* ctx, const char* fmt, ...) { 19 va_list ap; 20 KitSrcLoc loc; 21 if (!ctx || !ctx->diag || !ctx->diag->emit) return; 22 loc.file_id = 0; 23 loc.line = 0; 24 loc.col = 0; 25 va_start(ap, fmt); 26 ctx->diag->emit(ctx->diag, KIT_DIAG_ERROR, loc, fmt, ap); 27 va_end(ap); 28 } 29 30 struct KitCas { 31 /* Own a copy of the context so the handle outlives the caller's (possibly 32 * stack-local) KitContext; ctx points at the stored copy. The pointed-to 33 * heap/file_io/diag must still outlive the handle. */ 34 KitContext ctx_storage; 35 const KitContext* ctx; 36 KitCasHost host; 37 DistCas dist; 38 }; 39 40 struct KitCasTreeBuilder { 41 KitCas* cas; 42 DistTree tree; 43 DistTreeEntry* entries; 44 }; 45 46 void kit_blob_info(KitBlobInfo* out, const uint8_t* data, size_t len) { 47 DistBlobInfo bi; 48 if (!out) return; 49 memset(out, 0, sizeof *out); 50 if (dist_blob_info(&bi, data, len, DIST_BLOB_CHUNK_SIZE_DEFAULT) != DIST_OK) 51 return; 52 memcpy(out->id, bi.id, DIST_BLAKE2B_LEN); 53 memcpy(out->root, bi.root, DIST_BLAKE2B_LEN); 54 out->size = bi.size; 55 out->chunks = bi.chunks; 56 } 57 58 void kit_hex_encode(char* out, const uint8_t* in, size_t n) { 59 dist_hex_encode(out, in, n); 60 } 61 62 KitStatus kit_hex_decode(uint8_t* out, const char* in, size_t n) { 63 return dist_hex_decode(out, in, n) == DIST_OK ? KIT_OK : KIT_MALFORMED; 64 } 65 66 KitStatus kit_cas_open(const KitContext* ctx, const KitCasHost* host, 67 const char* root_path, KitCas** out) { 68 KitCas* cas; 69 if (!ctx || !ctx->heap || !host || !host->file_io || !root_path || !out) 70 return KIT_INVALID; 71 *out = NULL; 72 cas = (KitCas*)ctx->heap->alloc(ctx->heap, sizeof *cas, _Alignof(KitCas)); 73 if (!cas) return KIT_NOMEM; 74 cas->ctx_storage = *ctx; 75 cas->ctx = &cas->ctx_storage; 76 cas->host = *host; 77 cas->dist.host.file_io = host->file_io; 78 cas->dist.host.mkdir_p = host->mkdir_p; 79 cas->dist.host.mark_executable = host->mark_executable; 80 cas->dist.host.user = host->user; 81 cas->dist.root = root_path; 82 *out = cas; 83 return KIT_OK; 84 } 85 86 void kit_cas_close(KitCas* cas) { 87 if (!cas) return; 88 cas->ctx->heap->free(cas->ctx->heap, cas, sizeof *cas); 89 } 90 91 KitStatus kit_cas_add_blob(KitCas* cas, const uint8_t* data, size_t len, 92 KitBlobInfo* out) { 93 DistBlobInfo bi; 94 if (!cas || !out) return KIT_INVALID; 95 if (dist_blob_info(&bi, data, len, DIST_BLOB_CHUNK_SIZE_DEFAULT) != DIST_OK) { 96 cas_diagf(cas->ctx, "failed to hash blob"); 97 return KIT_ERR; 98 } 99 if (dist_cas_put_blob(&cas->dist, bi.id, data, len) != DIST_OK) { 100 cas_diagf(cas->ctx, "failed to store blob"); 101 return KIT_IO; 102 } 103 memcpy(out->id, bi.id, DIST_BLAKE2B_LEN); 104 memcpy(out->root, bi.root, DIST_BLAKE2B_LEN); 105 out->size = bi.size; 106 out->chunks = bi.chunks; 107 return KIT_OK; 108 } 109 110 KitStatus kit_cas_tree_builder_new(KitCas* cas, KitCasTreeBuilder** out) { 111 KitCasTreeBuilder* b; 112 KitHeap* h; 113 if (!cas || !out) return KIT_INVALID; 114 *out = NULL; 115 h = cas->ctx->heap; 116 b = (KitCasTreeBuilder*)h->alloc(h, sizeof *b, _Alignof(KitCasTreeBuilder)); 117 if (!b) return KIT_NOMEM; 118 b->cas = cas; 119 b->entries = (DistTreeEntry*)h->alloc(h, DIST_MAX_FILES * sizeof *b->entries, 120 _Alignof(DistTreeEntry)); 121 if (!b->entries) { 122 h->free(h, b, sizeof *b); 123 return KIT_NOMEM; 124 } 125 b->tree.entries = b->entries; 126 b->tree.n_entries = 0; 127 b->tree.cap_entries = DIST_MAX_FILES; 128 *out = b; 129 return KIT_OK; 130 } 131 132 KitStatus kit_cas_tree_builder_add(KitCasTreeBuilder* b, const char* tree_path, 133 KitTreeMode mode, const uint8_t* data, 134 size_t len) { 135 KitCas* cas; 136 DistBlobInfo bi; 137 DistTreeEntry* e; 138 if (!b || !tree_path) return KIT_INVALID; 139 cas = b->cas; 140 if (b->tree.n_entries >= b->tree.cap_entries) { 141 cas_diagf(cas->ctx, "too many tree entries"); 142 return KIT_ERR; 143 } 144 if (!dist_tree_path_valid(tree_path)) { 145 cas_diagf(cas->ctx, "unsafe tree path: %s", tree_path); 146 return KIT_INVALID; 147 } 148 if (!dist_tree_mode_name((uint8_t)mode)) { 149 cas_diagf(cas->ctx, "bad tree mode for: %s", tree_path); 150 return KIT_INVALID; 151 } 152 if (dist_blob_info(&bi, data, len, DIST_BLOB_CHUNK_SIZE_DEFAULT) != DIST_OK) { 153 cas_diagf(cas->ctx, "failed to hash blob: %s", tree_path); 154 return KIT_ERR; 155 } 156 if (dist_cas_put_blob(&cas->dist, bi.id, data, len) != DIST_OK) { 157 cas_diagf(cas->ctx, "failed to store blob: %s", tree_path); 158 return KIT_IO; 159 } 160 e = &b->tree.entries[b->tree.n_entries++]; 161 memset(e, 0, sizeof *e); 162 snprintf(e->path, sizeof e->path, "%s", tree_path); 163 e->mode = (uint8_t)mode; 164 e->size = bi.size; 165 memcpy(e->blob, bi.id, DIST_BLAKE2B_LEN); 166 memcpy(e->root, bi.root, DIST_BLAKE2B_LEN); 167 return KIT_OK; 168 } 169 170 KitStatus kit_cas_tree_builder_finish(KitCasTreeBuilder* b, 171 uint8_t out_tree_id[KIT_CAS_HASH_LEN]) { 172 KitCas* cas; 173 KitWriter* w = NULL; 174 const uint8_t* bytes; 175 size_t len; 176 char err[128]; 177 if (!b || !out_tree_id) return KIT_INVALID; 178 cas = b->cas; 179 if (dist_tree_sort_validate(&b->tree, err, sizeof err) != DIST_OK) { 180 cas_diagf(cas->ctx, "%s", err); 181 return KIT_MALFORMED; 182 } 183 if (kit_writer_mem(cas->ctx->heap, &w) != KIT_OK) { 184 cas_diagf(cas->ctx, "failed to allocate tree writer"); 185 return KIT_NOMEM; 186 } 187 if (dist_tree_emit(&b->tree, w) != DIST_OK || 188 kit_writer_status(w) != KIT_OK) { 189 kit_writer_close(w); 190 cas_diagf(cas->ctx, "failed to emit tree manifest"); 191 return KIT_ERR; 192 } 193 bytes = kit_writer_mem_bytes(w, &len); 194 dist_tree_id(out_tree_id, bytes, len); 195 if (dist_cas_put_tree(&cas->dist, out_tree_id, bytes, len) != DIST_OK) { 196 kit_writer_close(w); 197 cas_diagf(cas->ctx, "failed to store tree manifest"); 198 return KIT_IO; 199 } 200 kit_writer_close(w); 201 return KIT_OK; 202 } 203 204 void kit_cas_tree_builder_free(KitCasTreeBuilder* b) { 205 KitHeap* h; 206 if (!b) return; 207 h = b->cas->ctx->heap; 208 h->free(h, b->entries, DIST_MAX_FILES * sizeof *b->entries); 209 h->free(h, b, sizeof *b); 210 } 211 212 typedef struct CasDirWalk { 213 KitCasTreeBuilder* b; 214 KitStatus status; 215 } CasDirWalk; 216 217 static int cas_dir_walk_file(void* user, const char* source_path, 218 const char* tree_path, int executable) { 219 CasDirWalk* w = (CasDirWalk*)user; 220 KitCas* cas = w->b->cas; 221 const KitFileIO* io = cas->host.file_io; 222 KitFileData fd; 223 KitStatus st; 224 fd.data = NULL; 225 fd.size = 0; 226 fd.token = NULL; 227 if (io->read_all(io->user, source_path, &fd) != KIT_OK) { 228 cas_diagf(cas->ctx, "failed to read: %s", source_path); 229 w->status = KIT_IO; 230 return 1; 231 } 232 st = kit_cas_tree_builder_add( 233 w->b, tree_path, executable ? KIT_TREE_MODE_EXEC : KIT_TREE_MODE_FILE, 234 fd.data, fd.size); 235 if (io->release) io->release(io->user, &fd); 236 if (st != KIT_OK) { 237 w->status = st; 238 return 1; 239 } 240 return 0; 241 } 242 243 KitStatus kit_cas_add_tree_from_dir(KitCas* cas, const char* root, 244 uint8_t out_tree_id[KIT_CAS_HASH_LEN]) { 245 KitCasTreeBuilder* b; 246 CasDirWalk w; 247 KitStatus st; 248 if (!cas || !root || !out_tree_id) return KIT_INVALID; 249 if (!cas->host.walk_regular_files) return KIT_UNSUPPORTED; 250 st = kit_cas_tree_builder_new(cas, &b); 251 if (st != KIT_OK) return st; 252 w.b = b; 253 w.status = KIT_OK; 254 if (cas->host.walk_regular_files(cas->host.user, root, cas_dir_walk_file, 255 &w) != 0) { 256 if (w.status == KIT_OK) { 257 cas_diagf(cas->ctx, "failed to walk directory: %s", root); 258 w.status = KIT_IO; 259 } 260 kit_cas_tree_builder_free(b); 261 return w.status; 262 } 263 st = kit_cas_tree_builder_finish(b, out_tree_id); 264 kit_cas_tree_builder_free(b); 265 return st; 266 } 267 268 /* Load and parse a stored tree into a heap-allocated entries buffer. On 269 * success, *raw holds the borrowed manifest bytes (release via file_io) and 270 * *entries the allocation to free. */ 271 static KitStatus cas_load_tree(KitCas* cas, 272 const uint8_t tree_id[KIT_CAS_HASH_LEN], 273 DistTree* tree, DistTreeEntry** entries, 274 KitFileData* raw) { 275 KitHeap* h = cas->ctx->heap; 276 char err[128]; 277 *entries = (DistTreeEntry*)h->alloc(h, DIST_MAX_FILES * sizeof **entries, 278 _Alignof(DistTreeEntry)); 279 if (!*entries) return KIT_NOMEM; 280 tree->entries = *entries; 281 tree->n_entries = 0; 282 tree->cap_entries = DIST_MAX_FILES; 283 raw->data = NULL; 284 raw->size = 0; 285 raw->token = NULL; 286 if (dist_cas_get_tree(&cas->dist, tree_id, raw) != DIST_OK) { 287 cas_diagf(cas->ctx, "failed to load tree"); 288 h->free(h, *entries, DIST_MAX_FILES * sizeof **entries); 289 *entries = NULL; 290 return KIT_NOT_FOUND; 291 } 292 if (dist_tree_parse(raw->data, raw->size, tree, err, sizeof err) != DIST_OK) { 293 cas_diagf(cas->ctx, "%s", err); 294 if (cas->host.file_io->release) 295 cas->host.file_io->release(cas->host.file_io->user, raw); 296 h->free(h, *entries, DIST_MAX_FILES * sizeof **entries); 297 *entries = NULL; 298 return KIT_MALFORMED; 299 } 300 return KIT_OK; 301 } 302 303 static void cas_free_tree(KitCas* cas, DistTreeEntry* entries, 304 KitFileData* raw) { 305 KitHeap* h = cas->ctx->heap; 306 if (cas->host.file_io->release) 307 cas->host.file_io->release(cas->host.file_io->user, raw); 308 if (entries) h->free(h, entries, DIST_MAX_FILES * sizeof *entries); 309 } 310 311 KitStatus kit_cas_inspect_tree(KitCas* cas, 312 const uint8_t tree_id[KIT_CAS_HASH_LEN], 313 KitWriter* out) { 314 KitFileData raw; 315 if (!cas || !tree_id || !out) return KIT_INVALID; 316 raw.data = NULL; 317 raw.size = 0; 318 raw.token = NULL; 319 if (dist_cas_get_tree(&cas->dist, tree_id, &raw) != DIST_OK) { 320 cas_diagf(cas->ctx, "failed to load tree"); 321 return KIT_NOT_FOUND; 322 } 323 if (raw.size && kit_writer_write(out, raw.data, raw.size) != KIT_OK) { 324 if (cas->host.file_io->release) 325 cas->host.file_io->release(cas->host.file_io->user, &raw); 326 cas_diagf(cas->ctx, "failed to write tree manifest"); 327 return KIT_IO; 328 } 329 if (cas->host.file_io->release) 330 cas->host.file_io->release(cas->host.file_io->user, &raw); 331 return kit_writer_status(out) == KIT_OK ? KIT_OK : KIT_IO; 332 } 333 334 KitStatus kit_cas_verify_tree(KitCas* cas, 335 const uint8_t tree_id[KIT_CAS_HASH_LEN]) { 336 DistTree tree; 337 DistTreeEntry* entries; 338 KitFileData raw; 339 KitStatus st; 340 size_t i; 341 if (!cas || !tree_id) return KIT_INVALID; 342 st = cas_load_tree(cas, tree_id, &tree, &entries, &raw); 343 if (st != KIT_OK) return st; 344 st = KIT_OK; 345 for (i = 0; i < tree.n_entries; ++i) { 346 const DistTreeEntry* e = &tree.entries[i]; 347 KitFileData fd; 348 DistBlobInfo bi; 349 fd.data = NULL; 350 fd.size = 0; 351 fd.token = NULL; 352 if (dist_cas_get_blob(&cas->dist, e->blob, &fd) != DIST_OK) { 353 cas_diagf(cas->ctx, "missing or corrupt blob for: %s", e->path); 354 st = KIT_NOT_FOUND; 355 break; 356 } 357 if (dist_blob_info(&bi, fd.data, fd.size, DIST_BLOB_CHUNK_SIZE_DEFAULT) != 358 DIST_OK || 359 bi.size != e->size || memcmp(bi.root, e->root, DIST_BLAKE2B_LEN) != 0) { 360 if (cas->host.file_io->release) 361 cas->host.file_io->release(cas->host.file_io->user, &fd); 362 cas_diagf(cas->ctx, "blob root mismatch for: %s", e->path); 363 st = KIT_INVALID; 364 break; 365 } 366 if (cas->host.file_io->release) 367 cas->host.file_io->release(cas->host.file_io->user, &fd); 368 } 369 cas_free_tree(cas, entries, &raw); 370 return st; 371 } 372 373 KitStatus kit_cas_materialize_tree(KitCas* cas, 374 const uint8_t tree_id[KIT_CAS_HASH_LEN], 375 const char* dst) { 376 DistTree tree; 377 DistTreeEntry* entries; 378 KitFileData raw; 379 KitStatus st; 380 if (!cas || !tree_id || !dst) return KIT_INVALID; 381 st = cas_load_tree(cas, tree_id, &tree, &entries, &raw); 382 if (st != KIT_OK) return st; 383 if (dist_cas_materialize_tree(&cas->dist, &tree, dst) != DIST_OK) { 384 cas_diagf(cas->ctx, "failed to materialize tree"); 385 st = KIT_ERR; 386 } 387 cas_free_tree(cas, entries, &raw); 388 return st; 389 }