kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

cas.c (12593B)


      1 /* Public content-addressed store API: a thin composition over the internal
      2  * dist content model (src/dist/{blob,tree,cas}.c). See <kit/cas.h>. */
      3 
      4 #include "dist/cas.h"
      5 
      6 #include <kit/cas.h>
      7 #include <stdarg.h>
      8 #include <stdio.h>
      9 #include <string.h>
     10 
     11 #include "dist/blob.h"
     12 #include "dist/dist.h"
     13 #include "dist/tree.h"
     14 
     15 /* Emit a human-readable operational error through the context diag sink (no
     16  * source location), mirroring how other subsystems report. No-op when the
     17  * caller supplied no sink. */
     18 static void cas_diagf(const KitContext* ctx, const char* fmt, ...) {
     19   va_list ap;
     20   KitSrcLoc loc;
     21   if (!ctx || !ctx->diag || !ctx->diag->emit) return;
     22   loc.file_id = 0;
     23   loc.line = 0;
     24   loc.col = 0;
     25   va_start(ap, fmt);
     26   ctx->diag->emit(ctx->diag, KIT_DIAG_ERROR, loc, fmt, ap);
     27   va_end(ap);
     28 }
     29 
     30 struct KitCas {
     31   /* Own a copy of the context so the handle outlives the caller's (possibly
     32    * stack-local) KitContext; ctx points at the stored copy. The pointed-to
     33    * heap/file_io/diag must still outlive the handle. */
     34   KitContext ctx_storage;
     35   const KitContext* ctx;
     36   KitCasHost host;
     37   DistCas dist;
     38 };
     39 
     40 struct KitCasTreeBuilder {
     41   KitCas* cas;
     42   DistTree tree;
     43   DistTreeEntry* entries;
     44 };
     45 
     46 void kit_blob_info(KitBlobInfo* out, const uint8_t* data, size_t len) {
     47   DistBlobInfo bi;
     48   if (!out) return;
     49   memset(out, 0, sizeof *out);
     50   if (dist_blob_info(&bi, data, len, DIST_BLOB_CHUNK_SIZE_DEFAULT) != DIST_OK)
     51     return;
     52   memcpy(out->id, bi.id, DIST_BLAKE2B_LEN);
     53   memcpy(out->root, bi.root, DIST_BLAKE2B_LEN);
     54   out->size = bi.size;
     55   out->chunks = bi.chunks;
     56 }
     57 
     58 void kit_hex_encode(char* out, const uint8_t* in, size_t n) {
     59   dist_hex_encode(out, in, n);
     60 }
     61 
     62 KitStatus kit_hex_decode(uint8_t* out, const char* in, size_t n) {
     63   return dist_hex_decode(out, in, n) == DIST_OK ? KIT_OK : KIT_MALFORMED;
     64 }
     65 
     66 KitStatus kit_cas_open(const KitContext* ctx, const KitCasHost* host,
     67                        const char* root_path, KitCas** out) {
     68   KitCas* cas;
     69   if (!ctx || !ctx->heap || !host || !host->file_io || !root_path || !out)
     70     return KIT_INVALID;
     71   *out = NULL;
     72   cas = (KitCas*)ctx->heap->alloc(ctx->heap, sizeof *cas, _Alignof(KitCas));
     73   if (!cas) return KIT_NOMEM;
     74   cas->ctx_storage = *ctx;
     75   cas->ctx = &cas->ctx_storage;
     76   cas->host = *host;
     77   cas->dist.host.file_io = host->file_io;
     78   cas->dist.host.mkdir_p = host->mkdir_p;
     79   cas->dist.host.mark_executable = host->mark_executable;
     80   cas->dist.host.user = host->user;
     81   cas->dist.root = root_path;
     82   *out = cas;
     83   return KIT_OK;
     84 }
     85 
     86 void kit_cas_close(KitCas* cas) {
     87   if (!cas) return;
     88   cas->ctx->heap->free(cas->ctx->heap, cas, sizeof *cas);
     89 }
     90 
     91 KitStatus kit_cas_add_blob(KitCas* cas, const uint8_t* data, size_t len,
     92                            KitBlobInfo* out) {
     93   DistBlobInfo bi;
     94   if (!cas || !out) return KIT_INVALID;
     95   if (dist_blob_info(&bi, data, len, DIST_BLOB_CHUNK_SIZE_DEFAULT) != DIST_OK) {
     96     cas_diagf(cas->ctx, "failed to hash blob");
     97     return KIT_ERR;
     98   }
     99   if (dist_cas_put_blob(&cas->dist, bi.id, data, len) != DIST_OK) {
    100     cas_diagf(cas->ctx, "failed to store blob");
    101     return KIT_IO;
    102   }
    103   memcpy(out->id, bi.id, DIST_BLAKE2B_LEN);
    104   memcpy(out->root, bi.root, DIST_BLAKE2B_LEN);
    105   out->size = bi.size;
    106   out->chunks = bi.chunks;
    107   return KIT_OK;
    108 }
    109 
    110 KitStatus kit_cas_tree_builder_new(KitCas* cas, KitCasTreeBuilder** out) {
    111   KitCasTreeBuilder* b;
    112   KitHeap* h;
    113   if (!cas || !out) return KIT_INVALID;
    114   *out = NULL;
    115   h = cas->ctx->heap;
    116   b = (KitCasTreeBuilder*)h->alloc(h, sizeof *b, _Alignof(KitCasTreeBuilder));
    117   if (!b) return KIT_NOMEM;
    118   b->cas = cas;
    119   b->entries = (DistTreeEntry*)h->alloc(h, DIST_MAX_FILES * sizeof *b->entries,
    120                                         _Alignof(DistTreeEntry));
    121   if (!b->entries) {
    122     h->free(h, b, sizeof *b);
    123     return KIT_NOMEM;
    124   }
    125   b->tree.entries = b->entries;
    126   b->tree.n_entries = 0;
    127   b->tree.cap_entries = DIST_MAX_FILES;
    128   *out = b;
    129   return KIT_OK;
    130 }
    131 
    132 KitStatus kit_cas_tree_builder_add(KitCasTreeBuilder* b, const char* tree_path,
    133                                    KitTreeMode mode, const uint8_t* data,
    134                                    size_t len) {
    135   KitCas* cas;
    136   DistBlobInfo bi;
    137   DistTreeEntry* e;
    138   if (!b || !tree_path) return KIT_INVALID;
    139   cas = b->cas;
    140   if (b->tree.n_entries >= b->tree.cap_entries) {
    141     cas_diagf(cas->ctx, "too many tree entries");
    142     return KIT_ERR;
    143   }
    144   if (!dist_tree_path_valid(tree_path)) {
    145     cas_diagf(cas->ctx, "unsafe tree path: %s", tree_path);
    146     return KIT_INVALID;
    147   }
    148   if (!dist_tree_mode_name((uint8_t)mode)) {
    149     cas_diagf(cas->ctx, "bad tree mode for: %s", tree_path);
    150     return KIT_INVALID;
    151   }
    152   if (dist_blob_info(&bi, data, len, DIST_BLOB_CHUNK_SIZE_DEFAULT) != DIST_OK) {
    153     cas_diagf(cas->ctx, "failed to hash blob: %s", tree_path);
    154     return KIT_ERR;
    155   }
    156   if (dist_cas_put_blob(&cas->dist, bi.id, data, len) != DIST_OK) {
    157     cas_diagf(cas->ctx, "failed to store blob: %s", tree_path);
    158     return KIT_IO;
    159   }
    160   e = &b->tree.entries[b->tree.n_entries++];
    161   memset(e, 0, sizeof *e);
    162   snprintf(e->path, sizeof e->path, "%s", tree_path);
    163   e->mode = (uint8_t)mode;
    164   e->size = bi.size;
    165   memcpy(e->blob, bi.id, DIST_BLAKE2B_LEN);
    166   memcpy(e->root, bi.root, DIST_BLAKE2B_LEN);
    167   return KIT_OK;
    168 }
    169 
    170 KitStatus kit_cas_tree_builder_finish(KitCasTreeBuilder* b,
    171                                       uint8_t out_tree_id[KIT_CAS_HASH_LEN]) {
    172   KitCas* cas;
    173   KitWriter* w = NULL;
    174   const uint8_t* bytes;
    175   size_t len;
    176   char err[128];
    177   if (!b || !out_tree_id) return KIT_INVALID;
    178   cas = b->cas;
    179   if (dist_tree_sort_validate(&b->tree, err, sizeof err) != DIST_OK) {
    180     cas_diagf(cas->ctx, "%s", err);
    181     return KIT_MALFORMED;
    182   }
    183   if (kit_writer_mem(cas->ctx->heap, &w) != KIT_OK) {
    184     cas_diagf(cas->ctx, "failed to allocate tree writer");
    185     return KIT_NOMEM;
    186   }
    187   if (dist_tree_emit(&b->tree, w) != DIST_OK ||
    188       kit_writer_status(w) != KIT_OK) {
    189     kit_writer_close(w);
    190     cas_diagf(cas->ctx, "failed to emit tree manifest");
    191     return KIT_ERR;
    192   }
    193   bytes = kit_writer_mem_bytes(w, &len);
    194   dist_tree_id(out_tree_id, bytes, len);
    195   if (dist_cas_put_tree(&cas->dist, out_tree_id, bytes, len) != DIST_OK) {
    196     kit_writer_close(w);
    197     cas_diagf(cas->ctx, "failed to store tree manifest");
    198     return KIT_IO;
    199   }
    200   kit_writer_close(w);
    201   return KIT_OK;
    202 }
    203 
    204 void kit_cas_tree_builder_free(KitCasTreeBuilder* b) {
    205   KitHeap* h;
    206   if (!b) return;
    207   h = b->cas->ctx->heap;
    208   h->free(h, b->entries, DIST_MAX_FILES * sizeof *b->entries);
    209   h->free(h, b, sizeof *b);
    210 }
    211 
    212 typedef struct CasDirWalk {
    213   KitCasTreeBuilder* b;
    214   KitStatus status;
    215 } CasDirWalk;
    216 
    217 static int cas_dir_walk_file(void* user, const char* source_path,
    218                              const char* tree_path, int executable) {
    219   CasDirWalk* w = (CasDirWalk*)user;
    220   KitCas* cas = w->b->cas;
    221   const KitFileIO* io = cas->host.file_io;
    222   KitFileData fd;
    223   KitStatus st;
    224   fd.data = NULL;
    225   fd.size = 0;
    226   fd.token = NULL;
    227   if (io->read_all(io->user, source_path, &fd) != KIT_OK) {
    228     cas_diagf(cas->ctx, "failed to read: %s", source_path);
    229     w->status = KIT_IO;
    230     return 1;
    231   }
    232   st = kit_cas_tree_builder_add(
    233       w->b, tree_path, executable ? KIT_TREE_MODE_EXEC : KIT_TREE_MODE_FILE,
    234       fd.data, fd.size);
    235   if (io->release) io->release(io->user, &fd);
    236   if (st != KIT_OK) {
    237     w->status = st;
    238     return 1;
    239   }
    240   return 0;
    241 }
    242 
    243 KitStatus kit_cas_add_tree_from_dir(KitCas* cas, const char* root,
    244                                     uint8_t out_tree_id[KIT_CAS_HASH_LEN]) {
    245   KitCasTreeBuilder* b;
    246   CasDirWalk w;
    247   KitStatus st;
    248   if (!cas || !root || !out_tree_id) return KIT_INVALID;
    249   if (!cas->host.walk_regular_files) return KIT_UNSUPPORTED;
    250   st = kit_cas_tree_builder_new(cas, &b);
    251   if (st != KIT_OK) return st;
    252   w.b = b;
    253   w.status = KIT_OK;
    254   if (cas->host.walk_regular_files(cas->host.user, root, cas_dir_walk_file,
    255                                    &w) != 0) {
    256     if (w.status == KIT_OK) {
    257       cas_diagf(cas->ctx, "failed to walk directory: %s", root);
    258       w.status = KIT_IO;
    259     }
    260     kit_cas_tree_builder_free(b);
    261     return w.status;
    262   }
    263   st = kit_cas_tree_builder_finish(b, out_tree_id);
    264   kit_cas_tree_builder_free(b);
    265   return st;
    266 }
    267 
    268 /* Load and parse a stored tree into a heap-allocated entries buffer. On
    269  * success, *raw holds the borrowed manifest bytes (release via file_io) and
    270  * *entries the allocation to free. */
    271 static KitStatus cas_load_tree(KitCas* cas,
    272                                const uint8_t tree_id[KIT_CAS_HASH_LEN],
    273                                DistTree* tree, DistTreeEntry** entries,
    274                                KitFileData* raw) {
    275   KitHeap* h = cas->ctx->heap;
    276   char err[128];
    277   *entries = (DistTreeEntry*)h->alloc(h, DIST_MAX_FILES * sizeof **entries,
    278                                       _Alignof(DistTreeEntry));
    279   if (!*entries) return KIT_NOMEM;
    280   tree->entries = *entries;
    281   tree->n_entries = 0;
    282   tree->cap_entries = DIST_MAX_FILES;
    283   raw->data = NULL;
    284   raw->size = 0;
    285   raw->token = NULL;
    286   if (dist_cas_get_tree(&cas->dist, tree_id, raw) != DIST_OK) {
    287     cas_diagf(cas->ctx, "failed to load tree");
    288     h->free(h, *entries, DIST_MAX_FILES * sizeof **entries);
    289     *entries = NULL;
    290     return KIT_NOT_FOUND;
    291   }
    292   if (dist_tree_parse(raw->data, raw->size, tree, err, sizeof err) != DIST_OK) {
    293     cas_diagf(cas->ctx, "%s", err);
    294     if (cas->host.file_io->release)
    295       cas->host.file_io->release(cas->host.file_io->user, raw);
    296     h->free(h, *entries, DIST_MAX_FILES * sizeof **entries);
    297     *entries = NULL;
    298     return KIT_MALFORMED;
    299   }
    300   return KIT_OK;
    301 }
    302 
    303 static void cas_free_tree(KitCas* cas, DistTreeEntry* entries,
    304                           KitFileData* raw) {
    305   KitHeap* h = cas->ctx->heap;
    306   if (cas->host.file_io->release)
    307     cas->host.file_io->release(cas->host.file_io->user, raw);
    308   if (entries) h->free(h, entries, DIST_MAX_FILES * sizeof *entries);
    309 }
    310 
    311 KitStatus kit_cas_inspect_tree(KitCas* cas,
    312                                const uint8_t tree_id[KIT_CAS_HASH_LEN],
    313                                KitWriter* out) {
    314   KitFileData raw;
    315   if (!cas || !tree_id || !out) return KIT_INVALID;
    316   raw.data = NULL;
    317   raw.size = 0;
    318   raw.token = NULL;
    319   if (dist_cas_get_tree(&cas->dist, tree_id, &raw) != DIST_OK) {
    320     cas_diagf(cas->ctx, "failed to load tree");
    321     return KIT_NOT_FOUND;
    322   }
    323   if (raw.size && kit_writer_write(out, raw.data, raw.size) != KIT_OK) {
    324     if (cas->host.file_io->release)
    325       cas->host.file_io->release(cas->host.file_io->user, &raw);
    326     cas_diagf(cas->ctx, "failed to write tree manifest");
    327     return KIT_IO;
    328   }
    329   if (cas->host.file_io->release)
    330     cas->host.file_io->release(cas->host.file_io->user, &raw);
    331   return kit_writer_status(out) == KIT_OK ? KIT_OK : KIT_IO;
    332 }
    333 
    334 KitStatus kit_cas_verify_tree(KitCas* cas,
    335                               const uint8_t tree_id[KIT_CAS_HASH_LEN]) {
    336   DistTree tree;
    337   DistTreeEntry* entries;
    338   KitFileData raw;
    339   KitStatus st;
    340   size_t i;
    341   if (!cas || !tree_id) return KIT_INVALID;
    342   st = cas_load_tree(cas, tree_id, &tree, &entries, &raw);
    343   if (st != KIT_OK) return st;
    344   st = KIT_OK;
    345   for (i = 0; i < tree.n_entries; ++i) {
    346     const DistTreeEntry* e = &tree.entries[i];
    347     KitFileData fd;
    348     DistBlobInfo bi;
    349     fd.data = NULL;
    350     fd.size = 0;
    351     fd.token = NULL;
    352     if (dist_cas_get_blob(&cas->dist, e->blob, &fd) != DIST_OK) {
    353       cas_diagf(cas->ctx, "missing or corrupt blob for: %s", e->path);
    354       st = KIT_NOT_FOUND;
    355       break;
    356     }
    357     if (dist_blob_info(&bi, fd.data, fd.size, DIST_BLOB_CHUNK_SIZE_DEFAULT) !=
    358             DIST_OK ||
    359         bi.size != e->size || memcmp(bi.root, e->root, DIST_BLAKE2B_LEN) != 0) {
    360       if (cas->host.file_io->release)
    361         cas->host.file_io->release(cas->host.file_io->user, &fd);
    362       cas_diagf(cas->ctx, "blob root mismatch for: %s", e->path);
    363       st = KIT_INVALID;
    364       break;
    365     }
    366     if (cas->host.file_io->release)
    367       cas->host.file_io->release(cas->host.file_io->user, &fd);
    368   }
    369   cas_free_tree(cas, entries, &raw);
    370   return st;
    371 }
    372 
    373 KitStatus kit_cas_materialize_tree(KitCas* cas,
    374                                    const uint8_t tree_id[KIT_CAS_HASH_LEN],
    375                                    const char* dst) {
    376   DistTree tree;
    377   DistTreeEntry* entries;
    378   KitFileData raw;
    379   KitStatus st;
    380   if (!cas || !tree_id || !dst) return KIT_INVALID;
    381   st = cas_load_tree(cas, tree_id, &tree, &entries, &raw);
    382   if (st != KIT_OK) return st;
    383   if (dist_cas_materialize_tree(&cas->dist, &tree, dst) != DIST_OK) {
    384     cas_diagf(cas->ctx, "failed to materialize tree");
    385     st = KIT_ERR;
    386   }
    387   cas_free_tree(cas, entries, &raw);
    388   return st;
    389 }