kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 668e21ada80ea8280e3be9a229727590e54b1681
parent 55e6cae81c8e601c13504393d234c59e43135ffa
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue,  2 Jun 2026 11:06:58 -0700

hash: add public cfree/hash.h API + driver hash tool

New public surface in <cfree/hash.h>: one-shot cfree_hash() and streaming
cfree_hasher_{new,update,final,free} over SHA-256, BLAKE2b-256, and CRC-32.
Backed by src/api/hash.c composing src/core/sha256, the dist BLAKE2b wrapper,
and a CRC-32 lifted out of src/dist/deflate.c into src/core/crc32.{c,h}
(single source of truth; deflate's gzip trailer now calls cfree_crc32).

The driver 'hash' tool prints coreutils-style '<hex>  <name>' lines,
-a sha256|blake2b|crc32 (default sha256), '-'/no-arg = stdin, multiple files.
Verified against known vectors (SHA-256 empty/abc, CRC-32 0xcbf43926,
BLAKE2b-256 abc) and shasum.

Diffstat:
MMakefile | 3+++
Adriver/cmd/hash.c | 175+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/driver.h | 2++
Mdriver/main.c | 4++++
Minclude/cfree/config.h | 1+
Ainclude/cfree/hash.h | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/api/hash.c | 131+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/core/crc32.c | 18++++++++++++++++++
Asrc/core/crc32.h | 15+++++++++++++++
Msrc/dist/deflate.c | 26+++++++-------------------
10 files changed, 408 insertions(+), 19 deletions(-)

diff --git a/Makefile b/Makefile @@ -397,6 +397,9 @@ endif ifeq ($(CFREE_TOOL_CMP_ENABLED),1) DRIVER_TOOL_SRCS += driver/cmd/cmp.c endif +ifeq ($(CFREE_TOOL_HASH_ENABLED),1) +DRIVER_TOOL_SRCS += driver/cmd/hash.c +endif DRIVER_SRCS += $(sort $(DRIVER_TOOL_SRCS)) ifneq ($(filter 1,$(CFREE_TOOL_CC_ENABLED) $(CFREE_TOOL_CHECK_ENABLED) $(CFREE_TOOL_CPP_ENABLED) $(CFREE_TOOL_AS_ENABLED) $(CFREE_TOOL_DBG_ENABLED) $(CFREE_TOOL_RUN_ENABLED)),) DRIVER_SRCS += driver/lib/cflags.c diff --git a/driver/cmd/hash.c b/driver/cmd/hash.c @@ -0,0 +1,175 @@ +#include <cfree/core.h> +#include <cfree/hash.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include "driver.h" +#include "env.h" + +/* `cfree hash` — print the SHA-256, BLAKE2b-256, or CRC-32 digest of each + * input. Output is coreutils-style ("<hex> <name>"), so it diffs cleanly + * against sha256sum / b2sum / cksum -a output. With no FILE, or with `-`, + * reads stdin. Drives the streaming cfree_hasher_* API (the one-shot + * cfree_hash stays for library callers). */ + +#define HASH_TOOL "hash" + +static const char HASH_HEX[] = "0123456789abcdef"; + +typedef struct HashOpts { + CfreeHashAlgo algo; +} HashOpts; + +static int hash_parse_algo(const char* s, CfreeHashAlgo* out) { + if (driver_streq(s, "sha256")) { + *out = CFREE_HASH_SHA256; + return 0; + } + if (driver_streq(s, "blake2b")) { + *out = CFREE_HASH_BLAKE2B; + return 0; + } + if (driver_streq(s, "crc32")) { + *out = CFREE_HASH_CRC32; + return 0; + } + return 1; +} + +void driver_help_hash(void) { + driver_printf( + "%.*s", + CFREE_SLICE_ARG(CFREE_SLICE_LIT( + "cfree hash — hash files with SHA-256, BLAKE2b, or CRC-32\n" + "\n" + "USAGE\n" + " cfree hash [-a ALGO] [FILE...]\n" + "\n" + "DESCRIPTION\n" + " Prints one line per input: the lowercase-hex digest, two spaces,\n" + " then the file name (`-` for stdin). With no FILE, reads stdin.\n" + "\n" + "OPTIONS\n" + " -a ALGO sha256 (default) | blake2b | crc32\n" + " -h, --help show this help\n" + "\n" + "EXIT CODES\n" + " 0 success 1 I/O error 2 bad usage\n"))); +} + +/* Hash data[0..len) with opts->algo and print "<hex> <name>". Returns 0 on + * success, 1 on failure (error already reported). */ +static int hash_one(const CfreeContext* ctx, const HashOpts* opts, + const uint8_t* data, size_t len, const char* name) { + CfreeHasher* h = NULL; + uint8_t digest[CFREE_HASH_MAX_LEN]; + char hex[CFREE_HASH_MAX_LEN * 2 + 1]; + size_t dlen = 0, i; + + if (cfree_hasher_new(ctx, opts->algo, &h) != CFREE_OK) { + driver_errf(HASH_TOOL, "failed to start hasher"); + return 1; + } + cfree_hasher_update(h, data, len); + cfree_hasher_final(h, digest, &dlen); + cfree_hasher_free(h); + + for (i = 0; i < dlen; ++i) { + hex[i * 2] = HASH_HEX[digest[i] >> 4]; + hex[i * 2 + 1] = HASH_HEX[digest[i] & 0x0f]; + } + hex[dlen * 2] = '\0'; + driver_printf("%s %s\n", hex, name); + return 0; +} + +int driver_hash(int argc, char** argv) { + DriverEnv env; + CfreeContext ctx; + HashOpts opts; + int i, rc = 1, any_input = 0; + + if (driver_argv_wants_help(argc, argv, 1)) { + driver_help_hash(); + return 0; + } + + memset(&opts, 0, sizeof opts); + opts.algo = CFREE_HASH_SHA256; + driver_env_init(&env); + ctx = driver_env_to_context(&env); + + /* First pass: options. */ + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + if (driver_streq(a, "-a")) { + if (i + 1 >= argc || hash_parse_algo(argv[++i], &opts.algo) != 0) { + driver_errf(HASH_TOOL, "-a requires sha256, blake2b, or crc32"); + rc = 2; + goto done; + } + continue; + } + if (driver_streq(a, "-")) { + any_input = 1; + continue; + } + if (a[0] == '-' && a[1] != '\0') { + driver_errf(HASH_TOOL, "unknown option: %s", a); + rc = 2; + goto done; + } + any_input = 1; + } + + /* No file operands: hash stdin. */ + if (!any_input) { + uint8_t* buf = NULL; + size_t n = 0; + if (!driver_read_stdin(&env, &buf, &n)) { + driver_errf(HASH_TOOL, "failed to read stdin"); + rc = 1; + goto done; + } + rc = hash_one(&ctx, &opts, buf, n, "-"); + driver_free(&env, buf, n); + goto done; + } + + /* Second pass: inputs, in argv order. */ + rc = 0; + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + if (driver_streq(a, "-a")) { + ++i; /* skip its value */ + continue; + } + if (driver_streq(a, "-")) { + uint8_t* buf = NULL; + size_t n = 0; + if (!driver_read_stdin(&env, &buf, &n)) { + driver_errf(HASH_TOOL, "failed to read stdin"); + rc = 1; + continue; + } + if (hash_one(&ctx, &opts, buf, n, "-") != 0) rc = 1; + driver_free(&env, buf, n); + continue; + } + { + DriverLoad ld = {0}; + CfreeSlice input; + if (driver_load_bytes(&env.file_io, HASH_TOOL, a, &ld, &input) != 0) { + rc = 1; + continue; + } + if (hash_one(&ctx, &opts, input.data, input.len, a) != 0) rc = 1; + driver_release_bytes(&env.file_io, &ld); + } + } + +done: + driver_env_fini(&env); + return rc; +} diff --git a/driver/driver.h b/driver/driver.h @@ -57,6 +57,7 @@ int driver_cas(int argc, char** argv); int driver_pkg(int argc, char** argv); int driver_xxd(int argc, char** argv); int driver_cmp(int argc, char** argv); +int driver_hash(int argc, char** argv); /* Per-tool help printers. Write a multi-section help text to stdout and * return. The tool entry-points call these when invoked with no args, -h, @@ -83,6 +84,7 @@ void driver_help_cas(void); void driver_help_pkg(void); void driver_help_xxd(void); void driver_help_cmp(void); +void driver_help_hash(void); /* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`, * `cfree help`). Lists each tool with a one-line summary and explains diff --git a/driver/main.c b/driver/main.c @@ -104,6 +104,10 @@ static const DriverToolDesc driver_tools[] = { {"cmp", driver_cmp, driver_help_cmp, "Compare two files byte by byte"}, #endif +#if CFREE_TOOL_HASH_ENABLED + {"hash", driver_hash, driver_help_hash, + "Hash files with SHA-256, BLAKE2b, or CRC-32"}, +#endif {NULL, NULL, NULL, NULL}, }; diff --git a/include/cfree/config.h b/include/cfree/config.h @@ -116,5 +116,6 @@ #define CFREE_TOOL_PKG_ENABLED 1 #define CFREE_TOOL_XXD_ENABLED 1 #define CFREE_TOOL_CMP_ENABLED 1 +#define CFREE_TOOL_HASH_ENABLED 1 #endif /* CFREE_CONFIG_H */ diff --git a/include/cfree/hash.h b/include/cfree/hash.h @@ -0,0 +1,52 @@ +#ifndef CFREE_HASH_H +#define CFREE_HASH_H + +#include <cfree/core.h> +#include <stddef.h> +#include <stdint.h> + +/* + * General-purpose content hashing: SHA-256, BLAKE2b-256, and CRC-32. Pure + * computation — no I/O, no entropy. The one-shot form needs no context; the + * streaming form allocates a small opaque state from ctx->heap so callers can + * fold an input that does not fit in one buffer. + * + * (The CAS subsystem in <cfree/cas.h> hashes with BLAKE2b too, but couples it + * to a chunk merkle root; this surface is the lean digest-of-bytes path.) + */ + +#define CFREE_SHA256_LEN 32u +#define CFREE_BLAKE2B_LEN 32u +#define CFREE_CRC32_LEN 4u +/* Largest digest any algo here produces; size a buffer with this to hold all. */ +#define CFREE_HASH_MAX_LEN 32u + +typedef enum CfreeHashAlgo { + CFREE_HASH_SHA256 = 0, + CFREE_HASH_BLAKE2B = 1, + CFREE_HASH_CRC32 = 2, +} CfreeHashAlgo; + +/* Digest length in bytes for `algo` (CRC-32 is 4 big-endian bytes), or 0 for an + * unrecognized algo. */ +CFREE_API size_t cfree_hash_len(CfreeHashAlgo algo); + +/* One-shot hash of `data[0..len)`. `out` must hold cfree_hash_len(algo) bytes; + * *out_len (when non-NULL) receives that length. No context, no I/O. Returns + * CFREE_INVALID on a NULL out / unknown algo. */ +CFREE_API CfreeStatus cfree_hash(CfreeHashAlgo algo, const uint8_t* data, + size_t len, uint8_t* out, size_t* out_len); + +/* Streaming hash. The opaque state is allocated from ctx->heap and must be + * released with cfree_hasher_free (final does not free it). */ +typedef struct CfreeHasher CfreeHasher; + +CFREE_API CfreeStatus cfree_hasher_new(const CfreeContext* ctx, + CfreeHashAlgo algo, CfreeHasher** out); +CFREE_API void cfree_hasher_update(CfreeHasher* h, const uint8_t* data, + size_t len); +CFREE_API CfreeStatus cfree_hasher_final(CfreeHasher* h, uint8_t* out, + size_t* out_len); +CFREE_API void cfree_hasher_free(CfreeHasher* h); + +#endif diff --git a/src/api/hash.c b/src/api/hash.c @@ -0,0 +1,131 @@ +/* Public hashing API: one-shot and streaming SHA-256, BLAKE2b-256, and CRC-32. + * A thin composition over src/core/{sha256,crc32} and src/dist/blake2b. See + * <cfree/hash.h>. */ + +#include <cfree/hash.h> + +#include <string.h> + +#include "core/crc32.h" +#include "core/sha256.h" +#include "dist/blake2b.h" + +/* sha256_update takes a 32-bit length; feed large inputs in bounded chunks. */ +#define HASH_SHA_CHUNK 0x40000000u /* 1 GiB */ + +struct CfreeHasher { + const CfreeContext* ctx; + CfreeHashAlgo algo; + union { + Sha256 sha; + DistBlake2b b2; + uint32_t crc; + } st; +}; + +size_t cfree_hash_len(CfreeHashAlgo algo) { + switch (algo) { + case CFREE_HASH_SHA256: + return CFREE_SHA256_LEN; + case CFREE_HASH_BLAKE2B: + return CFREE_BLAKE2B_LEN; + case CFREE_HASH_CRC32: + return CFREE_CRC32_LEN; + } + return 0; +} + +/* Initialize the per-algo running state (shared by new + the one-shot path). */ +static void hasher_begin(CfreeHasher* h) { + switch (h->algo) { + case CFREE_HASH_SHA256: + sha256_init(&h->st.sha); + break; + case CFREE_HASH_BLAKE2B: + dist_blake2b_init(&h->st.b2, CFREE_BLAKE2B_LEN); + break; + case CFREE_HASH_CRC32: + h->st.crc = 0; + break; + } +} + +void cfree_hasher_update(CfreeHasher* h, const uint8_t* data, size_t len) { + if (!h || (!data && len)) return; + switch (h->algo) { + case CFREE_HASH_SHA256: + while (len) { + uint32_t n = len > HASH_SHA_CHUNK ? HASH_SHA_CHUNK : (uint32_t)len; + sha256_update(&h->st.sha, data, n); + data += n; + len -= n; + } + break; + case CFREE_HASH_BLAKE2B: + dist_blake2b_update(&h->st.b2, data, len); + break; + case CFREE_HASH_CRC32: + h->st.crc = cfree_crc32(h->st.crc, data, len); + break; + } +} + +CfreeStatus cfree_hasher_final(CfreeHasher* h, uint8_t* out, size_t* out_len) { + if (!h || !out) return CFREE_INVALID; + switch (h->algo) { + case CFREE_HASH_SHA256: + sha256_final(&h->st.sha, out); + break; + case CFREE_HASH_BLAKE2B: + dist_blake2b_final(&h->st.b2, out); + break; + case CFREE_HASH_CRC32: + out[0] = (uint8_t)(h->st.crc >> 24); + out[1] = (uint8_t)(h->st.crc >> 16); + out[2] = (uint8_t)(h->st.crc >> 8); + out[3] = (uint8_t)(h->st.crc); + break; + } + if (out_len) *out_len = cfree_hash_len(h->algo); + return CFREE_OK; +} + +CfreeStatus cfree_hasher_new(const CfreeContext* ctx, CfreeHashAlgo algo, + CfreeHasher** out) { + CfreeHeap* heap; + CfreeHasher* h; + if (!out) return CFREE_INVALID; + *out = NULL; + if (!ctx || !ctx->heap) return CFREE_INVALID; + if (cfree_hash_len(algo) == 0) return CFREE_INVALID; + heap = ctx->heap; + h = (CfreeHasher*)heap->alloc(heap, sizeof(*h), _Alignof(CfreeHasher)); + if (!h) return CFREE_NOMEM; + memset(h, 0, sizeof(*h)); + h->ctx = ctx; + h->algo = algo; + hasher_begin(h); + *out = h; + return CFREE_OK; +} + +void cfree_hasher_free(CfreeHasher* h) { + CfreeHeap* heap; + if (!h) return; + heap = h->ctx->heap; + heap->free(heap, h, sizeof(*h)); +} + +CfreeStatus cfree_hash(CfreeHashAlgo algo, const uint8_t* data, size_t len, + uint8_t* out, size_t* out_len) { + CfreeHasher tmp; + if (!out || (!data && len)) return CFREE_INVALID; + if (cfree_hash_len(algo) == 0) return CFREE_INVALID; + /* No heap needed: the running state lives on the stack and neither update + * nor final touches tmp.ctx. */ + memset(&tmp, 0, sizeof tmp); + tmp.algo = algo; + hasher_begin(&tmp); + cfree_hasher_update(&tmp, data, len); + return cfree_hasher_final(&tmp, out, out_len); +} diff --git a/src/core/crc32.c b/src/core/crc32.c @@ -0,0 +1,18 @@ +#include "core/crc32.h" + +/* Bit-at-a-time reflected CRC-32. Branch-free inner loop (mask trick) keeps it + * constant-time per bit and free of a 1 KiB lookup table — the throughput is + * ample for hashing and the gzip trailer, the two callers. */ +uint32_t cfree_crc32(uint32_t crc, const uint8_t* data, size_t len) { + size_t i; + unsigned k; + crc = ~crc; + for (i = 0; i < len; ++i) { + crc ^= data[i]; + for (k = 0; k < 8u; ++k) { + uint32_t mask = (uint32_t)0 - (crc & 1u); + crc = (crc >> 1) ^ (0xedb88320u & mask); + } + } + return ~crc; +} diff --git a/src/core/crc32.h b/src/core/crc32.h @@ -0,0 +1,15 @@ +#ifndef CFREE_CORE_CRC32_H +#define CFREE_CORE_CRC32_H + +#include <stddef.h> +#include <stdint.h> + +/* Streaming CRC-32 (IEEE 802.3, reflected, polynomial 0xedb88320). + * + * The running value uses the finalized (post-complement) convention: pass + * seed 0 to begin, chain the return value across successive chunks, and the + * final return is the CRC-32 of the concatenated input. A single + * cfree_crc32(0, data, len) therefore yields the standard CRC-32 of `data`. */ +uint32_t cfree_crc32(uint32_t seed, const uint8_t* data, size_t len); + +#endif diff --git a/src/dist/deflate.c b/src/dist/deflate.c @@ -2,6 +2,8 @@ #include <string.h> +#include "core/crc32.h" + /* * Private raw DEFLATE/INFLATE codec. * @@ -1953,20 +1955,6 @@ size_t xinflate_decompress_mem_to_mem(void* out_buf, size_t out_buf_len, #define GZ_HEADER_LEN 10u #define GZ_TRAILER_LEN 8u -static uint32_t crc32_update(uint32_t crc, const uint8_t* data, size_t len) { - size_t i; - unsigned k; - crc = ~crc; - for (i = 0; i < len; ++i) { - crc ^= data[i]; - for (k = 0; k < 8u; ++k) { - uint32_t mask = (uint32_t)0 - (crc & 1u); - crc = (crc >> 1) ^ (0xedb88320u & mask); - } - } - return ~crc; -} - static int gz_write(CfreeWriter* out, const void* data, size_t n) { return cfree_writer_write(out, data, n) == CFREE_OK ? DIST_OK : DIST_ERR; } @@ -1995,7 +1983,7 @@ static int gz_put_deflate(const void* data, int len, void* user) { static int gz_skip_header_bytes(const uint8_t* data, size_t trailer_off, size_t* off, size_t n, uint32_t* hcrc) { if (n > trailer_off - *off) return DIST_ERR; - *hcrc = crc32_update(*hcrc, data + *off, n); + *hcrc = cfree_crc32(*hcrc, data + *off, n); *off += n; return DIST_OK; } @@ -2004,7 +1992,7 @@ static int gz_skip_header_zstr(const uint8_t* data, size_t trailer_off, size_t* off, uint32_t* hcrc) { while (*off < trailer_off) { uint8_t c = data[*off]; - *hcrc = crc32_update(*hcrc, data + *off, 1); + *hcrc = cfree_crc32(*hcrc, data + *off, 1); ++*off; if (c == 0) return DIST_OK; } @@ -2026,7 +2014,7 @@ static int gz_parse_header(const uint8_t* data, size_t len, size_t* body_off) { flg = data[3]; if (flg & GZ_FLG_RESERVED) return DIST_ERR; - hcrc = crc32_update(0, data, GZ_HEADER_LEN); + hcrc = cfree_crc32(0, data, GZ_HEADER_LEN); if (flg & GZ_FLG_FEXTRA) { uint16_t xlen; @@ -2075,7 +2063,7 @@ int dist_gz_compress(CfreeWriter* out, const uint8_t* data, size_t len) { st = xdeflate_compress(&def, data, &in_len, NULL, NULL, XDEFLATE_FINISH); if (st != XDEFLATE_STATUS_DONE || in_len != len) return DIST_ERR; - put_u32le(trailer, crc32_update(0, data, len)); + put_u32le(trailer, cfree_crc32(0, data, len)); put_u32le(trailer + 4, (uint32_t)len); return gz_write(out, trailer, sizeof trailer); } @@ -2105,7 +2093,7 @@ int dist_gz_decompress(CfreeWriter* out, const uint8_t* data, size_t len) { if (out_avail) { if (gz_write(out, ring + ring_ofs, out_avail) != DIST_OK) return DIST_ERR; - crc = crc32_update(crc, ring + ring_ofs, out_avail); + crc = cfree_crc32(crc, ring + ring_ofs, out_avail); if (total + out_avail < total) return DIST_ERR; total += out_avail; }