commit 668e21ada80ea8280e3be9a229727590e54b1681
parent 55e6cae81c8e601c13504393d234c59e43135ffa
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 2 Jun 2026 11:06:58 -0700
hash: add public cfree/hash.h API + driver hash tool
New public surface in <cfree/hash.h>: one-shot cfree_hash() and streaming
cfree_hasher_{new,update,final,free} over SHA-256, BLAKE2b-256, and CRC-32.
Backed by src/api/hash.c composing src/core/sha256, the dist BLAKE2b wrapper,
and a CRC-32 lifted out of src/dist/deflate.c into src/core/crc32.{c,h}
(single source of truth; deflate's gzip trailer now calls cfree_crc32).
The driver 'hash' tool prints coreutils-style '<hex> <name>' lines,
-a sha256|blake2b|crc32 (default sha256), '-'/no-arg = stdin, multiple files.
Verified against known vectors (SHA-256 empty/abc, CRC-32 0xcbf43926,
BLAKE2b-256 abc) and shasum.
Diffstat:
10 files changed, 408 insertions(+), 19 deletions(-)
diff --git a/Makefile b/Makefile
@@ -397,6 +397,9 @@ endif
ifeq ($(CFREE_TOOL_CMP_ENABLED),1)
DRIVER_TOOL_SRCS += driver/cmd/cmp.c
endif
+ifeq ($(CFREE_TOOL_HASH_ENABLED),1)
+DRIVER_TOOL_SRCS += driver/cmd/hash.c
+endif
DRIVER_SRCS += $(sort $(DRIVER_TOOL_SRCS))
ifneq ($(filter 1,$(CFREE_TOOL_CC_ENABLED) $(CFREE_TOOL_CHECK_ENABLED) $(CFREE_TOOL_CPP_ENABLED) $(CFREE_TOOL_AS_ENABLED) $(CFREE_TOOL_DBG_ENABLED) $(CFREE_TOOL_RUN_ENABLED)),)
DRIVER_SRCS += driver/lib/cflags.c
diff --git a/driver/cmd/hash.c b/driver/cmd/hash.c
@@ -0,0 +1,175 @@
+#include <cfree/core.h>
+#include <cfree/hash.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "driver.h"
+#include "env.h"
+
+/* `cfree hash` — print the SHA-256, BLAKE2b-256, or CRC-32 digest of each
+ * input. Output is coreutils-style ("<hex> <name>"), so it diffs cleanly
+ * against sha256sum / b2sum / cksum -a output. With no FILE, or with `-`,
+ * reads stdin. Drives the streaming cfree_hasher_* API (the one-shot
+ * cfree_hash stays for library callers). */
+
+#define HASH_TOOL "hash"
+
+static const char HASH_HEX[] = "0123456789abcdef";
+
+typedef struct HashOpts {
+ CfreeHashAlgo algo;
+} HashOpts;
+
+static int hash_parse_algo(const char* s, CfreeHashAlgo* out) {
+ if (driver_streq(s, "sha256")) {
+ *out = CFREE_HASH_SHA256;
+ return 0;
+ }
+ if (driver_streq(s, "blake2b")) {
+ *out = CFREE_HASH_BLAKE2B;
+ return 0;
+ }
+ if (driver_streq(s, "crc32")) {
+ *out = CFREE_HASH_CRC32;
+ return 0;
+ }
+ return 1;
+}
+
+void driver_help_hash(void) {
+ driver_printf(
+ "%.*s",
+ CFREE_SLICE_ARG(CFREE_SLICE_LIT(
+ "cfree hash — hash files with SHA-256, BLAKE2b, or CRC-32\n"
+ "\n"
+ "USAGE\n"
+ " cfree hash [-a ALGO] [FILE...]\n"
+ "\n"
+ "DESCRIPTION\n"
+ " Prints one line per input: the lowercase-hex digest, two spaces,\n"
+ " then the file name (`-` for stdin). With no FILE, reads stdin.\n"
+ "\n"
+ "OPTIONS\n"
+ " -a ALGO sha256 (default) | blake2b | crc32\n"
+ " -h, --help show this help\n"
+ "\n"
+ "EXIT CODES\n"
+ " 0 success 1 I/O error 2 bad usage\n")));
+}
+
+/* Hash data[0..len) with opts->algo and print "<hex> <name>". Returns 0 on
+ * success, 1 on failure (error already reported). */
+static int hash_one(const CfreeContext* ctx, const HashOpts* opts,
+ const uint8_t* data, size_t len, const char* name) {
+ CfreeHasher* h = NULL;
+ uint8_t digest[CFREE_HASH_MAX_LEN];
+ char hex[CFREE_HASH_MAX_LEN * 2 + 1];
+ size_t dlen = 0, i;
+
+ if (cfree_hasher_new(ctx, opts->algo, &h) != CFREE_OK) {
+ driver_errf(HASH_TOOL, "failed to start hasher");
+ return 1;
+ }
+ cfree_hasher_update(h, data, len);
+ cfree_hasher_final(h, digest, &dlen);
+ cfree_hasher_free(h);
+
+ for (i = 0; i < dlen; ++i) {
+ hex[i * 2] = HASH_HEX[digest[i] >> 4];
+ hex[i * 2 + 1] = HASH_HEX[digest[i] & 0x0f];
+ }
+ hex[dlen * 2] = '\0';
+ driver_printf("%s %s\n", hex, name);
+ return 0;
+}
+
+int driver_hash(int argc, char** argv) {
+ DriverEnv env;
+ CfreeContext ctx;
+ HashOpts opts;
+ int i, rc = 1, any_input = 0;
+
+ if (driver_argv_wants_help(argc, argv, 1)) {
+ driver_help_hash();
+ return 0;
+ }
+
+ memset(&opts, 0, sizeof opts);
+ opts.algo = CFREE_HASH_SHA256;
+ driver_env_init(&env);
+ ctx = driver_env_to_context(&env);
+
+ /* First pass: options. */
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+ if (driver_streq(a, "-a")) {
+ if (i + 1 >= argc || hash_parse_algo(argv[++i], &opts.algo) != 0) {
+ driver_errf(HASH_TOOL, "-a requires sha256, blake2b, or crc32");
+ rc = 2;
+ goto done;
+ }
+ continue;
+ }
+ if (driver_streq(a, "-")) {
+ any_input = 1;
+ continue;
+ }
+ if (a[0] == '-' && a[1] != '\0') {
+ driver_errf(HASH_TOOL, "unknown option: %s", a);
+ rc = 2;
+ goto done;
+ }
+ any_input = 1;
+ }
+
+ /* No file operands: hash stdin. */
+ if (!any_input) {
+ uint8_t* buf = NULL;
+ size_t n = 0;
+ if (!driver_read_stdin(&env, &buf, &n)) {
+ driver_errf(HASH_TOOL, "failed to read stdin");
+ rc = 1;
+ goto done;
+ }
+ rc = hash_one(&ctx, &opts, buf, n, "-");
+ driver_free(&env, buf, n);
+ goto done;
+ }
+
+ /* Second pass: inputs, in argv order. */
+ rc = 0;
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+ if (driver_streq(a, "-a")) {
+ ++i; /* skip its value */
+ continue;
+ }
+ if (driver_streq(a, "-")) {
+ uint8_t* buf = NULL;
+ size_t n = 0;
+ if (!driver_read_stdin(&env, &buf, &n)) {
+ driver_errf(HASH_TOOL, "failed to read stdin");
+ rc = 1;
+ continue;
+ }
+ if (hash_one(&ctx, &opts, buf, n, "-") != 0) rc = 1;
+ driver_free(&env, buf, n);
+ continue;
+ }
+ {
+ DriverLoad ld = {0};
+ CfreeSlice input;
+ if (driver_load_bytes(&env.file_io, HASH_TOOL, a, &ld, &input) != 0) {
+ rc = 1;
+ continue;
+ }
+ if (hash_one(&ctx, &opts, input.data, input.len, a) != 0) rc = 1;
+ driver_release_bytes(&env.file_io, &ld);
+ }
+ }
+
+done:
+ driver_env_fini(&env);
+ return rc;
+}
diff --git a/driver/driver.h b/driver/driver.h
@@ -57,6 +57,7 @@ int driver_cas(int argc, char** argv);
int driver_pkg(int argc, char** argv);
int driver_xxd(int argc, char** argv);
int driver_cmp(int argc, char** argv);
+int driver_hash(int argc, char** argv);
/* Per-tool help printers. Write a multi-section help text to stdout and
* return. The tool entry-points call these when invoked with no args, -h,
@@ -83,6 +84,7 @@ void driver_help_cas(void);
void driver_help_pkg(void);
void driver_help_xxd(void);
void driver_help_cmp(void);
+void driver_help_hash(void);
/* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`,
* `cfree help`). Lists each tool with a one-line summary and explains
diff --git a/driver/main.c b/driver/main.c
@@ -104,6 +104,10 @@ static const DriverToolDesc driver_tools[] = {
{"cmp", driver_cmp, driver_help_cmp,
"Compare two files byte by byte"},
#endif
+#if CFREE_TOOL_HASH_ENABLED
+ {"hash", driver_hash, driver_help_hash,
+ "Hash files with SHA-256, BLAKE2b, or CRC-32"},
+#endif
{NULL, NULL, NULL, NULL},
};
diff --git a/include/cfree/config.h b/include/cfree/config.h
@@ -116,5 +116,6 @@
#define CFREE_TOOL_PKG_ENABLED 1
#define CFREE_TOOL_XXD_ENABLED 1
#define CFREE_TOOL_CMP_ENABLED 1
+#define CFREE_TOOL_HASH_ENABLED 1
#endif /* CFREE_CONFIG_H */
diff --git a/include/cfree/hash.h b/include/cfree/hash.h
@@ -0,0 +1,52 @@
+#ifndef CFREE_HASH_H
+#define CFREE_HASH_H
+
+#include <cfree/core.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/*
+ * General-purpose content hashing: SHA-256, BLAKE2b-256, and CRC-32. Pure
+ * computation — no I/O, no entropy. The one-shot form needs no context; the
+ * streaming form allocates a small opaque state from ctx->heap so callers can
+ * fold an input that does not fit in one buffer.
+ *
+ * (The CAS subsystem in <cfree/cas.h> hashes with BLAKE2b too, but couples it
+ * to a chunk merkle root; this surface is the lean digest-of-bytes path.)
+ */
+
+#define CFREE_SHA256_LEN 32u
+#define CFREE_BLAKE2B_LEN 32u
+#define CFREE_CRC32_LEN 4u
+/* Largest digest any algo here produces; size a buffer with this to hold all. */
+#define CFREE_HASH_MAX_LEN 32u
+
+typedef enum CfreeHashAlgo {
+ CFREE_HASH_SHA256 = 0,
+ CFREE_HASH_BLAKE2B = 1,
+ CFREE_HASH_CRC32 = 2,
+} CfreeHashAlgo;
+
+/* Digest length in bytes for `algo` (CRC-32 is 4 big-endian bytes), or 0 for an
+ * unrecognized algo. */
+CFREE_API size_t cfree_hash_len(CfreeHashAlgo algo);
+
+/* One-shot hash of `data[0..len)`. `out` must hold cfree_hash_len(algo) bytes;
+ * *out_len (when non-NULL) receives that length. No context, no I/O. Returns
+ * CFREE_INVALID on a NULL out / unknown algo. */
+CFREE_API CfreeStatus cfree_hash(CfreeHashAlgo algo, const uint8_t* data,
+ size_t len, uint8_t* out, size_t* out_len);
+
+/* Streaming hash. The opaque state is allocated from ctx->heap and must be
+ * released with cfree_hasher_free (final does not free it). */
+typedef struct CfreeHasher CfreeHasher;
+
+CFREE_API CfreeStatus cfree_hasher_new(const CfreeContext* ctx,
+ CfreeHashAlgo algo, CfreeHasher** out);
+CFREE_API void cfree_hasher_update(CfreeHasher* h, const uint8_t* data,
+ size_t len);
+CFREE_API CfreeStatus cfree_hasher_final(CfreeHasher* h, uint8_t* out,
+ size_t* out_len);
+CFREE_API void cfree_hasher_free(CfreeHasher* h);
+
+#endif
diff --git a/src/api/hash.c b/src/api/hash.c
@@ -0,0 +1,131 @@
+/* Public hashing API: one-shot and streaming SHA-256, BLAKE2b-256, and CRC-32.
+ * A thin composition over src/core/{sha256,crc32} and src/dist/blake2b. See
+ * <cfree/hash.h>. */
+
+#include <cfree/hash.h>
+
+#include <string.h>
+
+#include "core/crc32.h"
+#include "core/sha256.h"
+#include "dist/blake2b.h"
+
+/* sha256_update takes a 32-bit length; feed large inputs in bounded chunks. */
+#define HASH_SHA_CHUNK 0x40000000u /* 1 GiB */
+
+struct CfreeHasher {
+ const CfreeContext* ctx;
+ CfreeHashAlgo algo;
+ union {
+ Sha256 sha;
+ DistBlake2b b2;
+ uint32_t crc;
+ } st;
+};
+
+size_t cfree_hash_len(CfreeHashAlgo algo) {
+ switch (algo) {
+ case CFREE_HASH_SHA256:
+ return CFREE_SHA256_LEN;
+ case CFREE_HASH_BLAKE2B:
+ return CFREE_BLAKE2B_LEN;
+ case CFREE_HASH_CRC32:
+ return CFREE_CRC32_LEN;
+ }
+ return 0;
+}
+
+/* Initialize the per-algo running state (shared by new + the one-shot path). */
+static void hasher_begin(CfreeHasher* h) {
+ switch (h->algo) {
+ case CFREE_HASH_SHA256:
+ sha256_init(&h->st.sha);
+ break;
+ case CFREE_HASH_BLAKE2B:
+ dist_blake2b_init(&h->st.b2, CFREE_BLAKE2B_LEN);
+ break;
+ case CFREE_HASH_CRC32:
+ h->st.crc = 0;
+ break;
+ }
+}
+
+void cfree_hasher_update(CfreeHasher* h, const uint8_t* data, size_t len) {
+ if (!h || (!data && len)) return;
+ switch (h->algo) {
+ case CFREE_HASH_SHA256:
+ while (len) {
+ uint32_t n = len > HASH_SHA_CHUNK ? HASH_SHA_CHUNK : (uint32_t)len;
+ sha256_update(&h->st.sha, data, n);
+ data += n;
+ len -= n;
+ }
+ break;
+ case CFREE_HASH_BLAKE2B:
+ dist_blake2b_update(&h->st.b2, data, len);
+ break;
+ case CFREE_HASH_CRC32:
+ h->st.crc = cfree_crc32(h->st.crc, data, len);
+ break;
+ }
+}
+
+CfreeStatus cfree_hasher_final(CfreeHasher* h, uint8_t* out, size_t* out_len) {
+ if (!h || !out) return CFREE_INVALID;
+ switch (h->algo) {
+ case CFREE_HASH_SHA256:
+ sha256_final(&h->st.sha, out);
+ break;
+ case CFREE_HASH_BLAKE2B:
+ dist_blake2b_final(&h->st.b2, out);
+ break;
+ case CFREE_HASH_CRC32:
+ out[0] = (uint8_t)(h->st.crc >> 24);
+ out[1] = (uint8_t)(h->st.crc >> 16);
+ out[2] = (uint8_t)(h->st.crc >> 8);
+ out[3] = (uint8_t)(h->st.crc);
+ break;
+ }
+ if (out_len) *out_len = cfree_hash_len(h->algo);
+ return CFREE_OK;
+}
+
+CfreeStatus cfree_hasher_new(const CfreeContext* ctx, CfreeHashAlgo algo,
+ CfreeHasher** out) {
+ CfreeHeap* heap;
+ CfreeHasher* h;
+ if (!out) return CFREE_INVALID;
+ *out = NULL;
+ if (!ctx || !ctx->heap) return CFREE_INVALID;
+ if (cfree_hash_len(algo) == 0) return CFREE_INVALID;
+ heap = ctx->heap;
+ h = (CfreeHasher*)heap->alloc(heap, sizeof(*h), _Alignof(CfreeHasher));
+ if (!h) return CFREE_NOMEM;
+ memset(h, 0, sizeof(*h));
+ h->ctx = ctx;
+ h->algo = algo;
+ hasher_begin(h);
+ *out = h;
+ return CFREE_OK;
+}
+
+void cfree_hasher_free(CfreeHasher* h) {
+ CfreeHeap* heap;
+ if (!h) return;
+ heap = h->ctx->heap;
+ heap->free(heap, h, sizeof(*h));
+}
+
+CfreeStatus cfree_hash(CfreeHashAlgo algo, const uint8_t* data, size_t len,
+ uint8_t* out, size_t* out_len) {
+ CfreeHasher tmp;
+ if (!out || (!data && len)) return CFREE_INVALID;
+ if (cfree_hash_len(algo) == 0) return CFREE_INVALID;
+ /* No heap needed: the running state lives on the stack and neither update
+ * nor final touches tmp.ctx. */
+ memset(&tmp, 0, sizeof tmp);
+ tmp.algo = algo;
+ hasher_begin(&tmp);
+ cfree_hasher_update(&tmp, data, len);
+ return cfree_hasher_final(&tmp, out, out_len);
+}
diff --git a/src/core/crc32.c b/src/core/crc32.c
@@ -0,0 +1,18 @@
+#include "core/crc32.h"
+
+/* Bit-at-a-time reflected CRC-32. Branch-free inner loop (mask trick) keeps it
+ * constant-time per bit and free of a 1 KiB lookup table — the throughput is
+ * ample for hashing and the gzip trailer, the two callers. */
+uint32_t cfree_crc32(uint32_t crc, const uint8_t* data, size_t len) {
+ size_t i;
+ unsigned k;
+ crc = ~crc;
+ for (i = 0; i < len; ++i) {
+ crc ^= data[i];
+ for (k = 0; k < 8u; ++k) {
+ uint32_t mask = (uint32_t)0 - (crc & 1u);
+ crc = (crc >> 1) ^ (0xedb88320u & mask);
+ }
+ }
+ return ~crc;
+}
diff --git a/src/core/crc32.h b/src/core/crc32.h
@@ -0,0 +1,15 @@
+#ifndef CFREE_CORE_CRC32_H
+#define CFREE_CORE_CRC32_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/* Streaming CRC-32 (IEEE 802.3, reflected, polynomial 0xedb88320).
+ *
+ * The running value uses the finalized (post-complement) convention: pass
+ * seed 0 to begin, chain the return value across successive chunks, and the
+ * final return is the CRC-32 of the concatenated input. A single
+ * cfree_crc32(0, data, len) therefore yields the standard CRC-32 of `data`. */
+uint32_t cfree_crc32(uint32_t seed, const uint8_t* data, size_t len);
+
+#endif
diff --git a/src/dist/deflate.c b/src/dist/deflate.c
@@ -2,6 +2,8 @@
#include <string.h>
+#include "core/crc32.h"
+
/*
* Private raw DEFLATE/INFLATE codec.
*
@@ -1953,20 +1955,6 @@ size_t xinflate_decompress_mem_to_mem(void* out_buf, size_t out_buf_len,
#define GZ_HEADER_LEN 10u
#define GZ_TRAILER_LEN 8u
-static uint32_t crc32_update(uint32_t crc, const uint8_t* data, size_t len) {
- size_t i;
- unsigned k;
- crc = ~crc;
- for (i = 0; i < len; ++i) {
- crc ^= data[i];
- for (k = 0; k < 8u; ++k) {
- uint32_t mask = (uint32_t)0 - (crc & 1u);
- crc = (crc >> 1) ^ (0xedb88320u & mask);
- }
- }
- return ~crc;
-}
-
static int gz_write(CfreeWriter* out, const void* data, size_t n) {
return cfree_writer_write(out, data, n) == CFREE_OK ? DIST_OK : DIST_ERR;
}
@@ -1995,7 +1983,7 @@ static int gz_put_deflate(const void* data, int len, void* user) {
static int gz_skip_header_bytes(const uint8_t* data, size_t trailer_off,
size_t* off, size_t n, uint32_t* hcrc) {
if (n > trailer_off - *off) return DIST_ERR;
- *hcrc = crc32_update(*hcrc, data + *off, n);
+ *hcrc = cfree_crc32(*hcrc, data + *off, n);
*off += n;
return DIST_OK;
}
@@ -2004,7 +1992,7 @@ static int gz_skip_header_zstr(const uint8_t* data, size_t trailer_off,
size_t* off, uint32_t* hcrc) {
while (*off < trailer_off) {
uint8_t c = data[*off];
- *hcrc = crc32_update(*hcrc, data + *off, 1);
+ *hcrc = cfree_crc32(*hcrc, data + *off, 1);
++*off;
if (c == 0) return DIST_OK;
}
@@ -2026,7 +2014,7 @@ static int gz_parse_header(const uint8_t* data, size_t len, size_t* body_off) {
flg = data[3];
if (flg & GZ_FLG_RESERVED) return DIST_ERR;
- hcrc = crc32_update(0, data, GZ_HEADER_LEN);
+ hcrc = cfree_crc32(0, data, GZ_HEADER_LEN);
if (flg & GZ_FLG_FEXTRA) {
uint16_t xlen;
@@ -2075,7 +2063,7 @@ int dist_gz_compress(CfreeWriter* out, const uint8_t* data, size_t len) {
st = xdeflate_compress(&def, data, &in_len, NULL, NULL, XDEFLATE_FINISH);
if (st != XDEFLATE_STATUS_DONE || in_len != len) return DIST_ERR;
- put_u32le(trailer, crc32_update(0, data, len));
+ put_u32le(trailer, cfree_crc32(0, data, len));
put_u32le(trailer + 4, (uint32_t)len);
return gz_write(out, trailer, sizeof trailer);
}
@@ -2105,7 +2093,7 @@ int dist_gz_decompress(CfreeWriter* out, const uint8_t* data, size_t len) {
if (out_avail) {
if (gz_write(out, ring + ring_ofs, out_avail) != DIST_OK)
return DIST_ERR;
- crc = crc32_update(crc, ring + ring_ofs, out_avail);
+ crc = cfree_crc32(crc, ring + ring_ofs, out_avail);
if (total + out_avail < total) return DIST_ERR;
total += out_avail;
}