kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit ffc9d1d619fb773720f51a7033a308752c9feb6f
parent 8add2caae50139cc197d41b88b205987dc711bfb
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 09:53:18 -0700

cgpkg v3 CAS-based packages

Diffstat:
MMakefile | 7+++++++
Mdoc/DISTRIBUTE.md | 723++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Adriver/cas.c | 491+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adriver/dist/blob.c | 103+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adriver/dist/blob.h | 32++++++++++++++++++++++++++++++++
Adriver/dist/cas.c | 264+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adriver/dist/cas.h | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/dist/cfpkg.c | 481+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/dist/cfpkg.h | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/dist/dist.h | 1+
Mdriver/dist/manifest.c | 456+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/dist/manifest.h | 47+++++++++++++++++++++++++++++++++++++++++++++++
Adriver/dist/tree.c | 343+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adriver/dist/tree.h | 44++++++++++++++++++++++++++++++++++++++++++++
Mdriver/driver.h | 2++
Mdriver/env.h | 8++++++++
Mdriver/env/posix.c | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/env/windows.c | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/main.c | 4++++
Mdriver/pkg.c | 1476++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Minclude/cfree/config.h | 1+
Mmk/config.mk | 1+
Msrc/core/config_assert.c | 2++
Atest/cas/run.sh | 282+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/pkg/run.sh | 537+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Mtest/test.mk | 6+++++-
26 files changed, 5020 insertions(+), 594 deletions(-)

diff --git a/Makefile b/Makefile @@ -347,6 +347,13 @@ endif ifeq ($(CFREE_TOOL_STRINGS_ENABLED),1) DRIVER_TOOL_SRCS += driver/strings.c endif +ifneq ($(filter 1,$(CFREE_TOOL_CAS_ENABLED) $(CFREE_TOOL_PKG_ENABLED)),) +DRIVER_TOOL_SRCS += driver/dist/dist.c driver/dist/blake2b.c \ + driver/dist/blob.c driver/dist/tree.c driver/dist/cas.c +endif +ifeq ($(CFREE_TOOL_CAS_ENABLED),1) +DRIVER_TOOL_SRCS += driver/cas.c +endif ifeq ($(CFREE_TOOL_PKG_ENABLED),1) DRIVER_TOOL_SRCS += driver/pkg.c DRIVER_TOOL_SRCS += driver/dist/dist.c driver/dist/b64.c \ diff --git a/doc/DISTRIBUTE.md b/doc/DISTRIBUTE.md @@ -1,26 +1,48 @@ -# Code distribution: packaging, signing, and verification +# Code distribution: CAS, packaging, signing, and verification -cfree distribution v2 produces signed, self-describing code packages with one -canonical logical format and two physical representations: +cfree distribution v3 has one shared content model and two package +representations. -- `.tar.gz` for portable archive tooling, -- `.cfpkg` for native chunked verification and future streaming. +The shared content model is a content-addressed store of: -There is no network transport or dependency resolution in cfree. Existing tools -move package bytes; cfree creates, verifies, inspects, and unpacks them. +- blobs: raw regular-file bytes, +- trees: deterministic manifests for output directories. + +The package model is a signed claim over one or more trees. Package files can +embed the tree manifests, chunk index, and file content, or they can reference +those objects externally so a client can fetch only the metadata and chunks it +needs. + +There is no dependency resolution or network transport in the package format. +cfree creates, verifies, inspects, unpacks, and materializes local files. Other +tools may move package bytes or serve CAS objects. ## Trust and identity -The signed object is always the logical manifest's literal byte stream. The -package id is: +All cryptographic hashes in distribution v3 are BLAKE2b-256 unless explicitly +stated otherwise. + +The signed package object is always the package manifest's literal byte stream: ``` -package-id = BLAKE2b-256(logical manifest literal bytes) +package-id = BLAKE2b-256(package manifest literal bytes) ``` -The detached minisign signature covers the manifest. Its trusted comment also -contains `pkgid=<hex package-id>`, and verification rejects the package if that -signed value does not match the recomputed manifest hash. +The package signature is a detached minisign signature over the package +manifest. Its trusted comment contains: + +``` +pkgid=<hex package-id> +``` + +Verification rejects a package when the trusted comment package id does not +match the recomputed manifest hash. + +Native `.cfpkg` files also carry a signed encoding descriptor. The descriptor +must be signed by the same trusted key as the package manifest. The package +manifest signs the logical release/install claim; the encoding descriptor signs +the physical layout, embedded object regions, chunk index, compression, and +external object locators. Trust anchors are public keys in the trusted-keys file: @@ -33,58 +55,213 @@ Each line is: <keyid-hex> <pubkey-base64> <label> ``` -A bundled `.pub` is never trust by itself. It is only a TOFU candidate. With -`--tofu`, cfree pins the bundled public key after confirming its key id matches -the signature's key id. Without TOFU or `-p PUBKEY`, unknown signers fail. +A bundled `.pub` file is never trust by itself. It is only a TOFU candidate. +With `--tofu`, cfree pins the bundled public key after confirming its key id +matches the signature's key id. Without TOFU or `-p PUBKEY`, unknown signers +fail. ## Vendored primitives -The driver-side distribution subsystem uses BLAKE2b throughout. SHA-256 is not -part of the v2 package format. +The driver-side distribution subsystem vendors the primitives needed for the +package pipeline. + +| Primitive | Purpose | +|---|---| +| BLAKE2b-256 | package ids, blob ids, tree ids, chunk hashes, region roots | +| BLAKE2b-512 | minisign prehash compatibility | +| Ed25519 | minisign signature scheme | +| base64 | minisign key/signature text | +| tar | portable archive container | +| gzip/DEFLATE | portable compression | +| LZ4 block | optional native chunk compression | + +Compression APIs are deterministic and do not require host compression +libraries. + +## Paths and modes + +All tree and package paths are slash-separated relative paths. + +Rejected path forms: + +- absolute paths, +- empty paths, +- empty path components, +- `.` or `..` components, +- backslashes, +- drive-style `:`, +- paths containing NUL or newline bytes. + +Tree entries currently describe regular files only. Directories are implicit. +The only file mode values are: + +| Mode | Meaning | +|---|---| +| `-` | regular non-executable file | +| `x` | regular executable file | + +## Blob format + +A blob is the raw byte content of one regular file. Blob identity is +path-independent: + +``` +blob-id = BLAKE2b-256(raw file bytes) +``` + +Blobs are also split into fixed-size chunks for streaming verification. The +default and canonical package chunk size is 64 KiB. + +All integers mixed into hash domains are unsigned little-endian. + +For non-empty blobs: + +``` +leaf = BLAKE2b-256("cfree blob leaf v1" || + u64le(chunk-index) || + u64le(raw-size) || + raw-bytes) + +node = BLAKE2b-256("cfree blob node v1" || left-hash || right-hash) + +blob-root = BLAKE2b-256("cfree blob root v1" || top-hash) +``` + +At each tree level, adjacent hashes are paired left-to-right. If a level has +an odd final hash, that hash is promoted unchanged to the next level. There is +no virtual padding and no duplicated final leaf. + +For an empty blob: + +``` +blob-root = BLAKE2b-256("cfree blob empty v1") +``` + +The blob id and blob root serve different purposes. The blob id is the simple +CAS key for complete file bytes. The blob root authenticates the chunk stream +and allows a package index to verify chunks as they are fetched. -| Primitive | Purpose | Status | +## Tree format + +A tree is a deterministic manifest for an output directory. It contains no +package name, version, signature, dependency, or build trace information. + +The logical tree manifest is strict, byte-stable INI-style text. Unknown keys, +unknown sections, unknown versions, duplicate paths, and non-canonical ordering +are errors. Emitters sort file sections by bytewise path order. + +Example: + +```ini +cfree-tree 1 +hash = blake2b-256 +blob = cfree-blob-v1 + +[file] +path = bin/hello +mode = x +size = 16384 +blob = <blob-id> +root = <blob-root> + +[file] +path = include/hello.h +mode = - +size = 512 +blob = <blob-id> +root = <blob-root> +``` + +Top-level fields: + +| Key | Required | Meaning | |---|---|---| -| tar | portable archive container | real | -| gzip/DEFLATE | portable compression | real; miniz-derived raw DEFLATE | -| BLAKE2b-256 | package id, whole-file hashes, Merkle hashing | real; Monocypher 4.0.2 | -| minisign prehash | detached signature compatibility | real; stock minisign-compatible | -| Ed25519 | minisign signature scheme | real; Monocypher 4.0.2 | -| base64 | minisign key/signature text | real | -| LZ4 block | native chunk compression | real; upstream liblz4 1.10.0 | +| `hash` | yes | currently `blake2b-256` | +| `blob` | yes | currently `cfree-blob-v1` | + +`[file]` fields: -The compression APIs are deterministic and vendored into the driver-side -distribution subsystem; package format callers do not depend on external -compression libraries. +| Key | Required | Meaning | +|---|---|---| +| `path` | yes | materialized file path | +| `mode` | yes | `-` or `x` | +| `size` | yes | uncompressed file byte length | +| `blob` | yes | BLAKE2b-256 of full file bytes | +| `root` | yes | cfree blob root for the file bytes | + +The tree id is: + +``` +tree-id = BLAKE2b-256(tree manifest literal bytes) +``` -## Logical manifest +Tree manifest bytes are the canonical bytes stored in CAS and embedded or +referenced by packages. -The logical manifest is INI-style, byte-stable, and strict. Comments and blank -lines are signed bytes. Unknown keys, unknown sections, and unknown versions are -errors. +## CAS layout + +The shared local CAS layout is: + +``` +<cas>/ + blob/<prefix>/<blob-id> + tree/<prefix>/<tree-id> + index/<prefix>/<index-root> + chunk/<blob-prefix>/<blob-id>/<chunk-index> +``` + +`<prefix>` is the first two lowercase hex characters of the id. For chunk +objects, `<blob-prefix>` is the first two lowercase hex characters of the +blob id. + +Blob objects are raw file bytes. Tree objects are canonical `cfree-tree 1` +manifest bytes. Index objects are native cfpkg chunk index byte streams keyed +by their signed `index-root`. Chunk objects are stored package chunk bytes, +keyed by the blob id and chunk index that the signed index authenticates. + +CAS objects are not signed. They are self-verifying by content identity. Trust +enters through signed package manifests, signed package encoding descriptors, +or build-system trace records outside this package format. + +## Package manifest + +The package manifest is the signed logical package object. It names the +package, identifies one or more output trees, and attaches package-level +metadata to files in those trees. + +The logical package manifest is strict, byte-stable INI-style text. Unknown +keys, unknown sections, unknown versions, duplicate output ids, duplicate +default outputs, and duplicate artifact paths within an output are errors. Example: ```ini -cfree-package 2 -name = hello -version = 0.3.1 +cfree-package 3 +name = hello +version = 0.3.1 description = minimal greeting program -hash = blake2b-merkle-v1 +hash = blake2b-256 +tree = cfree-tree-v1 +blob = cfree-blob-v1 + +[output] +id = 0 +name = runtime +tree = <tree-id> +target = x86_64-linux-cfree +default = true [artifact] -id = 0 -path = bin/hello -kind = exe -size = 16384 -blake2b = <whole-file-blake2b-256> -root = <artifact-merkle-root> -entry = true +output = 0 +path = bin/hello +kind = exe +entry = true [dependency] -name = libfoo +name = libfoo version = >=1.2.0 -blake2b = <dependency-package-id> -key = <expected-signer-keyid> +package = <dependency-package-id> +key = <expected-signer-keyid> ``` Top-level fields: @@ -92,193 +269,399 @@ Top-level fields: | Key | Required | Meaning | |---|---|---| | `name` | yes | package name | -| `version` | yes | version string | +| `version` | yes | package version string | | `description` | no | one-line free text | -| `hash` | yes | currently `blake2b-merkle-v1` | +| `hash` | yes | currently `blake2b-256` | +| `tree` | yes | currently `cfree-tree-v1` | +| `blob` | yes | currently `cfree-blob-v1` | + +`[output]` fields: + +| Key | Required | Meaning | +|---|---|---| +| `id` | yes | numeric output id, unique in the manifest | +| `name` | yes | output name, such as `runtime`, `dev`, or `debug` | +| `tree` | yes | tree id of this output | +| `target` | no | cfree target triple | +| `default` | no | `true` for the default unpack output | `[artifact]` fields: | Key | Required | Meaning | |---|---|---| -| `id` | yes | numeric artifact id, unique in the manifest | -| `path` | yes | unpacked artifact path | +| `output` | yes | output id containing the path | +| `path` | yes | file path inside the referenced output tree | | `kind` | yes | `exe`, `dso`, `obj`, `wasm`, `lib`, `data`, or `source` | -| `size` | yes | uncompressed byte length | -| `blake2b` | yes | BLAKE2b-256 of the whole artifact bytes | -| `root` | yes | artifact Merkle root | -| `target` | no | cfree target triple | | `entry` | no | `true` if runnable under jit/emu/wasm | +Artifact sections are semantic overlays. File size, file hash, blob root, and +mode live in the tree manifest. Package verification rejects an artifact whose +path is not present in the referenced output tree. + `[dependency]` fields are validated but not resolved: | Key | Required | Meaning | |---|---|---| | `name` | yes | dependency package name | | `version` | yes | version constraint | -| `blake2b` | no | dependency package id | +| `package` | no | expected dependency package id | | `key` | no | expected signer key id | -Artifact paths are always relative unpack paths. Absolute paths, empty path -components, `.`, `..`, backslashes, and drive-style `:` are rejected. +## Portable `.tar.gz` + +The portable representation is a gzip-compressed tar containing the signed +package manifest and a CAS object bundle: + +``` +hello-0.3.1.tar.gz + cfree/package.manifest + cfree/package.manifest.minisig + cfree/package.pub + cfree/cas/tree/<prefix>/<tree-id> + cfree/cas/blob/<prefix>/<blob-id> +``` + +Verification: + +1. Decompress and parse the tar container. +2. Read `cfree/package.manifest` and `cfree/package.manifest.minisig`. +3. Anchor and verify the package manifest signature. +4. Parse the package manifest and recompute `package-id`. +5. Load every output tree from `cfree/cas/tree`. +6. Verify every tree object by `tree-id`. +7. Verify every blob referenced by each output tree by `blob-id` and + `blob-root`. +8. Verify artifact overlays reference files that exist in their output trees. + +Portable archives are not optimized for seeking. They are intended for ordinary +archive tooling and offline transfer. + +## Native `.cfpkg` -## Merkle tree +The native representation is a signed package pack. It supports three +practical shapes with one format: -Artifacts are split into fixed 64 KiB raw chunks. The final chunk may be -shorter. The tree is deterministic and deliberately simple: +- thin: manifest and descriptor only; tree manifests, chunk index, and chunks + are fetched externally, +- metadata-rich: manifest, descriptor, tree manifests, and chunk index are + embedded; chunks are fetched externally, +- fat: manifest, descriptor, tree manifests, chunk index, and chunks are + embedded in one file. + +The fixed header is trust-neutral. It only locates the early signed metadata: ``` -leaf = BLAKE2b-256("cfpkg2 leaf v1" || artifact-id || chunk-index || - raw-size || raw-bytes) -node = BLAKE2b-256("cfpkg2 node v1" || left-hash || right-hash) -root = BLAKE2b-256("cfpkg2 root v1" || "artifact" || top-hash) +offset size field +0 8 magic = "cfpkg3\0\0" +8 4 version = 3, little-endian +12 4 header-size = 96, little-endian +16 8 manifest-offset, little-endian +24 8 manifest-size, little-endian +32 8 signature-offset, little-endian +40 8 signature-size, little-endian +48 8 descriptor-offset, little-endian +56 8 descriptor-size, little-endian +64 8 descriptor-signature-offset, little-endian +72 8 descriptor-signature-size, little-endian +80 8 pubkey-offset, little-endian +88 8 pubkey-size, little-endian ``` -At each level, adjacent hashes are paired left-to-right. If a level has an odd -final hash, that hash is promoted unchanged to the next level. Empty artifacts -use a separate domain: +Trust starts at the verified package manifest. Layout trust starts at the +verified encoding descriptor. + +### Encoding descriptor + +The encoding descriptor is strict INI-style text signed by the same trusted key +as the package manifest. + +Example: +```ini +cfree-encoding 3 +package-id = <package-id> +format = cfpkg +hash = blake2b-256 +tree = cfree-tree-v1 +blob = cfree-blob-v1 +chunk-size = 65536 +alignment = 16 +tree-offset = 4096 +tree-size = 2048 +tree-root = <region-root> +index-offset = 6144 +index-size = 1680 +index-bytes = 1680 +index-root = <region-root> +index-url = index/<prefix>/<index-root> +content-offset = 8192 +content-size = 65536 +content-root = <region-root> + +[tree-object] +tree = <tree-id> +offset = 0 +size = 512 +blake2b = <BLAKE2b-256 of tree manifest bytes> +url = tree/<prefix>/<tree-id> + +[chunk-source] +kind = embedded + +[chunk-source] +kind = url-template +template = chunk/{blob-prefix}/{blob}/{chunk} ``` -root = BLAKE2b-256("cfpkg2 root v1" || "artifact-empty" || artifact-id || 0) + +Top-level descriptor fields: + +| Key | Required | Meaning | +|---|---|---| +| `package-id` | yes | package manifest id | +| `format` | yes | `cfpkg` | +| `hash` | yes | `blake2b-256` | +| `tree` | yes | `cfree-tree-v1` | +| `blob` | yes | `cfree-blob-v1` | +| `chunk-size` | yes | raw chunk size, normally `65536` | +| `alignment` | yes | embedded region alignment | +| `tree-offset` | yes | embedded tree region file offset, or `0` when absent | +| `tree-size` | yes | embedded tree region byte size, or `0` when absent | +| `tree-root` | yes | authenticated tree region root | +| `index-offset` | yes | embedded index region file offset, or `0` when absent | +| `index-size` | yes | embedded index region byte size, or `0` when absent | +| `index-bytes` | yes | logical chunk index byte size | +| `index-root` | yes | authenticated logical chunk index root | +| `index-url` | no | untrusted fetch locator for external index bytes | +| `content-offset` | yes | embedded content region file offset, or `0` when absent | +| `content-size` | yes | embedded content region byte size, or `0` when absent | +| `content-root` | yes | authenticated content region root | + +Region roots are: + +``` +region-root = BLAKE2b-256("cfree region v1" || + kind-bytes || + BLAKE2b-256(region bytes)) ``` -This avoids virtual padding, duplicated leaves, and power-of-two tree rules. -Proof verification only needs the leaf index, leaf count, sibling hashes, and -the expected root. +The `kind-bytes` value is `tree`, `index`, or `content`. -## Portable `.tar.gz` +When `tree-size` or `content-size` is zero, the region bytes are empty and the +matching region root is the root of the empty byte string in the same domain. + +`index-root` authenticates the logical chunk index bytes, not merely the +embedded region. When `index-size` is non-zero, `index-size` must equal +`index-bytes`, and the embedded index bytes are used. When `index-size` is +zero, the index is fetched externally through `index-url` and accepted only +after its size and root match `index-bytes` and `index-root`. + +`[tree-object]` fields: + +| Key | Required | Meaning | +|---|---|---| +| `tree` | yes | tree id | +| `offset` | no | byte offset relative to embedded tree region | +| `size` | no | tree manifest byte size in embedded tree region | +| `blake2b` | yes | BLAKE2b-256 of the tree manifest bytes | +| `url` | no | untrusted fetch locator for external tree bytes | + +If `offset` and `size` are present, the tree manifest is embedded. If they are +absent, the tree manifest must be fetched externally. The `url` field is never +trusted; fetched bytes are accepted only when their BLAKE2b-256 equals both +`blake2b` and the package output's `tree` id. + +`[chunk-source]` fields: + +| Key | Required | Meaning | +|---|---|---| +| `kind` | yes | `embedded` or `url-template` | +| `template` | when `kind=url-template` | untrusted external chunk locator | + +The template may contain `{blob-prefix}` for the first two lowercase hex +characters of the blob id, `{blob}` for the lowercase blob id, and `{chunk}` +for the decimal chunk index. Locator strings are not trusted and do not affect +verification. + +### Chunk index + +The binary chunk index is sorted by `(blob-id, chunk-index)`. -The portable representation is a gzip-compressed tar: +Each index record is 168 bytes and little-endian: ``` -hello-0.3.1.tar.gz - cfree/package.manifest - cfree/package.manifest.minisig - cfree/package.pub - bin/hello - share/data.txt +blob-id BLAKE2b-256 +chunk-index u64 +content-offset u64 # relative to embedded content region when embedded +stored-size u64 +raw-size u64 +compression u32 # 0 = none, 1 = lz4-block-v1 +reserved u32 # must be zero +stored-hash BLAKE2b-256 +raw-hash BLAKE2b-256 +leaf-hash BLAKE2b-256 ``` -Verification: +Empty blobs have no index records. -1. Decompress and parse the tar container. -2. Read `cfree/package.manifest` and `cfree/package.manifest.minisig`. -3. Anchor and verify the manifest signature. -4. Parse the logical manifest. -5. Verify every artifact member's whole-file BLAKE2b and Merkle root. +`content-offset` is used only for embedded content. For external chunk sources, +clients locate chunks through a `[chunk-source]` template and verify the +returned bytes against `stored-hash` before decoding. -This format is not optimized for seeking. It is meant to interoperate with -ordinary archive tooling. +Chunk verification: -## Native `.cfpkg` +1. Fetch or range-read the stored chunk bytes. +2. Verify `BLAKE2b-256(stored bytes) == stored-hash`. +3. Decode according to `compression`. +4. Verify `BLAKE2b-256(raw bytes) == raw-hash`. +5. Recompute the blob leaf hash and compare `leaf-hash`. + +Full blob verification also recomputes `blob-id` and `blob-root` from the raw +chunks and compares the referenced tree entry. -The native representation is a chunked binary container: +### Native verification + +Native package verification: + +1. Read the fixed header. +2. Verify the package manifest signature. +3. Verify the encoding descriptor signature with the same trusted key. +4. Confirm descriptor `package-id` matches the package manifest id. +5. Confirm embedded region offsets, sizes, roots, chunk size, and alignment are + well-formed. +6. Load embedded or external tree manifests and verify every `tree-id`. +7. Verify package output tree references and artifact overlays. +8. Load embedded or external chunk index and verify `index-root`. +9. For complete package verification or unpack, read chunks from embedded + content or a caller-supplied external object directory, verify stored bytes, + decode, verify raw bytes, verify leaf hashes, and recompute each referenced + blob id and blob root. +10. Re-materialize the selected tree and apply file modes. + +### External object workflow + +The package tool does not perform network fetches. Descriptor `url`, +`index-url`, and `url-template` values are fetch hints for external tools such +as `curl`, package mirrors, or build-system cache clients. + +Consumers that want to verify a thin or metadata-rich native package fetch the +referenced bytes into the same local CAS layout used by `cfree cas`, then pass +that directory to `pkg verify` or `pkg unpack`: ``` -fixed header -logical manifest bytes -manifest minisign bytes -encoding descriptor bytes -encoding descriptor minisign bytes -bundled pubkey bytes -binary chunk index -aligned content region +cfree pkg inspect --encoding hello.cfpkg +curl -o objects/index/<prefix>/<index-root> <index-url> +curl -o objects/tree/<prefix>/<tree-id> <tree-url> +curl -o objects/chunk/<blob-prefix>/<blob-id>/<chunk-index> <chunk-url> +cfree pkg verify -p key.pub --external objects hello.cfpkg +cfree pkg unpack --verify -p key.pub --external objects hello.cfpkg -C out ``` -The fixed header is trust-neutral. It locates the early byte ranges only: +The verifier treats local paths derived from descriptor locators as untrusted. +It accepts only relative paths under `--external DIR`, rejects absolute paths +and `..`, and verifies every byte against signed descriptor fields, tree ids, +blob ids, blob roots, chunk hashes, and leaf hashes. + +The external object directory is a CAS root. Its layout is identical to the +shared CAS layout: ``` -magic = "cfpkg2\0" -version = 2 -header-size -manifest-offset / manifest-size -signature-offset / signature-size -descriptor-offset / descriptor-size -descriptor-signature-offset / descriptor-signature-size -pubkey-offset / pubkey-size -index-offset / index-size -content-offset / content-size -alignment -chunk-size +<external>/ + blob/<prefix>/<blob-id> + tree/<prefix>/<tree-id> + index/<prefix>/<index-root> + chunk/<blob-prefix>/<blob-id>/<chunk-index> ``` -Trust starts at the verified logical manifest. Layout trust starts at the -verified encoding descriptor, which is signed by the same trusted key as the -manifest: +When descriptor locators are present, the verifier uses them as relative paths +under `--external DIR`. When a tree locator is absent, it falls back to the +default `tree/<prefix>/<tree-id>` path. When an index locator is absent, it +falls back to `index/<prefix>/<index-root>`. External chunk content can be +located with a `[chunk-source] kind = url-template`; the template is rendered +with `{blob-prefix}`, `{blob}`, and `{chunk}` and then constrained to the +external directory. If a descriptor omits the chunk template, the verifier +falls back to the default CAS chunk path. -```ini -cfree-encoding 2 -package-id = <BLAKE2b-256 of logical manifest> -format = cfpkg -hash = blake2b-merkle-v1 -index-offset = 1024 -index-size = 144 -index-root = <authenticated index region root> -content-offset = 1168 -content-size = 16384 -content-root = <authenticated content region root> -chunk-size = 65536 -alignment = 16 +## CLI + +Shared CAS utilities: + +``` +cfree cas add-blob --cas DIR FILE +cfree cas add-tree --cas DIR --root DIR +cfree cas add-tree --cas DIR --map FILE +cfree cas inspect-tree --cas DIR TREE_ID +cfree cas verify-tree --cas DIR TREE_ID +cfree cas materialize --cas DIR TREE_ID -C DIR ``` -Each binary index record is 144 bytes and little-endian: +`cas add-tree --root DIR` walks a directory, stores every regular file under +`blob/`, writes the canonical tree manifest under `tree/`, and prints the +tree id. + +`cas add-blob --cas DIR FILE` stores the raw file bytes under `blob/` and +prints the blob id. + +`cas add-tree --map FILE` builds a tree from explicit path/mode/source triples. +The map file grammar is one entry per line: ``` -artifact-id u64 -chunk-index u64 -content-offset u64 # relative to the content region -stored-size u64 -raw-size u64 -compression u32 # 0 = none, 1 = lz4-block-v1 -reserved u32 -stored-hash BLAKE2b-256 -raw-hash BLAKE2b-256 -leaf-hash BLAKE2b-256 +<tree-path> <mode> <source-path> ``` -Native verification: +`cas materialize --cas DIR TREE_ID -C DIR` reads the tree manifest and blobs +from the CAS and writes the tree into the target directory named by `-C`. +Materialization creates missing parent directories, rejects unsafe tree paths, +verifies every blob before writing it, and applies each entry's `mode`. -1. Read the fixed header. -2. Verify the logical manifest signature. -3. Verify the encoding descriptor signature with the same key. -4. Confirm descriptor `package-id`, offsets, sizes, roots, chunk size, and - alignment match the container. -5. Verify stored chunk hashes before decoding. -6. Decode each chunk (`none` and `lz4-block-v1` are implemented). -7. Verify raw chunk hashes, leaf hashes, artifact roots, and whole-file - BLAKE2b-256 hashes. +`cas inspect-tree --cas DIR TREE_ID` prints the canonical tree manifest bytes. +`cas verify-tree --cas DIR TREE_ID` verifies the tree object id and every blob +referenced by the tree. -## CLI +Package utilities: ``` cfree pkg keygen -o BASE cfree pkg create --name N --version V [--desc D] -s SECKEY \ [--format cfpkg|tar.gz] [--compression none|lz4-block-v1] \ - -o OUT FILE... -cfree pkg verify [-p PUBKEY | --tofu] [--format cfpkg|tar.gz] FILE + [--native-shape fat|metadata|thin] [--external DIR] \ + --cas DIR --tree TREE_ID -o OUT +cfree pkg create --name N --version V [--desc D] -s SECKEY \ + [--format cfpkg|tar.gz] [--compression none|lz4-block-v1] \ + [--native-shape fat|metadata|thin] [--external DIR] \ + --root DIR -o OUT +cfree pkg verify [-p PUBKEY | --tofu] [--format cfpkg|tar.gz] \ + [--external DIR] FILE cfree pkg unpack [--verify] [-p PUBKEY | --tofu] [--format cfpkg|tar.gz] \ - FILE -C DIR -cfree pkg inspect FILE + [--external DIR] FILE -C DIR +cfree pkg inspect [--manifest | --encoding] FILE cfree pkg trust {path | list | add PUBKEY [label] | remove KEYID} ``` -`create` infers the physical representation from `-o`: `.cfpkg` is native, -`.tar.gz` is portable. `--format` overrides inference. +`pkg create --cas DIR --tree TREE_ID` packages an existing tree from a CAS. +`pkg create --root DIR` is a convenience form that first creates a temporary +tree from `DIR` and then packages it. + +Local `pkg create --format cfpkg` writes a fat native package by default: +manifest, descriptor, tree manifests, chunk index, and chunks are embedded. +`--native-shape metadata --external DIR` embeds tree manifests and the chunk +index but writes chunks under the external directory. `--native-shape thin +--external DIR` writes tree manifests, the chunk index, and chunks under the +external directory and leaves only signed metadata in the `.cfpkg` file. + +`pkg inspect --encoding FILE` prints the native encoding descriptor so a caller +can derive the fetch plan before running external fetch commands. -`pkg trust path` prints the trusted-keys file path after applying -`$CFREE_TRUSTED_KEYS` / `$HOME` resolution. `pkg unpack` verifies before writing -artifacts; `--verify` makes that verification explicit in the command and emits -the normal verification success line. +`pkg unpack` verifies before writing files. When `--verify` is supplied, it +also prints the normal verification success line before unpacking. -## Implementation status +## Determinism requirements -Implemented: +Emitters and verifiers must preserve these identities exactly: -- v2 logical manifest parser/emitter, -- portable `.tar.gz` create/verify/unpack/inspect, -- native `.cfpkg` create/verify/unpack/inspect, -- minisign-compatible key/signature file layout, -- trusted-keys store and opt-in TOFU, -- Monocypher-backed BLAKE2b streaming API and package Merkle helpers, -- Monocypher-backed Ed25519 minisign key/signature operations, -- raw LZ4 block compression/decompression, -- driver-side SHA-256 removal for distribution. +- package ids are manifest-byte hashes, +- tree ids are tree-manifest-byte hashes, +- blob ids are raw-byte hashes, +- blob roots are path-independent chunk Merkle roots, +- native package chunk indexes are blob-indexed, not path-indexed, +- portable and native packages verify the same logical package/tree/blob + content. diff --git a/driver/cas.c b/driver/cas.c @@ -0,0 +1,491 @@ +#include <cfree/core.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include "dist/blob.h" +#include "dist/cas.h" +#include "dist/dist.h" +#include "dist/tree.h" +#include "driver.h" +#include "env.h" + +#define CAS_TOOL "cas" + +void driver_help_cas(void) { + driver_printf( + "cfree cas - shared cfree blob/tree content-addressed store\n" + "\n" + "USAGE\n" + " cfree cas add-blob --cas DIR FILE\n" + " cfree cas add-tree --cas DIR --root DIR\n" + " cfree cas add-tree --cas DIR --map FILE\n" + " cfree cas inspect-tree --cas DIR TREE_ID\n" + " cfree cas verify-tree --cas DIR TREE_ID\n" + " cfree cas materialize --cas DIR TREE_ID -C DIR\n"); +} + +typedef struct CasAddTree { + DriverEnv* env; + DistCas* cas; + DistTree tree; +} CasAddTree; + +static int cas_mkdir_p(void* user, const char* path) { + return driver_mkdir_p((DriverEnv*)user, path); +} + +static int cas_mark_executable(void* user, const char* path) { + (void)user; + return driver_mark_executable_output(path); +} + +static void cas_init(DistCas* cas, DriverEnv* env, const char* root) { + cas->host.file_io = &env->file_io; + cas->host.mkdir_p = cas_mkdir_p; + cas->host.mark_executable = cas_mark_executable; + cas->host.user = env; + cas->root = root; +} + +static void cas_hex(char out[2 * DIST_BLAKE2B_LEN + 1], + const uint8_t id[DIST_BLAKE2B_LEN]) { + dist_hex_encode(out, id, DIST_BLAKE2B_LEN); +} + +static int cas_parse_id(const char* s, uint8_t out[DIST_BLAKE2B_LEN]) { + size_t n = driver_strlen(s); + if (n != 2u * DIST_BLAKE2B_LEN) return DIST_ERR; + return dist_hex_decode(out, s, DIST_BLAKE2B_LEN); +} + +static int cas_write_stdout(DriverEnv* env, const uint8_t* data, size_t len) { + CfreeWriter* w = driver_stdout_writer(env); + if (!w) return DIST_ERR; + if (len && cfree_writer_write(w, data, len) != CFREE_OK) { + cfree_writer_close(w); + return DIST_ERR; + } + if (cfree_writer_status(w) != CFREE_OK) { + cfree_writer_close(w); + return DIST_ERR; + } + cfree_writer_close(w); + return DIST_OK; +} + +static int cas_add_tree_entry(CasAddTree* a, const char* tree_path, + uint8_t mode, const char* source_path) { + CfreeFileData fd; + DistBlobInfo bi; + DistTreeEntry* e; + fd.data = NULL; + fd.size = 0; + fd.token = NULL; + if (a->tree.n_entries >= a->tree.cap_entries) { + driver_errf(CAS_TOOL, "too many tree entries"); + return 1; + } + if (!dist_tree_path_valid(tree_path)) { + driver_errf(CAS_TOOL, "unsafe tree path: %s", tree_path); + return 1; + } + if (!dist_tree_mode_name(mode)) { + driver_errf(CAS_TOOL, "bad tree mode for: %s", tree_path); + return 1; + } + if (a->env->file_io.read_all(a->env->file_io.user, source_path, &fd) != + CFREE_OK) { + driver_errf(CAS_TOOL, "failed to read: %s", source_path); + return 1; + } + if (dist_blob_info(&bi, fd.data, fd.size, DIST_BLOB_CHUNK_SIZE_DEFAULT) != + DIST_OK) { + driver_errf(CAS_TOOL, "failed to hash blob: %s", source_path); + a->env->file_io.release(a->env->file_io.user, &fd); + return 1; + } + if (dist_cas_put_blob(a->cas, bi.id, fd.data, fd.size) != DIST_OK) { + driver_errf(CAS_TOOL, "failed to store blob: %s", source_path); + a->env->file_io.release(a->env->file_io.user, &fd); + return 1; + } + e = &a->tree.entries[a->tree.n_entries++]; + memset(e, 0, sizeof *e); + snprintf(e->path, sizeof e->path, "%s", tree_path); + e->mode = mode; + e->size = bi.size; + memcpy(e->blob, bi.id, DIST_BLAKE2B_LEN); + memcpy(e->root, bi.root, DIST_BLAKE2B_LEN); + a->env->file_io.release(a->env->file_io.user, &fd); + return 0; +} + +static int cas_walk_add_file(void* user, const char* source_path, + const char* tree_path, int executable) { + CasAddTree* a = (CasAddTree*)user; + uint8_t mode = + executable ? DIST_TREE_MODE_EXEC : DIST_TREE_MODE_FILE; + return cas_add_tree_entry(a, tree_path, mode, source_path); +} + +static int cas_emit_store_tree(CasAddTree* a, + uint8_t out_id[DIST_BLAKE2B_LEN]) { + CfreeWriter* w = NULL; + const uint8_t* bytes; + size_t len; + char err[128]; + if (dist_tree_sort_validate(&a->tree, err, sizeof err) != DIST_OK) { + driver_errf(CAS_TOOL, "%s", err); + return DIST_ERR; + } + if (cfree_writer_mem(a->env->heap, &w) != CFREE_OK) { + driver_errf(CAS_TOOL, "failed to allocate tree writer"); + return DIST_ERR; + } + if (dist_tree_emit(&a->tree, w) != DIST_OK || + cfree_writer_status(w) != CFREE_OK) { + cfree_writer_close(w); + driver_errf(CAS_TOOL, "failed to emit tree manifest"); + return DIST_ERR; + } + bytes = cfree_writer_mem_bytes(w, &len); + dist_tree_id(out_id, bytes, len); + if (dist_cas_put_tree(a->cas, out_id, bytes, len) != DIST_OK) { + cfree_writer_close(w); + driver_errf(CAS_TOOL, "failed to store tree manifest"); + return DIST_ERR; + } + cfree_writer_close(w); + return DIST_OK; +} + +static int cas_read_token(const uint8_t* line, size_t len, size_t* pos, + const uint8_t** start, size_t* tok_len) { + size_t i = *pos; + while (i < len && (line[i] == ' ' || line[i] == '\t')) ++i; + if (i >= len) return 0; + *start = line + i; + while (i < len && line[i] != ' ' && line[i] != '\t') ++i; + *tok_len = (size_t)(line + i - *start); + *pos = i; + return 1; +} + +static int cas_parse_map_line(CasAddTree* a, const uint8_t* line, size_t len, + unsigned line_no) { + const uint8_t *path_b, *mode_b, *src_b; + size_t path_l, mode_l, src_l; + size_t pos = 0; + char path[DIST_PATH_MAX + 1]; + char mode_s[2]; + char* src; + uint8_t mode; + while (len && line[len - 1u] == '\r') --len; + while (pos < len && (line[pos] == ' ' || line[pos] == '\t')) ++pos; + if (pos == len || line[pos] == '#') return 0; + if (!cas_read_token(line, len, &pos, &path_b, &path_l) || + !cas_read_token(line, len, &pos, &mode_b, &mode_l) || + !cas_read_token(line, len, &pos, &src_b, &src_l)) { + driver_errf(CAS_TOOL, "bad map line %u", line_no); + return 1; + } + while (pos < len && (line[pos] == ' ' || line[pos] == '\t')) ++pos; + if (pos != len || path_l > DIST_PATH_MAX || mode_l != 1u || src_l == 0) { + driver_errf(CAS_TOOL, "bad map line %u", line_no); + return 1; + } + memcpy(path, path_b, path_l); + path[path_l] = '\0'; + mode_s[0] = (char)mode_b[0]; + mode_s[1] = '\0'; + if (dist_tree_mode_parse(mode_s, &mode) != DIST_OK) { + driver_errf(CAS_TOOL, "bad mode on map line %u", line_no); + return 1; + } + src = (char*)driver_alloc(a->env, src_l + 1u); + if (!src) { + driver_errf(CAS_TOOL, "out of memory"); + return 1; + } + memcpy(src, src_b, src_l); + src[src_l] = '\0'; + /* v1 map files intentionally split on ASCII whitespace, so paths with + * spaces are not representable yet. */ + if (cas_add_tree_entry(a, path, mode, src) != 0) { + driver_free(a->env, src, src_l + 1u); + return 1; + } + driver_free(a->env, src, src_l + 1u); + return 0; +} + +static int cas_add_tree_map(CasAddTree* a, const uint8_t* data, size_t len) { + size_t start = 0; + unsigned line_no = 1; + while (start <= len) { + size_t end = start; + while (end < len && data[end] != '\n') ++end; + if (cas_parse_map_line(a, data + start, end - start, line_no) != 0) + return DIST_ERR; + if (end == len) break; + start = end + 1u; + ++line_no; + } + return DIST_OK; +} + +static int cas_cmd_add_blob(DriverEnv* env, int argc, char** argv) { + const char* cas_dir = NULL; + const char* file = NULL; + DriverLoad load; + CfreeSlice in; + DistBlobInfo bi; + DistCas cas; + char hex[2 * DIST_BLAKE2B_LEN + 1]; + int i; + load.loaded = 0; + for (i = 2; i < argc; ++i) { + if (driver_streq(argv[i], "--cas") && i + 1 < argc) { + cas_dir = argv[++i]; + } else if (!file) { + file = argv[i]; + } else { + driver_errf(CAS_TOOL, "unexpected argument: %s", argv[i]); + return 2; + } + } + if (!cas_dir || !file) { + driver_errf(CAS_TOOL, "usage: cfree cas add-blob --cas DIR FILE"); + return 2; + } + cas_init(&cas, env, cas_dir); + if (driver_load_bytes(&env->file_io, CAS_TOOL, file, &load, &in) != 0) + return 1; + if (dist_blob_info(&bi, in.data, in.len, DIST_BLOB_CHUNK_SIZE_DEFAULT) != + DIST_OK || + dist_cas_put_blob(&cas, bi.id, in.data, in.len) != DIST_OK) { + driver_release_bytes(&env->file_io, &load); + driver_errf(CAS_TOOL, "failed to store blob: %s", file); + return 1; + } + cas_hex(hex, bi.id); + driver_printf("%s\n", hex); + driver_release_bytes(&env->file_io, &load); + return 0; +} + +static int cas_cmd_add_tree(DriverEnv* env, int argc, char** argv) { + const char* cas_dir = NULL; + const char* root = NULL; + const char* map = NULL; + DistTreeEntry* entries; + DistCas cas; + CasAddTree add; + uint8_t tree_id[DIST_BLAKE2B_LEN]; + char hex[2 * DIST_BLAKE2B_LEN + 1]; + int i; + int rc = 1; + for (i = 2; i < argc; ++i) { + if (driver_streq(argv[i], "--cas") && i + 1 < argc) { + cas_dir = argv[++i]; + } else if (driver_streq(argv[i], "--root") && i + 1 < argc) { + root = argv[++i]; + } else if (driver_streq(argv[i], "--map") && i + 1 < argc) { + map = argv[++i]; + } else { + driver_errf(CAS_TOOL, "unexpected argument: %s", argv[i]); + return 2; + } + } + if (!cas_dir || ((root != NULL) == (map != NULL))) { + driver_errf(CAS_TOOL, + "usage: cfree cas add-tree --cas DIR (--root DIR | --map FILE)"); + return 2; + } + entries = + (DistTreeEntry*)driver_alloc_zeroed(env, DIST_MAX_FILES * sizeof *entries); + if (!entries) { + driver_errf(CAS_TOOL, "out of memory"); + return 1; + } + cas_init(&cas, env, cas_dir); + add.env = env; + add.cas = &cas; + add.tree.entries = entries; + add.tree.n_entries = 0; + add.tree.cap_entries = DIST_MAX_FILES; + if (root) { + if (driver_walk_regular_files(env, root, cas_walk_add_file, &add) != 0) { + driver_errf(CAS_TOOL, "failed to walk directory: %s", root); + goto out; + } + } else { + CfreeFileData fd; + fd.data = NULL; + fd.size = 0; + fd.token = NULL; + if (env->file_io.read_all(env->file_io.user, map, &fd) != CFREE_OK) { + driver_errf(CAS_TOOL, "failed to read map: %s", map); + goto out; + } + if (cas_add_tree_map(&add, fd.data, fd.size) != DIST_OK) { + env->file_io.release(env->file_io.user, &fd); + goto out; + } + env->file_io.release(env->file_io.user, &fd); + } + if (cas_emit_store_tree(&add, tree_id) != DIST_OK) goto out; + cas_hex(hex, tree_id); + driver_printf("%s\n", hex); + rc = 0; + +out: + driver_free(env, entries, DIST_MAX_FILES * sizeof *entries); + return rc; +} + +static int cas_load_parse_tree(DriverEnv* env, DistCas* cas, + const uint8_t id[DIST_BLAKE2B_LEN], + DistTree* tree, DistTreeEntry* entries, + CfreeFileData* raw) { + char err[128]; + tree->entries = entries; + tree->n_entries = 0; + tree->cap_entries = DIST_MAX_FILES; + raw->data = NULL; + raw->size = 0; + raw->token = NULL; + if (dist_cas_get_tree(cas, id, raw) != DIST_OK) { + driver_errf(CAS_TOOL, "failed to load tree"); + return DIST_ERR; + } + if (dist_tree_parse(raw->data, raw->size, tree, err, sizeof err) != DIST_OK) { + driver_errf(CAS_TOOL, "%s", err); + if (env->file_io.release) env->file_io.release(env->file_io.user, raw); + return DIST_ERR; + } + return DIST_OK; +} + +static int cas_verify_tree_blobs(DriverEnv* env, DistCas* cas, + const DistTree* tree) { + size_t i; + for (i = 0; i < tree->n_entries; ++i) { + const DistTreeEntry* e = &tree->entries[i]; + CfreeFileData fd; + DistBlobInfo bi; + fd.data = NULL; + fd.size = 0; + fd.token = NULL; + if (dist_cas_get_blob(cas, e->blob, &fd) != DIST_OK) { + driver_errf(CAS_TOOL, "missing or corrupt blob for: %s", e->path); + return DIST_ERR; + } + if (dist_blob_info(&bi, fd.data, fd.size, DIST_BLOB_CHUNK_SIZE_DEFAULT) != + DIST_OK || + bi.size != e->size || + memcmp(bi.root, e->root, DIST_BLAKE2B_LEN) != 0) { + if (env->file_io.release) env->file_io.release(env->file_io.user, &fd); + driver_errf(CAS_TOOL, "blob root mismatch for: %s", e->path); + return DIST_ERR; + } + if (env->file_io.release) env->file_io.release(env->file_io.user, &fd); + } + return DIST_OK; +} + +static int cas_cmd_tree_common(DriverEnv* env, int argc, char** argv, + const char* cmd) { + const char* cas_dir = NULL; + const char* tree_s = NULL; + const char* out_dir = NULL; + uint8_t tree_id[DIST_BLAKE2B_LEN]; + DistTreeEntry* entries; + DistTree tree; + CfreeFileData raw; + DistCas cas; + int i; + int rc = 1; + for (i = 2; i < argc; ++i) { + if (driver_streq(argv[i], "--cas") && i + 1 < argc) { + cas_dir = argv[++i]; + } else if (driver_streq(argv[i], "-C") && i + 1 < argc) { + out_dir = argv[++i]; + } else if (!tree_s) { + tree_s = argv[i]; + } else { + driver_errf(CAS_TOOL, "unexpected argument: %s", argv[i]); + return 2; + } + } + if (!cas_dir || !tree_s || + (driver_streq(cmd, "materialize") && !out_dir) || + (!driver_streq(cmd, "materialize") && out_dir)) { + if (driver_streq(cmd, "materialize")) + driver_errf(CAS_TOOL, + "usage: cfree cas materialize --cas DIR TREE_ID -C DIR"); + else + driver_errf(CAS_TOOL, "usage: cfree cas %s --cas DIR TREE_ID", cmd); + return 2; + } + if (cas_parse_id(tree_s, tree_id) != DIST_OK) { + driver_errf(CAS_TOOL, "bad tree id: %s", tree_s); + return 2; + } + entries = + (DistTreeEntry*)driver_alloc_zeroed(env, DIST_MAX_FILES * sizeof *entries); + if (!entries) { + driver_errf(CAS_TOOL, "out of memory"); + return 1; + } + cas_init(&cas, env, cas_dir); + if (cas_load_parse_tree(env, &cas, tree_id, &tree, entries, &raw) != DIST_OK) + goto out_entries; + if (driver_streq(cmd, "inspect-tree")) { + if (cas_write_stdout(env, raw.data, raw.size) != DIST_OK) { + driver_errf(CAS_TOOL, "failed to write tree manifest"); + goto out_raw; + } + } else if (driver_streq(cmd, "verify-tree")) { + if (cas_verify_tree_blobs(env, &cas, &tree) != DIST_OK) goto out_raw; + driver_printf("ok\n"); + } else { + if (dist_cas_materialize_tree(&cas, &tree, out_dir) != DIST_OK) { + driver_errf(CAS_TOOL, "failed to materialize tree"); + goto out_raw; + } + } + rc = 0; + +out_raw: + if (env->file_io.release) env->file_io.release(env->file_io.user, &raw); +out_entries: + driver_free(env, entries, DIST_MAX_FILES * sizeof *entries); + return rc; +} + +int driver_cas(int argc, char** argv) { + DriverEnv env; + int rc; + if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) { + driver_help_cas(); + return 0; + } + driver_env_init(&env); + if (driver_streq(argv[1], "add-blob")) { + rc = cas_cmd_add_blob(&env, argc, argv); + } else if (driver_streq(argv[1], "add-tree")) { + rc = cas_cmd_add_tree(&env, argc, argv); + } else if (driver_streq(argv[1], "inspect-tree") || + driver_streq(argv[1], "verify-tree") || + driver_streq(argv[1], "materialize")) { + rc = cas_cmd_tree_common(&env, argc, argv, argv[1]); + } else { + driver_errf(CAS_TOOL, "unknown command: %s", argv[1]); + rc = 2; + } + driver_env_fini(&env); + return rc; +} diff --git a/driver/dist/blob.c b/driver/dist/blob.c @@ -0,0 +1,103 @@ +#include "blob.h" + +#include <string.h> + +#include "blake2b.h" + +static void put_u64le(uint8_t* p, uint64_t v) { + unsigned i; + for (i = 0; i < 8u; ++i) p[i] = (uint8_t)(v >> (8u * i)); +} + +static void hash_u64(DistBlake2b* h, uint64_t v) { + uint8_t b[8]; + put_u64le(b, v); + dist_blake2b_update(h, b, sizeof b); +} + +void dist_blob_id(uint8_t out[DIST_BLAKE2B_LEN], const uint8_t* data, + size_t len) { + dist_blake2b(out, data, len); +} + +void dist_blob_leaf_hash(uint8_t out[DIST_BLAKE2B_LEN], uint64_t chunk_index, + const uint8_t* raw, size_t raw_len) { + static const uint8_t dom[] = "cfree blob leaf v1"; + DistBlake2b h; + dist_blake2b_init(&h, DIST_BLAKE2B_LEN); + dist_blake2b_update(&h, dom, sizeof dom - 1u); + hash_u64(&h, chunk_index); + hash_u64(&h, (uint64_t)raw_len); + dist_blake2b_update(&h, raw, raw_len); + dist_blake2b_final(&h, out); +} + +void dist_blob_node_hash(uint8_t out[DIST_BLAKE2B_LEN], + const uint8_t left[DIST_BLAKE2B_LEN], + const uint8_t right[DIST_BLAKE2B_LEN]) { + static const uint8_t dom[] = "cfree blob node v1"; + DistBlake2b h; + dist_blake2b_init(&h, DIST_BLAKE2B_LEN); + dist_blake2b_update(&h, dom, sizeof dom - 1u); + dist_blake2b_update(&h, left, DIST_BLAKE2B_LEN); + dist_blake2b_update(&h, right, DIST_BLAKE2B_LEN); + dist_blake2b_final(&h, out); +} + +void dist_blob_empty_root(uint8_t out[DIST_BLAKE2B_LEN]) { + static const uint8_t dom[] = "cfree blob empty v1"; + dist_blake2b(out, dom, sizeof dom - 1u); +} + +int dist_blob_root(uint8_t out[DIST_BLAKE2B_LEN], const uint8_t* data, + size_t len, size_t chunk_size) { + uint8_t level[DIST_MAX_FILES][DIST_BLAKE2B_LEN]; + size_t leaves, i; + if (chunk_size == 0) return DIST_ERR; + if (len && !data) return DIST_ERR; + if (len == 0) { + dist_blob_empty_root(out); + return DIST_OK; + } + leaves = (len + chunk_size - 1u) / chunk_size; + if (leaves > DIST_MAX_FILES) return DIST_ERR; + for (i = 0; i < leaves; ++i) { + size_t off = i * chunk_size; + size_t n = len - off; + if (n > chunk_size) n = chunk_size; + dist_blob_leaf_hash(level[i], (uint64_t)i, data + off, n); + } + while (leaves > 1u) { + size_t outn = 0; + for (i = 0; i < leaves; i += 2u) { + if (i + 1u < leaves) + dist_blob_node_hash(level[outn], level[i], level[i + 1u]); + else + memcpy(level[outn], level[i], DIST_BLAKE2B_LEN); + ++outn; + } + leaves = outn; + } + { + static const uint8_t dom[] = "cfree blob root v1"; + DistBlake2b h; + dist_blake2b_init(&h, DIST_BLAKE2B_LEN); + dist_blake2b_update(&h, dom, sizeof dom - 1u); + dist_blake2b_update(&h, level[0], DIST_BLAKE2B_LEN); + dist_blake2b_final(&h, out); + } + return DIST_OK; +} + +int dist_blob_info(DistBlobInfo* out, const uint8_t* data, size_t len, + size_t chunk_size) { + if (!out || chunk_size == 0) return DIST_ERR; + if (len && !data) return DIST_ERR; + memset(out, 0, sizeof *out); + dist_blob_id(out->id, data, len); + if (dist_blob_root(out->root, data, len, chunk_size) != DIST_OK) + return DIST_ERR; + out->size = (uint64_t)len; + out->chunks = len ? (uint64_t)((len + chunk_size - 1u) / chunk_size) : 0u; + return DIST_OK; +} diff --git a/driver/dist/blob.h b/driver/dist/blob.h @@ -0,0 +1,32 @@ +#ifndef CFREE_DIST_BLOB_H +#define CFREE_DIST_BLOB_H + +#include <stddef.h> +#include <stdint.h> + +#include "dist.h" + +#define DIST_BLOB_FORMAT "cfree-blob-v1" +#define DIST_BLOB_CHUNK_SIZE_DEFAULT 65536u + +typedef struct DistBlobInfo { + uint8_t id[DIST_BLAKE2B_LEN]; + uint8_t root[DIST_BLAKE2B_LEN]; + uint64_t size; + uint64_t chunks; +} DistBlobInfo; + +void dist_blob_id(uint8_t out[DIST_BLAKE2B_LEN], const uint8_t* data, + size_t len); +void dist_blob_leaf_hash(uint8_t out[DIST_BLAKE2B_LEN], uint64_t chunk_index, + const uint8_t* raw, size_t raw_len); +void dist_blob_node_hash(uint8_t out[DIST_BLAKE2B_LEN], + const uint8_t left[DIST_BLAKE2B_LEN], + const uint8_t right[DIST_BLAKE2B_LEN]); +void dist_blob_empty_root(uint8_t out[DIST_BLAKE2B_LEN]); +int dist_blob_root(uint8_t out[DIST_BLAKE2B_LEN], const uint8_t* data, + size_t len, size_t chunk_size); +int dist_blob_info(DistBlobInfo* out, const uint8_t* data, size_t len, + size_t chunk_size); + +#endif diff --git a/driver/dist/cas.c b/driver/dist/cas.c @@ -0,0 +1,264 @@ +#include "cas.h" + +#include <stdio.h> +#include <string.h> + +#include "blob.h" + +static int dist_cas_object_relpath(char* out, size_t cap, const char* kind, + const uint8_t id[DIST_BLAKE2B_LEN]) { + char hex[2 * DIST_BLAKE2B_LEN + 1]; + int n; + if (!out || !cap || !kind || !id) return DIST_ERR; + dist_hex_encode(hex, id, DIST_BLAKE2B_LEN); + n = snprintf(out, cap, "%s/%c%c/%s", kind, hex[0], hex[1], hex); + return n > 0 && (size_t)n < cap ? DIST_OK : DIST_ERR; +} + +static int dist_cas_join_path(char* out, size_t cap, const char* root, + const char* rel) { + size_t rl, pl; + int slash; + if (!out || !cap || !root || !rel) return DIST_ERR; + rl = strlen(root); + pl = strlen(rel); + slash = rl > 0 && root[rl - 1u] != '/'; + if (rl + (slash ? 1u : 0u) + pl + 1u > cap) return DIST_ERR; + memcpy(out, root, rl); + if (slash) out[rl++] = '/'; + memcpy(out + rl, rel, pl); + out[rl + pl] = '\0'; + return DIST_OK; +} + +static int dist_cas_object_path(char* out, size_t cap, const char* root, + const char* kind, + const uint8_t id[DIST_BLAKE2B_LEN]) { + char rel[DIST_CAS_PATH_MAX]; + if (dist_cas_object_relpath(rel, sizeof rel, kind, id) != DIST_OK) + return DIST_ERR; + return dist_cas_join_path(out, cap, root, rel); +} + +int dist_cas_blob_relpath(char* out, size_t cap, + const uint8_t blob[DIST_BLAKE2B_LEN]) { + return dist_cas_object_relpath(out, cap, "blob", blob); +} + +int dist_cas_tree_relpath(char* out, size_t cap, + const uint8_t tree[DIST_BLAKE2B_LEN]) { + return dist_cas_object_relpath(out, cap, "tree", tree); +} + +int dist_cas_index_relpath(char* out, size_t cap, + const uint8_t index[DIST_BLAKE2B_LEN]) { + return dist_cas_object_relpath(out, cap, "index", index); +} + +int dist_cas_chunk_relpath(char* out, size_t cap, + const uint8_t blob[DIST_BLAKE2B_LEN], + uint64_t chunk_index) { + char hex[2 * DIST_BLAKE2B_LEN + 1]; + int n; + if (!out || !cap || !blob) return DIST_ERR; + dist_hex_encode(hex, blob, DIST_BLAKE2B_LEN); + n = snprintf(out, cap, "chunk/%c%c/%s/%llu", hex[0], hex[1], hex, + (unsigned long long)chunk_index); + return n > 0 && (size_t)n < cap ? DIST_OK : DIST_ERR; +} + +int dist_cas_blob_path(char* out, size_t cap, const char* root, + const uint8_t blob[DIST_BLAKE2B_LEN]) { + return dist_cas_object_path(out, cap, root, "blob", blob); +} + +int dist_cas_tree_path(char* out, size_t cap, const char* root, + const uint8_t tree[DIST_BLAKE2B_LEN]) { + return dist_cas_object_path(out, cap, root, "tree", tree); +} + +int dist_cas_index_path(char* out, size_t cap, const char* root, + const uint8_t index[DIST_BLAKE2B_LEN]) { + return dist_cas_object_path(out, cap, root, "index", index); +} + +int dist_cas_chunk_path(char* out, size_t cap, const char* root, + const uint8_t blob[DIST_BLAKE2B_LEN], + uint64_t chunk_index) { + char rel[DIST_CAS_PATH_MAX]; + if (dist_cas_chunk_relpath(rel, sizeof rel, blob, chunk_index) != DIST_OK) + return DIST_ERR; + return dist_cas_join_path(out, cap, root, rel); +} + +static int parent_dir(char* out, size_t cap, const char* path) { + const char* slash = NULL; + const char* p; + size_t n; + for (p = path; *p; ++p) + if (*p == '/') slash = p; + if (!slash) return DIST_ERR; + n = (size_t)(slash - path); + if (n >= cap) return DIST_ERR; + memcpy(out, path, n); + out[n] = '\0'; + return DIST_OK; +} + +static int put_bytes(DistCas* cas, const char* path, const uint8_t* data, + size_t len) { + CfreeWriter* w = NULL; + char parent[DIST_CAS_PATH_MAX]; + if (!cas || !cas->root || !cas->host.file_io || + !cas->host.file_io->open_writer) + return DIST_ERR; + if (parent_dir(parent, sizeof parent, path) != DIST_OK) return DIST_ERR; + if (cas->host.mkdir_p && cas->host.mkdir_p(cas->host.user, parent) != 0) + return DIST_ERR; + if (cas->host.file_io->open_writer(cas->host.file_io->user, path, &w) != + CFREE_OK) + return DIST_ERR; + if (len && cfree_writer_write(w, data, len) != CFREE_OK) { + cfree_writer_close(w); + return DIST_ERR; + } + if (cfree_writer_status(w) != CFREE_OK) { + cfree_writer_close(w); + return DIST_ERR; + } + cfree_writer_close(w); + return DIST_OK; +} + +int dist_cas_put_blob(DistCas* cas, const uint8_t blob[DIST_BLAKE2B_LEN], + const uint8_t* data, size_t len) { + char path[DIST_CAS_PATH_MAX]; + uint8_t got[DIST_BLAKE2B_LEN]; + if (!cas || !blob || (len && !data)) return DIST_ERR; + dist_blob_id(got, data, len); + if (memcmp(got, blob, DIST_BLAKE2B_LEN) != 0) return DIST_ERR; + if (dist_cas_blob_path(path, sizeof path, cas->root, blob) != DIST_OK) + return DIST_ERR; + return put_bytes(cas, path, data, len); +} + +int dist_cas_get_blob(DistCas* cas, const uint8_t blob[DIST_BLAKE2B_LEN], + CfreeFileData* out) { + char path[DIST_CAS_PATH_MAX]; + uint8_t got[DIST_BLAKE2B_LEN]; + if (!cas || !cas->root || !blob || !out || !cas->host.file_io || + !cas->host.file_io->read_all) + return DIST_ERR; + if (dist_cas_blob_path(path, sizeof path, cas->root, blob) != DIST_OK) + return DIST_ERR; + if (cas->host.file_io->read_all(cas->host.file_io->user, path, out) != + CFREE_OK) + return DIST_ERR; + dist_blob_id(got, out->data, out->size); + if (memcmp(got, blob, DIST_BLAKE2B_LEN) == 0) return DIST_OK; + if (cas->host.file_io->release) + cas->host.file_io->release(cas->host.file_io->user, out); + return DIST_ERR; +} + +int dist_cas_put_tree(DistCas* cas, const uint8_t tree[DIST_BLAKE2B_LEN], + const uint8_t* data, size_t len) { + char path[DIST_CAS_PATH_MAX]; + uint8_t got[DIST_BLAKE2B_LEN]; + if (!cas || !tree || (len && !data)) return DIST_ERR; + dist_tree_id(got, data, len); + if (memcmp(got, tree, DIST_BLAKE2B_LEN) != 0) return DIST_ERR; + if (dist_cas_tree_path(path, sizeof path, cas->root, tree) != DIST_OK) + return DIST_ERR; + return put_bytes(cas, path, data, len); +} + +int dist_cas_get_tree(DistCas* cas, const uint8_t tree[DIST_BLAKE2B_LEN], + CfreeFileData* out) { + char path[DIST_CAS_PATH_MAX]; + uint8_t got[DIST_BLAKE2B_LEN]; + if (!cas || !cas->root || !tree || !out || !cas->host.file_io || + !cas->host.file_io->read_all) + return DIST_ERR; + if (dist_cas_tree_path(path, sizeof path, cas->root, tree) != DIST_OK) + return DIST_ERR; + if (cas->host.file_io->read_all(cas->host.file_io->user, path, out) != + CFREE_OK) + return DIST_ERR; + dist_tree_id(got, out->data, out->size); + if (memcmp(got, tree, DIST_BLAKE2B_LEN) == 0) return DIST_OK; + if (cas->host.file_io->release) + cas->host.file_io->release(cas->host.file_io->user, out); + return DIST_ERR; +} + +static int join_tree_path(char* out, size_t cap, const char* dst, + const char* rel) { + size_t dl, rl; + int needs_slash; + if (!out || !cap || !dst || !rel) return DIST_ERR; + dl = strlen(dst); + rl = strlen(rel); + needs_slash = dl > 0 && dst[dl - 1u] != '/'; + if (dl + (needs_slash ? 1u : 0u) + rl + 1u > cap) return DIST_ERR; + memcpy(out, dst, dl); + if (needs_slash) out[dl++] = '/'; + memcpy(out + dl, rel, rl); + out[dl + rl] = '\0'; + return DIST_OK; +} + +int dist_cas_materialize_tree(DistCas* cas, const DistTree* tree, + const char* dst) { + size_t i; + if (!cas || !tree || !dst || !cas->host.file_io || + !cas->host.file_io->open_writer) + return DIST_ERR; + for (i = 0; i < tree->n_entries; ++i) { + const DistTreeEntry* e = &tree->entries[i]; + CfreeFileData fd; + DistBlobInfo bi; + CfreeWriter* w = NULL; + char outpath[DIST_CAS_PATH_MAX]; + char parent[DIST_CAS_PATH_MAX]; + int rc = DIST_ERR; + + fd.data = NULL; + fd.size = 0; + fd.token = NULL; + if (!dist_tree_path_valid(e->path)) return DIST_ERR; + if (!dist_tree_mode_name(e->mode)) return DIST_ERR; + if (dist_cas_get_blob(cas, e->blob, &fd) != DIST_OK) return DIST_ERR; + if (dist_blob_info(&bi, fd.data, fd.size, DIST_BLOB_CHUNK_SIZE_DEFAULT) != + DIST_OK) + goto entry_out; + if (bi.size != e->size || + memcmp(bi.root, e->root, DIST_BLAKE2B_LEN) != 0) + goto entry_out; + if (join_tree_path(outpath, sizeof outpath, dst, e->path) != DIST_OK) + goto entry_out; + if (parent_dir(parent, sizeof parent, outpath) != DIST_OK) goto entry_out; + if (cas->host.mkdir_p && cas->host.mkdir_p(cas->host.user, parent) != 0) + goto entry_out; + if (cas->host.file_io->open_writer(cas->host.file_io->user, outpath, &w) != + CFREE_OK) + goto entry_out; + if (fd.size && + cfree_writer_write(w, fd.data, fd.size) != CFREE_OK) + goto entry_out; + if (cfree_writer_status(w) != CFREE_OK) goto entry_out; + cfree_writer_close(w); + w = NULL; + if (e->mode == DIST_TREE_MODE_EXEC && cas->host.mark_executable && + cas->host.mark_executable(cas->host.user, outpath) != 0) + goto entry_out; + rc = DIST_OK; + + entry_out: + if (w) cfree_writer_close(w); + if (cas->host.file_io->release) + cas->host.file_io->release(cas->host.file_io->user, &fd); + if (rc != DIST_OK) return DIST_ERR; + } + return DIST_OK; +} diff --git a/driver/dist/cas.h b/driver/dist/cas.h @@ -0,0 +1,54 @@ +#ifndef CFREE_DIST_CAS_H +#define CFREE_DIST_CAS_H + +#include <cfree/core.h> +#include <stddef.h> +#include <stdint.h> + +#include "dist.h" +#include "tree.h" + +#define DIST_CAS_PATH_MAX 1024u + +typedef struct DistCasHost { + const CfreeFileIO* file_io; + int (*mkdir_p)(void* user, const char* path); + int (*mark_executable)(void* user, const char* path); + void* user; +} DistCasHost; + +typedef struct DistCas { + DistCasHost host; + const char* root; +} DistCas; + +int dist_cas_blob_path(char* out, size_t cap, const char* root, + const uint8_t blob[DIST_BLAKE2B_LEN]); +int dist_cas_tree_path(char* out, size_t cap, const char* root, + const uint8_t tree[DIST_BLAKE2B_LEN]); +int dist_cas_index_path(char* out, size_t cap, const char* root, + const uint8_t index[DIST_BLAKE2B_LEN]); +int dist_cas_chunk_path(char* out, size_t cap, const char* root, + const uint8_t blob[DIST_BLAKE2B_LEN], + uint64_t chunk_index); +int dist_cas_blob_relpath(char* out, size_t cap, + const uint8_t blob[DIST_BLAKE2B_LEN]); +int dist_cas_tree_relpath(char* out, size_t cap, + const uint8_t tree[DIST_BLAKE2B_LEN]); +int dist_cas_index_relpath(char* out, size_t cap, + const uint8_t index[DIST_BLAKE2B_LEN]); +int dist_cas_chunk_relpath(char* out, size_t cap, + const uint8_t blob[DIST_BLAKE2B_LEN], + uint64_t chunk_index); +int dist_cas_put_blob(DistCas* cas, const uint8_t blob[DIST_BLAKE2B_LEN], + const uint8_t* data, size_t len); +int dist_cas_get_blob(DistCas* cas, const uint8_t blob[DIST_BLAKE2B_LEN], + CfreeFileData* out); +int dist_cas_put_tree(DistCas* cas, const uint8_t tree[DIST_BLAKE2B_LEN], + const uint8_t* data, size_t len); +int dist_cas_get_tree(DistCas* cas, const uint8_t tree[DIST_BLAKE2B_LEN], + CfreeFileData* out); +int dist_cas_materialize_tree(DistCas* cas, const DistTree* tree, + const char* dst); + +#endif diff --git a/driver/dist/cfpkg.c b/driver/dist/cfpkg.c @@ -314,6 +314,39 @@ static int parse_u64(const char* s, uint64_t* out) { return DIST_OK; } +static int parse_u64_strict(const char* s, uint64_t* out) { + uint64_t v = 0; + if (!*s) return DIST_ERR; + while (*s) { + uint64_t digit; + if (*s < '0' || *s > '9') return DIST_ERR; + digit = (uint64_t)(*s - '0'); + if (v > (UINT64_MAX - digit) / 10u) return DIST_ERR; + v = v * 10u + digit; + ++s; + } + *out = v; + return DIST_OK; +} + +static int parse_hex32(uint8_t out[DIST_BLAKE2B_LEN], const char* val) { + if (strlen(val) != 2 * DIST_BLAKE2B_LEN) return DIST_ERR; + return dist_hex_decode(out, val, DIST_BLAKE2B_LEN); +} + +static int copy_field(char* out, size_t cap, const char* val) { + size_t n = strlen(val); + if (n >= cap) return DIST_ERR; + memcpy(out, val, n + 1u); + return DIST_OK; +} + +static int seen_once(uint32_t* seen, uint32_t bit) { + if (*seen & bit) return DIST_ERR; + *seen |= bit; + return DIST_OK; +} + int dist_cfpkg_descriptor_parse(const uint8_t* data, size_t len, DistCfpkgDescriptor* d, char* err, size_t errcap) { @@ -400,6 +433,454 @@ int dist_cfpkg_descriptor_parse(const uint8_t* data, size_t len, "missing required encoding descriptor field"); } +void dist_cfpkg3_region_root(uint8_t out[DIST_BLAKE2B_LEN], const char* kind, + const uint8_t* data, size_t len) { + uint8_t region[DIST_BLAKE2B_LEN]; + DistBlake2b h; + static const uint8_t dom[] = "cfree region v1"; + dist_blake2b_init(&h, DIST_BLAKE2B_LEN); + if (len) dist_blake2b_update(&h, data, len); + dist_blake2b_final(&h, region); + + dist_blake2b_init(&h, DIST_BLAKE2B_LEN); + dist_blake2b_update(&h, dom, sizeof dom - 1u); + dist_blake2b_update(&h, (const uint8_t*)kind, strlen(kind)); + dist_blake2b_update(&h, region, DIST_BLAKE2B_LEN); + dist_blake2b_final(&h, out); +} + +int dist_cfpkg3_write_header(CfreeWriter* out, const DistCfpkg3Header* h) { + uint8_t b[DIST_CFPKG3_HEADER_SIZE]; + size_t off = 16u; + memset(b, 0, sizeof b); + memcpy(b, DIST_CFPKG3_MAGIC, 8u); + put_u32le(b + 8u, DIST_CFPKG3_VERSION); + put_u32le(b + 12u, DIST_CFPKG3_HEADER_SIZE); +#define PUT3(v) \ + do { \ + put_u64le(b + off, (v)); \ + off += 8u; \ + } while (0) + PUT3(h->manifest_offset); + PUT3(h->manifest_size); + PUT3(h->signature_offset); + PUT3(h->signature_size); + PUT3(h->descriptor_offset); + PUT3(h->descriptor_size); + PUT3(h->descriptor_signature_offset); + PUT3(h->descriptor_signature_size); + PUT3(h->pubkey_offset); + PUT3(h->pubkey_size); +#undef PUT3 + return cfree_writer_write(out, b, sizeof b) == CFREE_OK ? DIST_OK : DIST_ERR; +} + +int dist_cfpkg3_read_header(const uint8_t* data, size_t len, + DistCfpkg3Header* h) { + size_t off = 16u; + if (len < DIST_CFPKG3_HEADER_SIZE) return DIST_ERR; + if (memcmp(data, DIST_CFPKG3_MAGIC, 8u) != 0) return DIST_ERR; + if (get_u32le(data + 8u) != DIST_CFPKG3_VERSION || + get_u32le(data + 12u) != DIST_CFPKG3_HEADER_SIZE) + return DIST_ERR; +#define GET3(dst) \ + do { \ + (dst) = get_u64le(data + off); \ + off += 8u; \ + } while (0) + GET3(h->manifest_offset); + GET3(h->manifest_size); + GET3(h->signature_offset); + GET3(h->signature_size); + GET3(h->descriptor_offset); + GET3(h->descriptor_size); + GET3(h->descriptor_signature_offset); + GET3(h->descriptor_signature_size); + GET3(h->pubkey_offset); + GET3(h->pubkey_size); +#undef GET3 + return DIST_OK; +} + +void dist_cfpkg3_encode_index_record( + uint8_t out[DIST_CFPKG3_INDEX_RECORD_SIZE], + const DistCfpkg3IndexRecord* r) { + memset(out, 0, DIST_CFPKG3_INDEX_RECORD_SIZE); + memcpy(out + 0u, r->blob_id, DIST_BLAKE2B_LEN); + put_u64le(out + 32u, r->chunk_index); + put_u64le(out + 40u, r->content_offset); + put_u64le(out + 48u, r->stored_size); + put_u64le(out + 56u, r->raw_size); + put_u32le(out + 64u, r->compression); + put_u32le(out + 68u, 0u); + memcpy(out + 72u, r->stored_hash, DIST_BLAKE2B_LEN); + memcpy(out + 104u, r->raw_hash, DIST_BLAKE2B_LEN); + memcpy(out + 136u, r->leaf_hash, DIST_BLAKE2B_LEN); +} + +int dist_cfpkg3_decode_index_record(const uint8_t* data, size_t len, + DistCfpkg3IndexRecord* r) { + if (len < DIST_CFPKG3_INDEX_RECORD_SIZE) return DIST_ERR; + if (get_u32le(data + 68u) != 0u) return DIST_ERR; + memcpy(r->blob_id, data + 0u, DIST_BLAKE2B_LEN); + r->chunk_index = get_u64le(data + 32u); + r->content_offset = get_u64le(data + 40u); + r->stored_size = get_u64le(data + 48u); + r->raw_size = get_u64le(data + 56u); + r->compression = get_u32le(data + 64u); + memcpy(r->stored_hash, data + 72u, DIST_BLAKE2B_LEN); + memcpy(r->raw_hash, data + 104u, DIST_BLAKE2B_LEN); + memcpy(r->leaf_hash, data + 136u, DIST_BLAKE2B_LEN); + return DIST_OK; +} + +int dist_cfpkg3_descriptor_emit(CfreeWriter* out, + const DistCfpkg3Descriptor* d) { + size_t i; + if (emit(out, "cfree-encoding 3\n") != DIST_OK) return DIST_ERR; + if (emit_hex(out, "package-id", d->package_id) != DIST_OK) return DIST_ERR; + if (emit_kv(out, "format", "cfpkg") != DIST_OK) return DIST_ERR; + if (emit_kv(out, "hash", DIST_CFPKG3_HASH) != DIST_OK) return DIST_ERR; + if (emit_kv(out, "tree", DIST_CFPKG3_TREE_FORMAT) != DIST_OK) + return DIST_ERR; + if (emit_kv(out, "blob", DIST_CFPKG3_BLOB_FORMAT) != DIST_OK) + return DIST_ERR; + if (emit_u64(out, "chunk-size", d->chunk_size) != DIST_OK) return DIST_ERR; + if (emit_u64(out, "alignment", d->alignment) != DIST_OK) return DIST_ERR; + if (emit_u64(out, "tree-offset", d->tree_offset) != DIST_OK) + return DIST_ERR; + if (emit_u64(out, "tree-size", d->tree_size) != DIST_OK) return DIST_ERR; + if (emit_hex(out, "tree-root", d->tree_root) != DIST_OK) return DIST_ERR; + if (emit_u64(out, "index-offset", d->index_offset) != DIST_OK) + return DIST_ERR; + if (emit_u64(out, "index-size", d->index_size) != DIST_OK) return DIST_ERR; + if (emit_u64(out, "index-bytes", d->index_bytes) != DIST_OK) + return DIST_ERR; + if (emit_hex(out, "index-root", d->index_root) != DIST_OK) return DIST_ERR; + if (d->index_url[0] && + emit_kv(out, "index-url", d->index_url) != DIST_OK) + return DIST_ERR; + if (emit_u64(out, "content-offset", d->content_offset) != DIST_OK) + return DIST_ERR; + if (emit_u64(out, "content-size", d->content_size) != DIST_OK) + return DIST_ERR; + if (emit_hex(out, "content-root", d->content_root) != DIST_OK) + return DIST_ERR; + + for (i = 0; i < d->n_trees; ++i) { + if (emit(out, "\n[tree-object]\n") != DIST_OK) return DIST_ERR; + if (emit_hex(out, "tree", d->trees[i].tree) != DIST_OK) return DIST_ERR; + if (d->trees[i].embedded) { + if (emit_u64(out, "offset", d->trees[i].offset) != DIST_OK) + return DIST_ERR; + if (emit_u64(out, "size", d->trees[i].size) != DIST_OK) + return DIST_ERR; + } + if (emit_hex(out, "blake2b", d->trees[i].blake2b) != DIST_OK) + return DIST_ERR; + if (d->trees[i].url[0] && + emit_kv(out, "url", d->trees[i].url) != DIST_OK) + return DIST_ERR; + } + + for (i = 0; i < d->n_chunk_sources; ++i) { + if (emit(out, "\n[chunk-source]\n") != DIST_OK) return DIST_ERR; + if (d->chunk_sources[i].kind == DIST_CFPKG3_CHUNK_SOURCE_EMBEDDED) { + if (emit_kv(out, "kind", "embedded") != DIST_OK) return DIST_ERR; + } else if (d->chunk_sources[i].kind == + DIST_CFPKG3_CHUNK_SOURCE_URL_TEMPLATE) { + if (!d->chunk_sources[i].tmpl[0]) return DIST_ERR; + if (emit_kv(out, "kind", "url-template") != DIST_OK) return DIST_ERR; + if (emit_kv(out, "template", d->chunk_sources[i].tmpl) != DIST_OK) + return DIST_ERR; + } else { + return DIST_ERR; + } + } + return DIST_OK; +} + +typedef enum Cfpkg3DescriptorSection { + CFPKG3_DESC_TOP = 0, + CFPKG3_DESC_TREE_OBJECT = 1, + CFPKG3_DESC_CHUNK_SOURCE = 2, +} Cfpkg3DescriptorSection; + +#define CFPKG3_TOP_PACKAGE_ID (1u << 0) +#define CFPKG3_TOP_FORMAT (1u << 1) +#define CFPKG3_TOP_HASH (1u << 2) +#define CFPKG3_TOP_TREE (1u << 3) +#define CFPKG3_TOP_BLOB (1u << 4) +#define CFPKG3_TOP_CHUNK_SIZE (1u << 5) +#define CFPKG3_TOP_ALIGNMENT (1u << 6) +#define CFPKG3_TOP_TREE_OFFSET (1u << 7) +#define CFPKG3_TOP_TREE_SIZE (1u << 8) +#define CFPKG3_TOP_TREE_ROOT (1u << 9) +#define CFPKG3_TOP_INDEX_OFFSET (1u << 10) +#define CFPKG3_TOP_INDEX_SIZE (1u << 11) +#define CFPKG3_TOP_INDEX_BYTES (1u << 12) +#define CFPKG3_TOP_INDEX_ROOT (1u << 13) +#define CFPKG3_TOP_CONTENT_OFFSET (1u << 14) +#define CFPKG3_TOP_CONTENT_SIZE (1u << 15) +#define CFPKG3_TOP_CONTENT_ROOT (1u << 16) +#define CFPKG3_TOP_INDEX_URL (1u << 17) +#define CFPKG3_TOP_REQUIRED ((1u << 17) - 1u) + +#define CFPKG3_TREE_SEEN_TREE (1u << 0) +#define CFPKG3_TREE_SEEN_OFFSET (1u << 1) +#define CFPKG3_TREE_SEEN_SIZE (1u << 2) +#define CFPKG3_TREE_SEEN_BLAKE2B (1u << 3) +#define CFPKG3_TREE_SEEN_URL (1u << 4) + +#define CFPKG3_CHUNK_SEEN_KIND (1u << 0) +#define CFPKG3_CHUNK_SEEN_TEMPLATE (1u << 1) + +static int finish_cfpkg3_section(Cfpkg3DescriptorSection section, + uint32_t seen, DistCfpkg3Descriptor* d, + char* err, size_t errcap) { + if (section == CFPKG3_DESC_TREE_OBJECT) { + DistCfpkg3TreeObject* tree = &d->trees[d->n_trees - 1u]; + int has_offset = (seen & CFPKG3_TREE_SEEN_OFFSET) != 0; + int has_size = (seen & CFPKG3_TREE_SEEN_SIZE) != 0; + if ((seen & CFPKG3_TREE_SEEN_TREE) == 0 || + (seen & CFPKG3_TREE_SEEN_BLAKE2B) == 0) + return set_err(err, errcap, "missing tree-object field"); + if (has_offset != has_size) + return set_err(err, errcap, "partial tree-object embedded range"); + tree->embedded = has_offset; + } else if (section == CFPKG3_DESC_CHUNK_SOURCE) { + DistCfpkg3ChunkSource* source = &d->chunk_sources[d->n_chunk_sources - 1u]; + if ((seen & CFPKG3_CHUNK_SEEN_KIND) == 0) + return set_err(err, errcap, "missing chunk-source kind"); + if (source->kind == DIST_CFPKG3_CHUNK_SOURCE_URL_TEMPLATE) { + if ((seen & CFPKG3_CHUNK_SEEN_TEMPLATE) == 0) + return set_err(err, errcap, "missing chunk-source template"); + } else if (source->kind == DIST_CFPKG3_CHUNK_SOURCE_EMBEDDED) { + if ((seen & CFPKG3_CHUNK_SEEN_TEMPLATE) != 0) + return set_err(err, errcap, "embedded chunk-source has template"); + } else { + return set_err(err, errcap, "bad chunk-source kind"); + } + } + return DIST_OK; +} + +static int parse_cfpkg3_top_key(DistCfpkg3Descriptor* d, uint32_t* seen, + const char* key, const char* val, char* err, + size_t errcap) { + if (strcmp(key, "package-id") == 0) { + if (seen_once(seen, CFPKG3_TOP_PACKAGE_ID) != DIST_OK || + parse_hex32(d->package_id, val) != DIST_OK) + return set_err(err, errcap, "bad package-id"); + } else if (strcmp(key, "format") == 0) { + if (seen_once(seen, CFPKG3_TOP_FORMAT) != DIST_OK || + strcmp(val, "cfpkg") != 0) + return set_err(err, errcap, "bad format"); + } else if (strcmp(key, "hash") == 0) { + if (seen_once(seen, CFPKG3_TOP_HASH) != DIST_OK || + strcmp(val, DIST_CFPKG3_HASH) != 0) + return set_err(err, errcap, "bad hash algorithm"); + } else if (strcmp(key, "tree") == 0) { + if (seen_once(seen, CFPKG3_TOP_TREE) != DIST_OK || + strcmp(val, DIST_CFPKG3_TREE_FORMAT) != 0) + return set_err(err, errcap, "bad tree format"); + } else if (strcmp(key, "blob") == 0) { + if (seen_once(seen, CFPKG3_TOP_BLOB) != DIST_OK || + strcmp(val, DIST_CFPKG3_BLOB_FORMAT) != 0) + return set_err(err, errcap, "bad blob format"); + } else if (strcmp(key, "chunk-size") == 0) { + if (seen_once(seen, CFPKG3_TOP_CHUNK_SIZE) != DIST_OK || + parse_u64_strict(val, &d->chunk_size) != DIST_OK || + d->chunk_size == 0) + return set_err(err, errcap, "bad chunk-size"); + } else if (strcmp(key, "alignment") == 0) { + if (seen_once(seen, CFPKG3_TOP_ALIGNMENT) != DIST_OK || + parse_u64_strict(val, &d->alignment) != DIST_OK || + d->alignment == 0) + return set_err(err, errcap, "bad alignment"); + } else if (strcmp(key, "tree-offset") == 0) { + if (seen_once(seen, CFPKG3_TOP_TREE_OFFSET) != DIST_OK || + parse_u64_strict(val, &d->tree_offset) != DIST_OK) + return set_err(err, errcap, "bad tree-offset"); + } else if (strcmp(key, "tree-size") == 0) { + if (seen_once(seen, CFPKG3_TOP_TREE_SIZE) != DIST_OK || + parse_u64_strict(val, &d->tree_size) != DIST_OK) + return set_err(err, errcap, "bad tree-size"); + } else if (strcmp(key, "tree-root") == 0) { + if (seen_once(seen, CFPKG3_TOP_TREE_ROOT) != DIST_OK || + parse_hex32(d->tree_root, val) != DIST_OK) + return set_err(err, errcap, "bad tree-root"); + } else if (strcmp(key, "index-offset") == 0) { + if (seen_once(seen, CFPKG3_TOP_INDEX_OFFSET) != DIST_OK || + parse_u64_strict(val, &d->index_offset) != DIST_OK) + return set_err(err, errcap, "bad index-offset"); + } else if (strcmp(key, "index-size") == 0) { + if (seen_once(seen, CFPKG3_TOP_INDEX_SIZE) != DIST_OK || + parse_u64_strict(val, &d->index_size) != DIST_OK) + return set_err(err, errcap, "bad index-size"); + } else if (strcmp(key, "index-bytes") == 0) { + if (seen_once(seen, CFPKG3_TOP_INDEX_BYTES) != DIST_OK || + parse_u64_strict(val, &d->index_bytes) != DIST_OK) + return set_err(err, errcap, "bad index-bytes"); + } else if (strcmp(key, "index-root") == 0) { + if (seen_once(seen, CFPKG3_TOP_INDEX_ROOT) != DIST_OK || + parse_hex32(d->index_root, val) != DIST_OK) + return set_err(err, errcap, "bad index-root"); + } else if (strcmp(key, "index-url") == 0) { + if (seen_once(seen, CFPKG3_TOP_INDEX_URL) != DIST_OK || + copy_field(d->index_url, sizeof d->index_url, val) != DIST_OK) + return set_err(err, errcap, "bad index-url"); + } else if (strcmp(key, "content-offset") == 0) { + if (seen_once(seen, CFPKG3_TOP_CONTENT_OFFSET) != DIST_OK || + parse_u64_strict(val, &d->content_offset) != DIST_OK) + return set_err(err, errcap, "bad content-offset"); + } else if (strcmp(key, "content-size") == 0) { + if (seen_once(seen, CFPKG3_TOP_CONTENT_SIZE) != DIST_OK || + parse_u64_strict(val, &d->content_size) != DIST_OK) + return set_err(err, errcap, "bad content-size"); + } else if (strcmp(key, "content-root") == 0) { + if (seen_once(seen, CFPKG3_TOP_CONTENT_ROOT) != DIST_OK || + parse_hex32(d->content_root, val) != DIST_OK) + return set_err(err, errcap, "bad content-root"); + } else { + return set_err(err, errcap, "unknown encoding descriptor key"); + } + return DIST_OK; +} + +static int parse_cfpkg3_tree_key(DistCfpkg3TreeObject* tree, uint32_t* seen, + const char* key, const char* val, char* err, + size_t errcap) { + if (strcmp(key, "tree") == 0) { + if (seen_once(seen, CFPKG3_TREE_SEEN_TREE) != DIST_OK || + parse_hex32(tree->tree, val) != DIST_OK) + return set_err(err, errcap, "bad tree-object tree"); + } else if (strcmp(key, "offset") == 0) { + if (seen_once(seen, CFPKG3_TREE_SEEN_OFFSET) != DIST_OK || + parse_u64_strict(val, &tree->offset) != DIST_OK) + return set_err(err, errcap, "bad tree-object offset"); + } else if (strcmp(key, "size") == 0) { + if (seen_once(seen, CFPKG3_TREE_SEEN_SIZE) != DIST_OK || + parse_u64_strict(val, &tree->size) != DIST_OK) + return set_err(err, errcap, "bad tree-object size"); + } else if (strcmp(key, "blake2b") == 0) { + if (seen_once(seen, CFPKG3_TREE_SEEN_BLAKE2B) != DIST_OK || + parse_hex32(tree->blake2b, val) != DIST_OK) + return set_err(err, errcap, "bad tree-object blake2b"); + } else if (strcmp(key, "url") == 0) { + if (seen_once(seen, CFPKG3_TREE_SEEN_URL) != DIST_OK || + copy_field(tree->url, sizeof tree->url, val) != DIST_OK) + return set_err(err, errcap, "bad tree-object url"); + } else { + return set_err(err, errcap, "unknown tree-object key"); + } + return DIST_OK; +} + +static int parse_cfpkg3_chunk_key(DistCfpkg3ChunkSource* source, + uint32_t* seen, const char* key, + const char* val, char* err, + size_t errcap) { + if (strcmp(key, "kind") == 0) { + if (seen_once(seen, CFPKG3_CHUNK_SEEN_KIND) != DIST_OK) + return set_err(err, errcap, "bad chunk-source kind"); + if (strcmp(val, "embedded") == 0) { + source->kind = DIST_CFPKG3_CHUNK_SOURCE_EMBEDDED; + } else if (strcmp(val, "url-template") == 0) { + source->kind = DIST_CFPKG3_CHUNK_SOURCE_URL_TEMPLATE; + } else { + return set_err(err, errcap, "bad chunk-source kind"); + } + } else if (strcmp(key, "template") == 0) { + if (seen_once(seen, CFPKG3_CHUNK_SEEN_TEMPLATE) != DIST_OK || + copy_field(source->tmpl, sizeof source->tmpl, val) != DIST_OK) + return set_err(err, errcap, "bad chunk-source template"); + } else { + return set_err(err, errcap, "unknown chunk-source key"); + } + return DIST_OK; +} + +int dist_cfpkg3_descriptor_parse(const uint8_t* data, size_t len, + DistCfpkg3Descriptor* d, char* err, + size_t errcap) { + size_t pos = 0; + int first = 1; + uint32_t top_seen = 0, section_seen = 0; + Cfpkg3DescriptorSection section = CFPKG3_DESC_TOP; + memset(d, 0, sizeof *d); + while (pos < len) { + char buf[DESC_LINE_MAX], *t, *eq, *key, *val; + size_t end = pos, n; + while (end < len && data[end] != '\n') ++end; + n = end - pos; + if (n >= sizeof buf) return set_err(err, errcap, "line too long"); + memcpy(buf, data + pos, n); + buf[n] = '\0'; + pos = (end < len) ? end + 1u : end; + trim_trail(buf); + if (first) { + first = 0; + if (strcmp(buf, "cfree-encoding 3") != 0) + return set_err(err, errcap, "bad encoding descriptor magic/version"); + continue; + } + t = trim_lead(buf); + if (*t == '\0' || *t == '#') continue; + if (*t == '[') { + if (finish_cfpkg3_section(section, section_seen, d, err, errcap) != + DIST_OK) + return DIST_ERR; + section_seen = 0; + if (strcmp(t, "[tree-object]") == 0) { + if (d->n_trees == DIST_MAX_OUTPUTS) + return set_err(err, errcap, "too many tree-object sections"); + memset(&d->trees[d->n_trees], 0, sizeof d->trees[d->n_trees]); + ++d->n_trees; + section = CFPKG3_DESC_TREE_OBJECT; + } else if (strcmp(t, "[chunk-source]") == 0) { + if (d->n_chunk_sources == DIST_MAX_OUTPUTS) + return set_err(err, errcap, "too many chunk-source sections"); + memset(&d->chunk_sources[d->n_chunk_sources], 0, + sizeof d->chunk_sources[d->n_chunk_sources]); + ++d->n_chunk_sources; + section = CFPKG3_DESC_CHUNK_SOURCE; + } else { + return set_err(err, errcap, "unknown encoding descriptor section"); + } + continue; + } + eq = strchr(t, '='); + if (!eq) return set_err(err, errcap, "expected key = value"); + *eq = '\0'; + key = t; + trim_trail(key); + val = trim_lead(eq + 1); + if (section == CFPKG3_DESC_TOP) { + if (parse_cfpkg3_top_key(d, &top_seen, key, val, err, errcap) != + DIST_OK) + return DIST_ERR; + } else if (section == CFPKG3_DESC_TREE_OBJECT) { + if (parse_cfpkg3_tree_key(&d->trees[d->n_trees - 1u], &section_seen, + key, val, err, errcap) != DIST_OK) + return DIST_ERR; + } else { + if (parse_cfpkg3_chunk_key( + &d->chunk_sources[d->n_chunk_sources - 1u], &section_seen, key, + val, err, errcap) != DIST_OK) + return DIST_ERR; + } + } + if (first) return set_err(err, errcap, "empty encoding descriptor"); + if (finish_cfpkg3_section(section, section_seen, d, err, errcap) != DIST_OK) + return DIST_ERR; + if ((top_seen & CFPKG3_TOP_REQUIRED) != CFPKG3_TOP_REQUIRED) + return set_err(err, errcap, "missing required encoding descriptor field"); + if (d->index_size != 0 && d->index_size != d->index_bytes) + return set_err(err, errcap, "embedded index size mismatch"); + return DIST_OK; +} + const char* dist_cfpkg_compression_name(uint32_t c) { if (c == DIST_CFPKG_COMP_NONE) return "none"; if (c == DIST_CFPKG_COMP_LZ4_BLOCK_V1) return "lz4-block-v1"; diff --git a/driver/dist/cfpkg.h b/driver/dist/cfpkg.h @@ -14,6 +14,16 @@ #define DIST_CFPKG_CHUNK_SIZE_DEFAULT 65536u #define DIST_CFPKG_INDEX_RECORD_SIZE 144u +#define DIST_CFPKG3_MAGIC "cfpkg3\0" +#define DIST_CFPKG3_VERSION 3u +#define DIST_CFPKG3_HEADER_SIZE 96u +#define DIST_CFPKG3_ALIGNMENT 16u +#define DIST_CFPKG3_CHUNK_SIZE_DEFAULT 65536u +#define DIST_CFPKG3_INDEX_RECORD_SIZE 168u +#define DIST_CFPKG3_HASH "blake2b-256" +#define DIST_CFPKG3_TREE_FORMAT "cfree-tree-v1" +#define DIST_CFPKG3_BLOB_FORMAT "cfree-blob-v1" + typedef enum DistCfpkgCompression { DIST_CFPKG_COMP_NONE = 0, DIST_CFPKG_COMP_LZ4_BLOCK_V1 = 1, @@ -30,6 +40,14 @@ typedef struct DistCfpkgHeader { uint64_t alignment, chunk_size; } DistCfpkgHeader; +typedef struct DistCfpkg3Header { + uint64_t manifest_offset, manifest_size; + uint64_t signature_offset, signature_size; + uint64_t descriptor_offset, descriptor_size; + uint64_t descriptor_signature_offset, descriptor_signature_size; + uint64_t pubkey_offset, pubkey_size; +} DistCfpkg3Header; + typedef struct DistCfpkgIndexRecord { uint64_t artifact_id; uint64_t chunk_index; @@ -42,6 +60,18 @@ typedef struct DistCfpkgIndexRecord { uint8_t leaf_hash[DIST_BLAKE2B_LEN]; } DistCfpkgIndexRecord; +typedef struct DistCfpkg3IndexRecord { + uint8_t blob_id[DIST_BLAKE2B_LEN]; + uint64_t chunk_index; + uint64_t content_offset; /* relative to embedded content region */ + uint64_t stored_size; + uint64_t raw_size; + uint32_t compression; + uint8_t stored_hash[DIST_BLAKE2B_LEN]; + uint8_t raw_hash[DIST_BLAKE2B_LEN]; + uint8_t leaf_hash[DIST_BLAKE2B_LEN]; +} DistCfpkg3IndexRecord; + typedef struct DistCfpkgDescriptor { uint8_t package_id[DIST_BLAKE2B_LEN]; uint64_t index_offset, index_size; @@ -51,6 +81,40 @@ typedef struct DistCfpkgDescriptor { uint64_t chunk_size, alignment; } DistCfpkgDescriptor; +typedef struct DistCfpkg3TreeObject { + uint8_t tree[DIST_BLAKE2B_LEN]; + uint64_t offset, size; + int embedded; + uint8_t blake2b[DIST_BLAKE2B_LEN]; + char url[DIST_PATH_MAX + 1]; +} DistCfpkg3TreeObject; + +typedef enum DistCfpkg3ChunkSourceKind { + DIST_CFPKG3_CHUNK_SOURCE_EMBEDDED = 1, + DIST_CFPKG3_CHUNK_SOURCE_URL_TEMPLATE = 2, +} DistCfpkg3ChunkSourceKind; + +typedef struct DistCfpkg3ChunkSource { + uint32_t kind; + char tmpl[DIST_PATH_MAX + 1]; +} DistCfpkg3ChunkSource; + +typedef struct DistCfpkg3Descriptor { + uint8_t package_id[DIST_BLAKE2B_LEN]; + uint64_t chunk_size, alignment; + uint64_t tree_offset, tree_size; + uint8_t tree_root[DIST_BLAKE2B_LEN]; + uint64_t index_offset, index_size, index_bytes; + uint8_t index_root[DIST_BLAKE2B_LEN]; + char index_url[DIST_PATH_MAX + 1]; + uint64_t content_offset, content_size; + uint8_t content_root[DIST_BLAKE2B_LEN]; + DistCfpkg3TreeObject trees[DIST_MAX_OUTPUTS]; + size_t n_trees; + DistCfpkg3ChunkSource chunk_sources[DIST_MAX_OUTPUTS]; + size_t n_chunk_sources; +} DistCfpkg3Descriptor; + void dist_cfpkg2_leaf_hash(uint8_t out[DIST_BLAKE2B_LEN], uint64_t artifact_id, uint64_t chunk_index, const uint8_t* raw, size_t raw_len); @@ -85,6 +149,22 @@ int dist_cfpkg_descriptor_parse(const uint8_t* data, size_t len, DistCfpkgDescriptor* d, char* err, size_t errcap); +void dist_cfpkg3_region_root(uint8_t out[DIST_BLAKE2B_LEN], const char* kind, + const uint8_t* data, size_t len); +int dist_cfpkg3_write_header(CfreeWriter* out, const DistCfpkg3Header* h); +int dist_cfpkg3_read_header(const uint8_t* data, size_t len, + DistCfpkg3Header* h); +void dist_cfpkg3_encode_index_record( + uint8_t out[DIST_CFPKG3_INDEX_RECORD_SIZE], + const DistCfpkg3IndexRecord* r); +int dist_cfpkg3_decode_index_record(const uint8_t* data, size_t len, + DistCfpkg3IndexRecord* r); +int dist_cfpkg3_descriptor_emit(CfreeWriter* out, + const DistCfpkg3Descriptor* d); +int dist_cfpkg3_descriptor_parse(const uint8_t* data, size_t len, + DistCfpkg3Descriptor* d, char* err, + size_t errcap); + const char* dist_cfpkg_compression_name(uint32_t c); int dist_cfpkg_compression_parse(const char* s, uint32_t* out); diff --git a/driver/dist/dist.h b/driver/dist/dist.h @@ -26,6 +26,7 @@ #define DIST_MAX_ARTIFACTS 64u #define DIST_MAX_DEPS 64u #define DIST_MAX_FILES 256u +#define DIST_MAX_OUTPUTS 16u /* String field caps inside in-memory manifest structs. */ #define DIST_NAME_MAX 128u diff --git a/driver/dist/manifest.c b/driver/dist/manifest.c @@ -315,3 +315,459 @@ int dist_manifest_parse(const uint8_t* data, size_t len, DistManifest* m, } return DIST_OK; } + +#define P3_F_NAME 0x00000001u +#define P3_F_VERSION 0x00000002u +#define P3_F_DESCRIPTION 0x00000004u +#define P3_F_HASH 0x00000008u +#define P3_F_TREE_FORMAT 0x00000010u +#define P3_F_BLOB_FORMAT 0x00000020u +#define P3_F_ID 0x00000040u +#define P3_F_OUTPUT_NAME 0x00000080u +#define P3_F_TREE_ID 0x00000100u +#define P3_F_TARGET 0x00000200u +#define P3_F_DEFAULT 0x00000400u +#define P3_F_OUTPUT 0x00000800u +#define P3_F_PATH 0x00001000u +#define P3_F_KIND 0x00002000u +#define P3_F_ENTRY 0x00004000u +#define P3_F_PACKAGE 0x00008000u +#define P3_F_KEY 0x00010000u + +typedef enum { + P3_SEC_TOP, + P3_SEC_OUTPUT, + P3_SEC_ARTIFACT, + P3_SEC_DEPENDENCY +} PackageSection; + +static int field_text_valid(const char* s, int required) { + if (required && !s[0]) return 0; + for (; *s; ++s) { + if (*s == '\n' || *s == '\r') return 0; + } + return 1; +} + +static int parse_bool3(const char* s, int* out) { + if (strcmp(s, "true") == 0) { + *out = 1; + return DIST_OK; + } + if (strcmp(s, "false") == 0) { + *out = 0; + return DIST_OK; + } + return DIST_ERR; +} + +static int parse_u64_dec3(const char* s, uint64_t* out) { + uint64_t v = 0; + if (!*s) return DIST_ERR; + for (; *s; ++s) { + unsigned digit; + if (*s < '0' || *s > '9') return DIST_ERR; + digit = (unsigned)(*s - '0'); + if (v > (UINT64_MAX - (uint64_t)digit) / 10u) return DIST_ERR; + v = v * 10u + (uint64_t)digit; + } + *out = v; + return DIST_OK; +} + +static int decode_hash3(uint8_t out[DIST_BLAKE2B_LEN], const char* val, + const char* err_msg, char* err, size_t errcap) { + if (strlen(val) != 2u * DIST_BLAKE2B_LEN || + dist_hex_decode(out, val, DIST_BLAKE2B_LEN) != DIST_OK) + return set_err(err, errcap, err_msg); + return DIST_OK; +} + +static int decode_keyid3(uint8_t out[DIST_KEYID_LEN], const char* val, + char* err, size_t errcap) { + if (strlen(val) != 2u * DIST_KEYID_LEN || + dist_hex_decode(out, val, DIST_KEYID_LEN) != DIST_OK) + return set_err(err, errcap, "bad dependency key id"); + return DIST_OK; +} + +static int seen_once3(uint32_t* seen, uint32_t bit, const char* msg, char* err, + size_t errcap) { + if (*seen & bit) return set_err(err, errcap, msg); + *seen |= bit; + return DIST_OK; +} + +static int find_output3(const DistPackageManifest* m, uint64_t id) { + size_t i; + for (i = 0; i < m->n_outputs; ++i) { + if (m->outputs[i].id == id) return (int)i; + } + return -1; +} + +static int finalize_package_section3(PackageSection sec, uint32_t seen, + char* err, size_t errcap) { + if (sec == P3_SEC_TOP) { + if ((seen & (P3_F_NAME | P3_F_VERSION | P3_F_HASH | P3_F_TREE_FORMAT | + P3_F_BLOB_FORMAT)) != + (P3_F_NAME | P3_F_VERSION | P3_F_HASH | P3_F_TREE_FORMAT | + P3_F_BLOB_FORMAT)) + return set_err(err, errcap, "missing required top-level field"); + } else if (sec == P3_SEC_OUTPUT) { + if ((seen & (P3_F_ID | P3_F_OUTPUT_NAME | P3_F_TREE_ID)) != + (P3_F_ID | P3_F_OUTPUT_NAME | P3_F_TREE_ID)) + return set_err(err, errcap, "missing required [output] field"); + } else if (sec == P3_SEC_ARTIFACT) { + if ((seen & (P3_F_OUTPUT | P3_F_PATH | P3_F_KIND)) != + (P3_F_OUTPUT | P3_F_PATH | P3_F_KIND)) + return set_err(err, errcap, "missing required [artifact] field"); + } else { + if ((seen & (P3_F_NAME | P3_F_VERSION)) != (P3_F_NAME | P3_F_VERSION)) + return set_err(err, errcap, "missing required [dependency] field"); + } + return DIST_OK; +} + +int dist_package_manifest_validate(const DistPackageManifest* m, char* err, + size_t errcap) { + size_t i, j; + size_t default_outputs = 0; + + if (!field_text_valid(m->name, 1) || !field_text_valid(m->version, 1) || + !field_text_valid(m->description, 0)) + return set_err(err, errcap, "bad package string field"); + if (m->n_outputs == 0) return set_err(err, errcap, "missing [output]"); + if (m->n_outputs > DIST_MAX_OUTPUTS) + return set_err(err, errcap, "too many outputs"); + if (m->n_artifacts > DIST_MAX_ARTIFACTS) + return set_err(err, errcap, "too many artifacts"); + if (m->n_deps > DIST_MAX_DEPS) + return set_err(err, errcap, "too many dependencies"); + + for (i = 0; i < m->n_outputs; ++i) { + const DistPackageOutput* out = &m->outputs[i]; + if (!field_text_valid(out->name, 1) || !field_text_valid(out->target, 0)) + return set_err(err, errcap, "bad output string field"); + if (out->is_default) ++default_outputs; + for (j = i + 1u; j < m->n_outputs; ++j) { + if (out->id == m->outputs[j].id) + return set_err(err, errcap, "duplicate output id"); + } + } + if (default_outputs > 1u) + return set_err(err, errcap, "duplicate default output"); + + for (i = 0; i < m->n_artifacts; ++i) { + const DistPackageArtifact* art = &m->artifacts[i]; + if (find_output3(m, art->output_id) < 0) + return set_err(err, errcap, "artifact references unknown output"); + if (!field_text_valid(art->path, 1) || !dist_manifest_path_valid(art->path)) + return set_err(err, errcap, "unsafe artifact path"); + if (!kind_valid(art->kind)) return set_err(err, errcap, "unknown artifact kind"); + for (j = i + 1u; j < m->n_artifacts; ++j) { + const DistPackageArtifact* other = &m->artifacts[j]; + if (art->output_id == other->output_id && + strcmp(art->path, other->path) == 0) + return set_err(err, errcap, "duplicate artifact path"); + } + } + + for (i = 0; i < m->n_deps; ++i) { + const DistPackageDependency* dep = &m->deps[i]; + if (!field_text_valid(dep->name, 1) || !field_text_valid(dep->version, 1)) + return set_err(err, errcap, "bad dependency string field"); + } + + return DIST_OK; +} + +int dist_package_manifest_emit(const DistPackageManifest* m, CfreeWriter* out) { + size_t i; + char err[128]; + + if (dist_package_manifest_validate(m, err, sizeof err) != DIST_OK) + return DIST_ERR; + + if (emit(out, DIST_PACKAGE3_MAGIC "\n") != DIST_OK) return DIST_ERR; + if (emit_kv(out, "name", m->name) != DIST_OK) return DIST_ERR; + if (emit_kv(out, "version", m->version) != DIST_OK) return DIST_ERR; + if (m->description[0] && + emit_kv(out, "description", m->description) != DIST_OK) + return DIST_ERR; + if (emit_kv(out, "hash", DIST_PACKAGE3_HASH) != DIST_OK) return DIST_ERR; + if (emit_kv(out, "tree", DIST_PACKAGE3_TREE_FORMAT) != DIST_OK) + return DIST_ERR; + if (emit_kv(out, "blob", DIST_PACKAGE3_BLOB_FORMAT) != DIST_OK) + return DIST_ERR; + + for (i = 0; i < m->n_outputs; ++i) { + const DistPackageOutput* pkg_out = &m->outputs[i]; + if (emit(out, "\n[output]\n") != DIST_OK) return DIST_ERR; + if (emit_u64(out, "id", pkg_out->id) != DIST_OK) return DIST_ERR; + if (emit_kv(out, "name", pkg_out->name) != DIST_OK) return DIST_ERR; + if (emit_hex(out, "tree", pkg_out->tree, DIST_BLAKE2B_LEN) != DIST_OK) + return DIST_ERR; + if (pkg_out->target[0] && + emit_kv(out, "target", pkg_out->target) != DIST_OK) + return DIST_ERR; + if (pkg_out->is_default && + emit_kv(out, "default", "true") != DIST_OK) + return DIST_ERR; + } + + for (i = 0; i < m->n_artifacts; ++i) { + const DistPackageArtifact* art = &m->artifacts[i]; + if (emit(out, "\n[artifact]\n") != DIST_OK) return DIST_ERR; + if (emit_u64(out, "output", art->output_id) != DIST_OK) return DIST_ERR; + if (emit_kv(out, "path", art->path) != DIST_OK) return DIST_ERR; + if (emit_kv(out, "kind", art->kind) != DIST_OK) return DIST_ERR; + if (art->entry && emit_kv(out, "entry", "true") != DIST_OK) + return DIST_ERR; + } + + for (i = 0; i < m->n_deps; ++i) { + const DistPackageDependency* dep = &m->deps[i]; + if (emit(out, "\n[dependency]\n") != DIST_OK) return DIST_ERR; + if (emit_kv(out, "name", dep->name) != DIST_OK) return DIST_ERR; + if (emit_kv(out, "version", dep->version) != DIST_OK) return DIST_ERR; + if (dep->has_package && + emit_hex(out, "package", dep->package, DIST_BLAKE2B_LEN) != DIST_OK) + return DIST_ERR; + if (dep->has_keyid && + emit_hex(out, "key", dep->keyid, DIST_KEYID_LEN) != DIST_OK) + return DIST_ERR; + } + + return DIST_OK; +} + +int dist_package_manifest_parse(const uint8_t* data, size_t len, + DistPackageManifest* m, char* err, + size_t errcap) { + size_t pos = 0; + int first = 1; + PackageSection sec = P3_SEC_TOP; + uint32_t seen = 0; + DistPackageOutput* pkg_out = NULL; + DistPackageArtifact* art = NULL; + DistPackageDependency* dep = NULL; + + memset(m, 0, sizeof *m); + + while (pos < len) { + char buf[DIST_LINE_MAX]; + size_t end = pos; + size_t n; + char *t, *key, *val, *eq; + + while (end < len && data[end] != '\n') ++end; + n = end - pos; + if (n >= sizeof buf) return set_err(err, errcap, "line too long"); + memcpy(buf, data + pos, n); + buf[n] = '\0'; + pos = (end < len) ? end + 1 : end; + trim_trail(buf); + + if (first) { + first = 0; + if (strcmp(buf, DIST_PACKAGE3_MAGIC) != 0) + return set_err(err, errcap, "bad package manifest magic/version"); + continue; + } + + t = trim_lead(buf); + if (*t == '\0' || *t == '#') continue; + + if (*t == '[') { + if (finalize_package_section3(sec, seen, err, errcap) != DIST_OK) + return DIST_ERR; + seen = 0; + if (strcmp(t, "[output]") == 0) { + if (m->n_outputs >= DIST_MAX_OUTPUTS) + return set_err(err, errcap, "too many outputs"); + sec = P3_SEC_OUTPUT; + pkg_out = &m->outputs[m->n_outputs++]; + } else if (strcmp(t, "[artifact]") == 0) { + if (m->n_artifacts >= DIST_MAX_ARTIFACTS) + return set_err(err, errcap, "too many artifacts"); + sec = P3_SEC_ARTIFACT; + art = &m->artifacts[m->n_artifacts++]; + } else if (strcmp(t, "[dependency]") == 0) { + if (m->n_deps >= DIST_MAX_DEPS) + return set_err(err, errcap, "too many dependencies"); + sec = P3_SEC_DEPENDENCY; + dep = &m->deps[m->n_deps++]; + } else { + return set_err(err, errcap, "unknown section"); + } + continue; + } + + eq = strchr(t, '='); + if (!eq) return set_err(err, errcap, "expected key = value"); + *eq = '\0'; + key = t; + trim_trail(key); + val = trim_lead(eq + 1); + + if (sec == P3_SEC_TOP) { + if (strcmp(key, "name") == 0) { + if (seen_once3(&seen, P3_F_NAME, "duplicate top-level key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (!field_text_valid(val, 1)) + return set_err(err, errcap, "bad package name"); + if (copy_field(m->name, sizeof m->name, val, err, errcap)) + return DIST_ERR; + } else if (strcmp(key, "version") == 0) { + if (seen_once3(&seen, P3_F_VERSION, "duplicate top-level key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (!field_text_valid(val, 1)) + return set_err(err, errcap, "bad package version"); + if (copy_field(m->version, sizeof m->version, val, err, errcap)) + return DIST_ERR; + } else if (strcmp(key, "description") == 0) { + if (seen_once3(&seen, P3_F_DESCRIPTION, "duplicate top-level key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (!field_text_valid(val, 0)) + return set_err(err, errcap, "bad package description"); + if (copy_field(m->description, sizeof m->description, val, err, errcap)) + return DIST_ERR; + } else if (strcmp(key, "hash") == 0) { + if (seen_once3(&seen, P3_F_HASH, "duplicate top-level key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (strcmp(val, DIST_PACKAGE3_HASH) != 0) + return set_err(err, errcap, "unsupported hash algorithm"); + } else if (strcmp(key, "tree") == 0) { + if (seen_once3(&seen, P3_F_TREE_FORMAT, "duplicate top-level key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (strcmp(val, DIST_PACKAGE3_TREE_FORMAT) != 0) + return set_err(err, errcap, "unsupported tree format"); + } else if (strcmp(key, "blob") == 0) { + if (seen_once3(&seen, P3_F_BLOB_FORMAT, "duplicate top-level key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (strcmp(val, DIST_PACKAGE3_BLOB_FORMAT) != 0) + return set_err(err, errcap, "unsupported blob format"); + } else { + return set_err(err, errcap, "unknown top-level key"); + } + } else if (sec == P3_SEC_OUTPUT) { + if (strcmp(key, "id") == 0) { + if (seen_once3(&seen, P3_F_ID, "duplicate [output] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (parse_u64_dec3(val, &pkg_out->id) != DIST_OK) + return set_err(err, errcap, "bad output id"); + } else if (strcmp(key, "name") == 0) { + if (seen_once3(&seen, P3_F_OUTPUT_NAME, "duplicate [output] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (!field_text_valid(val, 1)) + return set_err(err, errcap, "bad output name"); + if (copy_field(pkg_out->name, sizeof pkg_out->name, val, err, errcap)) + return DIST_ERR; + } else if (strcmp(key, "tree") == 0) { + if (seen_once3(&seen, P3_F_TREE_ID, "duplicate [output] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (decode_hash3(pkg_out->tree, val, "bad output tree id", err, + errcap) != DIST_OK) + return DIST_ERR; + } else if (strcmp(key, "target") == 0) { + if (seen_once3(&seen, P3_F_TARGET, "duplicate [output] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (!field_text_valid(val, 0)) + return set_err(err, errcap, "bad output target"); + if (copy_field(pkg_out->target, sizeof pkg_out->target, val, err, + errcap)) + return DIST_ERR; + } else if (strcmp(key, "default") == 0) { + if (seen_once3(&seen, P3_F_DEFAULT, "duplicate [output] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (parse_bool3(val, &pkg_out->is_default) != DIST_OK) + return set_err(err, errcap, "bad default value"); + } else { + return set_err(err, errcap, "unknown [output] key"); + } + } else if (sec == P3_SEC_ARTIFACT) { + if (strcmp(key, "output") == 0) { + if (seen_once3(&seen, P3_F_OUTPUT, "duplicate [artifact] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (parse_u64_dec3(val, &art->output_id) != DIST_OK) + return set_err(err, errcap, "bad artifact output id"); + } else if (strcmp(key, "path") == 0) { + if (seen_once3(&seen, P3_F_PATH, "duplicate [artifact] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (!dist_manifest_path_valid(val)) + return set_err(err, errcap, "unsafe artifact path"); + if (copy_field(art->path, sizeof art->path, val, err, errcap)) + return DIST_ERR; + } else if (strcmp(key, "kind") == 0) { + if (seen_once3(&seen, P3_F_KIND, "duplicate [artifact] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (!kind_valid(val)) return set_err(err, errcap, "unknown artifact kind"); + if (copy_field(art->kind, sizeof art->kind, val, err, errcap)) + return DIST_ERR; + } else if (strcmp(key, "entry") == 0) { + if (seen_once3(&seen, P3_F_ENTRY, "duplicate [artifact] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (parse_bool3(val, &art->entry) != DIST_OK) + return set_err(err, errcap, "bad entry value"); + } else { + return set_err(err, errcap, "unknown [artifact] key"); + } + } else { + if (strcmp(key, "name") == 0) { + if (seen_once3(&seen, P3_F_NAME, "duplicate [dependency] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (!field_text_valid(val, 1)) + return set_err(err, errcap, "bad dependency name"); + if (copy_field(dep->name, sizeof dep->name, val, err, errcap)) + return DIST_ERR; + } else if (strcmp(key, "version") == 0) { + if (seen_once3(&seen, P3_F_VERSION, "duplicate [dependency] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (!field_text_valid(val, 1)) + return set_err(err, errcap, "bad dependency version"); + if (copy_field(dep->version, sizeof dep->version, val, err, errcap)) + return DIST_ERR; + } else if (strcmp(key, "package") == 0) { + if (seen_once3(&seen, P3_F_PACKAGE, "duplicate [dependency] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (decode_hash3(dep->package, val, "bad dependency package id", err, + errcap) != DIST_OK) + return DIST_ERR; + dep->has_package = 1; + } else if (strcmp(key, "key") == 0) { + if (seen_once3(&seen, P3_F_KEY, "duplicate [dependency] key", err, + errcap) != DIST_OK) + return DIST_ERR; + if (decode_keyid3(dep->keyid, val, err, errcap) != DIST_OK) + return DIST_ERR; + dep->has_keyid = 1; + } else { + return set_err(err, errcap, "unknown [dependency] key"); + } + } + } + + if (first) return set_err(err, errcap, "bad package manifest magic/version"); + if (finalize_package_section3(sec, seen, err, errcap) != DIST_OK) + return DIST_ERR; + return dist_package_manifest_validate(m, err, errcap); +} diff --git a/driver/dist/manifest.h b/driver/dist/manifest.h @@ -13,6 +13,10 @@ #define DIST_MANIFEST_MAGIC "cfree-package 2" #define DIST_MANIFEST_HASH "blake2b-merkle-v1" +#define DIST_PACKAGE3_MAGIC "cfree-package 3" +#define DIST_PACKAGE3_HASH "blake2b-256" +#define DIST_PACKAGE3_TREE_FORMAT "cfree-tree-v1" +#define DIST_PACKAGE3_BLOB_FORMAT "cfree-blob-v1" typedef struct DistArtifact { uint64_t id; @@ -44,6 +48,42 @@ typedef struct DistManifest { size_t n_deps; } DistManifest; +typedef struct DistPackageOutput { + uint64_t id; + char name[DIST_NAME_MAX]; + char target[DIST_TRIPLE_MAX]; /* "" = target-independent */ + uint8_t tree[DIST_BLAKE2B_LEN]; + int is_default; +} DistPackageOutput; + +typedef struct DistPackageArtifact { + uint64_t output_id; + char path[DIST_PATH_MAX + 1]; + char kind[DIST_KIND_MAX]; + int entry; +} DistPackageArtifact; + +typedef struct DistPackageDependency { + char name[DIST_NAME_MAX]; + char version[DIST_PCONSTRAINT_MAX]; + uint8_t package[DIST_BLAKE2B_LEN]; + int has_package; + uint8_t keyid[DIST_KEYID_LEN]; + int has_keyid; +} DistPackageDependency; + +typedef struct DistPackageManifest { + char name[DIST_NAME_MAX]; + char version[DIST_VERSION_MAX]; + char description[DIST_DESC_MAX]; /* "" = absent */ + DistPackageOutput outputs[DIST_MAX_OUTPUTS]; + size_t n_outputs; + DistPackageArtifact artifacts[DIST_MAX_ARTIFACTS]; + size_t n_artifacts; + DistPackageDependency deps[DIST_MAX_DEPS]; + size_t n_deps; +} DistPackageManifest; + int dist_manifest_emit(const DistManifest* m, CfreeWriter* out); int dist_manifest_path_valid(const char* path); @@ -51,4 +91,11 @@ int dist_manifest_path_valid(const char* path); int dist_manifest_parse(const uint8_t* data, size_t len, DistManifest* m, char* err, size_t errcap); +int dist_package_manifest_emit(const DistPackageManifest* m, CfreeWriter* out); +int dist_package_manifest_parse(const uint8_t* data, size_t len, + DistPackageManifest* m, char* err, + size_t errcap); +int dist_package_manifest_validate(const DistPackageManifest* m, char* err, + size_t errcap); + #endif diff --git a/driver/dist/tree.c b/driver/dist/tree.c @@ -0,0 +1,343 @@ +#include "tree.h" + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "blake2b.h" + +#define TREE_LINE_MAX 1024u + +#define F_HASH 0x01u +#define F_TOP_BLOB 0x02u +#define F_PATH 0x04u +#define F_MODE 0x08u +#define F_SIZE 0x10u +#define F_FILE_BLOB 0x20u +#define F_ROOT 0x40u + +typedef enum TreeSection { TREE_SEC_TOP, TREE_SEC_FILE } TreeSection; + +static int set_err(char* err, size_t cap, const char* msg) { + if (err && cap) snprintf(err, cap, "%s", msg); + return DIST_ERR; +} + +int dist_tree_mode_parse(const char* s, uint8_t* out) { + if (!s || !out) return DIST_ERR; + if (strcmp(s, "-") == 0) { + *out = DIST_TREE_MODE_FILE; + return DIST_OK; + } + if (strcmp(s, "x") == 0) { + *out = DIST_TREE_MODE_EXEC; + return DIST_OK; + } + return DIST_ERR; +} + +const char* dist_tree_mode_name(uint8_t mode) { + if (mode == DIST_TREE_MODE_FILE) return "-"; + if (mode == DIST_TREE_MODE_EXEC) return "x"; + return NULL; +} + +int dist_tree_path_valid(const char* p) { + size_t start = 0, i; + if (!p || !p[0] || p[0] == '/') return 0; + for (i = 0;; ++i) { + char c = p[i]; + if (c == '\\' || c == ':' || c == '\n' || c == '\r') return 0; + if (c == '/' || c == '\0') { + size_t n = i - start; + if (n == 0) return 0; + if (n == 1 && p[start] == '.') return 0; + if (n == 2 && p[start] == '.' && p[start + 1] == '.') return 0; + if (c == '\0') return 1; + start = i + 1u; + } + } +} + +static int entry_cmp(const void* ap, const void* bp) { + const DistTreeEntry* a = (const DistTreeEntry*)ap; + const DistTreeEntry* b = (const DistTreeEntry*)bp; + return strcmp(a->path, b->path); +} + +int dist_tree_sort_validate(DistTree* tree, char* err, size_t errcap) { + size_t i; + if (!tree) return set_err(err, errcap, "missing tree"); + if (tree->n_entries && !tree->entries) + return set_err(err, errcap, "missing tree entries"); + qsort(tree->entries, tree->n_entries, sizeof tree->entries[0], entry_cmp); + for (i = 0; i < tree->n_entries; ++i) { + if (!dist_tree_path_valid(tree->entries[i].path)) + return set_err(err, errcap, "unsafe tree path"); + if (!dist_tree_mode_name(tree->entries[i].mode)) + return set_err(err, errcap, "bad tree mode"); + if (i > 0 && strcmp(tree->entries[i - 1u].path, tree->entries[i].path) == 0) + return set_err(err, errcap, "duplicate tree path"); + } + return DIST_OK; +} + +static int tree_validate_canonical(const DistTree* tree) { + size_t i; + if (!tree) return DIST_ERR; + if (tree->n_entries && !tree->entries) return DIST_ERR; + for (i = 0; i < tree->n_entries; ++i) { + if (!dist_tree_path_valid(tree->entries[i].path)) return DIST_ERR; + if (!dist_tree_mode_name(tree->entries[i].mode)) return DIST_ERR; + if (i > 0 && strcmp(tree->entries[i - 1u].path, tree->entries[i].path) >= 0) + return DIST_ERR; + } + return DIST_OK; +} + +static int emit(CfreeWriter* out, const char* s) { + return cfree_writer_write(out, s, strlen(s)) == CFREE_OK ? DIST_OK : DIST_ERR; +} + +static int emit_kv(CfreeWriter* out, const char* key, const char* val) { + char line[TREE_LINE_MAX]; + snprintf(line, sizeof line, "%s = %s\n", key, val); + return emit(out, line); +} + +static int emit_u64(CfreeWriter* out, const char* key, uint64_t v) { + char num[24]; + snprintf(num, sizeof num, "%llu", (unsigned long long)v); + return emit_kv(out, key, num); +} + +static int emit_hex(CfreeWriter* out, const char* key, const uint8_t* h) { + char hex[2 * DIST_BLAKE2B_LEN + 1]; + dist_hex_encode(hex, h, DIST_BLAKE2B_LEN); + return emit_kv(out, key, hex); +} + +int dist_tree_emit(const DistTree* tree, CfreeWriter* out) { + size_t i; + if (!out || tree_validate_canonical(tree) != DIST_OK) return DIST_ERR; + if (emit(out, DIST_TREE_MAGIC "\n") != DIST_OK) return DIST_ERR; + if (emit_kv(out, "hash", DIST_TREE_HASH) != DIST_OK) return DIST_ERR; + if (emit_kv(out, "blob", DIST_TREE_BLOB_FORMAT) != DIST_OK) return DIST_ERR; + for (i = 0; i < tree->n_entries; ++i) { + const DistTreeEntry* e = &tree->entries[i]; + const char* mode = dist_tree_mode_name(e->mode); + if (!mode) return DIST_ERR; + if (emit(out, "\n[file]\n") != DIST_OK) return DIST_ERR; + if (emit_kv(out, "path", e->path) != DIST_OK) return DIST_ERR; + if (emit_kv(out, "mode", mode) != DIST_OK) return DIST_ERR; + if (emit_u64(out, "size", e->size) != DIST_OK) return DIST_ERR; + if (emit_hex(out, "blob", e->blob) != DIST_OK) return DIST_ERR; + if (emit_hex(out, "root", e->root) != DIST_OK) return DIST_ERR; + } + return DIST_OK; +} + +static char* trim_lead(char* s) { + while (*s == ' ' || *s == '\t') ++s; + return s; +} + +static void trim_trail(char* s) { + size_t n = strlen(s); + while (n && (s[n - 1] == ' ' || s[n - 1] == '\t' || s[n - 1] == '\r' || + s[n - 1] == '\n')) + s[--n] = '\0'; +} + +static int copy_field(char* dst, size_t cap, const char* src, char* err, + size_t errcap) { + if (strlen(src) >= cap) return set_err(err, errcap, "field value too long"); + snprintf(dst, cap, "%s", src); + return DIST_OK; +} + +static int parse_u64(const char* s, uint64_t* out) { + uint64_t v = 0; + size_t i; + if (!s || !*s || !out) return DIST_ERR; + for (i = 0; s[i]; ++i) { + unsigned d; + if (s[i] < '0' || s[i] > '9') return DIST_ERR; + d = (unsigned)(s[i] - '0'); + if (v > (UINT64_MAX - d) / 10u) return DIST_ERR; + v = v * 10u + d; + } + *out = v; + return DIST_OK; +} + +static int hex_hash(const char* val, uint8_t out[DIST_BLAKE2B_LEN]) { + if (strlen(val) != 2u * DIST_BLAKE2B_LEN) return DIST_ERR; + return dist_hex_decode(out, val, DIST_BLAKE2B_LEN); +} + +static int check_dup(uint32_t seen, uint32_t flag, char* err, size_t errcap) { + if (seen & flag) return set_err(err, errcap, "duplicate tree field"); + return DIST_OK; +} + +static int finalize_file(const DistTree* tree, uint32_t seen, char* err, + size_t errcap) { + const DistTreeEntry* e; + if ((seen & (F_PATH | F_MODE | F_SIZE | F_FILE_BLOB | F_ROOT)) != + (F_PATH | F_MODE | F_SIZE | F_FILE_BLOB | F_ROOT)) + return set_err(err, errcap, "missing required [file] field"); + if (!tree || tree->n_entries == 0) return set_err(err, errcap, "missing file"); + e = &tree->entries[tree->n_entries - 1u]; + if (!dist_tree_path_valid(e->path)) + return set_err(err, errcap, "unsafe tree path"); + if (!dist_tree_mode_name(e->mode)) + return set_err(err, errcap, "bad tree mode"); + if (tree->n_entries > 1u) { + const char* prev = tree->entries[tree->n_entries - 2u].path; + int cmp = strcmp(prev, e->path); + if (cmp == 0) return set_err(err, errcap, "duplicate tree path"); + if (cmp > 0) return set_err(err, errcap, "non-canonical tree ordering"); + } + return DIST_OK; +} + +int dist_tree_parse(const uint8_t* data, size_t len, DistTree* out, char* err, + size_t errcap) { + size_t pos = 0; + int first = 1; + TreeSection sec = TREE_SEC_TOP; + uint32_t top_seen = 0; + uint32_t file_seen = 0; + + if (!data || !out) return set_err(err, errcap, "missing tree manifest"); + if (out->cap_entries && !out->entries) + return set_err(err, errcap, "missing tree entries"); + out->n_entries = 0; + + while (pos < len) { + char buf[TREE_LINE_MAX]; + size_t end = pos; + size_t n, i; + char *t, *key, *val, *eq; + + while (end < len && data[end] != '\n') ++end; + n = end - pos; + if (n >= sizeof buf) return set_err(err, errcap, "line too long"); + for (i = pos; i < end; ++i) + if (data[i] == 0) return set_err(err, errcap, "NUL byte in tree manifest"); + memcpy(buf, data + pos, n); + buf[n] = '\0'; + pos = (end < len) ? end + 1u : end; + trim_trail(buf); + + if (first) { + first = 0; + if (strcmp(buf, DIST_TREE_MAGIC) != 0) + return set_err(err, errcap, "bad tree magic/version"); + continue; + } + + t = trim_lead(buf); + if (*t == '\0' || *t == '#') continue; + + if (*t == '[') { + if (strcmp(t, "[file]") != 0) + return set_err(err, errcap, "unknown tree section"); + if ((top_seen & (F_HASH | F_TOP_BLOB)) != (F_HASH | F_TOP_BLOB)) + return set_err(err, errcap, "missing required top-level field"); + if (sec == TREE_SEC_FILE && + finalize_file(out, file_seen, err, errcap) != DIST_OK) + return DIST_ERR; + if (out->n_entries >= out->cap_entries) + return set_err(err, errcap, "too many tree files"); + memset(&out->entries[out->n_entries], 0, sizeof out->entries[0]); + ++out->n_entries; + sec = TREE_SEC_FILE; + file_seen = 0; + continue; + } + + eq = strchr(t, '='); + if (!eq) return set_err(err, errcap, "expected key = value"); + *eq = '\0'; + key = t; + trim_trail(key); + val = trim_lead(eq + 1); + + if (sec == TREE_SEC_TOP) { + if (strcmp(key, "hash") == 0) { + if (check_dup(top_seen, F_HASH, err, errcap) != DIST_OK) + return DIST_ERR; + if (strcmp(val, DIST_TREE_HASH) != 0) + return set_err(err, errcap, "unsupported tree hash"); + top_seen |= F_HASH; + } else if (strcmp(key, "blob") == 0) { + if (check_dup(top_seen, F_TOP_BLOB, err, errcap) != DIST_OK) + return DIST_ERR; + if (strcmp(val, DIST_TREE_BLOB_FORMAT) != 0) + return set_err(err, errcap, "unsupported tree blob format"); + top_seen |= F_TOP_BLOB; + } else { + return set_err(err, errcap, "unknown top-level tree key"); + } + } else { + DistTreeEntry* e = &out->entries[out->n_entries - 1u]; + if (strcmp(key, "path") == 0) { + if (check_dup(file_seen, F_PATH, err, errcap) != DIST_OK) + return DIST_ERR; + if (!dist_tree_path_valid(val)) return set_err(err, errcap, "unsafe tree path"); + if (copy_field(e->path, sizeof e->path, val, err, errcap) != DIST_OK) + return DIST_ERR; + file_seen |= F_PATH; + } else if (strcmp(key, "mode") == 0) { + if (check_dup(file_seen, F_MODE, err, errcap) != DIST_OK) + return DIST_ERR; + if (dist_tree_mode_parse(val, &e->mode) != DIST_OK) + return set_err(err, errcap, "bad tree mode"); + file_seen |= F_MODE; + } else if (strcmp(key, "size") == 0) { + if (check_dup(file_seen, F_SIZE, err, errcap) != DIST_OK) + return DIST_ERR; + if (parse_u64(val, &e->size) != DIST_OK) + return set_err(err, errcap, "bad tree size"); + file_seen |= F_SIZE; + } else if (strcmp(key, "blob") == 0) { + if (check_dup(file_seen, F_FILE_BLOB, err, errcap) != DIST_OK) + return DIST_ERR; + if (hex_hash(val, e->blob) != DIST_OK) + return set_err(err, errcap, "bad tree blob hash"); + file_seen |= F_FILE_BLOB; + } else if (strcmp(key, "root") == 0) { + if (check_dup(file_seen, F_ROOT, err, errcap) != DIST_OK) + return DIST_ERR; + if (hex_hash(val, e->root) != DIST_OK) + return set_err(err, errcap, "bad tree blob root"); + file_seen |= F_ROOT; + } else { + return set_err(err, errcap, "unknown [file] tree key"); + } + } + } + + if (first) return set_err(err, errcap, "bad tree magic/version"); + if ((top_seen & (F_HASH | F_TOP_BLOB)) != (F_HASH | F_TOP_BLOB)) + return set_err(err, errcap, "missing required top-level field"); + if (sec == TREE_SEC_FILE && + finalize_file(out, file_seen, err, errcap) != DIST_OK) + return DIST_ERR; + return DIST_OK; +} + +void dist_tree_id(uint8_t out[DIST_BLAKE2B_LEN], const uint8_t* manifest, + size_t len) { + dist_blake2b(out, manifest, len); +} + +const DistTreeEntry* dist_tree_find(const DistTree* tree, const char* path) { + size_t i; + if (!tree || !path) return NULL; + for (i = 0; i < tree->n_entries; ++i) + if (strcmp(tree->entries[i].path, path) == 0) return &tree->entries[i]; + return NULL; +} diff --git a/driver/dist/tree.h b/driver/dist/tree.h @@ -0,0 +1,44 @@ +#ifndef CFREE_DIST_TREE_H +#define CFREE_DIST_TREE_H + +#include <cfree/core.h> +#include <stddef.h> +#include <stdint.h> + +#include "dist.h" + +#define DIST_TREE_MAGIC "cfree-tree 1" +#define DIST_TREE_HASH "blake2b-256" +#define DIST_TREE_BLOB_FORMAT "cfree-blob-v1" + +typedef enum DistTreeMode { + DIST_TREE_MODE_FILE = 0, + DIST_TREE_MODE_EXEC = 1, +} DistTreeMode; + +typedef struct DistTreeEntry { + char path[DIST_PATH_MAX + 1]; + uint8_t mode; /* DistTreeMode */ + uint8_t blob[DIST_BLAKE2B_LEN]; + uint8_t root[DIST_BLAKE2B_LEN]; + uint64_t size; +} DistTreeEntry; + +typedef struct DistTree { + DistTreeEntry* entries; + size_t n_entries; + size_t cap_entries; +} DistTree; + +int dist_tree_mode_parse(const char* s, uint8_t* out); +const char* dist_tree_mode_name(uint8_t mode); +int dist_tree_path_valid(const char* path); +int dist_tree_sort_validate(DistTree* tree, char* err, size_t errcap); +int dist_tree_emit(const DistTree* tree, CfreeWriter* out); +int dist_tree_parse(const uint8_t* data, size_t len, DistTree* out, char* err, + size_t errcap); +void dist_tree_id(uint8_t out[DIST_BLAKE2B_LEN], const uint8_t* manifest, + size_t len); +const DistTreeEntry* dist_tree_find(const DistTree* tree, const char* path); + +#endif diff --git a/driver/driver.h b/driver/driver.h @@ -53,6 +53,7 @@ int driver_nm(int argc, char** argv); int driver_size(int argc, char** argv); int driver_addr2line(int argc, char** argv); int driver_strings(int argc, char** argv); +int driver_cas(int argc, char** argv); int driver_pkg(int argc, char** argv); /* Per-tool help printers. Write a multi-section help text to stdout and @@ -76,6 +77,7 @@ void driver_help_nm(void); void driver_help_size(void); void driver_help_addr2line(void); void driver_help_strings(void); +void driver_help_cas(void); void driver_help_pkg(void); /* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`, diff --git a/driver/env.h b/driver/env.h @@ -112,6 +112,14 @@ int driver_mkdir_p(DriverEnv*, const char* path); * Returns 0 on success, nonzero on chmod failure. */ int driver_mark_executable_output(const char* path); +/* Walk regular files below `root`, reporting tree-relative paths with '/' + * separators. The callback returns nonzero to abort the walk. Unsupported + * filesystem entries cause a nonzero return from the walk helper. */ +typedef int (*DriverWalkFileFn)(void* user, const char* source_path, + const char* tree_path, int executable); +int driver_walk_regular_files(DriverEnv*, const char* root, + DriverWalkFileFn, void* user); + /* Diagnostic printing to host stderr. Format is `"<tool>: <fmt>\n"`. */ void driver_errf(const char* tool, const char* fmt, ...); diff --git a/driver/env/posix.c b/driver/env/posix.c @@ -5,6 +5,7 @@ * specifics are isolated in macos.c / linux.c / freebsd.c. */ #include <errno.h> +#include <dirent.h> #include <fcntl.h> #include <pthread.h> #include <signal.h> @@ -391,6 +392,69 @@ int driver_mark_executable_output(const char* path) { return chmod(path, mode) == 0 ? 0 : 1; } +static char* driver_join_path(DriverEnv* env, const char* a, const char* b) { + size_t al = cfree_slice_cstr(a).len; + size_t bl = cfree_slice_cstr(b).len; + int slash = al > 0 && a[al - 1u] != '/'; + char* out = (char*)driver_alloc(env, al + (slash ? 1u : 0u) + bl + 1u); + if (!out) return NULL; + memcpy(out, a, al); + if (slash) out[al++] = '/'; + memcpy(out + al, b, bl); + out[al + bl] = '\0'; + return out; +} + +static int driver_walk_regular_files_at(DriverEnv* env, const char* dir, + const char* rel, + DriverWalkFileFn cb, void* user) { + DIR* d; + struct dirent* ent; + int rc = 1; + d = opendir(dir); + if (!d) return 1; + while ((ent = readdir(d)) != NULL) { + const char* name = ent->d_name; + char* child; + char* child_rel; + struct stat sb; + int child_rc = 0; + if (driver_streq(name, ".") || driver_streq(name, "..")) continue; + child = driver_join_path(env, dir, name); + if (!child) goto out; + child_rel = rel && rel[0] ? driver_join_path(env, rel, name) + : driver_join_path(env, "", name); + if (!child_rel) { + driver_free(env, child, cfree_slice_cstr(child).len + 1u); + goto out; + } + if (lstat(child, &sb) != 0) { + child_rc = 1; + } else if (S_ISDIR(sb.st_mode)) { + child_rc = driver_walk_regular_files_at(env, child, child_rel, cb, user); + } else if (S_ISREG(sb.st_mode)) { + int x = (sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0; + child_rc = cb(user, child, child_rel, x); + } else { + child_rc = 1; + } + driver_free(env, child_rel, cfree_slice_cstr(child_rel).len + 1u); + driver_free(env, child, cfree_slice_cstr(child).len + 1u); + if (child_rc) goto out; + } + rc = 0; + +out: + closedir(d); + return rc; +} + +int driver_walk_regular_files(DriverEnv* env, const char* root, + DriverWalkFileFn cb, void* user) { + if (!env || !root || !root[0] || !cb) return 1; + return driver_walk_regular_files_at(env, root, "", cb, user); +} + /* ---------------- time ---------------- */ uint64_t driver_now_ns(void) { diff --git a/driver/env/windows.c b/driver/env/windows.c @@ -81,6 +81,21 @@ static wchar_t* widen(const char* utf8) { return w; } +static char* narrow(const wchar_t* wide) { + int need; + char* out; + if (!wide) return NULL; + need = WideCharToMultiByte(CP_UTF8, 0, wide, -1, NULL, 0, NULL, NULL); + if (need <= 0) return NULL; + out = (char*)malloc((size_t)need); + if (!out) return NULL; + if (WideCharToMultiByte(CP_UTF8, 0, wide, -1, out, need, NULL, NULL) <= 0) { + free(out); + return NULL; + } + return out; +} + /* ============================================================ * exec_dual registry (write/runtime alias bookkeeping) * ============================================================ */ @@ -578,6 +593,96 @@ int driver_mark_executable_output(const char* path) { return 0; } +static char* driver_join_path(DriverEnv* env, const char* a, const char* b) { + size_t al = cfree_slice_cstr(a).len; + size_t bl = cfree_slice_cstr(b).len; + int slash = al > 0 && a[al - 1u] != '/' && a[al - 1u] != '\\'; + char* out = (char*)driver_alloc(env, al + (slash ? 1u : 0u) + bl + 1u); + if (!out) return NULL; + memcpy(out, a, al); + if (slash) out[al++] = '/'; + memcpy(out + al, b, bl); + out[al + bl] = '\0'; + return out; +} + +static int driver_walk_regular_files_at(DriverEnv* env, const char* dir, + const char* rel, + DriverWalkFileFn cb, void* user) { + char* pattern; + wchar_t* wpattern; + WIN32_FIND_DATAW fd; + HANDLE h; + DWORD last; + int rc = 1; + + pattern = driver_join_path(env, dir, "*"); + if (!pattern) return 1; + wpattern = widen(pattern); + driver_free(env, pattern, cfree_slice_cstr(pattern).len + 1u); + if (!wpattern) return 1; + + h = FindFirstFileW(wpattern, &fd); + free(wpattern); + if (h == INVALID_HANDLE_VALUE) { + last = GetLastError(); + return last == ERROR_FILE_NOT_FOUND ? 0 : 1; + } + + for (;;) { + char* name = narrow(fd.cFileName); + char* child = NULL; + char* child_rel = NULL; + int child_rc = 0; + if (!name) goto loop_fail; + if (driver_streq(name, ".") || driver_streq(name, "..")) { + free(name); + goto loop_next; + } + child = driver_join_path(env, dir, name); + child_rel = rel && rel[0] ? driver_join_path(env, rel, name) + : driver_join_path(env, "", name); + if (!child || !child_rel) goto loop_fail; + if ((fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) { + if ((fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0) { + child_rc = 1; + } else { + child_rc = + driver_walk_regular_files_at(env, child, child_rel, cb, user); + } + } else { + child_rc = cb(user, child, child_rel, 0); + } + if (child_rel) driver_free(env, child_rel, cfree_slice_cstr(child_rel).len + 1u); + if (child) driver_free(env, child, cfree_slice_cstr(child).len + 1u); + free(name); + if (child_rc) goto out; + + loop_next: + if (!FindNextFileW(h, &fd)) break; + continue; + + loop_fail: + if (child_rel) driver_free(env, child_rel, cfree_slice_cstr(child_rel).len + 1u); + if (child) driver_free(env, child, cfree_slice_cstr(child).len + 1u); + if (name) free(name); + goto out; + } + + last = GetLastError(); + rc = last == ERROR_NO_MORE_FILES ? 0 : 1; + +out: + FindClose(h); + return rc; +} + +int driver_walk_regular_files(DriverEnv* env, const char* root, + DriverWalkFileFn cb, void* user) { + if (!env || !root || !root[0] || !cb) return 1; + return driver_walk_regular_files_at(env, root, "", cb, user); +} + /* ============================================================ * Time * ============================================================ */ diff --git a/driver/main.c b/driver/main.c @@ -88,6 +88,10 @@ static const DriverToolDesc driver_tools[] = { {"strings", driver_strings, driver_help_strings, "Print printable character sequences found in a file"}, #endif +#if CFREE_TOOL_CAS_ENABLED + {"cas", driver_cas, driver_help_cas, + "Store, inspect, verify, and materialize cfree CAS blobs and trees"}, +#endif #if CFREE_TOOL_PKG_ENABLED {"pkg", driver_pkg, driver_help_pkg, "Bundle, sign, verify, and unpack distributable .cfpkg packages"}, diff --git a/driver/pkg.c b/driver/pkg.c @@ -6,6 +6,8 @@ #include <string.h> #include "dist/blake2b.h" +#include "dist/blob.h" +#include "dist/cas.h" #include "dist/cfpkg.h" #include "dist/deflate.h" #include "dist/dist.h" @@ -13,34 +15,65 @@ #include "dist/manifest.h" #include "dist/minisig.h" #include "dist/tar.h" +#include "dist/tree.h" #include "dist/trust.h" #include "driver.h" #include "env.h" #define PKG_TOOL "pkg" #define PKG_PATH_BUF 1024u -#define PKG_NAME_BUF 256u #define PKG_META_MANIFEST "cfree/package.manifest" #define PKG_META_SIG "cfree/package.manifest.minisig" #define PKG_META_PUB "cfree/package.pub" +#define PKG_DEFAULT_OUTPUT_ID 0u +#define PKG_MAX_TAR_ENTRIES (DIST_MAX_FILES + DIST_MAX_OUTPUTS + 8u) typedef enum PkgFormat { PKG_FMT_AUTO, PKG_FMT_CFPKG, PKG_FMT_TARGZ } PkgFormat; -typedef struct PkgInputFile { - const char* src; - const char* path; +typedef enum PkgNativeShape { + PKG_NATIVE_FAT, + PKG_NATIVE_METADATA, + PKG_NATIVE_THIN +} PkgNativeShape; + +typedef struct PkgBlob { CfreeFileData fd; int loaded; -} PkgInputFile; + uint8_t id[DIST_BLAKE2B_LEN]; + uint8_t root[DIST_BLAKE2B_LEN]; + uint64_t size; +} PkgBlob; + +typedef struct PkgSource { + DriverEnv* env; + DistTree tree; + DistTreeEntry entries[DIST_MAX_FILES]; + PkgBlob blobs[DIST_MAX_FILES]; + size_t n_blobs; + uint8_t tree_id[DIST_BLAKE2B_LEN]; + const uint8_t* tree_bytes; + size_t tree_size; + CfreeWriter* tree_mem; + CfreeFileData tree_fd; + int tree_loaded; +} PkgSource; typedef struct PkgVerified { - DistManifest manifest; + DistPackageManifest manifest; uint8_t package_id[DIST_BLAKE2B_LEN]; uint8_t keyid[DIST_KEYID_LEN]; uint8_t pk[DIST_ED25519_PK_LEN]; char trusted[DIST_TRUSTED_COMMENT_MAX]; } PkgVerified; +typedef struct PkgLoadedTree { + DistTree tree; + DistTreeEntry entries[DIST_MAX_FILES]; + uint8_t id[DIST_BLAKE2B_LEN]; + const uint8_t* bytes; + size_t size; +} PkgLoadedTree; + static int pkg_write_file(const CfreeContext* ctx, const char* path, const uint8_t* data, size_t len) { CfreeWriter* w = NULL; @@ -87,11 +120,27 @@ static void pkg_parent_dir(const char* path, char* buf, size_t cap) { return; } n = (size_t)(slash - path); - if (n >= cap) n = cap - 1; + if (n >= cap) n = cap - 1u; memcpy(buf, path, n); buf[n] = '\0'; } +static int pkg_join_path(char* out, size_t cap, const char* dir, + const char* rel) { + size_t dl, rl; + int slash; + if (!out || !cap || !dir || !rel) return DIST_ERR; + dl = strlen(dir); + rl = strlen(rel); + slash = dl > 0 && dir[dl - 1u] != '/'; + if (dl + (slash ? 1u : 0u) + rl + 1u > cap) return DIST_ERR; + memcpy(out, dir, dl); + if (slash) out[dl++] = '/'; + memcpy(out + dl, rel, rl); + out[dl + rl] = '\0'; + return DIST_OK; +} + static int pkg_read_file(const CfreeContext* ctx, const char* path, CfreeFileData* out) { return ctx->file_io->read_all(ctx->file_io->user, path, out) == CFREE_OK @@ -120,6 +169,22 @@ static PkgFormat pkg_infer_format(const char* path) { return PKG_FMT_AUTO; } +static int pkg_parse_native_shape(const char* s, PkgNativeShape* out) { + if (driver_streq(s, "fat")) { + *out = PKG_NATIVE_FAT; + return DIST_OK; + } + if (driver_streq(s, "metadata") || driver_streq(s, "metadata-rich")) { + *out = PKG_NATIVE_METADATA; + return DIST_OK; + } + if (driver_streq(s, "thin")) { + *out = PKG_NATIVE_THIN; + return DIST_OK; + } + return DIST_ERR; +} + static uint64_t pkg_align_up(uint64_t v, uint64_t a) { return a ? ((v + a - 1u) / a) * a : v; } @@ -131,7 +196,7 @@ static int pkg_write_pad(CfreeWriter* w, uint64_t target) { size_t n = left < sizeof z ? (size_t)left : sizeof z; if (cfree_writer_write(w, z, n) != CFREE_OK) return DIST_ERR; } - return DIST_OK; + return cfree_writer_tell(w) == target ? DIST_OK : DIST_ERR; } static void pkg_hash(uint8_t out[DIST_BLAKE2B_LEN], const uint8_t* data, @@ -139,72 +204,324 @@ static void pkg_hash(uint8_t out[DIST_BLAKE2B_LEN], const uint8_t* data, dist_blake2b(out, data, len); } -static void pkg_region_root(uint8_t out[DIST_BLAKE2B_LEN], const char* kind, - const uint8_t* data, size_t len) { - uint8_t h[DIST_BLAKE2B_LEN]; - pkg_hash(h, data, len); - dist_cfpkg2_root_hash(out, kind, 0, h); +static int pkg_parse_id(const char* s, uint8_t out[DIST_BLAKE2B_LEN]) { + if (!s || driver_strlen(s) != 2u * DIST_BLAKE2B_LEN) return DIST_ERR; + return dist_hex_decode(out, s, DIST_BLAKE2B_LEN); } -static int pkg_input_cmp(const void* ap, const void* bp) { - const PkgInputFile* a = (const PkgInputFile*)ap; - const PkgInputFile* b = (const PkgInputFile*)bp; - int c = strcmp(a->path, b->path); - if (c != 0) return c; - return strcmp(a->src, b->src); +static int pkg_cas_rel_path(char* out, size_t cap, const char* kind, + const uint8_t id[DIST_BLAKE2B_LEN]) { + char hex[2 * DIST_BLAKE2B_LEN + 1]; + int n; + dist_hex_encode(hex, id, DIST_BLAKE2B_LEN); + n = snprintf(out, cap, "cfree/cas/%s/%c%c/%s", kind, hex[0], hex[1], hex); + return n > 0 && (size_t)n < cap ? DIST_OK : DIST_ERR; } -static int pkg_load_inputs(const CfreeContext* ctx, const char** files, - size_t n_files, PkgInputFile* in, DistManifest* m) { - size_t i; - memset(m, 0, sizeof *m); - for (i = 0; i < n_files; ++i) { - in[i].src = files[i]; - in[i].path = driver_basename(files[i]); - if (!dist_manifest_path_valid(in[i].path)) { - driver_errf(PKG_TOOL, "create: unsafe artifact path: %s", in[i].path); - return DIST_ERR; - } - if (pkg_read_file(ctx, files[i], &in[i].fd) != DIST_OK) { - driver_errf(PKG_TOOL, "create: cannot read file: %s", files[i]); - return DIST_ERR; +static int pkg_external_id_path(char* out, size_t cap, const char* kind, + const uint8_t id[DIST_BLAKE2B_LEN]) { + if (driver_streq(kind, "tree")) return dist_cas_tree_relpath(out, cap, id); + if (driver_streq(kind, "index")) return dist_cas_index_relpath(out, cap, id); + if (driver_streq(kind, "blob")) return dist_cas_blob_relpath(out, cap, id); + return DIST_ERR; +} + +static int pkg_external_chunk_path(char* out, size_t cap, + const uint8_t blob[DIST_BLAKE2B_LEN], + uint64_t chunk_index) { + return dist_cas_chunk_relpath(out, cap, blob, chunk_index); +} + +static int pkg_locator_safe(const char* path) { + size_t start = 0, i; + if (!path || !path[0] || path[0] == '/') return 0; + for (i = 0;; ++i) { + char c = path[i]; + if (c == '\\' || c == ':' || c == '\n' || c == '\r') return 0; + if (c == '/' || c == '\0') { + size_t n = i - start; + if (n == 0) return 0; + if (n == 1 && path[start] == '.') return 0; + if (n == 2 && path[start] == '.' && path[start + 1] == '.') return 0; + if (c == '\0') return 1; + start = i + 1u; } - in[i].loaded = 1; - if (m->n_artifacts >= DIST_MAX_ARTIFACTS) { - driver_errf(PKG_TOOL, "create: too many artifacts"); + } +} + +static int pkg_external_path(char* out, size_t cap, const char* root, + const char* rel) { + if (!pkg_locator_safe(rel)) return DIST_ERR; + return pkg_join_path(out, cap, root, rel); +} + +static int pkg_write_external_file(DriverEnv* env, const CfreeContext* ctx, + const char* root, const char* rel, + const uint8_t* data, size_t len) { + char full[PKG_PATH_BUF], parent[PKG_PATH_BUF]; + if (!root || pkg_external_path(full, sizeof full, root, rel) != DIST_OK) + return DIST_ERR; + pkg_parent_dir(full, parent, sizeof parent); + if (parent[0] && driver_mkdir_p(env, parent) != 0) return DIST_ERR; + return pkg_write_file(ctx, full, data, len); +} + +static int pkg_read_external_file(const CfreeContext* ctx, const char* root, + const char* rel, CfreeFileData* out) { + char full[PKG_PATH_BUF]; + if (!root || pkg_external_path(full, sizeof full, root, rel) != DIST_OK) + return DIST_ERR; + return pkg_read_file(ctx, full, out); +} + +static int pkg_blob_cmp(const void* ap, const void* bp) { + const PkgBlob* a = (const PkgBlob*)ap; + const PkgBlob* b = (const PkgBlob*)bp; + return memcmp(a->id, b->id, DIST_BLAKE2B_LEN); +} + +static PkgBlob* pkg_source_find_blob(PkgSource* src, + const uint8_t id[DIST_BLAKE2B_LEN]) { + size_t i; + for (i = 0; i < src->n_blobs; ++i) + if (memcmp(src->blobs[i].id, id, DIST_BLAKE2B_LEN) == 0) + return &src->blobs[i]; + return NULL; +} + +static void pkg_source_init(PkgSource* src, DriverEnv* env) { + memset(src, 0, sizeof *src); + src->env = env; + src->tree.entries = src->entries; + src->tree.cap_entries = DIST_MAX_FILES; +} + +static void pkg_source_release(PkgSource* src) { + size_t i; + if (!src || !src->env) return; + for (i = 0; i < src->n_blobs; ++i) { + if (src->blobs[i].loaded && src->env->file_io.release) + src->env->file_io.release(src->env->file_io.user, &src->blobs[i].fd); + } + if (src->tree_loaded && src->env->file_io.release) + src->env->file_io.release(src->env->file_io.user, &src->tree_fd); + if (src->tree_mem) cfree_writer_close(src->tree_mem); + memset(src, 0, sizeof *src); +} + +static int pkg_source_store_blob(PkgSource* src, CfreeFileData* fd, + const DistBlobInfo* bi, int take_fd) { + PkgBlob* existing = pkg_source_find_blob(src, bi->id); + if (existing) { + if (existing->size != bi->size || + memcmp(existing->root, bi->root, DIST_BLAKE2B_LEN) != 0) return DIST_ERR; - } + return DIST_OK; + } + if (src->n_blobs >= DIST_MAX_FILES) return DIST_ERR; + existing = &src->blobs[src->n_blobs++]; + memset(existing, 0, sizeof *existing); + memcpy(existing->id, bi->id, DIST_BLAKE2B_LEN); + memcpy(existing->root, bi->root, DIST_BLAKE2B_LEN); + existing->size = bi->size; + if (take_fd) { + existing->fd = *fd; + existing->loaded = 1; + fd->data = NULL; + fd->size = 0; + fd->token = NULL; + } + return DIST_OK; +} + +static int pkg_source_add_entry(PkgSource* src, const char* tree_path, + uint8_t mode, CfreeFileData* fd, + int take_fd) { + DistBlobInfo bi; + DistTreeEntry* e; + if (src->tree.n_entries >= src->tree.cap_entries) { + driver_errf(PKG_TOOL, "create: too many tree entries"); + return DIST_ERR; + } + if (!dist_tree_path_valid(tree_path)) { + driver_errf(PKG_TOOL, "create: unsafe tree path: %s", tree_path); + return DIST_ERR; + } + if (!dist_tree_mode_name(mode)) { + driver_errf(PKG_TOOL, "create: bad tree mode: %s", tree_path); + return DIST_ERR; + } + if (dist_blob_info(&bi, fd->data, fd->size, DIST_BLOB_CHUNK_SIZE_DEFAULT) != + DIST_OK) { + driver_errf(PKG_TOOL, "create: failed to hash blob: %s", tree_path); + return DIST_ERR; + } + if (pkg_source_store_blob(src, fd, &bi, take_fd) != DIST_OK) { + driver_errf(PKG_TOOL, "create: failed to store blob metadata: %s", + tree_path); + return DIST_ERR; + } + e = &src->tree.entries[src->tree.n_entries++]; + memset(e, 0, sizeof *e); + snprintf(e->path, sizeof e->path, "%s", tree_path); + e->mode = mode; + e->size = bi.size; + memcpy(e->blob, bi.id, DIST_BLAKE2B_LEN); + memcpy(e->root, bi.root, DIST_BLAKE2B_LEN); + return DIST_OK; +} + +static int pkg_source_walk_file(void* user, const char* source_path, + const char* tree_path, int executable) { + PkgSource* src = (PkgSource*)user; + CfreeFileData fd; + int rc; + fd.data = NULL; + fd.size = 0; + fd.token = NULL; + if (src->env->file_io.read_all(src->env->file_io.user, source_path, &fd) != + CFREE_OK) { + driver_errf(PKG_TOOL, "create: cannot read file: %s", source_path); + return 1; + } + rc = pkg_source_add_entry(src, tree_path, + executable ? DIST_TREE_MODE_EXEC + : DIST_TREE_MODE_FILE, + &fd, 1); + if (fd.data && src->env->file_io.release) + src->env->file_io.release(src->env->file_io.user, &fd); + return rc == DIST_OK ? 0 : 1; +} + +static int pkg_source_finish_tree(PkgSource* src) { + char err[128]; + if (dist_tree_sort_validate(&src->tree, err, sizeof err) != DIST_OK) { + driver_errf(PKG_TOOL, "create: %s", err); + return DIST_ERR; + } + if (cfree_writer_mem(src->env->heap, &src->tree_mem) != CFREE_OK) + return DIST_ERR; + if (dist_tree_emit(&src->tree, src->tree_mem) != DIST_OK || + cfree_writer_status(src->tree_mem) != CFREE_OK) { + driver_errf(PKG_TOOL, "create: failed to emit tree manifest"); + return DIST_ERR; + } + src->tree_bytes = cfree_writer_mem_bytes(src->tree_mem, &src->tree_size); + dist_tree_id(src->tree_id, src->tree_bytes, src->tree_size); + qsort(src->blobs, src->n_blobs, sizeof src->blobs[0], pkg_blob_cmp); + return DIST_OK; +} + +static int pkg_source_from_root(PkgSource* src, const char* root) { + if (driver_walk_regular_files(src->env, root, pkg_source_walk_file, src) != + 0) { + driver_errf(PKG_TOOL, "create: failed to walk directory: %s", root); + return DIST_ERR; } + return pkg_source_finish_tree(src); +} - qsort(in, n_files, sizeof in[0], pkg_input_cmp); +static void pkg_cas_init_get(DistCas* cas, DriverEnv* env, const char* root) { + memset(cas, 0, sizeof *cas); + cas->host.file_io = &env->file_io; + cas->host.user = env; + cas->root = root; +} - for (i = 0; i < n_files; ++i) { - DistArtifact* a; - if (i > 0 && driver_streq(in[i - 1].path, in[i].path)) { - driver_errf(PKG_TOOL, "create: duplicate artifact path: %s", in[i].path); +static int pkg_source_load_blob_from_cas(PkgSource* src, DistCas* cas, + const DistTreeEntry* e) { + PkgBlob* existing = pkg_source_find_blob(src, e->blob); + CfreeFileData fd; + DistBlobInfo bi; + if (existing) { + if (existing->size != e->size || + memcmp(existing->root, e->root, DIST_BLAKE2B_LEN) != 0) { + driver_errf(PKG_TOOL, "create: duplicate blob metadata mismatch: %s", + e->path); return DIST_ERR; } - a = &m->artifacts[m->n_artifacts++]; - a->id = (uint64_t)i; - snprintf(a->path, sizeof a->path, "%s", in[i].path); - snprintf(a->kind, sizeof a->kind, "%s", "data"); - a->size = in[i].fd.size; - pkg_hash(a->blake2b, in[i].fd.data, in[i].fd.size); - if (dist_cfpkg2_artifact_root(a->root, a->id, in[i].fd.data, - in[i].fd.size, - DIST_CFPKG_CHUNK_SIZE_DEFAULT) != DIST_OK) { - driver_errf(PKG_TOOL, "create: artifact too large for current chunk cap"); + return DIST_OK; + } + fd.data = NULL; + fd.size = 0; + fd.token = NULL; + if (dist_cas_get_blob(cas, e->blob, &fd) != DIST_OK) { + driver_errf(PKG_TOOL, "create: missing or corrupt blob for: %s", e->path); + return DIST_ERR; + } + if (dist_blob_info(&bi, fd.data, fd.size, DIST_BLOB_CHUNK_SIZE_DEFAULT) != + DIST_OK || + bi.size != e->size || memcmp(bi.root, e->root, DIST_BLAKE2B_LEN) != 0) { + if (src->env->file_io.release) + src->env->file_io.release(src->env->file_io.user, &fd); + driver_errf(PKG_TOOL, "create: blob root mismatch for: %s", e->path); + return DIST_ERR; + } + if (pkg_source_store_blob(src, &fd, &bi, 1) != DIST_OK) { + if (fd.data && src->env->file_io.release) + src->env->file_io.release(src->env->file_io.user, &fd); + return DIST_ERR; + } + return DIST_OK; +} + +static int pkg_source_from_cas(PkgSource* src, const char* cas_dir, + const char* tree_s) { + DistCas cas; + char err[128]; + size_t i; + if (pkg_parse_id(tree_s, src->tree_id) != DIST_OK) { + driver_errf(PKG_TOOL, "create: bad tree id: %s", tree_s); + return DIST_ERR; + } + pkg_cas_init_get(&cas, src->env, cas_dir); + src->tree_fd.data = NULL; + src->tree_fd.size = 0; + src->tree_fd.token = NULL; + if (dist_cas_get_tree(&cas, src->tree_id, &src->tree_fd) != DIST_OK) { + driver_errf(PKG_TOOL, "create: missing or corrupt tree: %s", tree_s); + return DIST_ERR; + } + src->tree_loaded = 1; + src->tree_bytes = src->tree_fd.data; + src->tree_size = src->tree_fd.size; + if (dist_tree_parse(src->tree_bytes, src->tree_size, &src->tree, err, + sizeof err) != DIST_OK) { + driver_errf(PKG_TOOL, "create: tree: %s", err); + return DIST_ERR; + } + for (i = 0; i < src->tree.n_entries; ++i) { + if (pkg_source_load_blob_from_cas(src, &cas, &src->tree.entries[i]) != + DIST_OK) return DIST_ERR; - } } + qsort(src->blobs, src->n_blobs, sizeof src->blobs[0], pkg_blob_cmp); return DIST_OK; } -static void pkg_release_inputs(const CfreeContext* ctx, PkgInputFile* in, - size_t n) { +static int pkg_manifest_from_source(const char* name, const char* version, + const char* desc, const PkgSource* src, + DistPackageManifest* m) { size_t i; - for (i = 0; i < n; ++i) - if (in[i].loaded) ctx->file_io->release(ctx->file_io->user, &in[i].fd); + memset(m, 0, sizeof *m); + snprintf(m->name, sizeof m->name, "%s", name); + snprintf(m->version, sizeof m->version, "%s", version); + if (desc) snprintf(m->description, sizeof m->description, "%s", desc); + m->n_outputs = 1; + m->outputs[0].id = PKG_DEFAULT_OUTPUT_ID; + snprintf(m->outputs[0].name, sizeof m->outputs[0].name, "%s", "default"); + memcpy(m->outputs[0].tree, src->tree_id, DIST_BLAKE2B_LEN); + m->outputs[0].is_default = 1; + for (i = 0; i < src->tree.n_entries; ++i) { + DistPackageArtifact* a; + if (m->n_artifacts >= DIST_MAX_ARTIFACTS) return DIST_ERR; + a = &m->artifacts[m->n_artifacts++]; + a->output_id = PKG_DEFAULT_OUTPUT_ID; + snprintf(a->path, sizeof a->path, "%s", src->tree.entries[i].path); + snprintf(a->kind, sizeof a->kind, "%s", "data"); + a->entry = 1; + } + return dist_package_manifest_validate(m, NULL, 0); } static int pkg_sign(CfreeWriter* out, DriverEnv* env, const uint8_t* data, @@ -227,10 +544,13 @@ void driver_help_pkg(void) { " cfree pkg keygen -o BASE\n" " cfree pkg create --name N --version V [--desc D] -s SECKEY\n" " [--format cfpkg|tar.gz] [--compression none|lz4-block-v1]\n" - " -o OUT FILE...\n" - " cfree pkg verify [-p PUBKEY | --tofu] [--format cfpkg|tar.gz] FILE\n" - " cfree pkg unpack [--verify] [-p PUBKEY | --tofu] [--format cfpkg|tar.gz] FILE -C DIR\n" - " cfree pkg inspect FILE\n" + " [--native-shape fat|metadata|thin] [--external DIR]\n" + " (--cas DIR --tree TREE_ID | --root DIR) -o OUT\n" + " cfree pkg verify [-p PUBKEY | --tofu] [--format cfpkg|tar.gz]\n" + " [--external DIR] FILE\n" + " cfree pkg unpack [--verify] [-p PUBKEY | --tofu] [--format cfpkg|tar.gz]\n" + " [--external DIR] FILE -C DIR\n" + " cfree pkg inspect [--manifest | --encoding] FILE\n" " cfree pkg trust {path | list | add PUBKEY [label] | remove KEYID}\n"); } @@ -284,13 +604,14 @@ static int pkg_keygen(DriverEnv* env, const CfreeContext* ctx, int argc, } static int pkg_create_targz(const CfreeContext* ctx, const char* out, - PkgInputFile* in, size_t n_files, - const uint8_t* man, size_t man_len, - const uint8_t* sig, size_t sig_len, - const uint8_t* pub, size_t pub_len) { + const PkgSource* src, const uint8_t* man, + size_t man_len, const uint8_t* sig, + size_t sig_len, const uint8_t* pub, + size_t pub_len) { CfreeWriter *tar = NULL, *gz = NULL; const uint8_t *tb, *gb; size_t tl, gl, i; + char path[PKG_PATH_BUF]; int rc = DIST_ERR; tar = pkg_mem(ctx); gz = pkg_mem(ctx); @@ -299,10 +620,16 @@ static int pkg_create_targz(const CfreeContext* ctx, const char* out, dist_tar_append(tar, PKG_META_SIG, sig, sig_len) != DIST_OK || dist_tar_append(tar, PKG_META_PUB, pub, pub_len) != DIST_OK) goto done; - for (i = 0; i < n_files; ++i) - if (dist_tar_append(tar, in[i].path, in[i].fd.data, in[i].fd.size) != - DIST_OK) + if (pkg_cas_rel_path(path, sizeof path, "tree", src->tree_id) != DIST_OK || + dist_tar_append(tar, path, src->tree_bytes, src->tree_size) != DIST_OK) + goto done; + for (i = 0; i < src->n_blobs; ++i) { + if (pkg_cas_rel_path(path, sizeof path, "blob", src->blobs[i].id) != + DIST_OK || + dist_tar_append(tar, path, src->blobs[i].fd.data, + (size_t)src->blobs[i].size) != DIST_OK) goto done; + } if (dist_tar_finish(tar) != DIST_OK) goto done; tb = cfree_writer_mem_bytes(tar, &tl); if (dist_gz_compress(gz, tb, tl) != DIST_OK) goto done; @@ -314,40 +641,52 @@ done: return rc; } -static int pkg_build_native_regions(const CfreeContext* ctx, PkgInputFile* in, - size_t n_files, uint32_t compression, +static int pkg_build_native_regions(DriverEnv* env, const CfreeContext* ctx, + const PkgSource* src, + uint32_t compression, + const char* external_dir, + int embed_content, CfreeWriter** index_out, CfreeWriter** content_out) { CfreeWriter* index = pkg_mem(ctx); CfreeWriter* content = pkg_mem(ctx); - size_t i; + size_t bi; if (!index || !content) return DIST_ERR; - for (i = 0; i < n_files; ++i) { + for (bi = 0; bi < src->n_blobs; ++bi) { + const PkgBlob* blob = &src->blobs[bi]; size_t off = 0, ci = 0; - if (in[i].fd.size == 0) continue; - while (off < in[i].fd.size) { - uint8_t recbuf[DIST_CFPKG_INDEX_RECORD_SIZE]; - DistCfpkgIndexRecord r; - const uint8_t* raw = in[i].fd.data + off; - size_t raw_len = in[i].fd.size - off; - if (raw_len > DIST_CFPKG_CHUNK_SIZE_DEFAULT) - raw_len = DIST_CFPKG_CHUNK_SIZE_DEFAULT; + if (blob->size == 0) continue; + while (off < blob->size) { + uint8_t recbuf[DIST_CFPKG3_INDEX_RECORD_SIZE]; + DistCfpkg3IndexRecord r; + const uint8_t* raw = blob->fd.data + off; + size_t raw_len = (size_t)blob->size - off; + if (raw_len > DIST_CFPKG3_CHUNK_SIZE_DEFAULT) + raw_len = DIST_CFPKG3_CHUNK_SIZE_DEFAULT; memset(&r, 0, sizeof r); - r.artifact_id = (uint64_t)i; + memcpy(r.blob_id, blob->id, DIST_BLAKE2B_LEN); r.chunk_index = (uint64_t)ci; - r.content_offset = cfree_writer_tell(content); + r.content_offset = embed_content ? cfree_writer_tell(content) : 0; r.raw_size = raw_len; r.compression = compression; pkg_hash(r.raw_hash, raw, raw_len); - dist_cfpkg2_leaf_hash(r.leaf_hash, r.artifact_id, r.chunk_index, raw, - raw_len); + dist_blob_leaf_hash(r.leaf_hash, r.chunk_index, raw, raw_len); if (compression == DIST_CFPKG_COMP_NONE) { r.stored_size = raw_len; pkg_hash(r.stored_hash, raw, raw_len); - if (cfree_writer_write(content, raw, raw_len) != CFREE_OK) + if (embed_content && + cfree_writer_write(content, raw, raw_len) != CFREE_OK) return DIST_ERR; + if (!embed_content) { + char rel[PKG_PATH_BUF]; + if (pkg_external_chunk_path(rel, sizeof rel, blob->id, + r.chunk_index) != DIST_OK || + pkg_write_external_file(env, ctx, external_dir, rel, raw, + raw_len) != DIST_OK) + return DIST_ERR; + } } else { - uint8_t tmp[DIST_CFPKG_CHUNK_SIZE_DEFAULT + 512u]; + uint8_t tmp[DIST_CFPKG3_CHUNK_SIZE_DEFAULT + 1024u]; size_t stored_len = 0; if (dist_lz4_compress_block(tmp, sizeof tmp, &stored_len, raw, raw_len) != DIST_OK) { @@ -356,10 +695,19 @@ static int pkg_build_native_regions(const CfreeContext* ctx, PkgInputFile* in, } r.stored_size = stored_len; pkg_hash(r.stored_hash, tmp, stored_len); - if (cfree_writer_write(content, tmp, stored_len) != CFREE_OK) + if (embed_content && + cfree_writer_write(content, tmp, stored_len) != CFREE_OK) return DIST_ERR; + if (!embed_content) { + char rel[PKG_PATH_BUF]; + if (pkg_external_chunk_path(rel, sizeof rel, blob->id, + r.chunk_index) != DIST_OK || + pkg_write_external_file(env, ctx, external_dir, rel, tmp, + stored_len) != DIST_OK) + return DIST_ERR; + } } - dist_cfpkg_encode_index_record(recbuf, &r); + dist_cfpkg3_encode_index_record(recbuf, &r); if (cfree_writer_write(index, recbuf, sizeof recbuf) != CFREE_OK) return DIST_ERR; off += raw_len; @@ -373,38 +721,73 @@ static int pkg_build_native_regions(const CfreeContext* ctx, PkgInputFile* in, static int pkg_create_cfpkg(DriverEnv* env, const CfreeContext* ctx, const char* out, const DistKeypair* kp, - PkgInputFile* in, size_t n_files, - const uint8_t* man, size_t man_len, - const uint8_t* sig, size_t sig_len, - const uint8_t* pub, size_t pub_len, + const PkgSource* src, const uint8_t* man, + size_t man_len, const uint8_t* sig, + size_t sig_len, const uint8_t* pub, + size_t pub_len, const uint8_t pkgid[DIST_BLAKE2B_LEN], - uint32_t compression) { + uint32_t compression, PkgNativeShape shape, + const char* external_dir) { CfreeWriter *index = NULL, *content = NULL, *descw = NULL, *descsigw = NULL, *pkg = NULL; const uint8_t *index_b, *content_b, *desc_b = NULL, *descsig_b = NULL; size_t index_l, content_l, desc_l = 0, descsig_l = 0; - uint8_t index_root[DIST_BLAKE2B_LEN], content_root[DIST_BLAKE2B_LEN]; - DistCfpkgHeader h; + uint8_t tree_root[DIST_BLAKE2B_LEN], index_root[DIST_BLAKE2B_LEN], + content_root[DIST_BLAKE2B_LEN]; + DistCfpkg3Header h; + uint64_t tree_offset = 0, index_offset = 0, content_offset = 0; + int embed_tree = shape != PKG_NATIVE_THIN; + int embed_index = shape != PKG_NATIVE_THIN; + int embed_content = shape == PKG_NATIVE_FAT; int stable = 0, iter, rc = DIST_ERR; + char tree_url[PKG_PATH_BUF], index_url[PKG_PATH_BUF]; - if (pkg_build_native_regions(ctx, in, n_files, compression, &index, &content) != + tree_url[0] = '\0'; + index_url[0] = '\0'; + if (shape != PKG_NATIVE_FAT && !external_dir) { + driver_errf(PKG_TOOL, + "create: --external DIR is required for non-fat native packages"); + goto done; + } + if (pkg_external_id_path(tree_url, sizeof tree_url, "tree", src->tree_id) != + DIST_OK) + goto done; + + if (pkg_build_native_regions(env, ctx, src, compression, external_dir, + embed_content, &index, &content) != DIST_OK) goto done; index_b = cfree_writer_mem_bytes(index, &index_l); content_b = cfree_writer_mem_bytes(content, &content_l); - pkg_region_root(index_root, "index", index_b, index_l); - pkg_region_root(content_root, "content", content_b, content_l); + dist_cfpkg3_region_root(tree_root, "tree", + embed_tree ? src->tree_bytes : NULL, + embed_tree ? src->tree_size : 0); + dist_cfpkg3_region_root(index_root, "index", index_b, index_l); + dist_cfpkg3_region_root(content_root, "content", + embed_content ? content_b : NULL, + embed_content ? content_l : 0); + if (!embed_tree && + pkg_write_external_file(env, ctx, external_dir, tree_url, src->tree_bytes, + src->tree_size) != DIST_OK) + goto done; + if (!embed_index) { + if (pkg_external_id_path(index_url, sizeof index_url, "index", + index_root) != DIST_OK || + pkg_write_external_file(env, ctx, external_dir, index_url, index_b, + index_l) != DIST_OK) + goto done; + } memset(&h, 0, sizeof h); for (iter = 0; iter < 8; ++iter) { - DistCfpkgDescriptor d; + DistCfpkg3Descriptor d; uint64_t old_desc_l = desc_l, old_descsig_l = descsig_l; if (descw) cfree_writer_close(descw); if (descsigw) cfree_writer_close(descsigw); descw = pkg_mem(ctx); descsigw = pkg_mem(ctx); if (!descw || !descsigw) goto done; - h.manifest_offset = DIST_CFPKG_HEADER_SIZE; + h.manifest_offset = DIST_CFPKG3_HEADER_SIZE; h.manifest_size = man_len; h.signature_offset = h.manifest_offset + h.manifest_size; h.signature_size = sig_len; @@ -415,26 +798,59 @@ static int pkg_create_cfpkg(DriverEnv* env, const CfreeContext* ctx, h.pubkey_offset = h.descriptor_signature_offset + h.descriptor_signature_size; h.pubkey_size = pub_len; - h.index_offset = - pkg_align_up(h.pubkey_offset + h.pubkey_size, DIST_CFPKG_ALIGNMENT); - h.index_size = index_l; - h.content_offset = - pkg_align_up(h.index_offset + h.index_size, DIST_CFPKG_ALIGNMENT); - h.content_size = content_l; - h.alignment = DIST_CFPKG_ALIGNMENT; - h.chunk_size = DIST_CFPKG_CHUNK_SIZE_DEFAULT; + tree_offset = embed_tree ? pkg_align_up(h.pubkey_offset + h.pubkey_size, + DIST_CFPKG3_ALIGNMENT) + : 0; + index_offset = + embed_index + ? pkg_align_up((embed_tree ? tree_offset + src->tree_size + : h.pubkey_offset + h.pubkey_size), + DIST_CFPKG3_ALIGNMENT) + : 0; + content_offset = + embed_content + ? pkg_align_up((embed_index ? index_offset + index_l + : (embed_tree ? tree_offset + + src->tree_size + : h.pubkey_offset + + h.pubkey_size)), + DIST_CFPKG3_ALIGNMENT) + : 0; memset(&d, 0, sizeof d); memcpy(d.package_id, pkgid, DIST_BLAKE2B_LEN); - d.index_offset = h.index_offset; - d.index_size = h.index_size; + d.chunk_size = DIST_CFPKG3_CHUNK_SIZE_DEFAULT; + d.alignment = DIST_CFPKG3_ALIGNMENT; + d.tree_offset = tree_offset; + d.tree_size = embed_tree ? src->tree_size : 0; + memcpy(d.tree_root, tree_root, DIST_BLAKE2B_LEN); + d.index_offset = index_offset; + d.index_size = embed_index ? index_l : 0; + d.index_bytes = index_l; memcpy(d.index_root, index_root, DIST_BLAKE2B_LEN); - d.content_offset = h.content_offset; - d.content_size = h.content_size; + if (!embed_index) snprintf(d.index_url, sizeof d.index_url, "%s", index_url); + d.content_offset = content_offset; + d.content_size = embed_content ? content_l : 0; memcpy(d.content_root, content_root, DIST_BLAKE2B_LEN); - d.chunk_size = h.chunk_size; - d.alignment = h.alignment; - if (dist_cfpkg_descriptor_emit(descw, &d) != DIST_OK) goto done; + d.n_trees = 1; + memcpy(d.trees[0].tree, src->tree_id, DIST_BLAKE2B_LEN); + if (embed_tree) { + d.trees[0].offset = 0; + d.trees[0].size = src->tree_size; + d.trees[0].embedded = 1; + } else { + snprintf(d.trees[0].url, sizeof d.trees[0].url, "%s", tree_url); + } + memcpy(d.trees[0].blake2b, src->tree_id, DIST_BLAKE2B_LEN); + d.n_chunk_sources = 1; + if (embed_content) { + d.chunk_sources[0].kind = DIST_CFPKG3_CHUNK_SOURCE_EMBEDDED; + } else { + d.chunk_sources[0].kind = DIST_CFPKG3_CHUNK_SOURCE_URL_TEMPLATE; + snprintf(d.chunk_sources[0].tmpl, sizeof d.chunk_sources[0].tmpl, + "%s", "chunk/{blob-prefix}/{blob}/{chunk}"); + } + if (dist_cfpkg3_descriptor_emit(descw, &d) != DIST_OK) goto done; desc_b = cfree_writer_mem_bytes(descw, &desc_l); if (pkg_sign(descsigw, env, desc_b, desc_l, kp, pkgid, "cfree cfpkg encoding descriptor") != DIST_OK) @@ -449,16 +865,22 @@ static int pkg_create_cfpkg(DriverEnv* env, const CfreeContext* ctx, pkg = pkg_mem(ctx); if (!pkg) goto done; - if (dist_cfpkg_write_header(pkg, &h) != DIST_OK || + if (dist_cfpkg3_write_header(pkg, &h) != DIST_OK || cfree_writer_write(pkg, man, man_len) != CFREE_OK || cfree_writer_write(pkg, sig, sig_len) != CFREE_OK || cfree_writer_write(pkg, desc_b, desc_l) != CFREE_OK || cfree_writer_write(pkg, descsig_b, descsig_l) != CFREE_OK || cfree_writer_write(pkg, pub, pub_len) != CFREE_OK || - pkg_write_pad(pkg, h.index_offset) != DIST_OK || - cfree_writer_write(pkg, index_b, index_l) != CFREE_OK || - pkg_write_pad(pkg, h.content_offset) != DIST_OK || - cfree_writer_write(pkg, content_b, content_l) != CFREE_OK) + (embed_tree && + (pkg_write_pad(pkg, tree_offset) != DIST_OK || + cfree_writer_write(pkg, src->tree_bytes, src->tree_size) != + CFREE_OK)) || + (embed_index && + (pkg_write_pad(pkg, index_offset) != DIST_OK || + cfree_writer_write(pkg, index_b, index_l) != CFREE_OK)) || + (embed_content && + (pkg_write_pad(pkg, content_offset) != DIST_OK || + cfree_writer_write(pkg, content_b, content_l) != CFREE_OK))) goto done; { const uint8_t* bytes; @@ -479,23 +901,23 @@ done: static int pkg_create(DriverEnv* env, const CfreeContext* ctx, int argc, char** argv) { const char *name = NULL, *version = NULL, *desc = NULL, *out = NULL, - *seckey = NULL; - const char* files[DIST_MAX_FILES]; - PkgInputFile inputs[DIST_MAX_FILES]; - size_t n_files = 0; + *seckey = NULL, *cas_dir = NULL, *tree_id = NULL, *root = NULL, + *external_dir = NULL; int i, rc = 1, sk_loaded = 0; PkgFormat fmt = PKG_FMT_AUTO; + PkgNativeShape native_shape = PKG_NATIVE_FAT; uint32_t compression = DIST_CFPKG_COMP_NONE; DistKeypair kp; CfreeFileData skfd = {0}; - DistManifest m; + DistPackageManifest m; + PkgSource src; CfreeWriter *manw = NULL, *sigw = NULL, *pubw = NULL; const uint8_t *man_b, *sig_b, *pub_b; size_t man_l, sig_l, pub_l; uint8_t pkgid[DIST_BLAKE2B_LEN]; char pkgid_hex[2 * DIST_BLAKE2B_LEN + 1]; - memset(inputs, 0, sizeof inputs); + pkg_source_init(&src, env); for (i = 0; i < argc; ++i) { const char* a = argv[i]; if (driver_streq(a, "--name") && i + 1 < argc) @@ -508,14 +930,27 @@ static int pkg_create(DriverEnv* env, const CfreeContext* ctx, int argc, out = argv[++i]; else if (driver_streq(a, "-s") && i + 1 < argc) seckey = argv[++i]; + else if (driver_streq(a, "--cas") && i + 1 < argc) + cas_dir = argv[++i]; + else if (driver_streq(a, "--tree") && i + 1 < argc) + tree_id = argv[++i]; + else if (driver_streq(a, "--root") && i + 1 < argc) + root = argv[++i]; + else if (driver_streq(a, "--external") && i + 1 < argc) + external_dir = argv[++i]; + else if (driver_streq(a, "--native-shape") && i + 1 < argc) { + if (pkg_parse_native_shape(argv[++i], &native_shape) != DIST_OK) { + driver_errf(PKG_TOOL, "create: unknown native shape"); + return 2; + } + } else if (driver_streq(a, "--format") && i + 1 < argc) { fmt = pkg_parse_format(argv[++i]); if (fmt == PKG_FMT_AUTO) { driver_errf(PKG_TOOL, "create: unknown format"); return 2; } - } - else if (driver_streq(a, "--compression") && i + 1 < argc) { + } else if (driver_streq(a, "--compression") && i + 1 < argc) { if (dist_cfpkg_compression_parse(argv[++i], &compression) != DIST_OK) { driver_errf(PKG_TOOL, "create: unknown compression"); return 2; @@ -524,11 +959,8 @@ static int pkg_create(DriverEnv* env, const CfreeContext* ctx, int argc, driver_errf(PKG_TOOL, "create: unknown option: %s", a); return 2; } else { - if (n_files >= DIST_MAX_FILES) { - driver_errf(PKG_TOOL, "create: too many files"); - return 2; - } - files[n_files++] = a; + driver_errf(PKG_TOOL, "create: positional file inputs were removed; use --root DIR"); + return 2; } } if (!name || !version || !out || !seckey) { @@ -536,11 +968,25 @@ static int pkg_create(DriverEnv* env, const CfreeContext* ctx, int argc, "create: --name, --version, -s SECKEY and -o OUT are required"); return 2; } + if ((root != NULL) == (cas_dir != NULL || tree_id != NULL) || (cas_dir && !tree_id) || + (tree_id && !cas_dir)) { + driver_errf(PKG_TOOL, + "create: pass exactly one of --root DIR or --cas DIR --tree TREE_ID"); + return 2; + } if (fmt == PKG_FMT_AUTO) fmt = pkg_infer_format(out); if (fmt == PKG_FMT_AUTO) { driver_errf(PKG_TOOL, "create: cannot infer format; pass --format"); return 2; } + if (fmt != PKG_FMT_CFPKG && native_shape != PKG_NATIVE_FAT) { + driver_errf(PKG_TOOL, "create: --native-shape only applies to cfpkg"); + return 2; + } + if (fmt != PKG_FMT_CFPKG && external_dir) { + driver_errf(PKG_TOOL, "create: --external only applies to cfpkg"); + return 2; + } if (pkg_read_file(ctx, seckey, &skfd) != DIST_OK) { driver_errf(PKG_TOOL, "create: cannot read secret key: %s", seckey); @@ -555,16 +1001,21 @@ static int pkg_create(DriverEnv* env, const CfreeContext* ctx, int argc, } memcpy(kp.pk, kp.sk + DIST_ED25519_SEED_LEN, DIST_ED25519_PK_LEN); - if (pkg_load_inputs(ctx, files, n_files, inputs, &m) != DIST_OK) goto done; - snprintf(m.name, sizeof m.name, "%s", name); - snprintf(m.version, sizeof m.version, "%s", version); - if (desc) snprintf(m.description, sizeof m.description, "%s", desc); + if (root) { + if (pkg_source_from_root(&src, root) != DIST_OK) goto done; + } else { + if (pkg_source_from_cas(&src, cas_dir, tree_id) != DIST_OK) goto done; + } + if (pkg_manifest_from_source(name, version, desc, &src, &m) != DIST_OK) { + driver_errf(PKG_TOOL, "create: failed to build package manifest"); + goto done; + } manw = pkg_mem(ctx); sigw = pkg_mem(ctx); pubw = pkg_mem(ctx); if (!manw || !sigw || !pubw) goto done; - if (dist_manifest_emit(&m, manw) != DIST_OK) goto done; + if (dist_package_manifest_emit(&m, manw) != DIST_OK) goto done; man_b = cfree_writer_mem_bytes(manw, &man_l); pkg_hash(pkgid, man_b, man_l); dist_hex_encode(pkgid_hex, pkgid, DIST_BLAKE2B_LEN); @@ -576,25 +1027,25 @@ static int pkg_create(DriverEnv* env, const CfreeContext* ctx, int argc, pub_b = cfree_writer_mem_bytes(pubw, &pub_l); if (fmt == PKG_FMT_TARGZ) - rc = pkg_create_targz(ctx, out, inputs, n_files, man_b, man_l, sig_b, sig_l, - pub_b, pub_l) == DIST_OK + rc = pkg_create_targz(ctx, out, &src, man_b, man_l, sig_b, sig_l, pub_b, + pub_l) == DIST_OK ? 0 : 1; else - rc = pkg_create_cfpkg(env, ctx, out, &kp, inputs, n_files, man_b, man_l, - sig_b, sig_l, pub_b, pub_l, pkgid, - compression) == DIST_OK + rc = pkg_create_cfpkg(env, ctx, out, &kp, &src, man_b, man_l, sig_b, sig_l, + pub_b, pub_l, pkgid, compression, native_shape, + external_dir) == DIST_OK ? 0 : 1; if (rc == 0) - driver_printf("wrote %s (%llu artifact(s), id %s)\n", out, - (unsigned long long)n_files, pkgid_hex); + driver_printf("wrote %s (%llu file(s), id %s)\n", out, + (unsigned long long)src.tree.n_entries, pkgid_hex); done: if (pubw) cfree_writer_close(pubw); if (sigw) cfree_writer_close(sigw); if (manw) cfree_writer_close(manw); - pkg_release_inputs(ctx, inputs, n_files); + pkg_source_release(&src); if (sk_loaded) ctx->file_io->release(ctx->file_io->user, &skfd); return rc; } @@ -683,6 +1134,7 @@ static int pkg_verify_manifest(DriverEnv* env, const CfreeContext* ctx, PkgVerified* out) { char err[128], pkgid_hex[2 * DIST_BLAKE2B_LEN + 1]; const char* pidp; + memset(out, 0, sizeof *out); if (dist_minisig_sig_keyid(sig, sig_len, out->keyid) != DIST_OK) { driver_errf(PKG_TOOL, "malformed signature"); return DIST_ERR; @@ -702,32 +1154,134 @@ static int pkg_verify_manifest(DriverEnv* env, const CfreeContext* ctx, driver_errf(PKG_TOOL, "trusted comment does not match package id"); return DIST_ERR; } - if (dist_manifest_parse(man, man_len, &out->manifest, err, sizeof err) != - DIST_OK) { + if (dist_package_manifest_parse(man, man_len, &out->manifest, err, + sizeof err) != DIST_OK) { driver_errf(PKG_TOOL, "manifest: %s", err); return DIST_ERR; } return DIST_OK; } -static const DistArtifact* pkg_find_artifact(const DistManifest* m, - const char* path) { +static const DistPackageOutput* pkg_default_output( + const DistPackageManifest* m) { size_t i; - for (i = 0; i < m->n_artifacts; ++i) - if (driver_streq(m->artifacts[i].path, path)) return &m->artifacts[i]; - return NULL; + for (i = 0; i < m->n_outputs; ++i) + if (m->outputs[i].is_default) return &m->outputs[i]; + return m->n_outputs ? &m->outputs[0] : NULL; +} + +static int pkg_parse_tree_object(PkgLoadedTree* out, + const uint8_t id[DIST_BLAKE2B_LEN], + const uint8_t* data, size_t len, + const char* label) { + uint8_t got[DIST_BLAKE2B_LEN]; + char err[128]; + memset(out, 0, sizeof *out); + dist_tree_id(got, data, len); + if (memcmp(got, id, DIST_BLAKE2B_LEN) != 0) { + driver_errf(PKG_TOOL, "tree id mismatch: %s", label); + return DIST_ERR; + } + out->tree.entries = out->entries; + out->tree.cap_entries = DIST_MAX_FILES; + if (dist_tree_parse(data, len, &out->tree, err, sizeof err) != DIST_OK) { + driver_errf(PKG_TOOL, "tree: %s", err); + return DIST_ERR; + } + memcpy(out->id, id, DIST_BLAKE2B_LEN); + out->bytes = data; + out->size = len; + return DIST_OK; +} + +static int pkg_verify_artifact_overlays(const DistPackageManifest* m, + const DistPackageOutput* out, + const DistTree* tree) { + size_t i; + for (i = 0; i < m->n_artifacts; ++i) { + const DistPackageArtifact* a = &m->artifacts[i]; + if (a->output_id != out->id) continue; + if (!dist_tree_find(tree, a->path)) { + driver_errf(PKG_TOOL, "artifact path not in output tree: %s", a->path); + return DIST_ERR; + } + } + return DIST_OK; } -static int pkg_verify_artifact_bytes(const DistArtifact* a, const uint8_t* data, - size_t len) { - uint8_t h[DIST_BLAKE2B_LEN], root[DIST_BLAKE2B_LEN]; - if (len != a->size) return DIST_ERR; - pkg_hash(h, data, len); - if (memcmp(h, a->blake2b, DIST_BLAKE2B_LEN) != 0) return DIST_ERR; - if (dist_cfpkg2_artifact_root(root, a->id, data, len, - DIST_CFPKG_CHUNK_SIZE_DEFAULT) != DIST_OK) +static int pkg_write_output_file(DriverEnv* env, const CfreeContext* ctx, + const char* out_dir, + const DistTreeEntry* e, + const uint8_t* data, size_t len) { + char full[PKG_PATH_BUF], parent[PKG_PATH_BUF]; + if (pkg_join_path(full, sizeof full, out_dir, e->path) != DIST_OK) { + driver_errf(PKG_TOOL, "output path too long: %s", e->path); return DIST_ERR; - return memcmp(root, a->root, DIST_BLAKE2B_LEN) == 0 ? DIST_OK : DIST_ERR; + } + pkg_parent_dir(full, parent, sizeof parent); + if (parent[0] && driver_mkdir_p(env, parent) != 0) return DIST_ERR; + if (pkg_write_file(ctx, full, data, len) != DIST_OK) return DIST_ERR; + if (e->mode == DIST_TREE_MODE_EXEC && + driver_mark_executable_output(full) != 0) + return DIST_ERR; + driver_printf(" extracted %s\n", full); + return DIST_OK; +} + +static const DistTarEntry* pkg_portable_find_cas(const DistTarEntry* entries, + size_t ne, const char* kind, + const uint8_t id[DIST_BLAKE2B_LEN]) { + char path[PKG_PATH_BUF]; + if (pkg_cas_rel_path(path, sizeof path, kind, id) != DIST_OK) return NULL; + return pkg_find_name(entries, ne, path); +} + +static int pkg_verify_blob_bytes(const DistTreeEntry* e, const uint8_t* data, + size_t len) { + DistBlobInfo bi; + if (dist_blob_info(&bi, data, len, DIST_BLOB_CHUNK_SIZE_DEFAULT) != DIST_OK) + return DIST_ERR; + return bi.size == e->size && + memcmp(bi.id, e->blob, DIST_BLAKE2B_LEN) == 0 && + memcmp(bi.root, e->root, DIST_BLAKE2B_LEN) == 0 + ? DIST_OK + : DIST_ERR; +} + +static int pkg_verify_portable_tree(DriverEnv* env, const CfreeContext* ctx, + const PkgVerified* v, + const DistPackageOutput* out, + const DistTarEntry* entries, size_t ne, + const char* out_dir) { + const DistTarEntry* te = pkg_portable_find_cas(entries, ne, "tree", out->tree); + PkgLoadedTree tree; + size_t i; + if (!te) { + driver_errf(PKG_TOOL, "portable package missing tree object"); + return DIST_ERR; + } + if (pkg_parse_tree_object(&tree, out->tree, te->data, te->size, + out->name) != DIST_OK) + return DIST_ERR; + if (pkg_verify_artifact_overlays(&v->manifest, out, &tree.tree) != DIST_OK) + return DIST_ERR; + for (i = 0; i < tree.tree.n_entries; ++i) { + const DistTreeEntry* e = &tree.tree.entries[i]; + const DistTarEntry* be = pkg_portable_find_cas(entries, ne, "blob", e->blob); + if (!be) { + driver_errf(PKG_TOOL, "portable package missing blob: %s", e->path); + return DIST_ERR; + } + if (pkg_verify_blob_bytes(e, be->data, be->size) != DIST_OK) { + driver_errf(PKG_TOOL, "blob hash mismatch: %s", e->path); + return DIST_ERR; + } + if (out_dir && + pkg_write_output_file(env, ctx, out_dir, e, be->data, be->size) != + DIST_OK) + return DIST_ERR; + } + return DIST_OK; } static int pkg_load_portable(const CfreeContext* ctx, const char* file, @@ -746,7 +1300,7 @@ static int pkg_load_portable(const CfreeContext* ctx, const char* file, return DIST_ERR; } bytes = cfree_writer_mem_bytes(inflated, &len); - if (dist_tar_iter(bytes, len, entries, DIST_MAX_FILES, ne) != DIST_OK) { + if (dist_tar_iter(bytes, len, entries, PKG_MAX_TAR_ENTRIES, ne) != DIST_OK) { driver_errf(PKG_TOOL, "malformed portable tar"); return DIST_ERR; } @@ -759,9 +1313,10 @@ static int pkg_verify_portable(DriverEnv* env, const CfreeContext* ctx, const char* out_dir, int quiet) { CfreeFileData fd = {0}; CfreeWriter* inflated = NULL; - DistTarEntry entries[DIST_MAX_FILES]; - size_t ne = 0, i, seen = 0; + DistTarEntry entries[PKG_MAX_TAR_ENTRIES]; + size_t ne = 0, oi; const DistTarEntry *man, *sig, *pub; + const DistPackageOutput* def; PkgVerified v; int rc = DIST_ERR; if (pkg_load_portable(ctx, file, &fd, &inflated, entries, &ne) != DIST_OK) @@ -777,38 +1332,13 @@ static int pkg_verify_portable(DriverEnv* env, const CfreeContext* ctx, pub ? pub->data : NULL, pub ? pub->size : 0, pubkey, tofu, &v) != DIST_OK) goto done; - for (i = 0; i < ne; ++i) { - const DistArtifact* a; - if (driver_streq(entries[i].name, PKG_META_MANIFEST) || - driver_streq(entries[i].name, PKG_META_SIG) || - driver_streq(entries[i].name, PKG_META_PUB)) - continue; - a = pkg_find_artifact(&v.manifest, entries[i].name); - if (!a) { - driver_errf(PKG_TOOL, "portable member not in manifest: %s", - entries[i].name); - goto done; - } - if (pkg_verify_artifact_bytes(a, entries[i].data, entries[i].size) != - DIST_OK) { - driver_errf(PKG_TOOL, "artifact hash mismatch: %s", entries[i].name); + def = pkg_default_output(&v.manifest); + if (!def) goto done; + for (oi = 0; oi < v.manifest.n_outputs; ++oi) { + const DistPackageOutput* out = &v.manifest.outputs[oi]; + if (pkg_verify_portable_tree(env, ctx, &v, out, entries, ne, + out == def ? out_dir : NULL) != DIST_OK) goto done; - } - ++seen; - if (out_dir) { - char full[PKG_PATH_BUF], parent[PKG_PATH_BUF]; - snprintf(full, sizeof full, "%s/%s", out_dir, entries[i].name); - pkg_parent_dir(full, parent, sizeof parent); - if (parent[0]) driver_mkdir_p(env, parent); - if (pkg_write_file(ctx, full, entries[i].data, entries[i].size) != - DIST_OK) - goto done; - driver_printf(" extracted %s\n", full); - } - } - if (seen != v.manifest.n_artifacts) { - driver_errf(PKG_TOOL, "portable package is missing artifacts"); - goto done; } if (!quiet) { char idhex[2 * DIST_KEYID_LEN + 1]; @@ -826,15 +1356,13 @@ done: return rc; } -static int pkg_bounds(const DistCfpkgHeader* h, size_t len) { +static int pkg_bounds3(const DistCfpkg3Header* h, size_t len) { uint64_t ranges[][2] = {{h->manifest_offset, h->manifest_size}, {h->signature_offset, h->signature_size}, {h->descriptor_offset, h->descriptor_size}, {h->descriptor_signature_offset, h->descriptor_signature_size}, - {h->pubkey_offset, h->pubkey_size}, - {h->index_offset, h->index_size}, - {h->content_offset, h->content_size}}; + {h->pubkey_offset, h->pubkey_size}}; size_t i; for (i = 0; i < sizeof ranges / sizeof ranges[0]; ++i) if (ranges[i][0] > len || ranges[i][1] > len - ranges[i][0]) @@ -842,82 +1370,379 @@ static int pkg_bounds(const DistCfpkgHeader* h, size_t len) { return DIST_OK; } -static int pkg_verify_native_content(DriverEnv* env, const CfreeContext* ctx, - const uint8_t* data, - const DistCfpkgHeader* h, - const PkgVerified* v, - const char* out_dir) { - size_t ai; - for (ai = 0; ai < v->manifest.n_artifacts; ++ai) { - const DistArtifact* a = &v->manifest.artifacts[ai]; - CfreeWriter* raww = pkg_mem(ctx); - const uint8_t* rawb; - size_t rawl; - uint64_t want_chunk = 0; - size_t off; - if (!raww) return DIST_ERR; - for (off = 0; off < h->index_size; off += DIST_CFPKG_INDEX_RECORD_SIZE) { - DistCfpkgIndexRecord r; - const uint8_t* stored; - uint8_t sh[DIST_BLAKE2B_LEN], rh[DIST_BLAKE2B_LEN], - leaf[DIST_BLAKE2B_LEN]; - if (dist_cfpkg_decode_index_record(data + h->index_offset + off, - DIST_CFPKG_INDEX_RECORD_SIZE, - &r) != DIST_OK) +static int pkg_range_ok(uint64_t off, uint64_t size, size_t len) { + return off <= len && size <= len - off; +} + +static const DistCfpkg3TreeObject* pkg_descriptor_find_tree( + const DistCfpkg3Descriptor* d, const uint8_t id[DIST_BLAKE2B_LEN]) { + size_t i; + for (i = 0; i < d->n_trees; ++i) + if (memcmp(d->trees[i].tree, id, DIST_BLAKE2B_LEN) == 0) + return &d->trees[i]; + return NULL; +} + +static int pkg_descriptor_has_embedded_chunks(const DistCfpkg3Descriptor* d) { + size_t i; + for (i = 0; i < d->n_chunk_sources; ++i) + if (d->chunk_sources[i].kind == DIST_CFPKG3_CHUNK_SOURCE_EMBEDDED) + return 1; + return 0; +} + +static const char* pkg_descriptor_chunk_template( + const DistCfpkg3Descriptor* d) { + size_t i; + for (i = 0; i < d->n_chunk_sources; ++i) + if (d->chunk_sources[i].kind == DIST_CFPKG3_CHUNK_SOURCE_URL_TEMPLATE) + return d->chunk_sources[i].tmpl; + return NULL; +} + +static int pkg_render_chunk_template(char* out, size_t cap, const char* tmpl, + const uint8_t blob[DIST_BLAKE2B_LEN], + uint64_t chunk_index) { + char blob_hex[2 * DIST_BLAKE2B_LEN + 1]; + char blob_prefix[3]; + char chunk_dec[24]; + size_t oi = 0, i; + dist_hex_encode(blob_hex, blob, DIST_BLAKE2B_LEN); + blob_prefix[0] = blob_hex[0]; + blob_prefix[1] = blob_hex[1]; + blob_prefix[2] = '\0'; + snprintf(chunk_dec, sizeof chunk_dec, "%llu", + (unsigned long long)chunk_index); + for (i = 0; tmpl[i];) { + const char* repl = NULL; + size_t repl_len = 0; + if (strncmp(tmpl + i, "{blob}", 6) == 0) { + repl = blob_hex; + repl_len = strlen(blob_hex); + i += 6; + } else if (strncmp(tmpl + i, "{blob-prefix}", 13) == 0) { + repl = blob_prefix; + repl_len = strlen(blob_prefix); + i += 13; + } else if (strncmp(tmpl + i, "{chunk}", 7) == 0) { + repl = chunk_dec; + repl_len = strlen(chunk_dec); + i += 7; + } else { + if (oi + 1u >= cap) return DIST_ERR; + out[oi++] = tmpl[i++]; + continue; + } + if (oi + repl_len >= cap) return DIST_ERR; + memcpy(out + oi, repl, repl_len); + oi += repl_len; + } + if (oi >= cap) return DIST_ERR; + out[oi] = '\0'; + return pkg_locator_safe(out) ? DIST_OK : DIST_ERR; +} + +static int pkg_verify_native_index_sorted(const uint8_t* index_b, + size_t index_l, + const DistCfpkg3Descriptor* d) { + DistCfpkg3IndexRecord prev; + size_t off; + int have_prev = 0; + int embedded_chunks = pkg_descriptor_has_embedded_chunks(d); + if (index_l != d->index_bytes || + index_l % DIST_CFPKG3_INDEX_RECORD_SIZE != 0) + return DIST_ERR; + memset(&prev, 0, sizeof prev); + for (off = 0; off < index_l; off += DIST_CFPKG3_INDEX_RECORD_SIZE) { + DistCfpkg3IndexRecord r; + int cmp; + if (dist_cfpkg3_decode_index_record(index_b + off, + DIST_CFPKG3_INDEX_RECORD_SIZE, + &r) != DIST_OK) + return DIST_ERR; + if (r.raw_size == 0 || r.raw_size > d->chunk_size || + !dist_cfpkg_compression_name(r.compression)) + return DIST_ERR; + if (embedded_chunks) { + if (r.content_offset > d->content_size || + r.stored_size > d->content_size - r.content_offset) return DIST_ERR; - if (r.artifact_id != a->id) continue; - if (r.chunk_index != want_chunk++) { - driver_errf(PKG_TOOL, "native index chunks out of order"); + } else if (r.content_offset != 0) { + return DIST_ERR; + } + if (!have_prev) { + if (r.chunk_index != 0) return DIST_ERR; + } else { + cmp = memcmp(prev.blob_id, r.blob_id, DIST_BLAKE2B_LEN); + if (cmp > 0) return DIST_ERR; + if (cmp == 0) { + if (r.chunk_index <= prev.chunk_index) return DIST_ERR; + } else if (r.chunk_index != 0) { return DIST_ERR; } - if (r.content_offset > h->content_size || - r.stored_size > h->content_size - r.content_offset) - return DIST_ERR; - stored = data + h->content_offset + r.content_offset; - pkg_hash(sh, stored, (size_t)r.stored_size); - if (memcmp(sh, r.stored_hash, DIST_BLAKE2B_LEN) != 0) return DIST_ERR; - if (r.compression == DIST_CFPKG_COMP_NONE) { - pkg_hash(rh, stored, (size_t)r.stored_size); - dist_cfpkg2_leaf_hash(leaf, r.artifact_id, r.chunk_index, stored, - (size_t)r.stored_size); - if (r.raw_size != r.stored_size || - memcmp(rh, r.raw_hash, DIST_BLAKE2B_LEN) != 0 || - memcmp(leaf, r.leaf_hash, DIST_BLAKE2B_LEN) != 0 || - cfree_writer_write(raww, stored, (size_t)r.stored_size) != CFREE_OK) - return DIST_ERR; - } else if (r.compression == DIST_CFPKG_COMP_LZ4_BLOCK_V1) { - uint8_t tmp[DIST_CFPKG_CHUNK_SIZE_DEFAULT]; - if (r.raw_size > sizeof tmp || - dist_lz4_decompress_block(tmp, (size_t)r.raw_size, stored, - (size_t)r.stored_size) != DIST_OK) - return DIST_ERR; - pkg_hash(rh, tmp, (size_t)r.raw_size); - dist_cfpkg2_leaf_hash(leaf, r.artifact_id, r.chunk_index, tmp, - (size_t)r.raw_size); - if (memcmp(rh, r.raw_hash, DIST_BLAKE2B_LEN) != 0 || - memcmp(leaf, r.leaf_hash, DIST_BLAKE2B_LEN) != 0 || - cfree_writer_write(raww, tmp, (size_t)r.raw_size) != CFREE_OK) - return DIST_ERR; - } else { + } + prev = r; + have_prev = 1; + } + return DIST_OK; +} + +static int pkg_native_load_tree(const CfreeContext* ctx, const uint8_t* data, + size_t len, const DistCfpkg3Descriptor* d, + const DistPackageOutput* out, + const char* external_dir, + PkgLoadedTree* tree) { + const DistCfpkg3TreeObject* obj = pkg_descriptor_find_tree(d, out->tree); + const uint8_t* bytes; + uint8_t h[DIST_BLAKE2B_LEN]; + (void)len; + if (!obj || !obj->embedded) { + CfreeFileData fd; + char rel[PKG_PATH_BUF]; + int rc; + if (!obj || !external_dir) { + driver_errf(PKG_TOOL, "external tree object is missing"); + return DIST_ERR; + } + if (obj->url[0]) + snprintf(rel, sizeof rel, "%s", obj->url); + else if (pkg_external_id_path(rel, sizeof rel, "tree", out->tree) != + DIST_OK) + return DIST_ERR; + fd.data = NULL; + fd.size = 0; + fd.token = NULL; + if (pkg_read_external_file(ctx, external_dir, rel, &fd) != DIST_OK) { + driver_errf(PKG_TOOL, "missing external tree object: %s", rel); + return DIST_ERR; + } + pkg_hash(h, fd.data, fd.size); + if (memcmp(h, obj->blake2b, DIST_BLAKE2B_LEN) != 0 || + memcmp(h, out->tree, DIST_BLAKE2B_LEN) != 0) { + ctx->file_io->release(ctx->file_io->user, &fd); + driver_errf(PKG_TOOL, "tree object hash mismatch"); + return DIST_ERR; + } + rc = pkg_parse_tree_object(tree, out->tree, fd.data, fd.size, out->name); + ctx->file_io->release(ctx->file_io->user, &fd); + tree->bytes = NULL; + tree->size = 0; + return rc; + } + if (obj->offset > d->tree_size || obj->size > d->tree_size - obj->offset) + return DIST_ERR; + bytes = data + d->tree_offset + obj->offset; + pkg_hash(h, bytes, (size_t)obj->size); + if (memcmp(h, obj->blake2b, DIST_BLAKE2B_LEN) != 0 || + memcmp(h, out->tree, DIST_BLAKE2B_LEN) != 0) { + driver_errf(PKG_TOOL, "tree object hash mismatch"); + return DIST_ERR; + } + return pkg_parse_tree_object(tree, out->tree, bytes, (size_t)obj->size, + out->name); +} + +static int pkg_native_load_index(const CfreeContext* ctx, const uint8_t* data, + const DistCfpkg3Descriptor* d, + const char* external_dir, CfreeFileData* fd, + const uint8_t** index_b, + size_t* index_l) { + uint8_t root[DIST_BLAKE2B_LEN]; + fd->data = NULL; + fd->size = 0; + fd->token = NULL; + if (d->index_size != 0) { + if (d->index_size != d->index_bytes) return DIST_ERR; + *index_b = data + d->index_offset; + *index_l = (size_t)d->index_size; + } else { + char rel[PKG_PATH_BUF]; + if (!external_dir) { + driver_errf(PKG_TOOL, "external index is missing"); + return DIST_ERR; + } + if (d->index_url[0]) + snprintf(rel, sizeof rel, "%s", d->index_url); + else if (pkg_external_id_path(rel, sizeof rel, "index", d->index_root) != + DIST_OK) + return DIST_ERR; + if (pkg_read_external_file(ctx, external_dir, rel, fd) != DIST_OK) { + driver_errf(PKG_TOOL, "missing external index: %s", rel); + return DIST_ERR; + } + *index_b = fd->data; + *index_l = fd->size; + } + if (*index_l != d->index_bytes) return DIST_ERR; + dist_cfpkg3_region_root(root, "index", *index_b, *index_l); + return memcmp(root, d->index_root, DIST_BLAKE2B_LEN) == 0 ? DIST_OK + : DIST_ERR; +} + +static int pkg_decode_native_chunk(CfreeWriter* raww, const uint8_t* stored, + size_t stored_len, + const DistCfpkg3Descriptor* d, + const DistCfpkg3IndexRecord* r) { + uint8_t sh[DIST_BLAKE2B_LEN], rh[DIST_BLAKE2B_LEN], + leaf[DIST_BLAKE2B_LEN]; + if (r->raw_size == 0 || r->raw_size > d->chunk_size || + r->stored_size != stored_len) + return DIST_ERR; + pkg_hash(sh, stored, (size_t)r->stored_size); + if (memcmp(sh, r->stored_hash, DIST_BLAKE2B_LEN) != 0) return DIST_ERR; + if (r->compression == DIST_CFPKG_COMP_NONE) { + if (r->raw_size != r->stored_size) return DIST_ERR; + pkg_hash(rh, stored, (size_t)r->stored_size); + dist_blob_leaf_hash(leaf, r->chunk_index, stored, (size_t)r->stored_size); + if (memcmp(rh, r->raw_hash, DIST_BLAKE2B_LEN) != 0 || + memcmp(leaf, r->leaf_hash, DIST_BLAKE2B_LEN) != 0 || + cfree_writer_write(raww, stored, (size_t)r->stored_size) != CFREE_OK) + return DIST_ERR; + } else if (r->compression == DIST_CFPKG_COMP_LZ4_BLOCK_V1) { + uint8_t tmp[DIST_CFPKG3_CHUNK_SIZE_DEFAULT]; + if (r->raw_size > sizeof tmp || + dist_lz4_decompress_block(tmp, (size_t)r->raw_size, stored, + (size_t)r->stored_size) != DIST_OK) + return DIST_ERR; + pkg_hash(rh, tmp, (size_t)r->raw_size); + dist_blob_leaf_hash(leaf, r->chunk_index, tmp, (size_t)r->raw_size); + if (memcmp(rh, r->raw_hash, DIST_BLAKE2B_LEN) != 0 || + memcmp(leaf, r->leaf_hash, DIST_BLAKE2B_LEN) != 0 || + cfree_writer_write(raww, tmp, (size_t)r->raw_size) != CFREE_OK) + return DIST_ERR; + } else { + return DIST_ERR; + } + return DIST_OK; +} + +static int pkg_native_load_stored_chunk(const CfreeContext* ctx, + const uint8_t* data, + const DistCfpkg3Descriptor* d, + const DistCfpkg3IndexRecord* r, + const char* external_dir, + const char* chunk_template, + CfreeFileData* fd, + const uint8_t** stored, + size_t* stored_len) { + fd->data = NULL; + fd->size = 0; + fd->token = NULL; + if (pkg_descriptor_has_embedded_chunks(d)) { + if (r->content_offset > d->content_size || + r->stored_size > d->content_size - r->content_offset) + return DIST_ERR; + *stored = data + d->content_offset + r->content_offset; + *stored_len = (size_t)r->stored_size; + return DIST_OK; + } + { + char rel[PKG_PATH_BUF]; + if (!external_dir) return DIST_ERR; + if (chunk_template) { + if (pkg_render_chunk_template(rel, sizeof rel, chunk_template, r->blob_id, + r->chunk_index) != DIST_OK) return DIST_ERR; - } + } else if (dist_cas_chunk_relpath(rel, sizeof rel, r->blob_id, + r->chunk_index) != DIST_OK) { + return DIST_ERR; + } + if (pkg_read_external_file(ctx, external_dir, rel, fd) != DIST_OK) { + driver_errf(PKG_TOOL, "missing external chunk: %s", rel); + return DIST_ERR; + } + *stored = fd->data; + *stored_len = fd->size; + return DIST_OK; + } +} + +static int pkg_native_reconstruct_blob(const CfreeContext* ctx, + const uint8_t* data, + const uint8_t* index_b, size_t index_l, + const DistCfpkg3Descriptor* d, + const DistTreeEntry* e, + const char* external_dir, + const char* chunk_template, + CfreeWriter** raww_out) { + CfreeWriter* raww = pkg_mem(ctx); + uint64_t want_chunk = 0; + size_t off; + int saw = 0; + if (!raww) return DIST_ERR; + if (index_l % DIST_CFPKG3_INDEX_RECORD_SIZE != 0) goto fail; + for (off = 0; off < index_l; off += DIST_CFPKG3_INDEX_RECORD_SIZE) { + DistCfpkg3IndexRecord r; + CfreeFileData chunk_fd; + const uint8_t* stored; + size_t stored_len; + int cmp; + if (dist_cfpkg3_decode_index_record(index_b + off, + DIST_CFPKG3_INDEX_RECORD_SIZE, + &r) != DIST_OK) + goto fail; + cmp = memcmp(r.blob_id, e->blob, DIST_BLAKE2B_LEN); + if (cmp < 0) continue; + if (cmp > 0 && saw) break; + if (cmp > 0) continue; + saw = 1; + if (r.chunk_index != want_chunk++) goto fail; + if (pkg_native_load_stored_chunk(ctx, data, d, &r, external_dir, + chunk_template, &chunk_fd, &stored, + &stored_len) != DIST_OK) + goto fail; + if (pkg_decode_native_chunk(raww, stored, stored_len, d, &r) != DIST_OK) { + if (chunk_fd.data && ctx->file_io->release) + ctx->file_io->release(ctx->file_io->user, &chunk_fd); + goto fail; + } + if (chunk_fd.data && ctx->file_io->release) + ctx->file_io->release(ctx->file_io->user, &chunk_fd); + } + *raww_out = raww; + return DIST_OK; +fail: + cfree_writer_close(raww); + return DIST_ERR; +} + +static int pkg_verify_native_tree(DriverEnv* env, const CfreeContext* ctx, + const uint8_t* data, size_t len, + const uint8_t* index_b, size_t index_l, + const DistCfpkg3Descriptor* d, + const PkgVerified* v, + const DistPackageOutput* out, + const char* external_dir, + const char* chunk_template, + const char* out_dir) { + PkgLoadedTree tree; + size_t i; + if (pkg_native_load_tree(ctx, data, len, d, out, external_dir, &tree) != + DIST_OK) + return DIST_ERR; + if (pkg_verify_artifact_overlays(&v->manifest, out, &tree.tree) != DIST_OK) + return DIST_ERR; + for (i = 0; i < tree.tree.n_entries; ++i) { + const DistTreeEntry* e = &tree.tree.entries[i]; + CfreeWriter* raww = NULL; + const uint8_t* rawb; + size_t rawl; + if (pkg_native_reconstruct_blob(ctx, data, index_b, index_l, d, e, + external_dir, chunk_template, + &raww) != DIST_OK) { + driver_errf(PKG_TOOL, "native chunk verification failed: %s", e->path); + return DIST_ERR; } rawb = cfree_writer_mem_bytes(raww, &rawl); - if (pkg_verify_artifact_bytes(a, rawb, rawl) != DIST_OK) { + if (pkg_verify_blob_bytes(e, rawb, rawl) != DIST_OK) { cfree_writer_close(raww); - driver_errf(PKG_TOOL, "artifact hash mismatch: %s", a->path); + driver_errf(PKG_TOOL, "blob hash mismatch: %s", e->path); return DIST_ERR; } - if (out_dir) { - char full[PKG_PATH_BUF], parent[PKG_PATH_BUF]; - snprintf(full, sizeof full, "%s/%s", out_dir, a->path); - pkg_parent_dir(full, parent, sizeof parent); - if (parent[0]) driver_mkdir_p(env, parent); - if (pkg_write_file(ctx, full, rawb, rawl) != DIST_OK) { - cfree_writer_close(raww); - return DIST_ERR; - } - driver_printf(" extracted %s\n", full); + if (out_dir && + pkg_write_output_file(env, ctx, out_dir, e, rawb, rawl) != DIST_OK) { + cfree_writer_close(raww); + return DIST_ERR; } cfree_writer_close(raww); } @@ -926,22 +1751,28 @@ static int pkg_verify_native_content(DriverEnv* env, const CfreeContext* ctx, static int pkg_verify_native(DriverEnv* env, const CfreeContext* ctx, const char* file, const char* pubkey, int tofu, - const char* out_dir, int quiet) { - CfreeFileData fd = {0}; - DistCfpkgHeader h; - DistCfpkgDescriptor d; + const char* external_dir, const char* out_dir, + int quiet) { + CfreeFileData fd = {0}, index_fd = {0}; + DistCfpkg3Header h; + DistCfpkg3Descriptor d; PkgVerified v; char err[128]; - uint8_t desc_keyid[DIST_KEYID_LEN], index_root[DIST_BLAKE2B_LEN], - content_root[DIST_BLAKE2B_LEN]; + uint8_t desc_keyid[DIST_KEYID_LEN], tree_root[DIST_BLAKE2B_LEN], + index_root[DIST_BLAKE2B_LEN], content_root[DIST_BLAKE2B_LEN]; char desc_trusted[DIST_TRUSTED_COMMENT_MAX]; + const DistPackageOutput* def; + const uint8_t* index_b = NULL; + size_t index_l = 0; + const char* chunk_template = NULL; + size_t oi; int rc = DIST_ERR; if (pkg_read_file(ctx, file, &fd) != DIST_OK) { driver_errf(PKG_TOOL, "cannot read package: %s", file); return DIST_ERR; } - if (dist_cfpkg_read_header(fd.data, fd.size, &h) != DIST_OK || - pkg_bounds(&h, fd.size) != DIST_OK) { + if (dist_cfpkg3_read_header(fd.data, fd.size, &h) != DIST_OK || + pkg_bounds3(&h, fd.size) != DIST_OK) { driver_errf(PKG_TOOL, "malformed native package"); goto done; } @@ -966,34 +1797,57 @@ static int pkg_verify_native(DriverEnv* env, const CfreeContext* ctx, driver_errf(PKG_TOOL, "encoding descriptor signature FAILED"); goto done; } - if (dist_cfpkg_descriptor_parse(fd.data + h.descriptor_offset, - (size_t)h.descriptor_size, &d, err, - sizeof err) != DIST_OK) { + if (dist_cfpkg3_descriptor_parse(fd.data + h.descriptor_offset, + (size_t)h.descriptor_size, &d, err, + sizeof err) != DIST_OK) { driver_errf(PKG_TOOL, "encoding descriptor: %s", err); goto done; } if (memcmp(d.package_id, v.package_id, DIST_BLAKE2B_LEN) != 0 || - d.index_offset != h.index_offset || d.index_size != h.index_size || - d.content_offset != h.content_offset || d.content_size != h.content_size || - d.chunk_size != h.chunk_size || d.alignment != h.alignment || - d.chunk_size != DIST_CFPKG_CHUNK_SIZE_DEFAULT || - d.alignment != DIST_CFPKG_ALIGNMENT) { + d.chunk_size != DIST_CFPKG3_CHUNK_SIZE_DEFAULT || + d.alignment != DIST_CFPKG3_ALIGNMENT || + !pkg_range_ok(d.tree_offset, d.tree_size, fd.size) || + !pkg_range_ok(d.index_offset, d.index_size, fd.size) || + !pkg_range_ok(d.content_offset, d.content_size, fd.size)) { driver_errf(PKG_TOOL, "encoding descriptor does not match package layout"); goto done; } - pkg_region_root(index_root, "index", fd.data + h.index_offset, - (size_t)h.index_size); - pkg_region_root(content_root, "content", fd.data + h.content_offset, - (size_t)h.content_size); - if (memcmp(index_root, d.index_root, DIST_BLAKE2B_LEN) != 0 || + dist_cfpkg3_region_root(tree_root, "tree", fd.data + d.tree_offset, + (size_t)d.tree_size); + dist_cfpkg3_region_root(content_root, "content", fd.data + d.content_offset, + (size_t)d.content_size); + if (pkg_native_load_index(ctx, fd.data, &d, external_dir, &index_fd, + &index_b, &index_l) != DIST_OK) { + driver_errf(PKG_TOOL, "native package index verification failed"); + goto done; + } + dist_cfpkg3_region_root(index_root, "index", index_b, index_l); + if (!pkg_descriptor_has_embedded_chunks(&d)) + chunk_template = pkg_descriptor_chunk_template(&d); + if (d.index_bytes && !pkg_descriptor_has_embedded_chunks(&d) && + !external_dir) { + driver_errf(PKG_TOOL, "external native chunks are missing"); + goto done; + } + if (memcmp(tree_root, d.tree_root, DIST_BLAKE2B_LEN) != 0 || + memcmp(index_root, d.index_root, DIST_BLAKE2B_LEN) != 0 || memcmp(content_root, d.content_root, DIST_BLAKE2B_LEN) != 0) { driver_errf(PKG_TOOL, "native package region hash mismatch"); goto done; } - if (h.index_size % DIST_CFPKG_INDEX_RECORD_SIZE != 0) goto done; - if (pkg_verify_native_content(env, ctx, fd.data, &h, &v, out_dir) != - DIST_OK) + if (pkg_verify_native_index_sorted(index_b, index_l, &d) != DIST_OK) { + driver_errf(PKG_TOOL, "native chunk index is malformed"); goto done; + } + def = pkg_default_output(&v.manifest); + if (!def) goto done; + for (oi = 0; oi < v.manifest.n_outputs; ++oi) { + const DistPackageOutput* out = &v.manifest.outputs[oi]; + if (pkg_verify_native_tree(env, ctx, fd.data, fd.size, index_b, index_l, + &d, &v, out, external_dir, chunk_template, + out == def ? out_dir : NULL) != DIST_OK) + goto done; + } if (!quiet) { char idhex[2 * DIST_KEYID_LEN + 1]; dist_hex_encode(idhex, v.keyid, DIST_KEYID_LEN); @@ -1005,13 +1859,15 @@ static int pkg_verify_native(DriverEnv* env, const CfreeContext* ctx, v.manifest.version, out_dir); rc = DIST_OK; done: + if (index_fd.token || index_fd.data) + ctx->file_io->release(ctx->file_io->user, &index_fd); if (fd.token || fd.data) ctx->file_io->release(ctx->file_io->user, &fd); return rc; } static int pkg_verify_or_unpack(DriverEnv* env, const CfreeContext* ctx, int argc, char** argv, int unpack) { - const char *file = NULL, *pubkey = NULL, *dir = "."; + const char *file = NULL, *pubkey = NULL, *dir = ".", *external_dir = NULL; int tofu = 0, explicit_verify = 0, i; PkgFormat fmt = PKG_FMT_AUTO; for (i = 0; i < argc; ++i) { @@ -1021,14 +1877,15 @@ static int pkg_verify_or_unpack(DriverEnv* env, const CfreeContext* ctx, tofu = 1; else if (unpack && driver_streq(argv[i], "--verify")) explicit_verify = 1; + else if (driver_streq(argv[i], "--external") && i + 1 < argc) + external_dir = argv[++i]; else if (driver_streq(argv[i], "--format") && i + 1 < argc) { fmt = pkg_parse_format(argv[++i]); if (fmt == PKG_FMT_AUTO) { driver_errf(PKG_TOOL, "%s: unknown format", unpack ? "unpack" : "verify"); return 2; } - } - else if (unpack && driver_streq(argv[i], "-C") && i + 1 < argc) + } else if (unpack && driver_streq(argv[i], "-C") && i + 1 < argc) dir = argv[++i]; else if (argv[i][0] != '-') file = argv[i]; @@ -1049,7 +1906,8 @@ static int pkg_verify_or_unpack(DriverEnv* env, const CfreeContext* ctx, unpack && !explicit_verify) == DIST_OK ? 0 : 1; - return pkg_verify_native(env, ctx, file, pubkey, tofu, unpack ? dir : NULL, + return pkg_verify_native(env, ctx, file, pubkey, tofu, external_dir, + unpack ? dir : NULL, unpack && !explicit_verify) == DIST_OK ? 0 : 1; @@ -1058,6 +1916,7 @@ static int pkg_verify_or_unpack(DriverEnv* env, const CfreeContext* ctx, static int pkg_inspect(const CfreeContext* ctx, int argc, char** argv) { const char* file = NULL; PkgFormat fmt = PKG_FMT_AUTO; + int show_encoding = 0; int i, rc = 1; for (i = 0; i < argc; ++i) { if (driver_streq(argv[i], "--format") && i + 1 < argc) { @@ -1066,8 +1925,11 @@ static int pkg_inspect(const CfreeContext* ctx, int argc, char** argv) { driver_errf(PKG_TOOL, "inspect: unknown format"); return 2; } - } - else if (argv[i][0] != '-') + } else if (driver_streq(argv[i], "--manifest")) { + show_encoding = 0; + } else if (driver_streq(argv[i], "--encoding")) { + show_encoding = 1; + } else if (argv[i][0] != '-') file = argv[i]; else { driver_errf(PKG_TOOL, "inspect: unknown option: %s", argv[i]); @@ -1080,9 +1942,13 @@ static int pkg_inspect(const CfreeContext* ctx, int argc, char** argv) { } if (fmt == PKG_FMT_AUTO) fmt = pkg_infer_format(file); if (fmt == PKG_FMT_TARGZ) { + if (show_encoding) { + driver_errf(PKG_TOOL, "inspect: portable packages have no encoding descriptor"); + return 2; + } CfreeFileData fd = {0}; CfreeWriter* inflated = NULL; - DistTarEntry entries[DIST_MAX_FILES]; + DistTarEntry entries[PKG_MAX_TAR_ENTRIES]; size_t ne = 0; const DistTarEntry* man; if (pkg_load_portable(ctx, file, &fd, &inflated, entries, &ne) == DIST_OK && @@ -1095,12 +1961,16 @@ static int pkg_inspect(const CfreeContext* ctx, int argc, char** argv) { return rc; } else { CfreeFileData fd = {0}; - DistCfpkgHeader h; + DistCfpkg3Header h; if (pkg_read_file(ctx, file, &fd) == DIST_OK && - dist_cfpkg_read_header(fd.data, fd.size, &h) == DIST_OK && - pkg_bounds(&h, fd.size) == DIST_OK) { - driver_printf("%.*s", (int)h.manifest_size, - (const char*)(fd.data + h.manifest_offset)); + dist_cfpkg3_read_header(fd.data, fd.size, &h) == DIST_OK && + pkg_bounds3(&h, fd.size) == DIST_OK) { + if (show_encoding) + driver_printf("%.*s", (int)h.descriptor_size, + (const char*)(fd.data + h.descriptor_offset)); + else + driver_printf("%.*s", (int)h.manifest_size, + (const char*)(fd.data + h.manifest_offset)); rc = 0; } else { driver_errf(PKG_TOOL, "malformed native package"); diff --git a/include/cfree/config.h b/include/cfree/config.h @@ -91,6 +91,7 @@ #define CFREE_TOOL_SIZE_ENABLED 1 #define CFREE_TOOL_ADDR2LINE_ENABLED 1 #define CFREE_TOOL_STRINGS_ENABLED 1 +#define CFREE_TOOL_CAS_ENABLED 1 #define CFREE_TOOL_PKG_ENABLED 1 #endif /* CFREE_CONFIG_H */ diff --git a/mk/config.mk b/mk/config.mk @@ -49,4 +49,5 @@ CFREE_TOOL_NM_ENABLED := $(call cfg_flag,CFREE_TOOL_NM_ENABLED) CFREE_TOOL_SIZE_ENABLED := $(call cfg_flag,CFREE_TOOL_SIZE_ENABLED) CFREE_TOOL_ADDR2LINE_ENABLED := $(call cfg_flag,CFREE_TOOL_ADDR2LINE_ENABLED) CFREE_TOOL_STRINGS_ENABLED := $(call cfg_flag,CFREE_TOOL_STRINGS_ENABLED) +CFREE_TOOL_CAS_ENABLED := $(call cfg_flag,CFREE_TOOL_CAS_ENABLED) CFREE_TOOL_PKG_ENABLED := $(call cfg_flag,CFREE_TOOL_PKG_ENABLED) diff --git a/src/core/config_assert.c b/src/core/config_assert.c @@ -58,6 +58,8 @@ CFREE_ASSERT_BOOL(CFREE_TOOL_NM_ENABLED); CFREE_ASSERT_BOOL(CFREE_TOOL_SIZE_ENABLED); CFREE_ASSERT_BOOL(CFREE_TOOL_ADDR2LINE_ENABLED); CFREE_ASSERT_BOOL(CFREE_TOOL_STRINGS_ENABLED); +CFREE_ASSERT_BOOL(CFREE_TOOL_CAS_ENABLED); +CFREE_ASSERT_BOOL(CFREE_TOOL_PKG_ENABLED); #undef CFREE_ASSERT_BOOL diff --git a/test/cas/run.sh b/test/cas/run.sh @@ -0,0 +1,282 @@ +#!/bin/sh +# Driver-level checks for the shared cfree CAS/tree utilities. + +set -u + +script_dir=$(cd "$(dirname "$0")" && pwd) +repo_root=$(cd "$script_dir/../.." && pwd) + +CFREE="${CFREE:-$repo_root/build/cfree}" + +if [ ! -x "$CFREE" ]; then + echo "cas: cfree binary not found at $CFREE" >&2 + exit 2 +fi + +work=$(mktemp -d "${TMPDIR:-/tmp}/cfree-cas-test.XXXXXX") +trap 'rm -rf "$work"' EXIT + +mkdir -p "$work/src/bin" "$work/src/share" "$work/cas" "$work/out" + +pass=0 +fail=0 +skip=0 +failures= + +ok() { + printf 'PASS %s\n' "$1" + pass=$((pass + 1)) +} + +not_ok() { + printf 'FAIL %s\n' "$1" + if [ "$#" -gt 1 ] && [ -s "$2" ]; then + sed 's/^/ | /' "$2" + fi + fail=$((fail + 1)) + failures="$failures $1" +} + +skip_test() { + printf 'SKIP %s\n' "$1" + skip=$((skip + 1)) +} + +run_ok() { + name=$1 + shift + if "$@" > "$work/$name.out" 2> "$work/$name.err"; then + ok "$name" + else + not_ok "$name" "$work/$name.err" + fi +} + +run_fail() { + name=$1 + shift + if "$@" > "$work/$name.out" 2> "$work/$name.err"; then + { + echo "command unexpectedly succeeded" + sed 's/^/stdout: /' "$work/$name.out" + } > "$work/$name.diag" + not_ok "$name" "$work/$name.diag" + else + ok "$name" + fi +} + +contains() { + name=$1 + file=$2 + needle=$3 + if grep -F "$needle" "$file" >/dev/null 2>&1; then + ok "$name" + else + { + printf 'missing text: %s\n' "$needle" + sed 's/^/file: /' "$file" + } > "$work/$name.diag" + not_ok "$name" "$work/$name.diag" + fi +} + +same_file() { + name=$1 + want=$2 + got=$3 + if cmp -s "$want" "$got"; then + ok "$name" + else + { + printf 'files differ:\n' + printf ' want: %s\n' "$want" + printf ' got: %s\n' "$got" + } > "$work/$name.diag" + not_ok "$name" "$work/$name.diag" + fi +} + +is_executable() { + name=$1 + file=$2 + if [ -x "$file" ]; then + ok "$name" + else + echo "not executable: $file" > "$work/$name.diag" + not_ok "$name" "$work/$name.diag" + fi +} + +first_hex_id() { + sed -n 's/.*\([0-9a-fA-F]\{64\}\).*/\1/p' "$1" | sed -n '1p' +} + +id_prefix() { + printf '%.2s' "$1" +} + +cas_object_path() { + root=$1 + kind=$2 + id=$3 + prefix=$(id_prefix "$id") + printf '%s/%s/%s/%s\n' "$root" "$kind" "$prefix" "$id" +} + +tree_blob_for_path() { + tree_file=$1 + want=$2 + awk -v want="$want" ' + $0 == "[file]" { in_file = 1; path = ""; blob = ""; next } + in_file && /^path = / { path = substr($0, 8); next } + in_file && /^blob = / { + blob = substr($0, 8); + if (path == want) { + print blob; + exit; + } + } + ' "$tree_file" +} + +assert_file_exists() { + name=$1 + file=$2 + if [ -f "$file" ]; then + ok "$name" + else + echo "missing file: $file" > "$work/$name.diag" + not_ok "$name" "$work/$name.diag" + fi +} + +make_fixtures() { + printf alpha > "$work/src/share/a.txt" + cp "$work/src/share/a.txt" "$work/src/share/a-copy.txt" + : > "$work/src/share/empty.dat" + { + printf '#!/bin/sh\n' + printf 'printf "hello from cas\\n"\n' + } > "$work/src/bin/tool.sh" + chmod +x "$work/src/bin/tool.sh" +} + +make_fixtures + +run_ok "cas-add-blob" "$CFREE" cas add-blob --cas "$work/cas" "$work/src/share/a.txt" +blob_id=$(first_hex_id "$work/cas-add-blob.out") +if [ -n "$blob_id" ]; then + ok "cas-add-blob-id" +else + echo "could not parse blob id from add-blob output" > "$work/cas-add-blob-id.diag" + not_ok "cas-add-blob-id" "$work/cas-add-blob-id.diag" +fi +blob_file=$(cas_object_path "$work/cas" blob "$blob_id") +assert_file_exists "cas-add-blob-object" "$blob_file" +same_file "cas-add-blob-content" "$work/src/share/a.txt" "$blob_file" + +run_ok "cas-add-tree-root" "$CFREE" cas add-tree --cas "$work/cas" --root "$work/src" +tree_id=$(first_hex_id "$work/cas-add-tree-root.out") +if [ -n "$tree_id" ]; then + ok "cas-add-tree-id" +else + echo "could not parse tree id from add-tree output" > "$work/cas-add-tree-id.diag" + not_ok "cas-add-tree-id" "$work/cas-add-tree-id.diag" +fi +tree_file=$(cas_object_path "$work/cas" tree "$tree_id") +assert_file_exists "cas-add-tree-object" "$tree_file" +contains "cas-tree-magic" "$tree_file" "cfree-tree 1" +contains "cas-tree-hash" "$tree_file" "hash = blake2b-256" +contains "cas-tree-blob-format" "$tree_file" "blob = cfree-blob-v1" +contains "cas-tree-regular-path" "$tree_file" "path = share/a.txt" +contains "cas-tree-exec-path" "$tree_file" "path = bin/tool.sh" +contains "cas-tree-file-mode" "$tree_file" "mode = -" +contains "cas-tree-exec-mode" "$tree_file" "mode = x" + +run_ok "cas-inspect-tree" "$CFREE" cas inspect-tree --cas "$work/cas" "$tree_id" +contains "cas-inspect-tree-magic" "$work/cas-inspect-tree.out" "cfree-tree 1" +contains "cas-inspect-tree-path" "$work/cas-inspect-tree.out" "path = share/a.txt" + +run_ok "cas-verify-tree" "$CFREE" cas verify-tree --cas "$work/cas" "$tree_id" + +run_ok "cas-materialize-tree" "$CFREE" cas materialize --cas "$work/cas" "$tree_id" -C "$work/out/root" +same_file "cas-materialize-a" "$work/src/share/a.txt" "$work/out/root/share/a.txt" +same_file "cas-materialize-a-copy" "$work/src/share/a-copy.txt" "$work/out/root/share/a-copy.txt" +same_file "cas-materialize-empty" "$work/src/share/empty.dat" "$work/out/root/share/empty.dat" +same_file "cas-materialize-tool" "$work/src/bin/tool.sh" "$work/out/root/bin/tool.sh" +is_executable "cas-materialize-exec-mode" "$work/out/root/bin/tool.sh" + +cat > "$work/map.txt" <<EOF +mapped/tool.sh x $work/src/bin/tool.sh +mapped/a.txt - $work/src/share/a.txt +EOF +run_ok "cas-add-tree-map" "$CFREE" cas add-tree --cas "$work/cas" --map "$work/map.txt" +map_tree_id=$(first_hex_id "$work/cas-add-tree-map.out") +if [ -n "$map_tree_id" ]; then + ok "cas-add-tree-map-id" +else + echo "could not parse map tree id from add-tree output" > "$work/cas-add-tree-map-id.diag" + not_ok "cas-add-tree-map-id" "$work/cas-add-tree-map-id.diag" +fi +run_ok "cas-verify-map-tree" "$CFREE" cas verify-tree --cas "$work/cas" "$map_tree_id" +run_ok "cas-materialize-map-tree" "$CFREE" cas materialize --cas "$work/cas" "$map_tree_id" -C "$work/out/map" +same_file "cas-map-materialize-a" "$work/src/share/a.txt" "$work/out/map/mapped/a.txt" +same_file "cas-map-materialize-tool" "$work/src/bin/tool.sh" "$work/out/map/mapped/tool.sh" +is_executable "cas-map-materialize-exec-mode" "$work/out/map/mapped/tool.sh" + +cat > "$work/dup.map" <<EOF +same.txt - $work/src/share/a.txt +same.txt - $work/src/share/a-copy.txt +EOF +run_fail "cas-add-tree-duplicate-path-fails" "$CFREE" cas add-tree --cas "$work/cas" --map "$work/dup.map" + +cat > "$work/unsafe.map" <<EOF +../escape.txt - $work/src/share/a.txt +EOF +run_fail "cas-add-tree-unsafe-path-fails" "$CFREE" cas add-tree --cas "$work/cas" --map "$work/unsafe.map" + +cat > "$work/absolute.map" <<EOF +/absolute.txt - $work/src/share/a.txt +EOF +run_fail "cas-add-tree-absolute-path-fails" "$CFREE" cas add-tree --cas "$work/cas" --map "$work/absolute.map" + +cat > "$work/empty-component.map" <<EOF +bad//component.txt - $work/src/share/a.txt +EOF +run_fail "cas-add-tree-empty-component-fails" "$CFREE" cas add-tree --cas "$work/cas" --map "$work/empty-component.map" + +cat > "$work/dot-component.map" <<EOF +bad/./component.txt - $work/src/share/a.txt +EOF +run_fail "cas-add-tree-dot-component-fails" "$CFREE" cas add-tree --cas "$work/cas" --map "$work/dot-component.map" + +cat > "$work/colon.map" <<EOF +bad:name.txt - $work/src/share/a.txt +EOF +run_fail "cas-add-tree-colon-path-fails" "$CFREE" cas add-tree --cas "$work/cas" --map "$work/colon.map" + +if [ -n "$tree_id" ]; then + cp -R "$work/cas" "$work/bad-cas-mutated" + a_blob=$(tree_blob_for_path "$tree_file" "share/a.txt") + printf tamper >> "$(cas_object_path "$work/bad-cas-mutated" blob "$a_blob")" + run_fail "cas-verify-mutated-blob-fails" "$CFREE" cas verify-tree --cas "$work/bad-cas-mutated" "$tree_id" + run_fail "cas-materialize-mutated-blob-fails" "$CFREE" cas materialize --cas "$work/bad-cas-mutated" "$tree_id" -C "$work/out/bad-mutated" + + cp -R "$work/cas" "$work/bad-cas-missing" + rm -f "$(cas_object_path "$work/bad-cas-missing" blob "$a_blob")" + run_fail "cas-verify-missing-blob-fails" "$CFREE" cas verify-tree --cas "$work/bad-cas-missing" "$tree_id" + + mkdir -p "$work/empty-cas" + run_fail "cas-verify-missing-tree-fails" "$CFREE" cas verify-tree --cas "$work/empty-cas" "$tree_id" +else + skip_test "cas-corruption-tests" +fi + +if [ "$fail" -ne 0 ]; then + printf 'cas: failures:%s\n' "$failures" + printf 'cas: %d passed, %d failed, %d skipped\n' "$pass" "$fail" "$skip" + exit 1 +fi + +printf 'cas: %d passed, %d skipped\n' "$pass" "$skip" diff --git a/test/pkg/run.sh b/test/pkg/run.sh @@ -20,16 +20,15 @@ HOME="$work/home" CFREE_TRUSTED_KEYS="$work/trusted_keys" SOURCE_DATE_EPOCH=1 export HOME CFREE_TRUSTED_KEYS SOURCE_DATE_EPOCH -mkdir -p "$HOME" "$work/in" "$work/pkg" "$work/unpack" +mkdir -p "$HOME" "$work/in" "$work/pkg" "$work/unpack" "$work/cas" pass=0 fail=0 skip=0 failures= -artifacts="empty.dat one.bin payload.txt chunk64.bin chunk64p1.bin chunk3.bin" -artifact_paths= -reversed_artifact_paths= +artifacts="empty.dat one.bin dup-one.bin payload.txt chunk64.bin chunk64-copy.bin chunk64p1.bin chunk3.bin bin/tool.sh" +tree_id= ok() { printf 'PASS %s\n' "$1" @@ -139,35 +138,97 @@ same_artifacts() { fi } +not_same_file() { + name=$1 + left=$2 + right=$3 + if cmp -s "$left" "$right"; then + { + printf 'files unexpectedly match:\n' + printf ' left: %s\n' "$left" + printf ' right: %s\n' "$right" + } > "$work/$name.diag" + not_ok "$name" "$work/$name.diag" + else + ok "$name" + fi +} + +is_executable() { + name=$1 + file=$2 + if [ -x "$file" ]; then + ok "$name" + else + echo "not executable: $file" > "$work/$name.diag" + not_ok "$name" "$work/$name.diag" + fi +} + have_cmd() { command -v "$1" >/dev/null 2>&1 } +first_hex_id() { + sed -n 's/.*\([0-9a-fA-F]\{64\}\).*/\1/p' "$1" | sed -n '1p' +} + +id_prefix() { + printf '%.2s' "$1" +} + +cas_object_path() { + root=$1 + kind=$2 + id=$3 + prefix=$(id_prefix "$id") + printf '%s/%s/%s/%s\n' "$root" "$kind" "$prefix" "$id" +} + +tree_blob_for_path() { + tree_file=$1 + want=$2 + awk -v want="$want" ' + $0 == "[file]" { in_file = 1; path = ""; blob = ""; next } + in_file && /^path = / { path = substr($0, 8); next } + in_file && /^blob = / { + blob = substr($0, 8); + if (path == want) { + print blob; + exit; + } + } + ' "$tree_file" +} + +assert_file_exists() { + name=$1 + file=$2 + if [ -f "$file" ]; then + ok "$name" + else + echo "missing file: $file" > "$work/$name.diag" + not_ok "$name" "$work/$name.diag" + fi +} + make_fixtures() { : > "$work/in/empty.dat" printf x > "$work/in/one.bin" - { - i=0 - while [ "$i" -lt 4096 ]; do - printf 'portable package deflate regression line %04d\n' "$i" - i=$((i + 1)) - done - } > "$work/in/payload.txt" + cp "$work/in/one.bin" "$work/in/dup-one.bin" + printf 'portable package deflate regression payload\n' > "$work/in/payload.txt" dd if=/dev/zero of="$work/in/chunk64.bin" bs=65536 count=1 >/dev/null 2>&1 + cp "$work/in/chunk64.bin" "$work/in/chunk64-copy.bin" dd if=/dev/zero of="$work/in/chunk64p1.bin" bs=65536 count=1 >/dev/null 2>&1 printf y >> "$work/in/chunk64p1.bin" dd if=/dev/zero of="$work/in/chunk3.bin" bs=65536 count=3 >/dev/null 2>&1 printf tail >> "$work/in/chunk3.bin" - - artifact_paths= - for f in $artifacts; do - artifact_paths="$artifact_paths $work/in/$f" - done - - reversed_artifact_paths= - for f in chunk3.bin chunk64p1.bin chunk64.bin payload.txt one.bin empty.dat; do - reversed_artifact_paths="$reversed_artifact_paths $work/in/$f" - done + mkdir -p "$work/in/bin" + { + printf '#!/bin/sh\n' + printf 'printf "tool executed\\n"\n' + } > "$work/in/bin/tool.sh" + chmod +x "$work/in/bin/tool.sh" } keyid_from_keygen() { @@ -177,14 +238,19 @@ keyid_from_keygen() { inspect_checks() { label=$1 out=$2 - contains "$label-inspect-magic" "$out" "cfree-package 2" + contains "$label-inspect-magic" "$out" "cfree-package 3" contains "$label-inspect-name" "$out" "name = matrix-test" contains "$label-inspect-version" "$out" "version = 1.0.0" contains "$label-inspect-desc" "$out" "description = package test matrix" - contains "$label-inspect-hash" "$out" "hash = blake2b-merkle-v1" + contains "$label-inspect-hash" "$out" "hash = blake2b-256" + contains "$label-inspect-tree-format" "$out" "tree = cfree-tree-v1" + contains "$label-inspect-blob-format" "$out" "blob = cfree-blob-v1" + contains "$label-inspect-output" "$out" "[output]" + contains "$label-inspect-output-tree" "$out" "tree = $tree_id" contains "$label-inspect-empty" "$out" "path = empty.dat" contains "$label-inspect-boundary" "$out" "path = chunk64p1.bin" - contains "$label-inspect-blake2b" "$out" "blake2b = " + contains "$label-inspect-exec" "$out" "path = bin/tool.sh" + not_contains "$label-inspect-no-v2-hash" "$out" "blake2b-merkle-v1" not_contains "$label-inspect-no-sha256" "$out" "sha256" } @@ -208,7 +274,7 @@ flip_byte() { printf '\377' | dd of="$file" bs=1 seek="$off" count=1 conv=notrunc >/dev/null 2>&1 } -cfpkg_field_offset() { +cfpkg3_field_offset() { file=$1 idx=$2 perl -e ' @@ -216,6 +282,8 @@ cfpkg_field_offset() { use warnings; my ($file, $idx) = @ARGV; open my $fh, "<:raw", $file or die "$file: $!"; + read $fh, my $magic, 8; + die "bad magic" unless $magic eq "cfpkg3\0\0"; seek $fh, 16 + 8 * $idx, 0 or die "seek: $!"; read $fh, my $b, 8; die "short read" unless length($b) == 8; @@ -223,6 +291,119 @@ cfpkg_field_offset() { ' "$file" "$idx" } +extract_cfpkg3_descriptor() { + pkg=$1 + out=$2 + descriptor_off=$(cfpkg3_field_offset "$pkg" 4) + descriptor_size=$(cfpkg3_field_offset "$pkg" 5) + dd if="$pkg" of="$out" bs=1 skip="$descriptor_off" count="$descriptor_size" >/dev/null 2>&1 +} + +extract_cfpkg3_manifest() { + pkg=$1 + out=$2 + manifest_off=$(cfpkg3_field_offset "$pkg" 0) + manifest_size=$(cfpkg3_field_offset "$pkg" 1) + dd if="$pkg" of="$out" bs=1 skip="$manifest_off" count="$manifest_size" >/dev/null 2>&1 +} + +descriptor_value() { + file=$1 + key=$2 + sed -n "s/^$key = //p" "$file" | sed -n '1p' +} + +check_cfpkg3_index() { + label=$1 + pkg=$2 + descriptor=$3 + expected_compression=$4 + + index_off=$(descriptor_value "$descriptor" "index-offset") + index_size=$(descriptor_value "$descriptor" "index-size") + content_size=$(descriptor_value "$descriptor" "content-size") + empty_blob=$(tree_blob_for_path "$tree_file" "empty.dat") + one_blob=$(tree_blob_for_path "$tree_file" "one.bin") + dup_one_blob=$(tree_blob_for_path "$tree_file" "dup-one.bin") + chunk64_blob=$(tree_blob_for_path "$tree_file" "chunk64.bin") + chunk64_copy_blob=$(tree_blob_for_path "$tree_file" "chunk64-copy.bin") + chunk64p1_blob=$(tree_blob_for_path "$tree_file" "chunk64p1.bin") + chunk3_blob=$(tree_blob_for_path "$tree_file" "chunk3.bin") + if perl -e ' + use strict; + use warnings; + my ($pkg, $index_off, $index_size, $content_size, $expected_comp, + $empty, $one, $dup_one, $chunk64, $chunk64_copy, $chunk64p1, + $chunk3) = @ARGV; + die "duplicate one.bin blob mismatch" unless $one eq $dup_one; + die "duplicate chunk64 blob mismatch" unless $chunk64 eq $chunk64_copy; + die "bad index size" unless $index_size % 168 == 0; + open my $fh, "<:raw", $pkg or die "$pkg: $!"; + seek $fh, $index_off, 0 or die "seek: $!"; + read $fh, my $index, $index_size; + die "short index" unless length($index) == $index_size; + my %count; + my ($prev_blob, $prev_chunk) = ("", -1); + for (my $off = 0; $off < $index_size; $off += 168) { + my $rec = substr($index, $off, 168); + my $blob = unpack("H64", substr($rec, 0, 32)); + my $chunk = unpack("Q<", substr($rec, 32, 8)); + my $content_off = unpack("Q<", substr($rec, 40, 8)); + my $stored_size = unpack("Q<", substr($rec, 48, 8)); + my $raw_size = unpack("Q<", substr($rec, 56, 8)); + my $compression = unpack("V", substr($rec, 64, 4)); + my $reserved = unpack("V", substr($rec, 68, 4)); + die "reserved field is nonzero" if $reserved != 0; + die "wrong compression" if $compression != $expected_comp; + die "raw chunk size out of range" if $raw_size == 0 || $raw_size > 65536; + die "content range out of bounds" + if $content_off > $content_size || $stored_size > $content_size - $content_off; + if ($prev_blob ne "") { + my $cmp = $prev_blob cmp $blob; + die "index not sorted" if $cmp > 0; + die "chunk index not increasing" if $cmp == 0 && $chunk <= $prev_chunk; + die "new blob does not start at chunk 0" if $cmp < 0 && $chunk != 0; + } elsif ($chunk != 0) { + die "first blob does not start at chunk 0"; + } + $count{$blob}++; + ($prev_blob, $prev_chunk) = ($blob, $chunk); + } + die "unexpected record count" unless $index_size / 168 == 10; + die "empty blob should have no records" if exists $count{$empty}; + die "one-byte duplicate blob count" unless ($count{$one} // 0) == 1; + die "chunk64 duplicate blob count" unless ($count{$chunk64} // 0) == 1; + die "chunk64p1 blob count" unless ($count{$chunk64p1} // 0) == 2; + die "chunk3 blob count" unless ($count{$chunk3} // 0) == 4; + ' "$pkg" "$index_off" "$index_size" "$content_size" "$expected_compression" \ + "$empty_blob" "$one_blob" "$dup_one_blob" "$chunk64_blob" \ + "$chunk64_copy_blob" "$chunk64p1_blob" "$chunk3_blob" \ + > "$work/$label.out" 2> "$work/$label.err"; then + ok "$label" + else + not_ok "$label" "$work/$label.err" + fi +} + +maybe_corrupt_native_region() { + label=$1 + pkg=$2 + descriptor=$3 + key=$4 + off=$(descriptor_value "$descriptor" "$key") + case "$off" in + ''|*[!0-9]*|0) + skip_test "$label" + ;; + *) + cp "$pkg" "$work/pkg/bad-$label.cfpkg" + flip_byte "$work/pkg/bad-$label.cfpkg" "$off" + run_fail "$label" "$CFREE" pkg verify -p "$work/key.pub" \ + "$work/pkg/bad-$label.cfpkg" + ;; + esac +} + run_matrix_for_package() { label=$1 pkg=$2 @@ -239,6 +420,7 @@ run_matrix_for_package() { run_ok "$label-unpack-pubkey" "$CFREE" pkg unpack --verify -p "$work/key.pub" "$pkg" -C "$outdir" contains "$label-unpack-verify-output-name" "$work/$label-unpack-pubkey.out" "ok: matrix-test 1.0.0" same_artifacts "$label-unpack-content" "$outdir" + is_executable "$label-unpack-exec-mode" "$outdir/bin/tool.sh" run_ok "$label-verify-format-override" "$CFREE" pkg verify -p "$work/key.pub" --format "$format" "$pkg" } @@ -254,57 +436,146 @@ else not_ok "pkg-keygen-keyid" "$work/pkg-keygen-keyid.diag" fi +run_ok "pkg-cas-add-tree" "$CFREE" cas add-tree --cas "$work/cas" --root "$work/in" +tree_id=$(first_hex_id "$work/pkg-cas-add-tree.out") +if [ -n "$tree_id" ]; then + ok "pkg-cas-tree-id" +else + echo "could not parse tree id from cas add-tree output" > "$work/pkg-cas-tree-id.diag" + not_ok "pkg-cas-tree-id" "$work/pkg-cas-tree-id.diag" +fi +tree_file=$(cas_object_path "$work/cas" tree "$tree_id") +assert_file_exists "pkg-cas-tree-object" "$tree_file" +contains "pkg-cas-tree-magic" "$tree_file" "cfree-tree 1" +contains "pkg-cas-tree-exec-mode" "$tree_file" "mode = x" +contains "pkg-cas-tree-file-mode" "$tree_file" "mode = -" + run_ok "pkg-create-targz-deflate" "$CFREE" pkg create \ --name matrix-test --version 1.0.0 --desc "package test matrix" \ - --format tar.gz -s "$work/key.key" -o "$work/pkg/matrix.tar.gz" \ - $artifact_paths + --format tar.gz --cas "$work/cas" --tree "$tree_id" \ + -s "$work/key.key" -o "$work/pkg/matrix.tar.gz" run_ok "pkg-create-cfpkg-none" "$CFREE" pkg create \ --name matrix-test --version 1.0.0 --desc "package test matrix" \ - --format cfpkg --compression none -s "$work/key.key" \ - -o "$work/pkg/matrix-none.cfpkg" $artifact_paths + --format cfpkg --compression none --cas "$work/cas" --tree "$tree_id" \ + -s "$work/key.key" -o "$work/pkg/matrix-none.cfpkg" run_ok "pkg-create-cfpkg-lz4" "$CFREE" pkg create \ --name matrix-test --version 1.0.0 --desc "package test matrix" \ - --format cfpkg --compression lz4-block-v1 -s "$work/key.key" \ - -o "$work/pkg/matrix-lz4.cfpkg" $artifact_paths + --format cfpkg --compression lz4-block-v1 --cas "$work/cas" --tree "$tree_id" \ + -s "$work/key.key" -o "$work/pkg/matrix-lz4.cfpkg" -run_ok "pkg-create-order-targz-forward" "$CFREE" pkg create \ - --name order-test --version 1.0.0 --desc "order independence" \ - --format tar.gz -s "$work/key.key" -o "$work/pkg/order-forward.tar.gz" \ - $artifact_paths -run_ok "pkg-create-order-targz-reversed" "$CFREE" pkg create \ - --name order-test --version 1.0.0 --desc "order independence" \ - --format tar.gz -s "$work/key.key" -o "$work/pkg/order-reversed.tar.gz" \ - $reversed_artifact_paths -run_ok "pkg-inspect-order-targz-forward" "$CFREE" pkg inspect "$work/pkg/order-forward.tar.gz" -run_ok "pkg-inspect-order-targz-reversed" "$CFREE" pkg inspect "$work/pkg/order-reversed.tar.gz" -same_file "pkg-order-targz-manifest-independent" \ - "$work/pkg-inspect-order-targz-forward.out" \ - "$work/pkg-inspect-order-targz-reversed.out" -same_file "pkg-order-targz-bytes-independent" \ - "$work/pkg/order-forward.tar.gz" "$work/pkg/order-reversed.tar.gz" - -run_ok "pkg-create-order-cfpkg-forward" "$CFREE" pkg create \ +run_ok "pkg-create-root-targz" "$CFREE" pkg create \ --name order-test --version 1.0.0 --desc "order independence" \ - --format cfpkg --compression lz4-block-v1 -s "$work/key.key" \ - -o "$work/pkg/order-forward.cfpkg" $artifact_paths -run_ok "pkg-create-order-cfpkg-reversed" "$CFREE" pkg create \ + --format tar.gz --root "$work/in" -s "$work/key.key" \ + -o "$work/pkg/order-root.tar.gz" +run_ok "pkg-create-cas-targz" "$CFREE" pkg create \ --name order-test --version 1.0.0 --desc "order independence" \ - --format cfpkg --compression lz4-block-v1 -s "$work/key.key" \ - -o "$work/pkg/order-reversed.cfpkg" $reversed_artifact_paths -run_ok "pkg-inspect-order-cfpkg-forward" "$CFREE" pkg inspect "$work/pkg/order-forward.cfpkg" -run_ok "pkg-inspect-order-cfpkg-reversed" "$CFREE" pkg inspect "$work/pkg/order-reversed.cfpkg" -same_file "pkg-order-cfpkg-manifest-independent" \ - "$work/pkg-inspect-order-cfpkg-forward.out" \ - "$work/pkg-inspect-order-cfpkg-reversed.out" -same_file "pkg-order-cfpkg-bytes-independent" \ - "$work/pkg/order-forward.cfpkg" "$work/pkg/order-reversed.cfpkg" + --format tar.gz --cas "$work/cas" --tree "$tree_id" \ + -s "$work/key.key" -o "$work/pkg/order-cas.tar.gz" +run_ok "pkg-inspect-order-targz-root" "$CFREE" pkg inspect "$work/pkg/order-root.tar.gz" +run_ok "pkg-inspect-order-targz-cas" "$CFREE" pkg inspect "$work/pkg/order-cas.tar.gz" +same_file "pkg-root-and-cas-manifest-match" \ + "$work/pkg-inspect-order-targz-root.out" \ + "$work/pkg-inspect-order-targz-cas.out" + +run_ok "pkg-create-version-alt" "$CFREE" pkg create \ + --name matrix-test --version 1.0.1 --desc "package test matrix" \ + --format tar.gz --cas "$work/cas" --tree "$tree_id" \ + -s "$work/key.key" -o "$work/pkg/matrix-alt-version.tar.gz" +run_ok "pkg-inspect-version-alt" "$CFREE" pkg inspect "$work/pkg/matrix-alt-version.tar.gz" +contains "pkg-version-alt-tree-same" "$work/pkg-inspect-version-alt.out" "tree = $tree_id" +contains "pkg-version-alt-version" "$work/pkg-inspect-version-alt.out" "version = 1.0.1" run_matrix_for_package "targz" "$work/pkg/matrix.tar.gz" "tar.gz" run_matrix_for_package "cfpkg-none" "$work/pkg/matrix-none.cfpkg" "cfpkg" run_matrix_for_package "cfpkg-lz4" "$work/pkg/matrix-lz4.cfpkg" "cfpkg" +if have_cmd perl; then + extract_cfpkg3_descriptor "$work/pkg/matrix-none.cfpkg" "$work/native-none.descriptor" + contains "native-descriptor-magic" "$work/native-none.descriptor" "cfree-encoding 3" + contains "native-descriptor-tree-region" "$work/native-none.descriptor" "tree-offset = " + contains "native-descriptor-index-bytes" "$work/native-none.descriptor" "index-bytes = 1680" + check_cfpkg3_index "native-none-index-shape" \ + "$work/pkg/matrix-none.cfpkg" "$work/native-none.descriptor" 0 + extract_cfpkg3_descriptor "$work/pkg/matrix-lz4.cfpkg" "$work/native-lz4.descriptor" + check_cfpkg3_index "native-lz4-index-shape" \ + "$work/pkg/matrix-lz4.cfpkg" "$work/native-lz4.descriptor" 1 + extract_cfpkg3_manifest "$work/pkg/matrix-none.cfpkg" "$work/native-none.manifest" + contains "native-manifest-v3" "$work/native-none.manifest" "cfree-package 3" +else + skip_test "native-descriptor-inspection" +fi + +run_ok "pkg-create-cfpkg-thin" "$CFREE" pkg create \ + --name matrix-test --version 1.0.0 --desc "package test matrix" \ + --format cfpkg --native-shape thin --external "$work/ext-thin" \ + --compression none --cas "$work/cas" --tree "$tree_id" \ + -s "$work/key.key" -o "$work/pkg/matrix-thin.cfpkg" +run_ok "native-thin-inspect-encoding" "$CFREE" pkg inspect --encoding \ + "$work/pkg/matrix-thin.cfpkg" +contains "native-thin-tree-external" "$work/native-thin-inspect-encoding.out" \ + "tree-size = 0" +contains "native-thin-index-external" "$work/native-thin-inspect-encoding.out" \ + "index-size = 0" +contains "native-thin-content-external" "$work/native-thin-inspect-encoding.out" \ + "content-size = 0" +contains "native-thin-index-url" "$work/native-thin-inspect-encoding.out" \ + "index-url = index/" +contains "native-thin-tree-url" "$work/native-thin-inspect-encoding.out" \ + "url = tree/" +contains "native-thin-chunk-template" "$work/native-thin-inspect-encoding.out" \ + "template = chunk/{blob-prefix}/{blob}/{chunk}" +run_fail "native-thin-verify-without-external-fails" "$CFREE" pkg verify \ + -p "$work/key.pub" "$work/pkg/matrix-thin.cfpkg" +run_ok "native-thin-verify-external" "$CFREE" pkg verify \ + -p "$work/key.pub" --external "$work/ext-thin" \ + "$work/pkg/matrix-thin.cfpkg" +mkdir -p "$work/unpack/cfpkg-thin" +run_ok "native-thin-unpack-external" "$CFREE" pkg unpack --verify \ + -p "$work/key.pub" --external "$work/ext-thin" \ + "$work/pkg/matrix-thin.cfpkg" -C "$work/unpack/cfpkg-thin" +same_artifacts "native-thin-unpack-content" "$work/unpack/cfpkg-thin" +is_executable "native-thin-unpack-exec-mode" \ + "$work/unpack/cfpkg-thin/bin/tool.sh" + +run_ok "pkg-create-cfpkg-metadata" "$CFREE" pkg create \ + --name matrix-test --version 1.0.0 --desc "package test matrix" \ + --format cfpkg --native-shape metadata --external "$work/ext-metadata" \ + --compression none --cas "$work/cas" --tree "$tree_id" \ + -s "$work/key.key" -o "$work/pkg/matrix-metadata.cfpkg" +run_fail "native-metadata-verify-without-external-fails" "$CFREE" pkg verify \ + -p "$work/key.pub" "$work/pkg/matrix-metadata.cfpkg" +run_ok "native-metadata-verify-external" "$CFREE" pkg verify \ + -p "$work/key.pub" --external "$work/ext-metadata" \ + "$work/pkg/matrix-metadata.cfpkg" + +payload_blob_for_external=$(tree_blob_for_path "$tree_file" "payload.txt") +payload_blob_prefix=$(id_prefix "$payload_blob_for_external") +cp -R "$work/ext-thin" "$work/ext-thin-bad-chunk" +printf tamper >> "$work/ext-thin-bad-chunk/chunk/$payload_blob_prefix/$payload_blob_for_external/0" +run_fail "native-external-mutated-chunk-fails" "$CFREE" pkg verify \ + -p "$work/key.pub" --external "$work/ext-thin-bad-chunk" \ + "$work/pkg/matrix-thin.cfpkg" + +cp -R "$work/ext-thin" "$work/ext-thin-bad-tree" +printf tamper >> "$(cas_object_path "$work/ext-thin-bad-tree" tree "$tree_id")" +run_fail "native-external-mutated-tree-fails" "$CFREE" pkg verify \ + -p "$work/key.pub" --external "$work/ext-thin-bad-tree" \ + "$work/pkg/matrix-thin.cfpkg" + +if have_cmd perl; then + extract_cfpkg3_descriptor "$work/pkg/matrix-thin.cfpkg" "$work/native-thin.descriptor" + thin_index_rel=$(descriptor_value "$work/native-thin.descriptor" "index-url") + cp -R "$work/ext-thin" "$work/ext-thin-bad-index" + printf tamper >> "$work/ext-thin-bad-index/$thin_index_rel" + run_fail "native-external-mutated-index-fails" "$CFREE" pkg verify \ + -p "$work/key.pub" --external "$work/ext-thin-bad-index" \ + "$work/pkg/matrix-thin.cfpkg" +else + skip_test "native-external-mutated-index-fails" +fi + run_ok "host-gzip-accepts-cfree-output" gzip -t "$work/pkg/matrix.tar.gz" if gunzip -c "$work/pkg/matrix.tar.gz" > "$work/pkg/matrix.tar" 2> "$work/gunzip.err" && gzip -c "$work/pkg/matrix.tar" > "$work/pkg/host.tar.gz" 2> "$work/regzip.err"; then @@ -316,17 +587,17 @@ else fi run_ok "pkg-create-infer-targz" "$CFREE" pkg create \ - --name matrix-test --version 1.0.0 -s "$work/key.key" \ - -o "$work/pkg/infer.tar.gz" "$work/in/payload.txt" + --name matrix-test --version 1.0.0 --root "$work/in" -s "$work/key.key" \ + -o "$work/pkg/infer.tar.gz" run_ok "pkg-create-infer-cfpkg" "$CFREE" pkg create \ - --name matrix-test --version 1.0.0 -s "$work/key.key" \ - -o "$work/pkg/infer.cfpkg" "$work/in/payload.txt" + --name matrix-test --version 1.0.0 --root "$work/in" -s "$work/key.key" \ + -o "$work/pkg/infer.cfpkg" run_fail "pkg-create-unknown-extension-needs-format" "$CFREE" pkg create \ --name matrix-test --version 1.0.0 -s "$work/key.key" \ - -o "$work/pkg/unknown.pkg" "$work/in/payload.txt" + --root "$work/in" -o "$work/pkg/unknown.pkg" run_ok "pkg-create-override-targz-extension" "$CFREE" pkg create \ --name matrix-test --version 1.0.0 --format tar.gz -s "$work/key.key" \ - -o "$work/pkg/override.pkg" "$work/in/payload.txt" + --root "$work/in" -o "$work/pkg/override.pkg" run_ok "pkg-verify-override-targz-extension" "$CFREE" pkg verify -p "$work/key.pub" \ --format tar.gz "$work/pkg/override.pkg" run_fail "pkg-verify-wrong-format-native" "$CFREE" pkg verify -p "$work/key.pub" \ @@ -361,7 +632,8 @@ run_fail "pkg-verify-wrong-pubkey" "$CFREE" pkg verify -p "$work/wrong.pub" "$wo if have_cmd tar; then extract_targz "$work/pkg/matrix.tar.gz" "$work/tar-ok" - for f in cfree/package.manifest cfree/package.manifest.minisig cfree/package.pub $artifacts; do + tar_tree_file=$(cas_object_path "$work/tar-ok/cfree/cas" tree "$tree_id") + for f in cfree/package.manifest cfree/package.manifest.minisig cfree/package.pub; do safe_f=$(printf '%s\n' "$f" | sed 's#[^A-Za-z0-9_.-]#-#g') if [ -f "$work/tar-ok/$f" ]; then ok "portable-member-$f" @@ -370,6 +642,30 @@ if have_cmd tar; then not_ok "portable-member-$f" "$work/portable-member-$safe_f.diag" fi done + assert_file_exists "portable-member-tree-object" "$tar_tree_file" + same_file "portable-tree-matches-cas" "$tree_file" "$tar_tree_file" + for f in $artifacts; do + blob=$(tree_blob_for_path "$tar_tree_file" "$f") + blob_file=$(cas_object_path "$work/tar-ok/cfree/cas" blob "$blob") + safe_f=$(printf '%s\n' "$f" | sed 's#[^A-Za-z0-9_.-]#-#g') + assert_file_exists "portable-member-blob-$safe_f" "$blob_file" + done + if [ -f "$work/tar-ok/payload.txt" ] || [ -f "$work/tar-ok/bin/tool.sh" ]; then + echo "portable archive stored output files outside cfree/cas" > "$work/portable-no-root-files.diag" + not_ok "portable-no-root-files" "$work/portable-no-root-files.diag" + else + ok "portable-no-root-files" + fi + + extract_targz "$work/pkg/matrix-alt-version.tar.gz" "$work/tar-alt-version" + contains "portable-alt-manifest-tree-same" \ + "$work/tar-alt-version/cfree/package.manifest" "tree = $tree_id" + not_same_file "portable-metadata-changes-manifest" \ + "$work/tar-ok/cfree/package.manifest" \ + "$work/tar-alt-version/cfree/package.manifest" + same_file "portable-metadata-keeps-tree-object" \ + "$tar_tree_file" \ + "$(cas_object_path "$work/tar-alt-version/cfree/cas" tree "$tree_id")" cp -R "$work/tar-ok" "$work/tar-missing-manifest" rm -f "$work/tar-missing-manifest/cfree/package.manifest" @@ -383,23 +679,30 @@ if have_cmd tar; then run_fail "portable-missing-signature-fails" "$CFREE" pkg verify -p "$work/key.pub" \ "$work/pkg/bad-missing-signature.tar.gz" - cp -R "$work/tar-ok" "$work/tar-missing-artifact" - rm -f "$work/tar-missing-artifact/payload.txt" - repack_targz "$work/tar-missing-artifact" "$work/pkg/bad-missing-artifact.tar.gz" - run_fail "portable-missing-artifact-fails" "$CFREE" pkg verify -p "$work/key.pub" \ - "$work/pkg/bad-missing-artifact.tar.gz" - - cp -R "$work/tar-ok" "$work/tar-extra-member" - printf extra > "$work/tar-extra-member/extra.dat" - repack_targz "$work/tar-extra-member" "$work/pkg/bad-extra-member.tar.gz" - run_fail "portable-extra-member-fails" "$CFREE" pkg verify -p "$work/key.pub" \ - "$work/pkg/bad-extra-member.tar.gz" - - cp -R "$work/tar-ok" "$work/tar-mutated-artifact" - printf tamper >> "$work/tar-mutated-artifact/payload.txt" - repack_targz "$work/tar-mutated-artifact" "$work/pkg/bad-mutated-artifact.tar.gz" - run_fail "portable-mutated-artifact-fails" "$CFREE" pkg verify -p "$work/key.pub" \ - "$work/pkg/bad-mutated-artifact.tar.gz" + cp -R "$work/tar-ok" "$work/tar-missing-tree" + rm -f "$(cas_object_path "$work/tar-missing-tree/cfree/cas" tree "$tree_id")" + repack_targz "$work/tar-missing-tree" "$work/pkg/bad-missing-tree.tar.gz" + run_fail "portable-missing-tree-fails" "$CFREE" pkg verify -p "$work/key.pub" \ + "$work/pkg/bad-missing-tree.tar.gz" + + payload_blob=$(tree_blob_for_path "$tar_tree_file" "payload.txt") + cp -R "$work/tar-ok" "$work/tar-missing-blob" + rm -f "$(cas_object_path "$work/tar-missing-blob/cfree/cas" blob "$payload_blob")" + repack_targz "$work/tar-missing-blob" "$work/pkg/bad-missing-blob.tar.gz" + run_fail "portable-missing-blob-fails" "$CFREE" pkg verify -p "$work/key.pub" \ + "$work/pkg/bad-missing-blob.tar.gz" + + cp -R "$work/tar-ok" "$work/tar-mutated-blob" + printf tamper >> "$(cas_object_path "$work/tar-mutated-blob/cfree/cas" blob "$payload_blob")" + repack_targz "$work/tar-mutated-blob" "$work/pkg/bad-mutated-blob.tar.gz" + run_fail "portable-mutated-blob-fails" "$CFREE" pkg verify -p "$work/key.pub" \ + "$work/pkg/bad-mutated-blob.tar.gz" + + cp -R "$work/tar-ok" "$work/tar-mutated-tree" + printf '\n# tamper\n' >> "$(cas_object_path "$work/tar-mutated-tree/cfree/cas" tree "$tree_id")" + repack_targz "$work/tar-mutated-tree" "$work/pkg/bad-mutated-tree.tar.gz" + run_fail "portable-mutated-tree-fails" "$CFREE" pkg verify -p "$work/key.pub" \ + "$work/pkg/bad-mutated-tree.tar.gz" cp -R "$work/tar-ok" "$work/tar-mutated-manifest" printf '\n# tamper\n' >> "$work/tar-mutated-manifest/cfree/package.manifest" @@ -426,63 +729,57 @@ run_fail "native-truncated-fails" "$CFREE" pkg verify -p "$work/key.pub" \ "$work/pkg/bad-native-truncated.cfpkg" cp "$work/pkg/matrix-none.cfpkg" "$work/pkg/bad-native-manifest.cfpkg" -flip_byte "$work/pkg/bad-native-manifest.cfpkg" 160 -run_fail "native-mutated-manifest-fails" "$CFREE" pkg verify -p "$work/key.pub" \ - "$work/pkg/bad-native-manifest.cfpkg" +if have_cmd perl; then + manifest_off=$(cfpkg3_field_offset "$work/pkg/matrix-none.cfpkg" 0) + flip_byte "$work/pkg/bad-native-manifest.cfpkg" "$manifest_off" + run_fail "native-mutated-manifest-fails" "$CFREE" pkg verify -p "$work/key.pub" \ + "$work/pkg/bad-native-manifest.cfpkg" +else + skip_test "native-mutated-manifest-fails" +fi if have_cmd perl; then - descriptor_off=$(cfpkg_field_offset "$work/pkg/matrix-none.cfpkg" 4) - index_off=$(cfpkg_field_offset "$work/pkg/matrix-none.cfpkg" 10) - content_off=$(cfpkg_field_offset "$work/pkg/matrix-none.cfpkg" 12) - alignment_off=$((16 + 8 * 14)) - chunk_size_off=$((16 + 8 * 15)) + descriptor_off=$(cfpkg3_field_offset "$work/pkg/matrix-none.cfpkg" 4) + extract_cfpkg3_descriptor "$work/pkg/matrix-none.cfpkg" "$work/native-corrupt.descriptor" cp "$work/pkg/matrix-none.cfpkg" "$work/pkg/bad-native-descriptor.cfpkg" flip_byte "$work/pkg/bad-native-descriptor.cfpkg" "$descriptor_off" run_fail "native-mutated-descriptor-fails" "$CFREE" pkg verify -p "$work/key.pub" \ "$work/pkg/bad-native-descriptor.cfpkg" - cp "$work/pkg/matrix-none.cfpkg" "$work/pkg/bad-native-index.cfpkg" - flip_byte "$work/pkg/bad-native-index.cfpkg" "$index_off" - run_fail "native-mutated-index-fails" "$CFREE" pkg verify -p "$work/key.pub" \ - "$work/pkg/bad-native-index.cfpkg" - - cp "$work/pkg/matrix-none.cfpkg" "$work/pkg/bad-native-content.cfpkg" - flip_byte "$work/pkg/bad-native-content.cfpkg" "$content_off" - run_fail "native-mutated-content-fails" "$CFREE" pkg verify -p "$work/key.pub" \ - "$work/pkg/bad-native-content.cfpkg" - - cp "$work/pkg/matrix-none.cfpkg" "$work/pkg/bad-native-alignment.cfpkg" - flip_byte "$work/pkg/bad-native-alignment.cfpkg" "$alignment_off" - run_fail "native-mutated-header-alignment-fails" "$CFREE" pkg verify -p "$work/key.pub" \ - "$work/pkg/bad-native-alignment.cfpkg" - - cp "$work/pkg/matrix-none.cfpkg" "$work/pkg/bad-native-chunk-size.cfpkg" - flip_byte "$work/pkg/bad-native-chunk-size.cfpkg" "$chunk_size_off" - run_fail "native-mutated-header-chunk-size-fails" "$CFREE" pkg verify -p "$work/key.pub" \ - "$work/pkg/bad-native-chunk-size.cfpkg" + maybe_corrupt_native_region "native-mutated-tree-region-fails" \ + "$work/pkg/matrix-none.cfpkg" "$work/native-corrupt.descriptor" "tree-offset" + maybe_corrupt_native_region "native-mutated-index-fails" \ + "$work/pkg/matrix-none.cfpkg" "$work/native-corrupt.descriptor" "index-offset" + maybe_corrupt_native_region "native-mutated-content-fails" \ + "$work/pkg/matrix-none.cfpkg" "$work/native-corrupt.descriptor" "content-offset" else skip_test "native-region-offset-corruption-tests" fi run_fail "pkg-create-missing-required-fails" "$CFREE" pkg create \ - --name missing-version -s "$work/key.key" -o "$work/pkg/missing.tar.gz" "$work/in/payload.txt" + --name missing-version -s "$work/key.key" --root "$work/in" -o "$work/pkg/missing.tar.gz" run_fail "pkg-create-unknown-format-fails" "$CFREE" pkg create \ --name matrix-test --version 1.0.0 --format nope -s "$work/key.key" \ - -o "$work/pkg/badformat.tar.gz" "$work/in/payload.txt" + --root "$work/in" -o "$work/pkg/badformat.tar.gz" run_fail "pkg-create-unknown-compression-fails" "$CFREE" pkg create \ --name matrix-test --version 1.0.0 --format cfpkg --compression nope \ - -s "$work/key.key" -o "$work/pkg/badcomp.cfpkg" "$work/in/payload.txt" + -s "$work/key.key" --root "$work/in" -o "$work/pkg/badcomp.cfpkg" mkdir -p "$work/dup/a" "$work/dup/b" printf one > "$work/dup/a/same.dat" printf two > "$work/dup/b/same.dat" -run_fail "pkg-create-duplicate-artifact-path-fails" "$CFREE" pkg create \ - --name matrix-test --version 1.0.0 --format tar.gz -s "$work/key.key" \ - -o "$work/pkg/duplicate.tar.gz" "$work/dup/a/same.dat" "$work/dup/b/same.dat" +cat > "$work/dup.map" <<EOF +same.dat - $work/dup/a/same.dat +same.dat - $work/dup/b/same.dat +EOF +run_fail "pkg-cas-duplicate-tree-path-fails" "$CFREE" cas add-tree \ + --cas "$work/cas" --map "$work/dup.map" printf unsafe > "$work/in/bad\\path.dat" -run_fail "pkg-create-unsafe-artifact-path-fails" "$CFREE" pkg create \ - --name matrix-test --version 1.0.0 --format tar.gz -s "$work/key.key" \ - -o "$work/pkg/unsafe-path.tar.gz" "$work/in/bad\\path.dat" +cat > "$work/unsafe.map" <<EOF +bad\\path.dat - $work/in/bad\\path.dat +EOF +run_fail "pkg-cas-unsafe-tree-path-fails" "$CFREE" cas add-tree \ + --cas "$work/cas" --map "$work/unsafe.map" run_fail "pkg-verify-missing-file-fails" "$CFREE" pkg verify -p "$work/key.pub" \ "$work/pkg/does-not-exist.tar.gz" printf not-a-package > "$work/not-a-package.cfpkg" diff --git a/test/test.mk b/test/test.mk @@ -45,6 +45,7 @@ TEST_TARGETS = \ test-dbg \ test-driver \ test-driver-ar \ + test-driver-cas \ test-driver-cc \ test-driver-objcopy \ test-driver-objdump \ @@ -113,7 +114,7 @@ DEFAULT_TEST_TARGETS = \ test: $(DEFAULT_TEST_TARGETS) -test-driver: test-driver-cc test-driver-ar test-driver-strip test-driver-objcopy test-driver-objdump test-driver-pkg test-driver-strings +test-driver: test-driver-cc test-driver-ar test-driver-cas test-driver-strip test-driver-objcopy test-driver-objdump test-driver-pkg test-driver-strings test-driver-cc: bin @CFREE=$(abspath $(BIN)) sh test/driver/run.sh @@ -178,6 +179,9 @@ $(AR_TEST_BIN): test/ar_test.c $(LIB_AR) test-driver-ar: bin @CFREE=$(abspath $(BIN)) test/ar/run.sh +test-driver-cas: bin + @CFREE=$(abspath $(BIN)) sh test/cas/run.sh + test-driver-strip: bin @CFREE=$(abspath $(BIN)) test/strip/run.sh