diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms.c b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms.c index bcc15659..fc02661a 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms.c +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms.c @@ -1 +1,226 @@ -/* ============================================================================ * cs229_building_llms.c — Pass 3 projection of the CS229 "Building LLMs" lecture * ============================================================================ * * PURPOSE * ------- * A small C11 program that demonstrates the constructive form of the lecture's * six pillars (Architecture, Training algorithm/loss, Data, Evaluation, * Systems, Model) using the duffle + forth bootslop conventions. * * The program illustrates: * - Byte Pair Encoding (BPE) tokenization * - Autoregressive forward pass: p(X_t | X_1..X_{t-1}) * - Softmax projection (the linear layer from hidden size to vocab size) * - Cross-entropy loss: L_CE = - sum_t log p_theta(X_t | X_#include #include #include #include #include #ifdef INTELLISENSE_DIRECTIVES# pragma once# include "dsl.h"# include "math.h"# include "memory.h"#endif#pragma region Typestypedef uint32_t TSet_(Token);typedef int32_t TSet_(S32);typedef uint32_t TSet_(U32);typedef float TSet_(F32);typedef Struct_(Scalar) { F32 value; };typedef Struct_(Logits) { F32_R data; U4 len; };typedef Struct_(Probability) { F32 value; };typedef Struct_(TokenSequence) { Token_R ids; U4 len; U4 cap; };typedef Struct_(Vocabulary) { U4 vocab_size; Token_R id_to_token; U4_R token_to_id; };typedef Struct_(BPEMerge) { Token a; Token b; Token merged; U4 count; };typedef Struct_(BPEMerger) { BPEMerge_R merges; U4 len; U4 cap; U4 vocab_size; };#pragma endregion Types#pragma region BPE Tokenization/*- BPE training is a greedy compression-based procedure. * Start with the character vocabulary; iteratively merge the most * frequent pair; stop at target_vocab_size. * Per Tier 2 #2.6: 'tokenize' is a Relation between Text and Seq[Token]. */I_ BPEMerger bpe_train(Slice corpus, U4 target_vocab_size) { BPEMerger m = {0}; (void)corpus; (void)target_vocab_size; return m;}/*- BPE encode — apply the learned merges to a text slice. * The encoding is bounded: the output is a sequence of Token : int32. */I_ TokenSequence bpe_encode(Slice text, BPEMerger_R m) { TokenSequence out = {0}; (void)text; (void)m; return out;}#pragma endregion BPE Tokenization#pragma region Autoregressive Forward Pass/*- Softmax projection — the linear layer from hidden size d to vocab |V|. * softmax(z_i) = exp(z_i) / sum_j exp(z_j) * Per Tier 2 #2.3: this is the 'result' of the projection. * NOTE: subtract max(z) for numerical stability (the bounded form). */I_ Probability softmax_at(Logits_R z, U4 i) { F32 max_val = z->data[0]; for (U4 j = 1; j < z->len; ++ j) { if (z->data[j] > max_val) { max_val = z->data[j]; } } F32 sum_exp = (F32)0; for (U4 j = 0; j < z->len; ++ j) { sum_exp += expf(z->data[j] - max_val); } Probability p = { .value = expf(z->data[i] - max_val) / sum_exp }; return p;}/*- AR forward pass — predict the next token given previous tokens. * p(X_t | X_1..X_{t-1}) : this is the AR form. * The function returns the logits over the vocabulary for the next token. */I_ Logits ar_forward(TokenSequence_R context, U4 hidden_dim, U4 vocab_size) { Logits out = { .data = 0, .len = vocab_size }; (void)context; (void)hidden_dim; assert(hidden_dim > 0); assert(vocab_size > 0); return out;}/*- AR sample — sample a token from the predicted distribution. * Per the lecture: at inference, we sample from the AR distribution. */I_ Token ar_sample(Logits_R logits) { assert(logits->len > 0); Token best = 0; F32 best_val = logits->data[0]; for (U4 i = 1; i < logits->len; ++ i) { if (logits->data[i] > best_val) { best_val = logits->data[i]; best = i; } } return best;}#pragma endregion Autoregressive Forward Pass#pragma region Cross-Entropy Loss/*- Cross-entropy loss for one sequence. * L_CE = - sum (t in 1..L) of log p_theta(X_t | X_1..X_{t-1}) * Per the lecture: this is THE training loss for autoregressive LMs. * Returns: Quantity : float (placeholder; resolved as float32 in the function). */I_ Scalar cross_entropy_loss(TokenSequence_R tokens, U4 hidden_dim, U4 vocab_size) { Scalar total = { .value = (F32)0 }; for (U4 t = 0; t + 1 < tokens->len; ++ t) { TokenSequence prefix = { .ids = tokens->ids, .len = t + 1, .cap = t + 1 }; Logits logits = ar_forward(&prefix, hidden_dim, vocab_size); Probability p = softmax_at(&logits, tokens->ids[t + 1]); total.value -= logf(p.value + 1e-9f); } return total;}#pragma endregion Cross-Entropy Loss#pragma region Chinchilla Scaling/*- Chinchilla optimal model size for a compute budget C. * N_opt(C) = a * C^0.5 * Per the lecture: compute-optimal ratio is ~20 tokens/param. */I_ F32 chinchilla_optimal_N(F32 compute_budget_flops) { F32 a = 0.29f; return a * powf(compute_budget_flops, 0.5f);}/*- Chinchilla optimal dataset size for a compute budget C. * D_opt(C) = b * C^0.5 * Per the lecture: compute-optimal ratio is ~20 tokens/param. */I_ F32 chinchilla_optimal_D(F32 compute_budget_flops) { F32 b = 5.7f; return b * powf(compute_budget_flops, 0.5f);}/*- Back-of-envelope training cost for a Llama-3-400B-style run. * FLOPs = 6 * N * D * Per the lecture: ~3.79e25 FLOPs for Llama 3 400B (D = 15.6T tokens). * Encoding: float (placeholder; resolved as float64 in the back-of-envelope). */I_ F32 training_flops(F32 N_params, F32 D_tokens) { return 6.0f * N_params * D_tokens;}#pragma endregion Chinchilla Scaling#pragma region KV-Cache Memory/*- KV-cache memory footprint. * Memory_KV = 2 * B * S * L * H * D * bytes_per_element * Per the lecture: pre-training vs inference throughput differ * because the KV-cache only grows at inference. * bytes_per_element is float16 by default (2 bytes). */I_ U64 kv_cache_bytes(U4 batch, U4 seq_len, U4 n_layers, U4 n_heads, U4 head_dim, U4 bytes_per_elem) { U64 per_layer = (U64)batch * seq_len * n_heads * head_dim * bytes_per_elem; return 2ULL * per_layer * n_layers;}#pragma endregion KV-Cache Memory#pragma region MainI_ S32 main(S32 argc, char *argv[]) { (void)argc; (void)argv; U4 hidden_dim = 4096; U4 vocab_size = 32000; U4 target_vocab = vocab_size; Slice corpus = { .ptr = 0, .len = 0 }; BPEMerger bpe = bpe_train(corpus, target_vocab); TokenSequence tokens = {0}; Logits logits = ar_forward(&tokens, hidden_dim, vocab_size); Token next = ar_sample(&logits); (void)next; Scalar loss = cross_entropy_loss(&tokens, hidden_dim, vocab_size); (void)loss; F32 compute_budget = 3.79e25f; F32 N_opt = chinchilla_optimal_N(compute_budget); F32 D_opt = chinchilla_optimal_D(compute_budget); (void)N_opt; (void)D_opt; F32 flops = training_flops(4.0e11f, 1.56e13f); (void)flops; U64 kv_bytes = kv_cache_bytes(1, 2048, 32, 32, 128, 2); (void)kv_bytes; return 0;}#pragma endregion Main \ No newline at end of file +/* ============================================================================ + * cs229_building_llms.c - Pass 3 projection of the CS229 "Building LLMs" lecture + * ============================================================================ + * + * PURPOSE + * ------- + * A small C11 program that demonstrates the constructive form of the lecture's + * six pillars (Architecture, Training algorithm/loss, Data, Evaluation, + * Systems, Model) using the duffle + forth bootslop conventions. + * + * The program illustrates: + * - Byte Pair Encoding (BPE) tokenization + * - Autoregressive forward pass: p(X_t | X_1..X_{t-1}) + * - Softmax projection (the linear layer from hidden size to vocab size) + * - Cross-entropy loss: L_CE = - sum_t log p_theta(X_t | X_ +#include +#include +#include +#include +#include + +#ifdef INTELLISENSE_DIRECTIVES +# pragma once +# include "dsl.h" +# include "math.h" +# include "memory.h" +#endif + +#pragma region Types + +typedef uint32_t TSet_(Token); +typedef int32_t TSet_(S32); +typedef uint32_t TSet_(U32); +typedef float TSet_(F32); + +typedef Struct_(Scalar) { F32 value; }; +typedef Struct_(Logits) { F32_R data; U4 len; }; +typedef Struct_(Probability) { F32 value; }; +typedef Struct_(TokenSequence) { Token_R ids; U4 len; U4 cap; }; +typedef Struct_(Vocabulary) { U4 vocab_size; Token_R id_to_token; U4_R token_to_id; }; +typedef Struct_(BPEMerge) { Token a; Token b; Token merged; U4 count; }; +typedef Struct_(BPEMerger) { BPEMerge_R merges; U4 len; U4 cap; U4 vocab_size; }; + +#pragma endregion Types + +#pragma region BPE Tokenization + +/* BPE training is a greedy compression-based procedure. + * Start with the character vocabulary; iteratively merge the most + * frequent pair; stop at target_vocab_size. */ +I_ BPEMerger bpe_train(Slice corpus, U4 target_vocab_size) { + BPEMerger m = {0}; + (void)corpus; + (void)target_vocab_size; + return m; +} + +/* BPE encode - apply the learned merges to a text slice. + * The encoding is bounded: the output is a sequence of Token : int32. */ +I_ TokenSequence bpe_encode(Slice text, BPEMerger_R m) { + TokenSequence out = {0}; + (void)text; + (void)m; + return out; +} + +#pragma endregion BPE Tokenization + +#pragma region Autoregressive Forward Pass + +/* Softmax projection - the linear layer from hidden size d to vocab |V|. + * softmax(z_i) = exp(z_i) / sum_j exp(z_j) + * Subtract max(z) for numerical stability (the bounded form). */ +I_ Probability softmax_at(Logits_R z, U4 i) { + F32 max_val = z->data[0]; + for (U4 j = 1; j < z->len; ++ j) { + if (z->data[j] > max_val) { max_val = z->data[j]; } + } + F32 sum_exp = (F32)0; + for (U4 j = 0; j < z->len; ++ j) { + sum_exp += expf(z->data[j] - max_val); + } + Probability p = { .value = expf(z->data[i] - max_val) / sum_exp }; + return p; +} + +/* AR forward pass - predict the next token given previous tokens. + * p(X_t | X_1..X_{t-1}) : this is the AR form. */ +I_ Logits ar_forward(TokenSequence_R context, U4 hidden_dim, U4 vocab_size) { + Logits out = { .data = 0, .len = vocab_size }; + (void)context; + (void)hidden_dim; + assert(hidden_dim > 0); + assert(vocab_size > 0); + return out; +} + +/* AR sample - sample a token from the predicted distribution. */ +I_ Token ar_sample(Logits_R logits) { + assert(logits->len > 0); + Token best = 0; + F32 best_val = logits->data[0]; + for (U4 i = 1; i < logits->len; ++ i) { + if (logits->data[i] > best_val) { + best_val = logits->data[i]; + best = i; + } + } + return best; +} + +#pragma endregion Autoregressive Forward Pass + +#pragma region Cross-Entropy Loss + +/* Cross-entropy loss for one sequence. + * L_CE = - sum (t in 1..L) of log p_theta(X_t | X_1..X_{t-1}) */ +I_ Scalar cross_entropy_loss(TokenSequence_R tokens, U4 hidden_dim, U4 vocab_size) { + Scalar total = { .value = (F32)0 }; + for (U4 t = 0; t + 1 < tokens->len; ++ t) { + TokenSequence prefix = { .ids = tokens->ids, .len = t + 1, .cap = t + 1 }; + Logits logits = ar_forward(&prefix, hidden_dim, vocab_size); + Probability p = softmax_at(&logits, tokens->ids[t + 1]); + total.value -= logf(p.value + 1e-9f); + } + return total; +} + +#pragma endregion Cross-Entropy Loss + +#pragma region Chinchilla Scaling + +/* Chinchilla optimal model size for a compute budget C. + * N_opt(C) = a * C^0.5 */ +I_ F32 chinchilla_optimal_N(F32 compute_budget_flops) { + F32 a = 0.29f; + return a * powf(compute_budget_flops, 0.5f); +} + +/* Chinchilla optimal dataset size for a compute budget C. + * D_opt(C) = b * C^0.5 */ +I_ F32 chinchilla_optimal_D(F32 compute_budget_flops) { + F32 b = 5.7f; + return b * powf(compute_budget_flops, 0.5f); +} + +/* Back-of-envelope training cost for a Llama-3-400B-style run. + * FLOPs = 6 * N * D */ +I_ F32 training_flops(F32 N_params, F32 D_tokens) { + return 6.0f * N_params * D_tokens; +} + +#pragma endregion Chinchilla Scaling + +#pragma region KV-Cache Memory + +/* KV-cache memory footprint. + * Memory_KV = 2 * B * S * L * H * D * bytes_per_element */ +I_ U64 kv_cache_bytes(U4 batch, U4 seq_len, U4 n_layers, U4 n_heads, U4 head_dim, U4 bytes_per_elem) { + U64 per_layer = (U64)batch * seq_len * n_heads * head_dim * bytes_per_elem; + return 2ULL * per_layer * n_layers; +} + +#pragma endregion KV-Cache Memory + +#pragma region Main + +I_ S32 main(S32 argc, char *argv[]) { + (void)argc; + (void)argv; + + U4 hidden_dim = 4096; + U4 vocab_size = 32000; + U4 target_vocab = vocab_size; + + Slice corpus = { .ptr = 0, .len = 0 }; + BPEMerger bpe = bpe_train(corpus, target_vocab); + + TokenSequence tokens = {0}; + Logits logits = ar_forward(&tokens, hidden_dim, vocab_size); + Token next = ar_sample(&logits); + (void)next; + + Scalar loss = cross_entropy_loss(&tokens, hidden_dim, vocab_size); + (void)loss; + + F32 compute_budget = 3.79e25f; + F32 N_opt = chinchilla_optimal_N(compute_budget); + F32 D_opt = chinchilla_optimal_D(compute_budget); + (void)N_opt; + (void)D_opt; + + F32 flops = training_flops(4.0e11f, 1.56e13f); + (void)flops; + + U64 kv_bytes = kv_cache_bytes(1, 2048, 32, 32, 128, 2); + (void)kv_bytes; + + return 0; +} + +#pragma endregion Main \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_decoder.md b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_decoder.md index 6be9b7f0..c4416c6e 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_decoder.md +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_decoder.md @@ -1,17 +1,17 @@ -# cs229_building_llms — Per-term Decoder (tier-categorized) +# cs229_building_llms - Per-term Decoder (tier-categorized) **Source:** `cs229_building_llms_deobfuscated.md` (Pass 2 deobfuscation) **Target:** `cs229_building_llms.c` (the C11 program) -**Method:** Per v2 lexicon §1.3 (etymology) + §2 (the 4 tiers) +**Method:** Per v2 lexicon section 1.3 (etymology) + section 2 (the 4 tiers) ## Tier 1: Core concepts | Term | C11 form | Etymology | Tier | Source | |---|---|---|---|---| | `Token` | `typedef uint32_t Token` | Old English *tacen* ("sign"); in LMs, an atomic unit of text | Tier 1 | Cluster 0, 2 | -| `Logits` | `typedef struct { F32_R data; U4 len; } Logits` | Greek *λογιστικός* ("of reasoning"); raw pre-softmax scores | Tier 1 | Cluster 1 | -| `Probability` | `typedef struct { F32 value; } Probability` | Latin *probabilitas* ("likelihood"); bounded to [0, 1] | Tier 1 | Tier 1 #1.13, Cluster 0 | -| `Scalar` | `typedef struct { F32 value; } Scalar` | Latin *scalaris* ("of a ladder"); per-user placeholder for linear/geo/tensor alg | Tier 1 | Tier 4 #4.22 | +| `Logits` | `typedef struct { F32_R data; U4 len; } Logits` | Greek *logistike* ("of reasoning"); raw pre-softmax scores | Tier 1 | Cluster 1 | +| `Probability` | `typedef struct { F32 value; } Probability` | Latin *probabilitas* ("likelihood"); bounded to [0, 1] | Tier 1 | Cluster 0 | +| `Scalar` | `typedef struct { F32 value; } Scalar` | Latin *scalaris* ("of a ladder"); per-user placeholder for linear/geo/tensor alg | Tier 1 | Cluster 0 | ## Tier 2: Data-oriented pipeline terms @@ -21,7 +21,7 @@ | `bpe_encode` | procedure | abbreviation: BPE + encode | Tier 2 | Cluster 9 | | `ar_forward` | procedure | abbreviation: autoregressive + forward pass | Tier 2 | Cluster 2 | | `ar_sample` | procedure | abbreviation: autoregressive + sample | Tier 2 | Cluster 2 | -| `softmax_at` | procedure | softmax + index access | Tier 2 | Tier 4 #4.10 (user-specific `'scalar product'`) | +| `softmax_at` | procedure | softmax + index access | Tier 2 | Cluster 2 | | `cross_entropy_loss` | procedure | cross-entropy + loss | Tier 2 | Cluster 2 | | `chinchilla_optimal_N` | procedure | Chinchilla scaling law (Hoffmann et al. 2022) + optimal model size | Tier 2 | Cluster 2 | | `chinchilla_optimal_D` | procedure | Chinchilla + optimal dataset size | Tier 2 | Cluster 2 | @@ -32,27 +32,25 @@ | Term | C11 form | Etymology | Tier | Source | |---|---|---|---|---| -| `TokenSequence` | `typedef struct { Token_R ids; U4 len; U4 cap; } TokenSequence` | sequence of tokens; pair of (ptr, len, cap) | Tier 3 | Cluster 3 (Pair) | -| `Vocabulary` | `typedef struct { U4 vocab_size; Token_R id_to_token; U4_R token_to_id; } Vocabulary` | the lookup table; bidirectional id<->token | Tier 3 | Cluster 3 (Map) | +| `TokenSequence` | `typedef struct { Token_R ids; U4 len; U4 cap; } TokenSequence` | sequence of tokens; pair of (ptr, len, cap) | Tier 3 | Cluster 3 (Pair) | +| `Vocabulary` | `typedef struct { U4 vocab_size; Token_R id_to_token; U4_R token_to_id; } Vocabulary` | the lookup table; bidirectional id-token | Tier 3 | Cluster 3 | | `BPEMerge` | `typedef struct { Token a; Token b; Token merged; U4 count; } BPEMerge` | one BPE merge rule | Tier 3 | Cluster 3 | -| `BPEMerger` | `typedef struct { BPEMerge_R merges; U4 len; U4 cap; U4 vocab_size; } BPEMerger` | the learned BPE merger (a sequence of merges) | Tier 3 | Cluster 3 (Sequence) | -| `Slice` | `typedef struct { U4 ptr, len; } Slice` | (ptr, len) pair; per `c11_convention.md` §8 | Tier 3 | Cluster 3 (Pair) | +| `BPEMerger` | `typedef struct { BPEMerge_R merges; U4 len; U4 cap; U4 vocab_size; } BPEMerger` | the learned BPE merger | Tier 3 | Cluster 3 | +| `Slice` | `typedef struct { U4 ptr, len; } Slice` | (ptr, len) pair; per c11_convention section 8 | Tier 3 | Cluster 3 | ## Tier 4: AI-fuzzing tolerance terms | Term | C11 form | Etymology | Tier | Source | |---|---|---|---|---| -| `hidden_dim` | `U4` parameter | the embedding dimension d; bounded integer | Tier 4 | cs229 §2.6 | -| `vocab_size` | `U4` parameter | the vocabulary size |V|; bounded integer | Tier 4 | cs229 §2.6 | -| `compute_budget` | `F32` parameter | the FLOPs budget C; placeholder, resolved as float32 | Tier 4 | cs229 §2.4 (R1) | -| `correlation` | (not used in this video) | `correlation : float64` (per v2 lexicon Tier 4 #4.25, R1 NEW v2) | Tier 4 | cs229 §2.6 | -| `weakly_coupled` | (not used in this video) | `weakly_coupled(a, b, tolerance)` (per v2 lexicon §9.1) | Tier 4 | v2 lexicon §9.1 | +| `hidden_dim` | `U4` parameter | the embedding dimension d; bounded integer | Tier 4 | cs229 section 2.6 | +| `vocab_size` | `U4` parameter | the vocabulary size; bounded integer | Tier 4 | cs229 section 2.6 | +| `compute_budget` | `F32` parameter | the FLOPs budget C; placeholder, resolved as float32 | Tier 4 | cs229 section 2.4 | ## Etymology notes (per Cluster 7, Pattern 3) -- `Token` — Old English *tacen* ("sign"); modern usage: atomic unit of text after tokenization. -- `Logits` — Greek *λογιστικός* via Latin *logisticus*; modern ML usage: raw pre-softmax scores. -- `Probability` — Latin *probabilitas* from *probare* ("to test"); modern usage: a value in [0, 1]. -- `Scalar` — Latin *scalaris* from *scala* ("ladder"); modern usage: a single quantity (vs. vector/matrix). -- `Chinchilla` — Hoffmann et al. 2022, "Training Compute-Optimal Large Language Models"; the paper name. -- `BPE` — Sennrich et al. 2016, "Neural Machine Translation of Rare Words with Subword Units"; the algorithm name. +- `Token` - Old English *tacen* ("sign"); modern usage: atomic unit of text after tokenization. +- `Logits` - Greek *logistike* via Latin *logisticus*; modern ML usage: raw pre-softmax scores. +- `Probability` - Latin *probabilitas* from *probare* ("to test"); modern usage: a value in [0, 1]. +- `Scalar` - Latin *scalaris* from *scala* ("ladder"); modern usage: a single quantity (vs. vector/matrix). +- `Chinchilla` - Hoffmann et al. 2022, "Training Compute-Optimal Large Language Models"; the paper name. +- `BPE` - Sennrich et al. 2016, "Neural Machine Translation of Rare Words with Subword Units"; the algorithm name. \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_notes.md b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_notes.md index abcd006e..8f4fe331 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_notes.md +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_notes.md @@ -1,16 +1,16 @@ -# cs229_building_llms — Pass 3 Notes +# cs229_building_llms - Pass 3 Notes **Track:** `video_analysis_deob_pass3_20260623` **Date:** 2026-06-23 -**Language:** C11 (per the per-language default in `TIER2_STARTER.md` §3) +**Language:** C11 (per the per-language default in `TIER2_STARTER.md` section 3) ## Decisions made -1. **Language:** C11 (default; per `TIER2_STARTER.md` §3 cluster A row 1). +1. **Language:** C11 (default; per `TIER2_STARTER.md` section 3 cluster A row 1). 2. **Conventions:** duffle + forth bootslop + raddbg fallback (per `c11_convention.md`). -3. **Header style:** design-doc header (per `c11_convention.md` §9). +3. **Header style:** design-doc header (per `c11_convention.md` section 9). 4. **Type system:** byte-width types (`U4`, `F32`) + `TSet_` / `Struct_` / `Opt_` / `Ret_` macros. -5. **Encoding placeholders:** `float` / `integer` / `Scalar` (per v2 lexicon §7). Resolved as concrete C11 types at the function signature. +5. **Encoding placeholders:** `float` / `integer` / `Scalar` (per v2 lexicon section 7). Resolved as concrete C11 types at the function signature. ## Alternatives considered @@ -20,29 +20,29 @@ ## Language override (none) -Per `TIER2_STARTER.md` §3, the default for this video is C11. No override applied. +Per `TIER2_STARTER.md` section 3, the default for this video is C11. No override applied. -## 4 + 3 verification criteria (per v2 lexicon §7 of `TIER2_STARTER.md`) +## 4 + 3 verification criteria (per v2 lexicon section 7 of `TIER2_STARTER.md`) | # | Criterion | Status | Notes | |---|---|---|---| | 1 | **Lossless** | met | All 10 concepts from the translation table are represented in the C11 code. | -| 2 | **Bounded** | met | No `∞_val`; all values are finite (Token, F32, U4, U64). | +| 2 | **Bounded** | met | No `infinity_val`; all values are finite (Token, F32, U4, U64). | | 3 | **Constructively typed** | met | Every expression has a type (`Token`, `Logits`, `Probability`, `Scalar`, etc.). | | 4 | **Etymology-cited** | met | Every new term has 1-line origin + 1-line history in the decoder. | | 5 | **Encoding-explicit** | met | Every value-bearing term has an encoding (`Token : int32`, `F32` etc.). | | 6 | **Form-anchored** | met | Every re-encoding has a form anchor in the translation table. | | 7 | **User-specific opt-in** | met | The principled form is produced; the user-specific form (e.g., Sectored Language `'scalar product'`) is opt-in. | -## Hardware target (per v2 lexicon §7 of `TIER2_STARTER.md`) +## Hardware target (per v2 lexicon section 7 of `TIER2_STARTER.md`) Per user 2026-06-23, "target up to 10k." Default workstation: Ryzen 9 / i9, RTX 4090, 128GB DDR5, 4TB NVMe. This video's concepts map to: -- **Pretraining** (cs229 §2.3-2.4): requires RTX 4090-class GPU + 128GB DDR5 for a 7B-param model in float16. The back-of-envelope Llama 3 400B numbers (3.79e25 FLOPs) imply a much larger cluster (16k+ H100s). -- **Inference** (cs229 §2.6): KV-cache for a 7B model is `2 * 1 * 2048 * 32 * 32 * 128 * 2 = 1.07 GB` per request (F32 here for clarity; F16 halves this to 0.54 GB). RTX 4090 has 24GB VRAM, so ~20 concurrent requests fit. +- **Pretraining** (cs229 section 2.3-2.4): requires RTX 4090-class GPU + 128GB DDR5 for a 7B-param model in float16. The back-of-envelope Llama 3 400B numbers (3.79e25 FLOPs) imply a much larger cluster (16k+ H100s). +- **Inference** (cs229 section 2.6): KV-cache for a 7B model is `2 * 1 * 2048 * 32 * 32 * 128 * 2 = 1.07 GB` per request (F32 here for clarity; F16 halves this to 0.54 GB). RTX 4090 has 24GB VRAM, so ~20 concurrent requests fit. -## Refinements discovered (Pass 3 → lexicon v3 candidates) +## Refinements discovered (Pass 3 to lexicon v3 candidates) 1. **Per-pillar encoding tags:** the lecture's six pillars (Architecture, Training, Data, Evaluation, Systems, Model) could each have a tier tag in the lexicon (currently they are Tier 2 terms mixed). The C11 code shows that the pillars map to distinct type categories; a v3 lexicon could formalize this. @@ -54,10 +54,10 @@ This video's concepts map to: ## See also -- `cs229_building_llms.c` — the C11 program -- `cs229_building_llms_translation.md` — the math → C11 translation table -- `cs229_building_llms_decoder.md` — the per-term decoder (tier-categorized) -- `conductor/tracks/video_analysis_deob_pilot_20260621/artifacts/cs229_building_llms/` — the Pass 2 input -- `conductor/tracks/video_analysis_cs229_building_llms_20260621/report.md` — the Pass 1 source -- `conductor/tracks/video_analysis_deob_c11_reference_20260623/c11_convention.md` — the C11 style guide -- `conductor/tracks/video_analysis_deob_lexicon_20260621/lexicon.md` — the v2 lexicon +- `cs229_building_llms.c` - the C11 program +- `cs229_building_llms_translation.md` - the math to C11 translation table +- `cs229_building_llms_decoder.md` - the per-term decoder (tier-categorized) +- `conductor/tracks/video_analysis_deob_pilot_20260621/artifacts/cs229_building_llms/` - the Pass 2 input +- `conductor/tracks/video_analysis_cs229_building_llms_20260621/report.md` - the Pass 1 source +- `conductor/tracks/video_analysis_deob_c11_reference_20260623/c11_convention.md` - the C11 style guide +- `conductor/tracks/video_analysis_deob_lexicon_20260621/lexicon.md` - the v2 lexicon \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_translation.md b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_translation.md index 3a3976b4..bcc531ba 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_translation.md +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/cs229_building_llms/cs229_building_llms_translation.md @@ -1,4 +1,4 @@ -# cs229_building_llms — Translation Table (math → C11) +# cs229_building_llms - Translation Table (math to C11) **Source:** `conductor/tracks/video_analysis_deob_pilot_20260621/artifacts/cs229_building_llms/cs229_building_llms_deobfuscated.md` (464 lines) **Target:** `cs229_building_llms.c` (the C11 program) @@ -17,7 +17,8 @@ | 9 | `FLOPs = 6 * N * D` | `training_flops(F32 N_params, F32 D_tokens)` | bounded: N, D as finite inputs | `F32` | | 10 | `Memory_KV = 2 * B * S * L * H * D * bytes_per_element` | `kv_cache_bytes(U4 batch, U4 seq_len, U4 n_layers, U4 n_heads, U4 head_dim, U4 bytes_per_elem)` | bounded: B, S, L, H, D, bytes all finite | `U64` | -**Notes:** +## Notes + - The C11 program does NOT implement a real transformer; it expresses the SHAPE of the lecture's six pillars. - All `float` / `integer` / `Scalar` placeholders resolve to concrete C11 types (`F32`, `U4`, etc.) at the function signature. -- Per the v2 lexicon §9.1, `<<` / `>>` operators are rendered as `much_less` / `much_greater` / `weakly_coupled` if needed; this file does not use them. +- Per the v2 lexicon section 9.1, `<<` / `>>` operators are rendered as `much_less` / `much_greater` / `weakly_coupled` if needed; this file does not use them. \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity.py b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity.py index 6e5a2694..4e6201bc 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity.py +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity.py @@ -1,4 +1,4 @@ -"""entropy_epiplexity.py - Pass 3 projection of the "From Entropy to Epiplexity" lecture. +"""entropy_epiplexity.py - Pass 3 projection of the From Entropy to Epiplexity lecture. PURPOSE ------- @@ -21,16 +21,9 @@ ENCODING (per lexicon v2 Rule 5) Bits : float (placeholder), resolved as float64 (units of entropy) Markov : type-class predicate where X -> Y -> Z (R4 NEW v2) PolyTimeAdversary : type where runtime(A) : Polynomial(security_parameter) : int64 (R6 NEW v2) - -SEE ALSO --------- - entropy_epiplexity_translation.md - entropy_epiplexity_decoder.md - entropy_epiplexity_notes.md - lexicon.md (the v2 lexicon) + product-guidelines.md (manual_slop) """ -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Callable, TypeAlias import math @@ -162,4 +155,4 @@ def main() -> int: if __name__ == "__main__": - raise SystemExit(main()) + raise SystemExit(main()) \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_decoder.md b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_decoder.md index fc4d70b5..02b16d26 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_decoder.md +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_decoder.md @@ -1,11 +1,11 @@ -# entropy_epiplexity — Per-term Decoder (tier-categorized) +# entropy_epiplexity - Per-term Decoder (tier-categorized) ## Tier 1: Core concepts | Term | Python form | Etymology | Tier | Source | |---|---|---|---|---| | `Probability` | `TypeAlias = float` | Latin *probabilitas* | Tier 1 | Cluster 0 | -| `Entropy` | `TypeAlias = float` | Greek *ἐντροπή* ("a turning toward"); Rudolf Clausius, 1865 | Tier 1 | Cluster 0 | +| `Entropy` | `TypeAlias = float` | Greek *entropia* ("a turning toward"); Rudolf Clausius, 1865 | Tier 1 | Cluster 0 | ## Tier 2: Data-oriented pipeline terms @@ -14,7 +14,7 @@ | `shannon_entropy` | function | Claude Shannon, 1948; "A Mathematical Theory of Communication" | Tier 2 | Cluster 0 | | `cross_entropy` | function | the cross-entropy loss | Tier 2 | Cluster 2 | | `kl_divergence` | function | Solomon Kullback, Richard Leibler, 1951 | Tier 2 | Cluster 2 | -| `epiplexity_estimate` | function | "epi-" (Greek "upon") + "plexity" (Latin "fold"); the "folding upon" measure of memorization | Tier 2 | entropy_epiplexity §2 (NEW v2) | +| `epiplexity_estimate` | function | "epi-" (Greek "upon") + "plexity" (Latin "fold"); the "folding upon" measure of memorization | Tier 2 | entropy_epiplexity section 2 (NEW v2) | | `bits_to_nats`, `nats_to_bits` | function | units conversion; nat = natural logarithm unit | Tier 2 | Cluster 2 | ## Tier 3: Type-theoretic primitives @@ -22,23 +22,23 @@ | Term | Python form | Etymology | Tier | Source | |---|---|---|---|---| | `MarkovState` | `@dataclass(frozen=True) class MarkovState` | Andrey Markov, 1856-1922; the eponym | Tier 3 | Cluster 3 | -| `MarkovChain` | `@dataclass(frozen=True) class MarkovChain` | the Markov chain as a type-class predicate | Tier 3 | entropy_epiplexity §5.2 (R4 NEW v2) | -| `PolyTimeAdversary` | `@dataclass(frozen=True) class PolyTimeAdversary` | the polynomial-time adversary security model | Tier 3 | entropy_epiplexity §5.8 (R6 NEW v2) | +| `MarkovChain` | `@dataclass(frozen=True) class MarkovChain` | the Markov chain as a type-class predicate | Tier 3 | entropy_epiplexity section 5.2 (R4 NEW v2) | +| `PolyTimeAdversary` | `@dataclass(frozen=True) class PolyTimeAdversary` | the polynomial-time adversary security model | Tier 3 | entropy_epiplexity section 5.8 (R6 NEW v2) | ## Tier 4: AI-fuzzing tolerance terms | Term | Python form | Etymology | Tier | Source | |---|---|---|---|---| -| `memorization` | local variable | the bounded form of "memorization" in NN training | Tier 4 | entropy_epiplexity §2 | -| `decay` | local variable | the bounded form of "decay" (1 / (1 + t * eps)) | Tier 4 | entropy_epiplexity §2 | -| `security_parameter` | `int` field | the security parameter k | Tier 4 | entropy_epiplexity §5.8 | +| `memorization` | local variable | the bounded form of "memorization" in NN training | Tier 4 | entropy_epiplexity section 2 | +| `decay` | local variable | the bounded form of "decay" (1 / (1 + t * eps)) | Tier 4 | entropy_epiplexity section 2 | +| `security_parameter` | `int` field | the security parameter k | Tier 4 | entropy_epiplexity section 5.8 | ## Etymology notes (per Cluster 7, Pattern 3) -- `Entropy` — Greek *ἐντροπή* via German *Entropie* (Clausius, 1865); modern usage: a measure of uncertainty. -- `Shannon` — Claude Shannon (1916-2001); the founder of information theory. -- `Kullback-Leibler` — Solomon Kullback (1907-1994) + Richard Leibler (1914-2003); the eponym. -- `Epiplexity` — coined by Wilson & Finzi (2020); the "folding upon" measure of memorization in NN training. -- `Markov` — Andrey Markov (1856-1922); the eponym; the chain was introduced in 1906. -- `Adversary` — Latin *adversarius* ("opponent"); the cryptographic security model. -- `Polynomial` — Greek *πολύ* ("many") + *νόμος* ("rule"); the complexity class. +- `Entropy` - Greek *entropia* via German *Entropie* (Clausius, 1865); modern usage: a measure of uncertainty. +- `Shannon` - Claude Shannon (1916-2001); the founder of information theory. +- `Kullback-Leibler` - Solomon Kullback (1907-1994) + Richard Leibler (1914-2003); the eponym. +- `Epiplexity` - coined by Wilson and Finzi (2020); the "folding upon" measure of memorization in NN training. +- `Markov` - Andrey Markov (1856-1922); the eponym; the chain was introduced in 1906. +- `Adversary` - Latin *adversarius* ("opponent"); the cryptographic security model. +- `Polynomial` - Greek *poly* ("many") + *nomos* ("rule"); the complexity class. \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_notes.md b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_notes.md index 53aad7a0..4b139165 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_notes.md +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_notes.md @@ -1,16 +1,16 @@ -# entropy_epiplexity — Pass 3 Notes +# entropy_epiplexity - Pass 3 Notes **Track:** `video_analysis_deob_pass3_20260623` **Date:** 2026-06-23 -**Language:** Python (per the per-language default in `TIER2_STARTER.md` §3) +**Language:** Python (per the per-language default in `TIER2_STARTER.md` section 3) ## Decisions made -1. **Language:** Python (default; per `TIER2_STARTER.md` §3 cluster A row 3). +1. **Language:** Python (default; per `TIER2_STARTER.md` section 3 cluster A row 3). 2. **Conventions:** manual_slop (1-space indent, type hints, no comments, Result[T] for errors). 3. **Type system:** `dataclass(frozen=True)` for value semantics; `TypeAlias` for primitives. 4. **Markov chain:** encoded as a `MarkovChain` dataclass with `is_markov()` validation (rows sum to 1). -5. **Epiplexity:** simplified model — `max(0, H(p_data, q_estimate) - H(p_data))` with decay factor. +5. **Epiplexity:** simplified model - `max(0, H(p_data, q_estimate) - H(p_data))` with decay factor. ## Alternatives considered @@ -19,21 +19,21 @@ ## Language override (none) -Per `TIER2_STARTER.md` §3, the default for this video is Python. No override applied. +Per `TIER2_STARTER.md` section 3, the default for this video is Python. No override applied. -## 4 + 3 verification criteria (per v2 lexicon §7 of `TIER2_STARTER.md`) +## 4 + 3 verification criteria (per v2 lexicon section 7 of `TIER2_STARTER.md`) | # | Criterion | Status | Notes | |---|---|---|---| | 1 | **Lossless** | met | All 7 concepts from the translation table are represented. | -| 2 | **Bounded** | met | No `∞_val`; all values are finite. | +| 2 | **Bounded** | met | No `infinity_val`; all values are finite. | | 3 | **Constructively typed** | met | Every expression has a type hint. | | 4 | **Etymology-cited** | met | Every new term has 1-line origin + 1-line history. | | 5 | **Encoding-explicit** | met | Every value-bearing term has an encoding. | | 6 | **Form-anchored** | met | Every re-encoding has a form anchor in the translation table. | | 7 | **User-specific opt-in** | met | The principled form is produced. | -## Hardware target (per v2 lexicon §7 of `TIER2_STARTER.md`) +## Hardware target (per v2 lexicon section 7 of `TIER2_STARTER.md`) Per user 2026-06-23, "target up to 10k." Default workstation: Ryzen 9 / i9, RTX 4090, 128GB DDR5, 4TB NVMe. @@ -42,7 +42,7 @@ This video's concepts map to: - **Epiplexity in NN training:** requires a GPU for the actual training loop; the epiplexity_estimate function is a simplified post-hoc model. - **Markov chain stationary distribution:** converges in O(n) iterations for an n-state chain; no special hardware needed. -## Refinements discovered (Pass 3 → lexicon v3 candidates) +## Refinements discovered (Pass 3 to lexicon v3 candidates) 1. **Epiplexity as a Tier 4 term:** the epiplexity function is a NEW v2 term; v3 should formalize it. 2. **Markov as a type-class predicate:** the R4 NEW v2 entry could be extended to a generic type-class pattern for stochastic processes. @@ -55,10 +55,10 @@ This video's concepts map to: ## See also -- `entropy_epiplexity.py` — the Python program -- `entropy_epiplexity_translation.md` — the math → Python translation table -- `entropy_epiplexity_decoder.md` — the per-term decoder (tier-categorized) -- `conductor/tracks/video_analysis_deob_pilot_20260621/artifacts/entropy_epiplexity/` — the Pass 2 input -- `conductor/tracks/video_analysis_entropy_epiplexity_20260621/report.md` — the Pass 1 source -- `conductor/tracks/video_analysis_deob_lexicon_20260621/lexicon.md` — the v2 lexicon -- `conductor/product-guidelines.md` — the manual_slop convention +- `entropy_epiplexity.py` - the Python program +- `entropy_epiplexity_translation.md` - the math to Python translation table +- `entropy_epiplexity_decoder.md` - the per-term decoder (tier-categorized) +- `conductor/tracks/video_analysis_deob_pilot_20260621/artifacts/entropy_epiplexity/` - the Pass 2 input +- `conductor/tracks/video_analysis_entropy_epiplexity_20260621/report.md` - the Pass 1 source +- `conductor/tracks/video_analysis_deob_lexicon_20260621/lexicon.md` - the v2 lexicon +- `conductor/product-guidelines.md` - the manual_slop convention \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_translation.md b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_translation.md index e9034026..8b58bd33 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_translation.md +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/entropy_epiplexity/entropy_epiplexity_translation.md @@ -1,4 +1,4 @@ -# entropy_epiplexity — Translation Table (math → Python) +# entropy_epiplexity - Translation Table (math to Python) **Source:** `conductor/tracks/video_analysis_deob_pilot_20260621/artifacts/entropy_epiplexity/entropy_epiplexity_deobfuscated.md` **Target:** `entropy_epiplexity.py` @@ -14,7 +14,8 @@ | 6 | `PolyTimeAdversary : Type where runtime(A) : Polynomial(security_parameter) : int64` (R6 NEW v2) | `PolyTimeAdversary` dataclass with `is_poly_time` | bounded: `runtime(n) <= n^k` | `PolyTimeAdversary : type` | | 7 | `nats -> bits` (unit conversion) | `bits_to_nats`, `nats_to_bits` | bounded: linear conversion | `Entropy : float` | -**Notes:** -- Per v2 lexicon §9.2, the per-language rendering is the same as C11. +## Notes + +- Per v2 lexicon section 9.2, the per-language rendering is the same as C11. - The `epiplexity_estimate` function is a SIMPLIFIED model; the real epiplexity function is more complex. -- The Markov chain is checked for stochasticity (rows sum to 1) via `is_markov()`. +- The Markov chain is checked for stochasticity (rows sum to 1) via `is_markov()`. \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic.py b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic.py index 3fb3f82d..8268fa7c 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic.py +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic.py @@ -1,4 +1,4 @@ -"""probability_logic.py - Pass 3 projection of the "Probability Theory is an Extension of Logic" lecture. +"""probability_logic.py - Pass 3 projection of the Probability Theory is an Extension of Logic lecture. PURPOSE ------- @@ -24,16 +24,13 @@ ENCODING (per lexicon v2 Rule 5) SEE ALSO -------- - probability_logic_translation.md : math-to-Python translation table - probability_logic_decoder.md : per-term decoder (tier-categorized) - probability_logic_notes.md : decisions, alternatives, overrides - lexicon.md (the v2 lexicon) + product-guidelines.md (manual_slop) + probability_logic_translation.md + probability_logic_decoder.md + probability_logic_notes.md """ -from dataclasses import dataclass, field -from typing import Callable, Iterable, TypeAlias -from itertools import product as cartesian_product -import math +from dataclasses import dataclass +from typing import Callable, TypeAlias Plausibility: TypeAlias = float Probability: TypeAlias = float @@ -129,7 +126,6 @@ def jaynes_policeman_burglar(p_burglary: Plausibility = 0.001, + p_earthquake * p_alarm_given_earthquake + (1.0 - p_burglary - p_earthquake) * p_alarm_given_neither ) - p_burglary_and_alarm: float = p_burglary * p_alarm_given_burglary return bayes_rule(p_burglary, p_alarm_given_burglary, p_alarm) if p_alarm > 0 else 0.0 @@ -178,4 +174,4 @@ def main() -> int: if __name__ == "__main__": - raise SystemExit(main()) + raise SystemExit(main()) \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_decoder.md b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_decoder.md index a0297f01..9645d51f 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_decoder.md +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_decoder.md @@ -1,25 +1,25 @@ -# probability_logic — Per-term Decoder (tier-categorized) +# probability_logic - Per-term Decoder (tier-categorized) **Source:** `probability_logic_deobfuscated.md` (Pass 2 deobfuscation) **Target:** `probability_logic.py` (the Python program) -**Method:** Per v2 lexicon §1.3 (etymology) + §2 (the 4 tiers) +**Method:** Per v2 lexicon section 1.3 (etymology) + section 2 (the 4 tiers) ## Tier 1: Core concepts | Term | Python form | Etymology | Tier | Source | |---|---|---|---|---| -| `Proposition` | `@dataclass(frozen=True) class Proposition` | Latin *propositio* ("a setting forth"); the propositional logic primitive | Tier 1 | Cluster 7, Tier 1 #1.4-1.7 | -| `Context` | `@dataclass(frozen=True) class Context` | Latin *contextus* ("a joining together"); the conditioning context | Tier 1 | Tier 1 #1.4-1.7 | -| `implies` | `Callable[[Proposition, Proposition], bool]` | Latin *implicare* ("to involve"); Tier 1 #1.7 | Tier 1 | Cluster 7 | -| `for all` | (implicit in `assert` + exhaustive loops) | Latin *pro omnibus*; Tier 1 #1.2 | Tier 1 | Cluster 2, 4 | -| `exists` | (implicit in `for ... if not found: raise`) | Latin *existere* ("to stand out, to be"); Tier 1 #1.3 | Tier 1 | Cluster 4 | +| `Proposition` | `@dataclass(frozen=True) class Proposition` | Latin *propositio* ("a setting forth"); the propositional logic primitive | Tier 1 | Cluster 7 | +| `Context` | `@dataclass(frozen=True) class Context` | Latin *contextus* ("a joining together"); the conditioning context | Tier 1 | Cluster 7 | +| `implies` | `Callable[[Proposition, Proposition], bool]` | Latin *implicare* ("to involve"); Tier 1 entry 1.7 | Tier 1 | Cluster 7 | +| `for all` | (implicit in `assert` + exhaustive loops) | Latin *pro omnibus*; Tier 1 entry 1.2 | Tier 1 | Cluster 2, 4 | +| `exists` | (implicit in `for ... if not found: raise`) | Latin *existere* ("to stand out, to be"); Tier 1 entry 1.3 | Tier 1 | Cluster 4 | ## Tier 2: Data-oriented pipeline terms | Term | Python form | Etymology | Tier | Source | |---|---|---|---|---| -| `bivaluation` | function | from Cox's bivaluation; generalized indicator function | Tier 2 | Tier 4 (R3 NEW v2) | -| `frequentist_relative_frequency` | function | frequentist definition; the ratio `count/total` | Tier 2 | Cluster 2 | +| `bivaluation` | function | from Cox's bivaluation; generalized indicator function | Tier 2 | Cluster 0 | +| `frequentist_relative_frequency` | function | frequentist definition; the ratio count/total | Tier 2 | Cluster 2 | | `frequentist_stream` | function | re-encoding of `lim_{N -> infinity}` as `Stream A = nat -> A` | Tier 2 | Rule 1 + Cluster 2 | | `sum_rule` | function | the sum rule of probability | Tier 2 | Cluster 2 | | `product_rule` | function | the product rule of probability | Tier 2 | Cluster 2 | @@ -31,27 +31,27 @@ | Term | Python form | Etymology | Tier | Source | |---|---|---|---|---| -| `LatticePoset` | `@dataclass(frozen=True) class LatticePoset` | Latin *lattice* + *poset*; a partially ordered set with join/meet | Tier 3 | Cluster 3 (Pair) | +| `LatticePoset` | `@dataclass(frozen=True) class LatticePoset` | Latin *lattice* + *poset*; a partially ordered set with join/meet | Tier 3 | Cluster 3 | | `Plausibility` | `TypeAlias = float` | Latin *plausibilis* ("deserving applause"); the bounded form of "plausibility" | Tier 3 | Cluster 0 | | `Probability` | `TypeAlias = float` | Latin *probabilitas* ("likelihood"); bounded to [0, 1] | Tier 3 | Cluster 0 | -| `Stream` | `TypeAlias = "Callable[[int], float]"` | Old English *stream*; `Stream A = nat -> A` per Rule 1 | Tier 3 | Cluster 3 (Pi type) | +| `Stream` | `TypeAlias = "Callable[[int], float]"` | Old English *stream*; `Stream A = nat -> A` per Rule 1 | Tier 3 | Cluster 3 | ## Tier 4: AI-fuzzing tolerance terms | Term | Python form | Etymology | Tier | Source | |---|---|---|---|---| | `world` | `dict[Proposition, bool]` | the world assignment; bounded to finite propositions | Tier 4 | Cluster 0 (P2) | -| `bivaluation` | function (R3 NEW v2) | the generalized indicator; bivalent (0/1) for propositions | Tier 4 | probability_logic §2.1 (R3 NEW v2) | -| `Plausibility` (was `Likelihood`) | TypeAlias | the bounded form of "plausibility"; was `Likelihood` in v1 lexicon | Tier 4 | probability_logic §2.1 (R3 NEW v2) | +| `bivaluation` (R3 NEW v2) | function | the generalized indicator; bivalent (0/1) for propositions | Tier 4 | probability_logic section 2.1 | +| `Plausibility` (was `Likelihood`) | TypeAlias | the bounded form of "plausibility"; was `Likelihood` in v1 lexicon | Tier 4 | probability_logic section 2.1 | ## Etymology notes (per Cluster 7, Pattern 3) -- `Proposition` — Latin *propositio* from *proponere* ("to set forth"); modern usage: an assertion that is true or false. -- `Context` — Latin *contextus* from *contexere* ("to weave together"); modern usage: the conditioning context for a probability. -- `Lattice` — Old French *latiz* ("lattice"); modern usage: a partially ordered set with join and meet. -- `Poset` — partially ordered set; abbreviation; modern usage: a set with a reflexive, antisymmetric, transitive relation. -- `Bivaluation` — Latin *bi-* ("two") + Old French *valour* ("value"); the assignment of two values (true/false) to propositions; generalized in Cox's theorem to continuous plausibility. -- `Marginalize` — Latin *marginalis* ("of the margin"); modern usage: summing out a variable from a joint distribution. -- `Bayes` — Thomas Bayes (1701-1761); the eponym; the rule was published posthumously in 1763. -- `Jaynes` — Edwin T. Jaynes (1922-1998); the probability-as-logic school; the canonical "policeman+burglar" example. -- `Cox` — Richard T. Cox (1898-1991); the Cox theorem deriving the sum and product rules from Boolean algebra. +- `Proposition` - Latin *propositio* from *proponere* ("to set forth"); modern usage: an assertion that is true or false. +- `Context` - Latin *contextus* from *contexere* ("to weave together"); modern usage: the conditioning context for a probability. +- `Lattice` - Old French *latiz* ("lattice"); modern usage: a partially ordered set with join and meet. +- `Poset` - partially ordered set; abbreviation; modern usage: a set with a reflexive, antisymmetric, transitive relation. +- `Bivaluation` - Latin *bi-* ("two") + Old French *valour* ("value"); the assignment of two values (true/false) to propositions; generalized in Cox's theorem to continuous plausibility. +- `Marginalize` - Latin *marginalis* ("of the margin"); modern usage: summing out a variable from a joint distribution. +- `Bayes` - Thomas Bayes (1701-1761); the eponym; the rule was published posthumously in 1763. +- `Jaynes` - Edwin T. Jaynes (1922-1998); the probability-as-logic school; the canonical "policeman+burglar" example. +- `Cox` - Richard T. Cox (1898-1991); the Cox theorem deriving the sum and product rules from Boolean algebra. \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_notes.md b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_notes.md index 27c586d3..caa8f82d 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_notes.md +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_notes.md @@ -1,12 +1,12 @@ -# probability_logic — Pass 3 Notes +# probability_logic - Pass 3 Notes **Track:** `video_analysis_deob_pass3_20260623` **Date:** 2026-06-23 -**Language:** Python (per the per-language default in `TIER2_STARTER.md` §3) +**Language:** Python (per the per-language default in `TIER2_STARTER.md` section 3) ## Decisions made -1. **Language:** Python (default; per `TIER2_STARTER.md` §3 cluster A row 2). +1. **Language:** Python (default; per `TIER2_STARTER.md` section 3 cluster A row 2). 2. **Conventions:** manual_slop (1-space indent, type hints, no comments, Result[T] for errors). 3. **Type system:** `dataclass(frozen=True)` for value semantics; `TypeAlias` for primitives. 4. **Stream encoding:** `Stream A = nat -> A` per v2 lexicon Rule 1; rendered as `Callable[[int], float]`. @@ -19,21 +19,21 @@ ## Language override (none) -Per `TIER2_STARTER.md` §3, the default for this video is Python. No override applied. +Per `TIER2_STARTER.md` section 3, the default for this video is Python. No override applied. -## 4 + 3 verification criteria (per v2 lexicon §7 of `TIER2_STARTER.md`) +## 4 + 3 verification criteria (per v2 lexicon section 7 of `TIER2_STARTER.md`) | # | Criterion | Status | Notes | |---|---|---|---| | 1 | **Lossless** | met | All 10 concepts from the translation table are represented in the Python code. | -| 2 | **Bounded** | met | No `∞_val`; the frequentist definition is re-encoded as a `Stream`. | +| 2 | **Bounded** | met | No `infinity_val`; the frequentist definition is re-encoded as a `Stream`. | | 3 | **Constructively typed** | met | Every expression has a type hint. | | 4 | **Etymology-cited** | met | Every new term has 1-line origin + 1-line history in the decoder. | | 5 | **Encoding-explicit** | met | Every value-bearing term has an encoding (`Plausibility : float`, `Stream : Callable[[int], float]`). | | 6 | **Form-anchored** | met | Every re-encoding has a form anchor in the translation table. | | 7 | **User-specific opt-in** | met | The principled form is produced; the user-specific form (e.g., Cox's bivaluation) is opt-in. | -## Hardware target (per v2 lexicon §7 of `TIER2_STARTER.md`) +## Hardware target (per v2 lexicon section 7 of `TIER2_STARTER.md`) Per user 2026-06-23, "target up to 10k." Default workstation: Ryzen 9 / i9, RTX 4090, 128GB DDR5, 4TB NVMe. @@ -42,7 +42,7 @@ This video's concepts map to: - **Bayesian inference:** marginalization scales with the size of the joint distribution; for 1000 propositions, the lattice is computable in <1s on any modern CPU. - **Stream re-encoding:** the `Stream A = nat -> A` is computable up to a given index; the lecture's "infinity" is a process, not a value. -## Refinements discovered (Pass 3 → lexicon v3 candidates) +## Refinements discovered (Pass 3 to lexicon v3 candidates) 1. **Bivaluation as Tier 4 term:** the bivaluation `Z(x, t)` is a Tier 4 (AI-fuzzing tolerance) term that doesn't have an existing entry in the v2 lexicon. v3 should add it. 2. **Cox's theorem formalization:** the Python program implements the bivaluation but not the full Cox theorem. v3 could formalize the sum/product rule derivation. @@ -55,10 +55,10 @@ This video's concepts map to: ## See also -- `probability_logic.py` — the Python program -- `probability_logic_translation.md` — the math → Python translation table -- `probability_logic_decoder.md` — the per-term decoder (tier-categorized) -- `conductor/tracks/video_analysis_deob_apply_20260621/artifacts/probability_logic/` — the Pass 2 input -- `conductor/tracks/video_analysis_probability_logic_20260621/report.md` — the Pass 1 source -- `conductor/tracks/video_analysis_deob_lexicon_20260621/lexicon.md` — the v2 lexicon -- `conductor/product-guidelines.md` — the manual_slop convention +- `probability_logic.py` - the Python program +- `probability_logic_translation.md` - the math to Python translation table +- `probability_logic_decoder.md` - the per-term decoder (tier-categorized) +- `conductor/tracks/video_analysis_deob_apply_20260621/artifacts/probability_logic/` - the Pass 2 input +- `conductor/tracks/video_analysis_probability_logic_20260621/report.md` - the Pass 1 source +- `conductor/tracks/video_analysis_deob_lexicon_20260621/lexicon.md` - the v2 lexicon +- `conductor/product-guidelines.md` - the manual_slop convention \ No newline at end of file diff --git a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_translation.md b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_translation.md index 8c689745..adf94a88 100644 --- a/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_translation.md +++ b/conductor/tracks/video_analysis_deob_pass3_20260623/artifacts/probability_logic/probability_logic_translation.md @@ -1,4 +1,4 @@ -# probability_logic — Translation Table (math → Python) +# probability_logic - Translation Table (math to Python) **Source:** `conductor/tracks/video_analysis_deob_apply_20260621/artifacts/probability_logic/probability_logic_deobfuscated.md` (538 lines) **Target:** `probability_logic.py` (the Python program) @@ -17,8 +17,9 @@ | 9 | `sum_B P(H, B)` (marginalization) | `marginalize(joint, hypothesis, observations) -> float` | bounded: finite `observations` list | `Plausibility : float` | | 10 | Jaynes policeman+burglar | `jaynes_policeman_burglar()` | bounded: numerical default values | `Plausibility : float` | -**Notes:** +## Notes + - The Python program does NOT implement full Jaynes' probability theory; it expresses the SHAPE of the lecture's three parts. - All `float` placeholders resolve to `float64` at runtime (Python's default `float` is C double). -- Per the v2 lexicon §9.2, the per-language rendering is the same as C11 (`much_less` / `much_greater` / `weakly_coupled`); this file does not use them. -- The frequentist definition is re-encoded as a `Stream : nat -> float` (the bounded form of "infinity" per Rule 1). +- Per the v2 lexicon section 9.2, the per-language rendering is the same as C11 (`much_less` / `much_greater` / `weakly_coupled`); this file does not use them. +- The frequentist definition is re-encoded as a `Stream : nat -> float` (the bounded form of "infinity" per Rule 1). \ No newline at end of file