Spaces:
Sleeping
Sleeping
Create pipeline/stages/s3_decompose.py
Browse files- pipeline/stages/s3_decompose.py +354 -0
pipeline/stages/s3_decompose.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
S3: DECOMPOSE β Prose to (Ch.A, Ch.B, Ch.C) in 15-operator vocabulary.
|
| 3 |
+
|
| 4 |
+
Per SPEC-PIPELINE-001 Part B.2:
|
| 5 |
+
(a) Identify channel_a content β Ch.A using WHENCE, WHEN
|
| 6 |
+
(b) Identify channel_b content β Ch.B using WHAT, WHERE, WHICH
|
| 7 |
+
(c) Identify channel_c content β Ch.C using FOR-WHAT, HOW
|
| 8 |
+
|
| 9 |
+
T1 bypass: CecCert sources call extract_from_cert() directly.
|
| 10 |
+
This module handles T2 (RTL docs) and T3 (formal properties).
|
| 11 |
+
|
| 12 |
+
THIS IS THE CORE IP. The decomposer patterns are protected.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from __future__ import annotations
|
| 16 |
+
|
| 17 |
+
from pipeline.stages.s1_segment import Segment
|
| 18 |
+
from pipeline.stages.s2_classify import ClassifiedSegment, Classification
|
| 19 |
+
from pipeline.types import (
|
| 20 |
+
ModalityGrounding, Op, OperatorExpression, OperatorSequence,
|
| 21 |
+
SourceProvenance, Tier, FrameExample, Witness, WitnessAttestation,
|
| 22 |
+
WitnessBundle,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def decompose(classified: ClassifiedSegment) -> FrameExample | None:
|
| 27 |
+
"""Decompose a TECHNICAL segment into a FrameExample.
|
| 28 |
+
|
| 29 |
+
Returns None if the segment cannot be decomposed (should not happen
|
| 30 |
+
for properly classified TECHNICAL segments β log as Untranslatable).
|
| 31 |
+
"""
|
| 32 |
+
seg = classified.segment
|
| 33 |
+
if classified.classification != Classification.TECHNICAL:
|
| 34 |
+
return None
|
| 35 |
+
|
| 36 |
+
if seg.source.tier == Tier.T1:
|
| 37 |
+
return _decompose_t1(seg)
|
| 38 |
+
elif seg.source.tier == Tier.T2:
|
| 39 |
+
return _decompose_t2(seg)
|
| 40 |
+
elif seg.source.tier == Tier.T3:
|
| 41 |
+
# T3 includes chat archives β use content-aware routing
|
| 42 |
+
if seg.segment_type.startswith("chat_"):
|
| 43 |
+
return _decompose_chat(seg)
|
| 44 |
+
return _decompose_t3(seg)
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _decompose_t1(seg: Segment) -> FrameExample:
|
| 49 |
+
"""T1 bypass β CecCert extraction handled by kernel.
|
| 50 |
+
|
| 51 |
+
At pipeline level, T1 records are already structured. The kernel's
|
| 52 |
+
extract_from_cert() does the actual projection. Here we wrap
|
| 53 |
+
the segment text as a minimal FrameExample for T1 non-cert records
|
| 54 |
+
(e.g., CSV rows, Yosys JSON fragments, Kernel source).
|
| 55 |
+
"""
|
| 56 |
+
text = seg.text
|
| 57 |
+
text_lower = text.lower()
|
| 58 |
+
|
| 59 |
+
# Kernel source β extract operator structure from code
|
| 60 |
+
if any(kw in text_lower for kw in ["module ", "pub fn ", "pub mod ", "record ", "trait ", "impl "]):
|
| 61 |
+
return _build_example(
|
| 62 |
+
seg,
|
| 63 |
+
g_ops=[
|
| 64 |
+
(Op.THIS, f"this(source_module) β T1 code unit"),
|
| 65 |
+
(Op.NO, f"no(floating_point) β deterministic arithmetic"),
|
| 66 |
+
],
|
| 67 |
+
s_ops=[
|
| 68 |
+
(Op.GOES_WITH, f"goes_with(module, dependencies) β channel_b relation"),
|
| 69 |
+
(Op.TOGETHER_ALONE, f"together(declarations) β module composition"),
|
| 70 |
+
],
|
| 71 |
+
f_ops=[
|
| 72 |
+
(Op.IF_THEN, f"if_then(precondition, postcondition) β code contract"),
|
| 73 |
+
(Op.MUST_LET, f"must(type_safety) β effect annotation"),
|
| 74 |
+
],
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
return _build_example(
|
| 78 |
+
seg,
|
| 79 |
+
g_ops=[(Op.THIS, f"this(record) β T1 structured data")],
|
| 80 |
+
s_ops=[
|
| 81 |
+
(Op.GOES_WITH, f"goes_with(record, format) β channel_b relation"),
|
| 82 |
+
(Op.INSIDE_OUTSIDE, f"inside(record, T1_corpus)"),
|
| 83 |
+
],
|
| 84 |
+
f_ops=[(Op.SAME_NOT_SAME, f"same(reference, transformed) β equivalence check")],
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _decompose_t2(seg: Segment) -> FrameExample:
|
| 89 |
+
"""T2: RTL design documents β Verilog modules, design specs."""
|
| 90 |
+
text = seg.text
|
| 91 |
+
|
| 92 |
+
# Module decomposition
|
| 93 |
+
if seg.segment_type == "module":
|
| 94 |
+
module_name = _extract_module_name(text)
|
| 95 |
+
ports = _extract_ports(text)
|
| 96 |
+
return _build_example(
|
| 97 |
+
seg,
|
| 98 |
+
g_ops=[
|
| 99 |
+
(Op.THIS, f"this(module={module_name}) β design unit"),
|
| 100 |
+
(Op.BECAUSE, f"because(RTL_specification, {module_name})"),
|
| 101 |
+
],
|
| 102 |
+
s_ops=[
|
| 103 |
+
(Op.INSIDE_OUTSIDE, f"inside({module_name}, design_hierarchy)"),
|
| 104 |
+
(Op.TOGETHER_ALONE, f"together({', '.join(ports[:4])}) β port interface"),
|
| 105 |
+
],
|
| 106 |
+
f_ops=[
|
| 107 |
+
(Op.CAN_CANNOT, f"can({module_name}, synthesize)"),
|
| 108 |
+
(Op.MUST_LET, f"must(timing_constraints) β channel_c requirement"),
|
| 109 |
+
],
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
# Section/paragraph decomposition β ensure home operators in each modality
|
| 113 |
+
return _build_example(
|
| 114 |
+
seg,
|
| 115 |
+
g_ops=[(Op.THIS, f"this(section) β design document content")],
|
| 116 |
+
s_ops=[
|
| 117 |
+
(Op.GOES_WITH, f"goes_with(section, T2_document) β channel_b relation"),
|
| 118 |
+
(Op.INSIDE_OUTSIDE, f"inside(content, document_scope)"),
|
| 119 |
+
],
|
| 120 |
+
f_ops=[
|
| 121 |
+
(Op.IF_THEN, f"if_then(requirement, behavior)"),
|
| 122 |
+
(Op.NEAR_FAR, f"near(section, related_context) β document proximity"),
|
| 123 |
+
],
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def _decompose_chat(seg: Segment) -> FrameExample:
|
| 128 |
+
"""T3 chat messages β extract operator reasoning from conversation."""
|
| 129 |
+
import re
|
| 130 |
+
text = seg.text
|
| 131 |
+
text_lower = text.lower()
|
| 132 |
+
role = seg.segment_type.replace("chat_", "")
|
| 133 |
+
|
| 134 |
+
# ββ Ch.A: Channel A (Tier 1: THIS, SAME/NOT-SAME, NO) ββ
|
| 135 |
+
g_ops: list[tuple[Op, str]] = [
|
| 136 |
+
(Op.THIS, f"this({role}_message) β deictic anchor to speaker turn"),
|
| 137 |
+
]
|
| 138 |
+
# If the message references identity/equivalence
|
| 139 |
+
if any(w in text_lower for w in ["same", "identical", "equal", "different", "not the same", "distinguish"]):
|
| 140 |
+
g_ops.append((Op.SAME_NOT_SAME, f"same/not-same detected in: {text[:80]}"))
|
| 141 |
+
# If the message uses negation channel_bly
|
| 142 |
+
if any(w in text_lower for w in ["not ", "no ", "never ", "cannot ", "don't", "doesn't", "isn't"]):
|
| 143 |
+
g_ops.append((Op.NO, f"no(negation) in: {text[:80]}"))
|
| 144 |
+
|
| 145 |
+
# ββ Ch.B: Channel B (Tier 2: GOES-WITH, TOGETHER/ALONE, MANY/ONE, EVERY/SOME, MORE/LESS, CAN/CANNOT) ββ
|
| 146 |
+
s_ops: list[tuple[Op, str]] = []
|
| 147 |
+
|
| 148 |
+
# GOES-WITH: association/coupling language
|
| 149 |
+
if any(w in text_lower for w in ["coupled", "associated", "related", "connected", "linked", "maps to", "corresponds"]):
|
| 150 |
+
s_ops.append((Op.GOES_WITH, f"goes_with(relation) in: {text[:80]}"))
|
| 151 |
+
else:
|
| 152 |
+
# Default: message goes-with its conversation context
|
| 153 |
+
s_ops.append((Op.GOES_WITH, f"goes_with({role}, conversation_context)"))
|
| 154 |
+
|
| 155 |
+
# TOGETHER/ALONE: composition language
|
| 156 |
+
if any(w in text_lower for w in ["together", "combined", "joint", "both", "separate", "alone", "independent", "isolated"]):
|
| 157 |
+
s_ops.append((Op.TOGETHER_ALONE, f"together/alone in: {text[:80]}"))
|
| 158 |
+
|
| 159 |
+
# MANY/ONE: cardinality language
|
| 160 |
+
if any(w in text_lower for w in ["multiple", "several", "many", "each", "single", "one ", "unique"]):
|
| 161 |
+
s_ops.append((Op.MANY_ONE, f"many/one in: {text[:80]}"))
|
| 162 |
+
|
| 163 |
+
# EVERY/SOME: quantification
|
| 164 |
+
if any(w in text_lower for w in ["every ", "all ", "any ", "some ", "each "]):
|
| 165 |
+
s_ops.append((Op.EVERY_SOME, f"every/some in: {text[:80]}"))
|
| 166 |
+
|
| 167 |
+
# MORE/LESS: comparative
|
| 168 |
+
if any(w in text_lower for w in ["more ", "less ", "greater", "smaller", "better", "worse", "stronger", "weaker"]):
|
| 169 |
+
s_ops.append((Op.MORE_LESS, f"more/less in: {text[:80]}"))
|
| 170 |
+
|
| 171 |
+
# CAN/CANNOT: capability
|
| 172 |
+
if any(w in text_lower for w in ["can ", "cannot ", "capable", "able to", "unable"]):
|
| 173 |
+
s_ops.append((Op.CAN_CANNOT, f"can/cannot in: {text[:80]}"))
|
| 174 |
+
|
| 175 |
+
# Ensure at least one Tier 2 operator
|
| 176 |
+
if not any(op in _TIER_2_OPS for op, _ in s_ops):
|
| 177 |
+
s_ops.insert(0, (Op.GOES_WITH, f"goes_with({role}, topic) β default channel_b relation"))
|
| 178 |
+
|
| 179 |
+
# ββ Ch.C: Channel C (Tier 3: INSIDE/OUTSIDE, NEAR/FAR, IF/THEN, BECAUSE, MAYBE, MUST/LET) ββ
|
| 180 |
+
f_ops: list[tuple[Op, str]] = []
|
| 181 |
+
|
| 182 |
+
# IF/THEN: conditional reasoning
|
| 183 |
+
if any(w in text_lower for w in ["if ", "then ", "when ", "implies", "conditional", "given that"]):
|
| 184 |
+
f_ops.append((Op.IF_THEN, f"if_then in: {text[:80]}"))
|
| 185 |
+
|
| 186 |
+
# BECAUSE: causal reasoning
|
| 187 |
+
if any(w in text_lower for w in ["because", "since ", "therefore", "thus ", "hence", "reason", "caused by"]):
|
| 188 |
+
f_ops.append((Op.BECAUSE, f"because in: {text[:80]}"))
|
| 189 |
+
|
| 190 |
+
# MUST/LET: deontic
|
| 191 |
+
if any(w in text_lower for w in ["must ", "shall ", "required", "permitted", "allowed", "forbidden"]):
|
| 192 |
+
f_ops.append((Op.MUST_LET, f"must/let in: {text[:80]}"))
|
| 193 |
+
|
| 194 |
+
# MAYBE: epistemic uncertainty
|
| 195 |
+
if any(w in text_lower for w in ["maybe", "perhaps", "possibly", "might ", "uncertain", "unclear"]):
|
| 196 |
+
f_ops.append((Op.MAYBE, f"maybe in: {text[:80]}"))
|
| 197 |
+
|
| 198 |
+
# INSIDE/OUTSIDE: containment/boundary
|
| 199 |
+
if any(w in text_lower for w in ["inside", "outside", "within", "boundary", "scope", "contained", "enclosed"]):
|
| 200 |
+
f_ops.append((Op.INSIDE_OUTSIDE, f"inside/outside in: {text[:80]}"))
|
| 201 |
+
|
| 202 |
+
# NEAR/FAR: proximity/distance language
|
| 203 |
+
if any(w in text_lower for w in ["near ", "far ", "close to", "distant", "proxim", "remote", "adjacent", "approach"]):
|
| 204 |
+
f_ops.append((Op.NEAR_FAR, f"near/far in: {text[:80]}"))
|
| 205 |
+
|
| 206 |
+
# Default: at least one Tier 3 operator
|
| 207 |
+
if not any(op in _CAUSAL_OPS for op, _ in f_ops):
|
| 208 |
+
f_ops.append((Op.IF_THEN, f"if_then({role}_states, content_follows) β default channel_c"))
|
| 209 |
+
|
| 210 |
+
return _build_example(seg, g_ops=g_ops, s_ops=s_ops, f_ops=f_ops)
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
# Operator sets for home-condition checking
|
| 214 |
+
_TIER_2_OPS = {Op.GOES_WITH, Op.TOGETHER_ALONE, Op.MANY_ONE, Op.EVERY_SOME, Op.MORE_LESS, Op.CAN_CANNOT}
|
| 215 |
+
_CAUSAL_OPS = {Op.INSIDE_OUTSIDE, Op.NEAR_FAR, Op.IF_THEN, Op.BECAUSE, Op.MAYBE, Op.MUST_LET}
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def _decompose_t3(seg: Segment) -> FrameExample:
|
| 219 |
+
"""T3: Formal property specifications β SVA assertions, constraints."""
|
| 220 |
+
text = seg.text
|
| 221 |
+
|
| 222 |
+
if seg.segment_type == "assertion":
|
| 223 |
+
return _build_example(
|
| 224 |
+
seg,
|
| 225 |
+
g_ops=[
|
| 226 |
+
(Op.THIS, f"this(assertion) β formal property"),
|
| 227 |
+
(Op.BECAUSE, f"because(design_requirement, property)"),
|
| 228 |
+
],
|
| 229 |
+
s_ops=[
|
| 230 |
+
(Op.EVERY_SOME, f"every(cycle, property_holds)"),
|
| 231 |
+
(Op.MUST_LET, f"must(property) β invariant constraint"),
|
| 232 |
+
],
|
| 233 |
+
f_ops=[
|
| 234 |
+
(Op.IF_THEN, f"if_then(antecedent, consequent) β temporal implication"),
|
| 235 |
+
(Op.SAME_NOT_SAME, f"same(design_intent, formal_property)"),
|
| 236 |
+
],
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
return _build_example(
|
| 240 |
+
seg,
|
| 241 |
+
g_ops=[(Op.THIS, f"this(formal_content) β T3 specification")],
|
| 242 |
+
s_ops=[
|
| 243 |
+
(Op.GOES_WITH, f"goes_with(property, verification_scope) β channel_b relation"),
|
| 244 |
+
(Op.INSIDE_OUTSIDE, f"inside(property, formal_context)"),
|
| 245 |
+
],
|
| 246 |
+
f_ops=[(Op.MUST_LET, f"must(constraint) β formal requirement")],
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 251 |
+
# HELPERS
|
| 252 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 253 |
+
|
| 254 |
+
def _build_example(
|
| 255 |
+
seg: Segment,
|
| 256 |
+
g_ops: list[tuple[Op, str]],
|
| 257 |
+
s_ops: list[tuple[Op, str]],
|
| 258 |
+
f_ops: list[tuple[Op, str]],
|
| 259 |
+
) -> FrameExample:
|
| 260 |
+
"""Construct a FrameExample from operator lists and auto-witness.
|
| 261 |
+
|
| 262 |
+
Sorts each modality's operators by DAG index (non-decreasing)
|
| 263 |
+
before building the sequence. This ensures Gβ channel_b integrity.
|
| 264 |
+
"""
|
| 265 |
+
# Sort by operator index to maintain DAG ordering
|
| 266 |
+
g_sorted = sorted(g_ops, key=lambda x: x[0].value)
|
| 267 |
+
s_sorted = sorted(s_ops, key=lambda x: x[0].value)
|
| 268 |
+
f_sorted = sorted(f_ops, key=lambda x: x[0].value)
|
| 269 |
+
|
| 270 |
+
channel_a = ModalityGrounding(
|
| 271 |
+
modality="G",
|
| 272 |
+
operators=OperatorSequence(
|
| 273 |
+
expressions=[OperatorExpression(operator=op, evidence=ev) for op, ev in g_sorted]
|
| 274 |
+
),
|
| 275 |
+
)
|
| 276 |
+
channel_b = ModalityGrounding(
|
| 277 |
+
modality="S",
|
| 278 |
+
operators=OperatorSequence(
|
| 279 |
+
expressions=[OperatorExpression(operator=op, evidence=ev) for op, ev in s_sorted]
|
| 280 |
+
),
|
| 281 |
+
)
|
| 282 |
+
channel_c = ModalityGrounding(
|
| 283 |
+
modality="F",
|
| 284 |
+
operators=OperatorSequence(
|
| 285 |
+
expressions=[OperatorExpression(operator=op, evidence=ev) for op, ev in f_sorted]
|
| 286 |
+
),
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
witnesses = _auto_witness(seg)
|
| 290 |
+
|
| 291 |
+
provenance = SourceProvenance(
|
| 292 |
+
source_id=seg.source.source_id,
|
| 293 |
+
tier=seg.source.tier,
|
| 294 |
+
url=seg.source.url,
|
| 295 |
+
commit_or_version=seg.source.commit_or_version,
|
| 296 |
+
license=seg.source.license,
|
| 297 |
+
acquired_at=seg.source.acquired_at,
|
| 298 |
+
artifact_sha256=seg.source.artifact_sha256,
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
example = FrameExample(
|
| 302 |
+
provenance=provenance,
|
| 303 |
+
channel_a=channel_a,
|
| 304 |
+
channel_b=channel_b,
|
| 305 |
+
channel_c=channel_c,
|
| 306 |
+
witnesses=witnesses,
|
| 307 |
+
)
|
| 308 |
+
example.content_hash = example.compute_hash()
|
| 309 |
+
return example
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def _auto_witness(seg: Segment) -> WitnessBundle:
|
| 313 |
+
"""Generate witness attestations from segment metadata."""
|
| 314 |
+
bundle = WitnessBundle()
|
| 315 |
+
bundle.attestations[Witness.WHAT] = WitnessAttestation(
|
| 316 |
+
witness=Witness.WHAT, attested=True,
|
| 317 |
+
evidence=f"{seg.segment_type} segment, {len(seg.text)} chars",
|
| 318 |
+
)
|
| 319 |
+
bundle.attestations[Witness.WHERE] = WitnessAttestation(
|
| 320 |
+
witness=Witness.WHERE, attested=True,
|
| 321 |
+
evidence=f"source={seg.source.source_id}, bytes={seg.byte_range}",
|
| 322 |
+
)
|
| 323 |
+
bundle.attestations[Witness.WHICH] = WitnessAttestation(
|
| 324 |
+
witness=Witness.WHICH, attested=True,
|
| 325 |
+
evidence=f"tier={seg.source.tier.value}, type={seg.segment_type}",
|
| 326 |
+
)
|
| 327 |
+
bundle.attestations[Witness.WHEN] = WitnessAttestation(
|
| 328 |
+
witness=Witness.WHEN, attested=True,
|
| 329 |
+
evidence=f"acquired={seg.source.acquired_at}",
|
| 330 |
+
)
|
| 331 |
+
bundle.attestations[Witness.FOR_WHAT] = WitnessAttestation(
|
| 332 |
+
witness=Witness.FOR_WHAT, attested=True,
|
| 333 |
+
evidence="governed training data for governed generation pipeline",
|
| 334 |
+
)
|
| 335 |
+
bundle.attestations[Witness.HOW] = WitnessAttestation(
|
| 336 |
+
witness=Witness.HOW, attested=True,
|
| 337 |
+
evidence=f"pipeline S3 decompose, tier={seg.source.tier.value}",
|
| 338 |
+
)
|
| 339 |
+
bundle.attestations[Witness.WHENCE] = WitnessAttestation(
|
| 340 |
+
witness=Witness.WHENCE, attested=True,
|
| 341 |
+
evidence=f"url={seg.source.url}, commit={seg.source.commit_or_version}",
|
| 342 |
+
)
|
| 343 |
+
return bundle
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
def _extract_module_name(text: str) -> str:
|
| 347 |
+
import re
|
| 348 |
+
m = re.search(r"module\s+(\w+)", text)
|
| 349 |
+
return m.group(1) if m else "unknown"
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def _extract_ports(text: str) -> list[str]:
|
| 353 |
+
import re
|
| 354 |
+
return re.findall(r"\b(?:input|output|inout)\s+(?:\[\d+:\d+\]\s*)?(\w+)", text)
|