ashu1069 commited on
Commit
0b33900
·
1 Parent(s): 027b791

Matter - initial gradio space

Browse files
Files changed (43) hide show
  1. .gitignore +9 -0
  2. PUSH.md +79 -0
  3. app.py +300 -0
  4. examples/cd_brick.jpg +3 -0
  5. examples/domestic_pet_bottle.jpg +3 -0
  6. examples/ev_pouch_cell.jpg +3 -0
  7. examples/ewaste_dead_laptop.jpg +3 -0
  8. examples/medical_glucose_strip.jpg +3 -0
  9. examples/textile_cotton_tshirt.jpg +3 -0
  10. matter/__init__.py +2 -0
  11. matter/calibration.py +152 -0
  12. matter/engine.py +206 -0
  13. matter/guardrail.py +70 -0
  14. matter/hazard_flagger.py +51 -0
  15. matter/heads.py +114 -0
  16. matter/impact.py +82 -0
  17. matter/passport.py +150 -0
  18. matter/runtimes/__init__.py +18 -0
  19. matter/runtimes/ollama.py +121 -0
  20. requirements.txt +11 -0
  21. spec/calibration_v1.json +115 -0
  22. spec/co2e_factors_v1.json +171 -0
  23. spec/examples/README.md +34 -0
  24. spec/examples/passport-01-domestic-pet-bottle.json +43 -0
  25. spec/examples/passport-02-medical-sharps-hazards-auto-flagged.json +49 -0
  26. spec/examples/passport-03-medical-glucose-strip-guardrail-fires.json +47 -0
  27. spec/examples/passport-04-ev-18650-cell.json +49 -0
  28. spec/examples/passport-05-ewaste-laptop.json +46 -0
  29. spec/examples/passport-06-cd-concrete-with-recovery-value.json +45 -0
  30. spec/examples/passport-07-textile-denim.json +46 -0
  31. spec/governance.md +54 -0
  32. spec/hazard_flags_v1.json +16 -0
  33. spec/jurisdictions/README.md +30 -0
  34. spec/jurisdictions/eu-dpp.json +86 -0
  35. spec/jurisdictions/nyc.json +84 -0
  36. spec/jurisdictions/pune.json +88 -0
  37. spec/matter-passport-v0.1.md +135 -0
  38. spec/matter-passport-v0.1.schema.json +148 -0
  39. spec/medical-disclaimer.md +47 -0
  40. spec/passport-prior-art.md +83 -0
  41. spec/safety_rules_v1.json +82 -0
  42. sync.sh +49 -0
  43. transformers_runtime.py +115 -0
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .pytest_cache/
5
+ .ruff_cache/
6
+ .venv/
7
+ .env
8
+ *.egg-info/
9
+ .DS_Store
PUSH.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pushing to `ashu-1069/matter` on Hugging Face Spaces
2
+
3
+ Space URL: https://huggingface.co/spaces/ashu-1069/matter
4
+
5
+ ## One-time setup
6
+
7
+ ```bash
8
+ cd space
9
+ bash sync.sh # pulls matter/ + spec/ + examples/ from upstream
10
+
11
+ git init
12
+ git remote add origin https://huggingface.co/spaces/ashu-1069/matter
13
+ git fetch origin
14
+
15
+ # pull the existing Space contents (HF auto-creates a README on Space creation)
16
+ # then overlay our files
17
+ git checkout -b main origin/main 2>/dev/null || git checkout -b main
18
+
19
+ # auth — pick one:
20
+ # 1) huggingface-cli login (token with write to ashu-1069)
21
+ # 2) embed token in remote URL:
22
+ # git remote set-url origin https://ashu-1069:HF_TOKEN@huggingface.co/spaces/ashu-1069/matter
23
+ ```
24
+
25
+ If `git fetch origin` fails with auth errors:
26
+ - Get a write token: https://huggingface.co/settings/tokens (role: `write`)
27
+ - `huggingface-cli login` and paste it
28
+
29
+ If the Space already has a `README.md` from creation, our `README.md` will overwrite it — that's expected.
30
+
31
+ ## Every push
32
+
33
+ ```bash
34
+ cd space
35
+ bash sync.sh
36
+ git add -A
37
+ git commit -m "matter: <what changed>"
38
+ git push origin main
39
+ ```
40
+
41
+ The Space rebuilds automatically. First build takes ~5 min (torch + transformers).
42
+ Cold-start GPU load adds ~30 s on the first **Live** click after that.
43
+
44
+ ## Space settings to set in the HF UI
45
+
46
+ - **Hardware**: ZeroGPU (free; spins up an H200 on demand)
47
+ - **Visibility**: Public (for the Impact Challenge submission)
48
+ - **Variables and secrets** (all optional):
49
+ - `MATTER_MODEL_ID` — override default `google/gemma-4-E2B-it`
50
+ - `MATTER_LORA_ID` — A/B the E1 QLoRA adapter (off by default; it regressed
51
+ on real NYC images)
52
+ - `MATTER_MAX_NEW_TOKENS` — default `512`
53
+
54
+ ## Sanity-check locally before pushing
55
+
56
+ ```bash
57
+ cd space
58
+ bash sync.sh
59
+ uv venv
60
+ uv pip install -r requirements.txt
61
+ uv run python app.py
62
+ # → http://127.0.0.1:7860
63
+ ```
64
+
65
+ Demo mode works without a GPU. Live mode needs a CUDA GPU locally — easier to
66
+ just push and test on the Space.
67
+
68
+ ## Authorization troubleshooting
69
+
70
+ If `git push` fails with `403 Forbidden`:
71
+ 1. Verify you own the Space owner namespace `ashu-1069` (the username with the
72
+ hyphen — not `ashu1069`).
73
+ 2. Token must have **write** scope, not just read.
74
+ 3. If using `huggingface-cli login`, run `huggingface-cli whoami` to confirm the
75
+ token resolves to `ashu-1069`.
76
+ 4. As a fallback, embed the token in the remote URL:
77
+ ```
78
+ git remote set-url origin https://ashu-1069:hf_xxx@huggingface.co/spaces/ashu-1069/matter
79
+ ```
app.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Matter — Material Intelligence Platform · Gradio Space app.
2
+
3
+ Two run modes:
4
+ - Live: upload an image → Gemma 4 → MIE pipeline → Passport
5
+ - Demo: instant, returns one of the seven canonical example Passports
6
+
7
+ Live mode loads Gemma 4 E2B lazily on first inference. The demo path never
8
+ touches the model so cold Spaces still serve a fast first impression.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import traceback
15
+ from pathlib import Path
16
+
17
+ import gradio as gr
18
+
19
+ from matter.engine import MIE, CaptureInput, MIEError
20
+ from matter.heads import HEADS
21
+ from transformers_runtime import TransformersRuntime
22
+
23
+ ROOT = Path(__file__).parent
24
+ EXAMPLES_DIR = ROOT / "examples"
25
+ SPEC_EXAMPLES = ROOT / "spec" / "examples"
26
+
27
+ HEAD_NAMES = list(HEADS.keys()) # domestic, ewaste, ev, medical, cd, textile
28
+
29
+ SAMPLE_IMAGES: dict[str, str] = {
30
+ "domestic": "domestic_pet_bottle.jpg",
31
+ "ewaste": "ewaste_dead_laptop.jpg",
32
+ "ev": "ev_pouch_cell.jpg",
33
+ "medical": "medical_glucose_strip.jpg",
34
+ "cd": "cd_brick.jpg",
35
+ "textile": "textile_cotton_tshirt.jpg",
36
+ }
37
+
38
+ DEMO_PASSPORTS: dict[str, str] = {
39
+ "domestic": "passport-01-domestic-pet-bottle.json",
40
+ "medical": "passport-03-medical-glucose-strip-guardrail-fires.json",
41
+ "ev": "passport-04-ev-18650-cell.json",
42
+ "ewaste": "passport-05-ewaste-laptop.json",
43
+ "cd": "passport-06-cd-concrete-with-recovery-value.json",
44
+ "textile": "passport-07-textile-denim.json",
45
+ }
46
+
47
+
48
+ _runtime: TransformersRuntime | None = None
49
+
50
+
51
+ def get_engine() -> MIE:
52
+ global _runtime
53
+ if _runtime is None:
54
+ _runtime = TransformersRuntime()
55
+ return MIE(runtime=_runtime, on_device=True)
56
+
57
+
58
+ def render_summary(p: dict) -> str:
59
+ ident = p.get("identity", {})
60
+ state = p.get("state", {})
61
+ nba = p.get("next_best_action", {})
62
+ routing = p.get("routing", {})
63
+ prov = p.get("provenance", {})
64
+ val = (p.get("value") or {}).get("environmental") or {}
65
+
66
+ hazards = state.get("hazard_flags") or []
67
+ do_not = nba.get("do_not") or []
68
+
69
+ badge = "🟢 clear"
70
+ if nba.get("fallback_used"):
71
+ badge = "🟡 guardrail fired — safe default applied"
72
+ if any(h in {"biohazard", "sharps_injury_risk", "thermal_runaway_risk"} for h in hazards):
73
+ badge = "🔴 hazard"
74
+
75
+ lines = [
76
+ f"### {ident.get('class', '?')} · _{ident.get('subclass', '')}_",
77
+ "",
78
+ f"**Status** · {badge}",
79
+ "",
80
+ "| | |",
81
+ "|---|---|",
82
+ f"| **Action** | `{nba.get('primary', '?')}`"
83
+ + (f" → `{nba.get('secondary')}`" if nba.get("secondary") else "") + " |",
84
+ f"| **Do not** | "
85
+ + (", ".join(f"`{x}`" for x in do_not) if do_not else "_none_") + " |",
86
+ f"| **Confidence** | `{ident.get('confidence', 0):.3f}` "
87
+ + ("(calibrated)" if prov.get("confidence_calibrated") else "(raw)") + " |",
88
+ f"| **Hazards** | "
89
+ + (", ".join(f"`{h}`" for h in hazards) if hazards else "_none_") + " |",
90
+ f"| **Condition** | `{state.get('condition', '?')}` |",
91
+ f"| **Jurisdiction** | {routing.get('jurisdiction', '?')} |",
92
+ ]
93
+ if val.get("co2e_avoided_kg") is not None:
94
+ lines.append(f"| **CO₂e avoided** | `{val['co2e_avoided_kg']} kg` |")
95
+ lines += [
96
+ f"| **Model** | `{prov.get('model', '?')}` ({prov.get('runtime', '?')}) |",
97
+ f"| **On-device** | {'✅' if prov.get('on_device') else '—'} |",
98
+ ]
99
+ return "\n".join(lines)
100
+
101
+
102
+ def render_pipeline(p: dict) -> str:
103
+ nba = p.get("next_best_action", {})
104
+ state = p.get("state", {})
105
+ fallback = nba.get("fallback_used", False)
106
+ hazards = state.get("hazard_flags") or []
107
+ return "\n".join([
108
+ "**MIE pipeline**",
109
+ "",
110
+ "| Step | Status |",
111
+ "|---|---|",
112
+ "| 01 · Validator | ✅ JSON shape + taxonomy enum |",
113
+ "| 02 · Calibration | ✅ histogram-calibrated |",
114
+ "| 03 · Hazard auto-flagger | "
115
+ + (f"⚠️ flagged: {', '.join(hazards)}" if hazards else "✅ no class-implied hazard") + " |",
116
+ "| 04 · Guardrail | "
117
+ + ("⚠️ fired — unsafe action overridden" if fallback else "✅ action passed `do_not` rules") + " |",
118
+ ])
119
+
120
+
121
+ def run_demo(head: str) -> tuple[str, str, str]:
122
+ fname = DEMO_PASSPORTS.get(head, DEMO_PASSPORTS["domestic"])
123
+ p = json.loads((SPEC_EXAMPLES / fname).read_text())
124
+ return render_summary(p), render_pipeline(p), json.dumps(p, indent=2)
125
+
126
+
127
+ def run_live(image_path: str | None, head: str, jurisdiction: str) -> tuple[str, str, str]:
128
+ if image_path is None:
129
+ return (
130
+ "⚠️ Upload an image first, or switch to **Demo** mode for the canonical example.",
131
+ "", "",
132
+ )
133
+ try:
134
+ engine = get_engine()
135
+ capture = CaptureInput(
136
+ image_path=Path(image_path),
137
+ jurisdiction=jurisdiction.strip() or None,
138
+ )
139
+ passport = engine.infer(capture, head)
140
+ p = passport.to_dict()
141
+ return render_summary(p), render_pipeline(p), json.dumps(p, indent=2)
142
+ except MIEError as e:
143
+ return (
144
+ f"### ❌ MIE pipeline rejected the model output\n\n```\n{e}\n```\n\n"
145
+ "_The model returned malformed or out-of-taxonomy JSON. Try a clearer image or switch to Demo mode._",
146
+ "", "",
147
+ )
148
+ except Exception as e:
149
+ return (
150
+ f"### ❌ Runtime error\n\n```\n{e.__class__.__name__}: {e}\n```\n\n"
151
+ "_If this is the first call after a cold start, the GPU worker is still loading Gemma 4 (≈30s). Try again in a moment, or use Demo mode._",
152
+ f"<details><summary>traceback</summary>\n\n```\n{traceback.format_exc()}\n```\n</details>",
153
+ "",
154
+ )
155
+
156
+
157
+ def dispatch(mode: str, image_path: str | None, head: str, jurisdiction: str):
158
+ if mode == "Demo (instant)":
159
+ return run_demo(head)
160
+ return run_live(image_path, head, jurisdiction)
161
+
162
+
163
+ CSS = """
164
+ :root {
165
+ --bg-0: #04080a;
166
+ --emerald: #00d97e;
167
+ --emerald-glow: #00ff8c;
168
+ --cyan: #00e5ff;
169
+ --leaf: #7dd3a8;
170
+ --ink: #e6f3ec;
171
+ --ink-dim: #8aa39a;
172
+ --line: rgba(125, 211, 168, 0.12);
173
+ }
174
+ .gradio-container {
175
+ background: radial-gradient(ellipse 80% 60% at 20% 0%, rgba(0, 217, 126, 0.18), transparent 60%),
176
+ radial-gradient(ellipse 70% 50% at 85% 20%, rgba(0, 229, 255, 0.10), transparent 60%),
177
+ linear-gradient(180deg, #04080a 0%, #061410 50%, #04080a 100%) !important;
178
+ color: var(--ink) !important;
179
+ font-family: "Inter", ui-sans-serif, system-ui, sans-serif !important;
180
+ }
181
+ #hero { padding: 28px 4px 8px; }
182
+ #hero h1 {
183
+ font-family: "Fraunces", Georgia, serif;
184
+ font-weight: 400;
185
+ font-size: clamp(2rem, 5vw, 3.4rem);
186
+ letter-spacing: -0.025em;
187
+ line-height: 1; margin: 0; color: var(--ink);
188
+ }
189
+ #hero h1 em {
190
+ font-style: italic;
191
+ background: linear-gradient(135deg, var(--emerald-glow), var(--cyan));
192
+ -webkit-background-clip: text; background-clip: text;
193
+ -webkit-text-fill-color: transparent;
194
+ font-weight: 300;
195
+ }
196
+ #hero p { color: var(--ink-dim); margin-top: 12px; max-width: 680px; }
197
+ #hero .chip {
198
+ display: inline-flex; gap: 8px; align-items: center;
199
+ padding: 6px 14px; border-radius: 999px;
200
+ border: 1px solid rgba(125, 211, 168, 0.28);
201
+ background: linear-gradient(135deg, rgba(0, 217, 126, 0.08), rgba(0, 229, 255, 0.04));
202
+ color: var(--leaf); font-size: 0.82rem; font-weight: 500;
203
+ }
204
+ .dot { width: 8px; height: 8px; border-radius: 50%; background: var(--emerald-glow);
205
+ box-shadow: 0 0 12px var(--emerald-glow); display: inline-block; }
206
+ .gr-button-primary {
207
+ background: linear-gradient(135deg, var(--emerald), var(--cyan)) !important;
208
+ color: #04130c !important;
209
+ border: 0 !important;
210
+ font-weight: 600 !important;
211
+ }
212
+ .gr-panel, .gr-block, .gr-form {
213
+ background: rgba(7, 18, 15, 0.6) !important;
214
+ border-color: var(--line) !important;
215
+ }
216
+ """
217
+
218
+ HERO_HTML = """
219
+ <div id="hero">
220
+ <span class="chip"><span class="dot"></span> Powered by Gemma 4 · On-device · CC0 Passport</span>
221
+ <h1 style="margin-top:18px;">Material in. <em>Passport out.</em></h1>
222
+ <p>Point a camera at a thing — bottle, battery, syringe, denim, concrete, e-waste — and Matter
223
+ returns a calibrated, hazard-aware <strong style="color:var(--ink)">Passport</strong> that routes
224
+ it to its right next life. One vocabulary, six material heads, four post-model layers, validated.</p>
225
+ </div>
226
+ """
227
+
228
+
229
+ def build_examples() -> list[list]:
230
+ rows = []
231
+ for head, fname in SAMPLE_IMAGES.items():
232
+ p = EXAMPLES_DIR / fname
233
+ if p.exists():
234
+ rows.append([str(p), head, HEADS[head].default_jurisdiction])
235
+ return rows
236
+
237
+
238
+ with gr.Blocks(css=CSS, theme=gr.themes.Base(), title="Matter — Material Intelligence") as demo:
239
+ gr.HTML(HERO_HTML)
240
+
241
+ with gr.Row():
242
+ with gr.Column(scale=5):
243
+ gr.Markdown("### Capture")
244
+ image_in = gr.Image(
245
+ label="Material image",
246
+ type="filepath",
247
+ height=320,
248
+ sources=["upload", "webcam", "clipboard"],
249
+ )
250
+ head_in = gr.Dropdown(
251
+ label="Material head",
252
+ choices=HEAD_NAMES,
253
+ value="domestic",
254
+ info="Which taxonomy and prompt to use.",
255
+ )
256
+ juris_in = gr.Textbox(
257
+ label="Jurisdiction (optional override)",
258
+ placeholder="leave blank to use the head default",
259
+ value="",
260
+ )
261
+ mode_in = gr.Radio(
262
+ label="Mode",
263
+ choices=["Live (Gemma 4)", "Demo (instant)"],
264
+ value="Demo (instant)",
265
+ info="Live runs Gemma 4 E2B on ZeroGPU (~8–12s once warm). Demo returns the canonical example passport for this head.",
266
+ )
267
+ run_btn = gr.Button("Generate Passport", variant="primary", size="lg")
268
+
269
+ ex = build_examples()
270
+ if ex:
271
+ gr.Examples(
272
+ examples=ex,
273
+ inputs=[image_in, head_in, juris_in],
274
+ label="Sample materials",
275
+ examples_per_page=6,
276
+ )
277
+
278
+ with gr.Column(scale=7):
279
+ gr.Markdown("### Passport")
280
+ summary_out = gr.Markdown(value="_Pick a mode and press_ **Generate Passport**.")
281
+ pipeline_out = gr.Markdown()
282
+ with gr.Accordion("Passport JSON", open=True):
283
+ json_out = gr.Code(language="json", label=None, lines=22)
284
+
285
+ gr.Markdown(
286
+ "<div style='color:var(--ink-dim);font-size:0.85rem;margin-top:24px;text-align:center;'>"
287
+ "Matter · open Material Intelligence platform · "
288
+ "Built for the <strong>Gemma 4 Impact Challenge</strong>"
289
+ "</div>"
290
+ )
291
+
292
+ run_btn.click(
293
+ dispatch,
294
+ inputs=[mode_in, image_in, head_in, juris_in],
295
+ outputs=[summary_out, pipeline_out, json_out],
296
+ )
297
+
298
+
299
+ if __name__ == "__main__":
300
+ demo.queue(max_size=8).launch(server_name="0.0.0.0", show_error=True)
examples/cd_brick.jpg ADDED

Git LFS Details

  • SHA256: 0482da3f3f7597157ea9aad7c6d4296b68adce6621935699afc0fb958e47c01d
  • Pointer size: 131 Bytes
  • Size of remote file: 376 kB
examples/domestic_pet_bottle.jpg ADDED

Git LFS Details

  • SHA256: 0ce550866164a9b2bb86d0c662688e8bdd0ff46d679a22af73f338dabd95a4ae
  • Pointer size: 131 Bytes
  • Size of remote file: 294 kB
examples/ev_pouch_cell.jpg ADDED

Git LFS Details

  • SHA256: c925afebd5f78ee76f45a943845cca4b397aeff4006b0ddae9a37aa3297234e4
  • Pointer size: 130 Bytes
  • Size of remote file: 76 kB
examples/ewaste_dead_laptop.jpg ADDED

Git LFS Details

  • SHA256: 96d386e775900fd1280398127c7f80246b85ed9006c07a197caa7a731978bbee
  • Pointer size: 131 Bytes
  • Size of remote file: 157 kB
examples/medical_glucose_strip.jpg ADDED

Git LFS Details

  • SHA256: 561af5d393ad3428ba3e513f4b0612ff6661fe49a0d09f6846d7f2a0c85d0f74
  • Pointer size: 131 Bytes
  • Size of remote file: 125 kB
examples/textile_cotton_tshirt.jpg ADDED

Git LFS Details

  • SHA256: b6cb224a0176cd9362c837d3c94778306b54a9971cee716dcecffa4036640e10
  • Pointer size: 131 Bytes
  • Size of remote file: 415 kB
matter/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Matter — a Gemma 4-powered Material Intelligence Platform."""
2
+ __version__ = "0.0.1"
matter/calibration.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Post-hoc confidence calibration for the Material Intelligence Engine.
2
+
3
+ Gemma 4 E4B is badly over-confident at zero-shot (A1: every prediction at 0.95 or 0.98
4
+ regardless of correctness — ECE ~0.16 on 43 samples). This module fits a small, honest
5
+ calibration function on a held-out set and applies it at Passport construction time.
6
+
7
+ See DECISIONS.md D015 for the rationale and the `confidence_calibrated` flag in the
8
+ Matter Passport schema.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from dataclasses import dataclass, field
15
+ from pathlib import Path
16
+ from typing import Literal
17
+
18
+ import numpy as np
19
+ from sklearn.isotonic import IsotonicRegression
20
+ from sklearn.linear_model import LogisticRegression
21
+
22
+ Method = Literal["platt", "isotonic", "histogram"]
23
+
24
+
25
+ @dataclass
26
+ class Calibration:
27
+ method: Method
28
+ n_fit: int
29
+ ece_before: float
30
+ ece_after: float
31
+ params: dict = field(default_factory=dict)
32
+
33
+ def to_dict(self) -> dict:
34
+ return {
35
+ "method": self.method,
36
+ "n_fit": self.n_fit,
37
+ "ece_before": self.ece_before,
38
+ "ece_after": self.ece_after,
39
+ "params": self.params,
40
+ }
41
+
42
+ @classmethod
43
+ def from_dict(cls, d: dict) -> "Calibration":
44
+ return cls(**d)
45
+
46
+
47
+ def ece(confidences: np.ndarray, correctness: np.ndarray, n_bins: int = 10) -> float:
48
+ """Expected Calibration Error with equal-width bins."""
49
+ confidences = np.asarray(confidences, dtype=float)
50
+ correctness = np.asarray(correctness, dtype=float)
51
+ bins = np.linspace(0.0, 1.0, n_bins + 1)
52
+ err = 0.0
53
+ n = len(confidences)
54
+ for i in range(n_bins):
55
+ lo, hi = bins[i], bins[i + 1]
56
+ mask = (confidences >= lo) & (confidences < hi if i < n_bins - 1 else confidences <= hi)
57
+ k = int(mask.sum())
58
+ if k == 0:
59
+ continue
60
+ err += (k / n) * abs(confidences[mask].mean() - correctness[mask].mean())
61
+ return float(err)
62
+
63
+
64
+ def fit_platt(confidences: np.ndarray, correctness: np.ndarray) -> Calibration:
65
+ """Logistic regression: P(correct | conf) = sigmoid(a*conf + b). 2 params."""
66
+ X = np.asarray(confidences, dtype=float).reshape(-1, 1)
67
+ y = np.asarray(correctness, dtype=int)
68
+ clf = LogisticRegression(C=1e6).fit(X, y)
69
+ calibrated = clf.predict_proba(X)[:, 1]
70
+ return Calibration(
71
+ method="platt",
72
+ n_fit=len(y),
73
+ ece_before=ece(confidences, correctness),
74
+ ece_after=ece(calibrated, correctness),
75
+ params={"a": float(clf.coef_[0, 0]), "b": float(clf.intercept_[0])},
76
+ )
77
+
78
+
79
+ def fit_isotonic(confidences: np.ndarray, correctness: np.ndarray) -> Calibration:
80
+ """Non-parametric monotonic mapping; stores the knots."""
81
+ X = np.asarray(confidences, dtype=float)
82
+ y = np.asarray(correctness, dtype=int)
83
+ iso = IsotonicRegression(out_of_bounds="clip").fit(X, y)
84
+ calibrated = iso.predict(X)
85
+ return Calibration(
86
+ method="isotonic",
87
+ n_fit=len(y),
88
+ ece_before=ece(confidences, correctness),
89
+ ece_after=ece(calibrated, correctness),
90
+ params={
91
+ "x": iso.X_thresholds_.tolist(),
92
+ "y": iso.y_thresholds_.tolist(),
93
+ },
94
+ )
95
+
96
+
97
+ def fit_histogram(confidences: np.ndarray, correctness: np.ndarray, n_bins: int = 10) -> Calibration:
98
+ """Per-bin empirical accuracy as the calibrated score."""
99
+ X = np.asarray(confidences, dtype=float)
100
+ y = np.asarray(correctness, dtype=int)
101
+ bins = np.linspace(0.0, 1.0, n_bins + 1)
102
+ bin_acc: dict[int, float] = {}
103
+ for i in range(n_bins):
104
+ lo, hi = bins[i], bins[i + 1]
105
+ mask = (X >= lo) & (X < hi if i < n_bins - 1 else X <= hi)
106
+ if int(mask.sum()) > 0:
107
+ bin_acc[i] = float(y[mask].mean())
108
+
109
+ def _apply(c: np.ndarray) -> np.ndarray:
110
+ idx = np.clip(np.digitize(c, bins) - 1, 0, n_bins - 1)
111
+ return np.array([bin_acc.get(int(i), float(c[j])) for j, i in enumerate(idx)])
112
+
113
+ calibrated = _apply(X)
114
+ return Calibration(
115
+ method="histogram",
116
+ n_fit=len(y),
117
+ ece_before=ece(confidences, correctness),
118
+ ece_after=ece(calibrated, correctness),
119
+ params={"bins": bins.tolist(), "bin_accuracy": bin_acc},
120
+ )
121
+
122
+
123
+ def apply(cal: Calibration, confidences: np.ndarray | list[float]) -> np.ndarray:
124
+ """Apply a fitted calibration to new confidences."""
125
+ c = np.asarray(confidences, dtype=float)
126
+ if cal.method == "platt":
127
+ a = cal.params["a"]
128
+ b = cal.params["b"]
129
+ return 1.0 / (1.0 + np.exp(-(a * c + b)))
130
+ if cal.method == "isotonic":
131
+ xs = np.array(cal.params["x"])
132
+ ys = np.array(cal.params["y"])
133
+ return np.interp(c, xs, ys)
134
+ if cal.method == "histogram":
135
+ bins = np.array(cal.params["bins"])
136
+ bin_acc = cal.params["bin_accuracy"]
137
+ n_bins = len(bins) - 1
138
+ idx = np.clip(np.digitize(c, bins) - 1, 0, n_bins - 1)
139
+ return np.array([bin_acc.get(int(i), float(c[j])) for j, i in enumerate(idx)])
140
+ raise ValueError(f"unknown method: {cal.method}")
141
+
142
+
143
+ def save(cal: Calibration, path: Path) -> None:
144
+ path.write_text(json.dumps(cal.to_dict(), indent=2))
145
+
146
+
147
+ def load(path: Path) -> Calibration:
148
+ cal = Calibration.from_dict(json.loads(path.read_text()))
149
+ # Normalize histogram bin_accuracy keys to int — JSON serialization stringifies them.
150
+ if cal.method == "histogram" and isinstance(cal.params.get("bin_accuracy"), dict):
151
+ cal.params["bin_accuracy"] = {int(k): v for k, v in cal.params["bin_accuracy"].items()}
152
+ return cal
matter/engine.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Material Intelligence Engine (MIE) — wired four-layer pipeline.
2
+
3
+ Composes the platform primitives the project shipped piecemeal:
4
+
5
+ 1. JSON + enum validator (D012)
6
+ 2. Confidence calibration (D015 — calibration_v1.json)
7
+ 3. Class-implied hazard auto-flagger (D019 — hazard_flags_v1.json)
8
+ 4. do_not guardrail (D018 — safety_rules_v1.json)
9
+ -> validated Passport (Pydantic)
10
+
11
+ Runtime is injected via the `Runtime` protocol — the engine doesn't know whether
12
+ it's calling Ollama, llama.cpp, Cactus, or a fake. That keeps the engine itself
13
+ unit-testable without a model.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import hashlib
19
+ import json
20
+ import re
21
+ from dataclasses import dataclass
22
+ from pathlib import Path
23
+ from typing import Protocol, runtime_checkable
24
+
25
+ from matter import calibration as cal
26
+ from matter.guardrail import apply_guardrail, load_rules as load_safety_rules
27
+ from matter.hazard_flagger import apply_hazard_flagger, load_hazard_rules
28
+ from matter.heads import HEADS, build_prompt
29
+ from matter.passport import (
30
+ Capture,
31
+ Identity,
32
+ NextBestAction,
33
+ Passport,
34
+ Provenance,
35
+ Routing,
36
+ State,
37
+ make_passport_id,
38
+ now_utc,
39
+ )
40
+
41
+
42
+ _SPEC_DIR = Path(__file__).resolve().parent.parent / "spec"
43
+ _CALIB_PATH = _SPEC_DIR / "calibration_v1.json"
44
+ _SAFETY_PATH = _SPEC_DIR / "safety_rules_v1.json"
45
+ _HAZARD_PATH = _SPEC_DIR / "hazard_flags_v1.json"
46
+ _JSON_RE = re.compile(r"\{.*\}", re.DOTALL)
47
+
48
+
49
+ @dataclass
50
+ class CaptureInput:
51
+ """Caller-facing capture struct. Resolved into a Passport `capture` block."""
52
+
53
+ image_path: Path | None = None
54
+ image_bytes: bytes | None = None
55
+ text: str | None = None
56
+ jurisdiction: str | None = None
57
+ geohash_coarse: str | None = None
58
+
59
+
60
+ @runtime_checkable
61
+ class Runtime(Protocol):
62
+ """Anything that turns (prompt, image_path) -> raw model output text."""
63
+
64
+ name: str
65
+ model_id: str
66
+
67
+ def infer(self, prompt: str, image: Path | None) -> str: ...
68
+
69
+
70
+ class MIEError(Exception):
71
+ pass
72
+
73
+
74
+ def _content_hash(c: CaptureInput) -> tuple[str, str]:
75
+ """Return (modality, content_hash). Modality inferred from inputs."""
76
+ if c.image_path is not None:
77
+ b = Path(c.image_path).read_bytes()
78
+ return "image", "sha256:" + hashlib.sha256(b).hexdigest()
79
+ if c.image_bytes is not None:
80
+ return "image", "sha256:" + hashlib.sha256(c.image_bytes).hexdigest()
81
+ if c.text is not None:
82
+ return "text", "sha256:" + hashlib.sha256(c.text.encode()).hexdigest()
83
+ raise MIEError("CaptureInput needs one of image_path / image_bytes / text")
84
+
85
+
86
+ def _parse_json_block(text: str) -> dict:
87
+ m = _JSON_RE.search(text)
88
+ if m is None:
89
+ raise MIEError(f"no JSON object in model output: {text[:200]!r}")
90
+ try:
91
+ return json.loads(m.group(0))
92
+ except json.JSONDecodeError as e:
93
+ raise MIEError(f"invalid JSON in model output: {e} | {text[:200]!r}") from e
94
+
95
+
96
+ def _validate_enum(parsed: dict, head_name: str) -> None:
97
+ """D012 — enum-membership check beyond JSON-shape validity."""
98
+ head = HEADS[head_name]
99
+ cls = (parsed.get("identity") or {}).get("class")
100
+ if cls not in head.identity_classes:
101
+ raise MIEError(f"identity.class {cls!r} not in {head_name} taxonomy")
102
+ nba = (parsed.get("next_best_action") or {}).get("primary")
103
+ if nba not in head.nba_classes:
104
+ raise MIEError(f"next_best_action.primary {nba!r} not in {head_name} NBA taxonomy")
105
+
106
+
107
+ def _calibrate(c: float, table: cal.Calibration) -> float:
108
+ return float(cal.apply(table, [c])[0])
109
+
110
+
111
+ class MIE:
112
+ """Compose the four primitives + the runtime into one entry point."""
113
+
114
+ def __init__(
115
+ self,
116
+ runtime: Runtime,
117
+ on_device: bool = False,
118
+ calibration_path: Path = _CALIB_PATH,
119
+ safety_path: Path = _SAFETY_PATH,
120
+ hazard_path: Path = _HAZARD_PATH,
121
+ ):
122
+ self.runtime = runtime
123
+ self.on_device = on_device
124
+ self.calib = cal.load(calibration_path)
125
+ self.safety_rules = load_safety_rules(safety_path)
126
+ self.hazard_rules = load_hazard_rules(hazard_path)
127
+ self._calibration_ref = f"{self.calib.method}@" + hashlib.sha256(
128
+ calibration_path.read_bytes()
129
+ ).hexdigest()[:16]
130
+
131
+ def infer(self, capture: CaptureInput, head_name: str) -> Passport:
132
+ if head_name not in HEADS:
133
+ raise MIEError(f"unknown head: {head_name}. Heads: {list(HEADS)}")
134
+ head = HEADS[head_name]
135
+ jurisdiction = capture.jurisdiction or head.default_jurisdiction
136
+ prompt = build_prompt(head_name, jurisdiction)
137
+
138
+ # 1. Runtime call
139
+ raw = self.runtime.infer(prompt, capture.image_path)
140
+
141
+ # 2. Layer A — JSON parse + enum validation (D012)
142
+ parsed = _parse_json_block(raw)
143
+ _validate_enum(parsed, head_name)
144
+ ident = parsed["identity"]
145
+ st = parsed.get("state", {})
146
+ nba = parsed["next_best_action"]
147
+
148
+ # 3. Layer B — calibration on per-block confidences (D015)
149
+ ident_conf_calibrated = _calibrate(float(ident.get("confidence", 0.0)), self.calib)
150
+ state_conf_calibrated = _calibrate(float(st.get("confidence", 0.0)), self.calib)
151
+ nba_conf_calibrated = _calibrate(float(nba.get("confidence", 0.0)), self.calib)
152
+
153
+ # 4. Construct draft passport (dict — guardrail + flagger mutate dicts)
154
+ modality, content_hash = _content_hash(capture)
155
+ ts = now_utc()
156
+ passport_id = make_passport_id(content_hash, ident["class"], ts)
157
+ draft: dict = {
158
+ "schema": "matter-passport/v0.1",
159
+ "passport_id": passport_id,
160
+ "prev": None,
161
+ "timestamp": ts,
162
+ "capture": {
163
+ "modality": modality,
164
+ "content_hash": content_hash,
165
+ **({"geohash_coarse": capture.geohash_coarse} if capture.geohash_coarse else {}),
166
+ },
167
+ "identity": {
168
+ "class": ident["class"],
169
+ "subclass": ident.get("subclass"),
170
+ "taxonomy": head.taxonomy_uri,
171
+ "confidence": ident_conf_calibrated,
172
+ },
173
+ "state": {
174
+ "condition": st.get("condition", "unknown"),
175
+ "hazard_flags": list(st.get("hazard_flags") or []),
176
+ "confidence": state_conf_calibrated,
177
+ },
178
+ "next_best_action": {
179
+ "primary": nba["primary"],
180
+ "secondary": nba.get("secondary"),
181
+ "do_not": list(nba.get("do_not") or []),
182
+ "confidence": nba_conf_calibrated,
183
+ "fallback_used": False,
184
+ },
185
+ "routing": {"jurisdiction": jurisdiction, "regulation_refs": []},
186
+ "provenance": {
187
+ "model": self.runtime.model_id,
188
+ "runtime": self.runtime.name,
189
+ "on_device": self.on_device,
190
+ "confidence_calibrated": True,
191
+ "calibration_ref": self._calibration_ref,
192
+ },
193
+ }
194
+
195
+ # 5. Layer C — class-implied hazard auto-flagger (D019)
196
+ apply_hazard_flagger(draft, self.hazard_rules)
197
+
198
+ # 6. Layer D — do_not guardrail (D018)
199
+ apply_guardrail(draft, self.safety_rules)
200
+
201
+ # 7. Final Pydantic validation against the v0.1 schema
202
+ return Passport.model_validate(draft)
203
+
204
+
205
+ __all__ = ["MIE", "MIEError", "Runtime", "CaptureInput", "Capture", "Identity",
206
+ "State", "NextBestAction", "Provenance", "Routing", "Passport"]
matter/guardrail.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """MIE rule-based do_not guardrail (D018).
2
+
3
+ Enforces per-identity-class safety constraints on a Passport's `next_best_action`
4
+ before the MIE returns. If the model's proposed primary action is in the item's
5
+ `do_not` set, the guardrail overrides it with the safe default and sets
6
+ `next_best_action.fallback_used = true` on the Passport.
7
+
8
+ This is a platform primitive — it runs regardless of which model produced the draft.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from dataclasses import dataclass
15
+ from pathlib import Path
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class Rule:
20
+ do_not: frozenset[str]
21
+ safe_default: str
22
+ severity: str # "critical" | "high" | "environmental" | "economic" | "informational"
23
+
24
+
25
+ @dataclass
26
+ class GuardrailResult:
27
+ passport: dict
28
+ fallback_used: bool
29
+ triggered_rule: Rule | None
30
+ original_primary: str | None
31
+
32
+
33
+ def load_rules(path: str | Path) -> dict[str, Rule]:
34
+ """Load do_not rules. JSON files retain `reason` and other documentation
35
+ fields the Python type doesn't carry — they're for downstream consumers."""
36
+ data = json.loads(Path(path).read_text())
37
+ raw = data.get("rules", data)
38
+ return {
39
+ key: Rule(
40
+ do_not=frozenset(v.get("do_not", [])),
41
+ safe_default=v["safe_default"],
42
+ severity=v.get("severity", "informational"),
43
+ )
44
+ for key, v in raw.items()
45
+ }
46
+
47
+
48
+ def apply_guardrail(passport: dict, rules: dict[str, Rule]) -> GuardrailResult:
49
+ """Validate and potentially override next_best_action.primary. Mutates passport."""
50
+ identity_cls = (passport.get("identity") or {}).get("class")
51
+ nba = passport.setdefault("next_best_action", {})
52
+ primary = nba.get("primary")
53
+
54
+ rule = rules.get(identity_cls) if identity_cls else None
55
+ if rule is None or primary is None or primary not in rule.do_not:
56
+ nba.setdefault("fallback_used", False)
57
+ return GuardrailResult(passport, False, rule, None)
58
+
59
+ # Guardrail fires: override primary, append to do_not list, flag fallback.
60
+ nba["primary"] = rule.safe_default
61
+ nba["fallback_used"] = True
62
+ existing_do_not = nba.setdefault("do_not", [])
63
+ if primary not in existing_do_not:
64
+ existing_do_not.append(primary)
65
+ return GuardrailResult(passport, True, rule, primary)
66
+
67
+
68
+ def critical_violation(result: GuardrailResult) -> bool:
69
+ """Convenience: did the guardrail catch a critical-severity rule violation?"""
70
+ return result.fallback_used and result.triggered_rule is not None and result.triggered_rule.severity == "critical"
matter/hazard_flagger.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Class-implied hazard auto-flagger (D019).
2
+
3
+ A1/A6 showed that Gemma 4's self-reported hazard_flags miss real biohazards
4
+ (37.5% sensitivity on the medical eval). This module guarantees that canonical
5
+ hazards implied by the identity class are present in the Passport, regardless
6
+ of whether the model produced them.
7
+
8
+ Runs BEFORE the do_not guardrail in the MIE pipeline.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from dataclasses import dataclass
15
+ from pathlib import Path
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class HazardRule:
20
+ required: tuple[str, ...]
21
+
22
+
23
+ def load_hazard_rules(path: str | Path) -> dict[str, HazardRule]:
24
+ """Load class → required-hazards rules. JSON retains `context_dependent`
25
+ and other documentation the Python type doesn't carry."""
26
+ data = json.loads(Path(path).read_text())
27
+ raw = data.get("rules", data)
28
+ return {cls: HazardRule(required=tuple(v.get("required", []))) for cls, v in raw.items()}
29
+
30
+
31
+ def apply_hazard_flagger(passport: dict, rules: dict[str, HazardRule]) -> tuple[dict, list[str]]:
32
+ """Ensure canonical hazards for the identity class are in passport.state.hazard_flags.
33
+
34
+ Returns (mutated_passport, added_flags).
35
+ """
36
+ identity_cls = (passport.get("identity") or {}).get("class")
37
+ state = passport.setdefault("state", {})
38
+ existing = list(state.get("hazard_flags") or [])
39
+ # Drop sentinels like "none" if present.
40
+ existing = [f for f in existing if f and f != "none"]
41
+
42
+ rule = rules.get(identity_cls) if identity_cls else None
43
+ added: list[str] = []
44
+ if rule is not None:
45
+ for flag in rule.required:
46
+ if flag not in existing:
47
+ existing.append(flag)
48
+ added.append(flag)
49
+
50
+ state["hazard_flags"] = existing
51
+ return passport, added
matter/heads.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Head registry: identity taxonomy + NBA taxonomy + prompt template per domain.
2
+
3
+ Extracted from the A1, A2, and A6 notebooks so the MIE engine has a single source
4
+ of truth for prompts and allowed-class enums. Each head bundles the same shape:
5
+
6
+ - identity_classes: allowed `identity.class` values
7
+ - nba_classes: allowed `next_best_action.primary` values
8
+ - taxonomy_uri: URI used in `identity.taxonomy`
9
+ - jurisdiction: default jurisdiction for prompts (overridable per call)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class Head:
19
+ name: str
20
+ identity_classes: tuple[str, ...]
21
+ nba_classes: tuple[str, ...]
22
+ taxonomy_uri: str
23
+ default_jurisdiction: str
24
+
25
+
26
+ HEADS: dict[str, Head] = {
27
+ "domestic": Head(
28
+ name="domestic",
29
+ identity_classes=("plastic_bottle", "multilayer_plastic", "carton", "metal_can",
30
+ "organic", "glass", "paper", "other"),
31
+ nba_classes=("blue_bin_recycle", "compost_bin", "special_collection", "general_waste"),
32
+ taxonomy_uri="https://matter.spec/taxonomy/domestic/v0.1",
33
+ default_jurisdiction="NYC (DSNY + NY EPR)",
34
+ ),
35
+ "ewaste": Head(
36
+ name="ewaste",
37
+ identity_classes=("laptop", "smartphone", "cable", "power_adapter", "audio",
38
+ "battery", "pcb", "lighting", "other"),
39
+ nba_classes=("retailer_takeback", "ewaste_collection_event", "battery_drop_off",
40
+ "general_waste"),
41
+ taxonomy_uri="https://matter.spec/taxonomy/ewaste/v0.1",
42
+ default_jurisdiction="NYC (DSNY + NY EPR)",
43
+ ),
44
+ "ev": Head(
45
+ name="ev",
46
+ identity_classes=("lithium_ion_cell", "lead_acid_battery", "battery_pack",
47
+ "connector", "pcb", "other"),
48
+ nba_classes=("second_life_stationary_storage", "certified_ev_recycler",
49
+ "battery_drop_off", "general_waste"),
50
+ taxonomy_uri="https://matter.spec/taxonomy/ev/v0.1",
51
+ default_jurisdiction="NYC (DSNY + NY EPR)",
52
+ ),
53
+ "medical": Head(
54
+ name="medical",
55
+ identity_classes=("blister_pack", "sharps", "medicine_bottle", "diagnostic",
56
+ "wound_care", "packaging", "device", "other"),
57
+ nba_classes=("pharmacy_takeback", "biomedical_waste_collector",
58
+ "general_waste", "recycle_paper"),
59
+ taxonomy_uri="https://matter.spec/taxonomy/medical/v0.1",
60
+ default_jurisdiction="NYC (CPCB + NY DOH biomedical waste rules)",
61
+ ),
62
+ "cd": Head(
63
+ name="cd",
64
+ identity_classes=("concrete", "brick", "gypsum", "steel", "plastic", "ceramic",
65
+ "insulation", "other"),
66
+ nba_classes=("aggregate_recycler", "metal_recycler", "gypsum_recycler",
67
+ "specialty_recycler", "landfill"),
68
+ taxonomy_uri="https://matter.spec/taxonomy/cd/v0.1",
69
+ default_jurisdiction="NYC (Local Law 97 + DEP construction debris rules)",
70
+ ),
71
+ "textile": Head(
72
+ name="textile",
73
+ identity_classes=("cotton_garment", "denim", "blended_garment", "synthetic_garment",
74
+ "wool_garment", "leather", "mixed_fabric_scrap", "other"),
75
+ nba_classes=("resale_reuse", "fiber_recycler", "downcycle_rags",
76
+ "leather_reclaim", "landfill"),
77
+ taxonomy_uri="https://matter.spec/taxonomy/textile/v0.1",
78
+ default_jurisdiction="NYC (NY EPR Packaging + Higg MSI alignment)",
79
+ ),
80
+ }
81
+
82
+
83
+ PROMPT_TEMPLATE = """You are a Material Intelligence model identifying a single object in an image and recommending its disposal action.
84
+
85
+ Domain: {domain}
86
+ Jurisdiction: {jurisdiction}
87
+ Allowed identity classes: {identity_classes}
88
+ Allowed next_best_action values: {nba_classes}
89
+
90
+ CONFIDENCE RUBRIC — use these anchors strictly:
91
+ - 0.95-1.00 unambiguous; multiple distinctive features match exactly one class
92
+ - 0.80-0.94 likely correct; one or two details could plausibly fit another class
93
+ - 0.60-0.79 best guess; visual evidence is weak or partially occluded
94
+ - 0.40-0.59 coin-flip between two classes; pick the more likely one
95
+ - below 0.40 uncertain; prefer 'other' and say so in the reason
96
+
97
+ SAFETY: For sharps, contaminated diagnostics, batteries, or pharmaceuticals, route to the safest action. The Matter rule layer will override unsafe choices but you should not propose them.
98
+
99
+ Respond ONLY with valid JSON in this exact shape:
100
+ {{"identity": {{"class": "<one of the identity classes>", "subclass": "<short>", "confidence": <0-1>, "reason": "<one sentence>"}},
101
+ "state": {{"condition": "<good|degraded|contaminated|unknown>", "hazard_flags": ["<flags or []>"], "confidence": <0-1>}},
102
+ "next_best_action": {{"primary": "<one of the NBA values>", "secondary": "<NBA or null>", "do_not": ["<NBA values>"], "confidence": <0-1>, "reason": "<one sentence>"}}
103
+ }}
104
+ """
105
+
106
+
107
+ def build_prompt(head_name: str, jurisdiction: str | None = None) -> str:
108
+ head = HEADS[head_name]
109
+ return PROMPT_TEMPLATE.format(
110
+ domain=head.name,
111
+ jurisdiction=jurisdiction or head.default_jurisdiction,
112
+ identity_classes=", ".join(head.identity_classes),
113
+ nba_classes=", ".join(head.nba_classes),
114
+ )
matter/impact.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Carbon footprint enrichment — fills Passport.value.environmental.co2e_avoided_kg
2
+ from the CC0 factors table at spec/co2e_factors_v1.json.
3
+
4
+ This is an OPTIONAL fifth layer that runs after the safety pipeline. Scope is
5
+ strictly carbon footprint; water / land-use / biodiversity are out of scope.
6
+
7
+ Per-kg factors with class-typical unit weights are looked up by (head, identity_class,
8
+ next_best_action). If the caller supplies a measured weight, that is used; otherwise
9
+ the typical_unit_weight_kg from the table is used as the best-available estimate.
10
+
11
+ Footprint sign convention (matches spec/co2e_factors_v1.json):
12
+ positive value = kg CO2e *avoided* by routing to this action vs. landfill BAU
13
+ negative value = the action *adds* to the carbon footprint relative to BAU
14
+ (e.g. autoclave incineration of sharps — a safety-driven cost)
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ from dataclasses import dataclass
21
+ from pathlib import Path
22
+
23
+ from matter.passport import Environmental, Passport, Value
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class Co2eEntry:
28
+ co2e_avoided_kg_per_kg: float
29
+ typical_unit_weight_kg: float
30
+ source: str
31
+
32
+
33
+ def load_factors(path: str | Path) -> dict[tuple[str, str, str], Co2eEntry]:
34
+ """Load the CO2e factor table.
35
+
36
+ Returns a flat lookup keyed by (head, identity_class, next_best_action).
37
+ """
38
+ raw = json.loads(Path(path).read_text())
39
+ table: dict[tuple[str, str, str], Co2eEntry] = {}
40
+ for head, classes in raw["factors"].items():
41
+ for cls, actions in classes.items():
42
+ for action, entry in actions.items():
43
+ table[(head, cls, action)] = Co2eEntry(
44
+ co2e_avoided_kg_per_kg=float(entry["co2e_avoided_kg_per_kg"]),
45
+ typical_unit_weight_kg=float(entry["typical_unit_weight_kg"]),
46
+ source=entry["source"],
47
+ )
48
+ return table
49
+
50
+
51
+ def estimate_footprint(
52
+ passport: Passport,
53
+ head_name: str,
54
+ factors: dict[tuple[str, str, str], Co2eEntry],
55
+ measured_weight_kg: float | None = None,
56
+ ) -> Passport:
57
+ """Fill `passport.value.environmental` based on identity.class + NBA + (head).
58
+
59
+ If a factor is missing for the (head, class, action) tuple, the function leaves
60
+ the value block unchanged. Returns the (possibly mutated) Passport.
61
+ """
62
+ cls = passport.identity.class_
63
+ action = passport.next_best_action.primary
64
+ key = (head_name, cls, action)
65
+ entry = factors.get(key)
66
+ if entry is None:
67
+ return passport
68
+
69
+ weight_kg = measured_weight_kg if measured_weight_kg is not None else entry.typical_unit_weight_kg
70
+ co2e_kg = entry.co2e_avoided_kg_per_kg * weight_kg
71
+ basis = (
72
+ f"co2e_factors_v1[{head_name}/{cls}/{action}] "
73
+ f"({entry.co2e_avoided_kg_per_kg:+.2f} kg CO2e/kg × {weight_kg:.3f} kg)"
74
+ + (" [measured weight]" if measured_weight_kg is not None else " [typical unit weight]")
75
+ )
76
+
77
+ env = Environmental(co2e_avoided_kg=round(co2e_kg, 4), basis=basis)
78
+ if passport.value is None:
79
+ passport.value = Value(environmental=env)
80
+ else:
81
+ passport.value.environmental = env
82
+ return passport
matter/passport.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic models for the Matter Passport v0.1.
2
+
3
+ Mirrors `spec/matter-passport-v0.1.schema.json`. Every field that the JSON Schema
4
+ declares is represented here with Python types + Pydantic validation.
5
+
6
+ Used by:
7
+ - the MIE engine to construct + validate Passports end-to-end
8
+ - the CLI to emit valid Passport JSON
9
+ - integration tests to assert pipeline correctness
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import hashlib
15
+ import json
16
+ import re
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+ from typing import Literal
20
+
21
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
22
+
23
+
24
+ SCHEMA_VERSION = "matter-passport/v0.1"
25
+ PASSPORT_ID_RE = re.compile(r"^mp_sha256:[0-9a-f]{6,64}$")
26
+
27
+ Modality = Literal["image", "video", "text", "audio", "multimodal"]
28
+ Runtime = Literal["unsloth", "cactus", "litert", "llama.cpp", "mlx", "ollama", "other"]
29
+
30
+
31
+ def make_passport_id(content_hash: str, identity_class: str, ts: str) -> str:
32
+ """Derive a content-addressed passport_id."""
33
+ h = hashlib.sha256(f"{content_hash}|{identity_class}|{ts}".encode()).hexdigest()[:32]
34
+ return f"mp_sha256:{h}"
35
+
36
+
37
+ def now_utc() -> str:
38
+ return datetime.now(tz=timezone.utc).isoformat(timespec="seconds").replace("+00:00", "Z")
39
+
40
+
41
+ class Capture(BaseModel):
42
+ modality: Modality
43
+ content_hash: str
44
+ geohash_coarse: str | None = Field(default=None, max_length=5)
45
+
46
+
47
+ class Identity(BaseModel):
48
+ class_: str = Field(alias="class")
49
+ subclass: str | None = None
50
+ taxonomy: str
51
+ gs1_alias: str | None = None
52
+ confidence: float = Field(ge=0.0, le=1.0)
53
+ model_config = ConfigDict(populate_by_name=True)
54
+
55
+
56
+ class State(BaseModel):
57
+ condition: str
58
+ estimated_soh: float | None = Field(default=None, ge=0.0, le=1.0)
59
+ contamination: str | None = None
60
+ hazard_flags: list[str] = Field(default_factory=list)
61
+ confidence: float = Field(ge=0.0, le=1.0)
62
+
63
+
64
+ class Economic(BaseModel):
65
+ amount: float
66
+ currency: str
67
+ basis: str
68
+
69
+
70
+ class Environmental(BaseModel):
71
+ co2e_avoided_kg: float
72
+ basis: str
73
+
74
+
75
+ class Value(BaseModel):
76
+ economic: Economic | None = None
77
+ environmental: Environmental | None = None
78
+
79
+
80
+ class NextBestAction(BaseModel):
81
+ primary: str
82
+ secondary: str | None = None
83
+ do_not: list[str] = Field(default_factory=list)
84
+ confidence: float = Field(ge=0.0, le=1.0)
85
+ fallback_used: bool = False
86
+
87
+
88
+ class Routing(BaseModel):
89
+ jurisdiction: str | None = None
90
+ regulation_refs: list[str] = Field(default_factory=list)
91
+ function_call: str | None = None
92
+
93
+
94
+ class Provenance(BaseModel):
95
+ model: str
96
+ runtime: Runtime
97
+ on_device: bool
98
+ lora: str | None = None
99
+ confidence_calibrated: bool = False
100
+ calibration_ref: str | None = None
101
+
102
+
103
+ class Signature(BaseModel):
104
+ type: str
105
+ created: str
106
+ verificationMethod: str
107
+ proofPurpose: str | None = None
108
+ proofValue: str
109
+
110
+
111
+ class Passport(BaseModel):
112
+ """Matter Passport v0.1 — typed, validated representation of one inference."""
113
+
114
+ schema_: Literal["matter-passport/v0.1"] = Field(default=SCHEMA_VERSION, alias="schema")
115
+ context: str | list[str] | None = Field(default=None, alias="@context")
116
+ passport_id: str
117
+ prev: str | None = None
118
+ timestamp: str
119
+ capture: Capture
120
+ identity: Identity
121
+ state: State
122
+ value: Value | None = None
123
+ next_best_action: NextBestAction
124
+ routing: Routing | None = None
125
+ provenance: Provenance
126
+ signature: Signature | None = None
127
+
128
+ model_config = ConfigDict(populate_by_name=True, extra="forbid")
129
+
130
+ @field_validator("passport_id")
131
+ @classmethod
132
+ def _check_id(cls, v: str) -> str:
133
+ if not PASSPORT_ID_RE.match(v):
134
+ raise ValueError(f"passport_id must match {PASSPORT_ID_RE.pattern}: got {v!r}")
135
+ return v
136
+
137
+ def to_json(self, **kwargs) -> str:
138
+ """Emit canonical JSON with the field aliases (`schema`, `@context`, `class`)."""
139
+ return self.model_dump_json(by_alias=True, exclude_none=True, **kwargs)
140
+
141
+ def to_dict(self) -> dict:
142
+ return self.model_dump(by_alias=True, exclude_none=True)
143
+
144
+ @classmethod
145
+ def from_json(cls, raw: str | dict | Path) -> "Passport":
146
+ if isinstance(raw, Path):
147
+ raw = raw.read_text()
148
+ if isinstance(raw, str):
149
+ raw = json.loads(raw)
150
+ return cls.model_validate(raw)
matter/runtimes/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Runtime adapters that implement the `matter.engine.Runtime` protocol.
2
+
3
+ Each adapter is responsible for: (a) accepting a prompt + optional image,
4
+ (b) calling its underlying inference backend, (c) returning the raw model
5
+ output text. The MIE engine handles parsing, calibration, hazard flagging,
6
+ and guardrail enforcement on top.
7
+
8
+ Available adapters:
9
+ - OllamaRuntime: HTTP client for a local Ollama daemon (Mac, Linux, Pi)
10
+
11
+ Planned (Stage 1):
12
+ - HFInferenceRuntime: Hugging Face Inference Endpoints (cloud demo)
13
+ - CactusRuntime: Native Android via the Cactus SDK
14
+ """
15
+
16
+ from matter.runtimes.ollama import OllamaRuntime
17
+
18
+ __all__ = ["OllamaRuntime"]
matter/runtimes/ollama.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Ollama runtime adapter — `Runtime` protocol over a local Ollama HTTP daemon.
2
+
3
+ Works with text-only and multimodal Gemma 4 variants. Multimodal calls send the
4
+ image as base64-encoded bytes in the `images` array per Ollama's /api/generate
5
+ contract.
6
+
7
+ Tested against:
8
+ - Mac M3 Pro / Ollama 0.20.x with `gemma4:e2b` (text — A5 Tier 1 measured 2.3s p50)
9
+ - Multimodal models via the same registry path
10
+
11
+ If Ollama isn't running, calls raise httpx.ConnectError; the engine surfaces it.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import base64
17
+ from pathlib import Path
18
+ from typing import Literal
19
+
20
+ import httpx
21
+
22
+
23
+ DEFAULT_HOST = "http://localhost:11434"
24
+ DEFAULT_NUM_PREDICT = 512
25
+ DEFAULT_TIMEOUT_S = 300.0
26
+
27
+
28
+ class OllamaRuntime:
29
+ """Implements the engine `Runtime` protocol via Ollama's REST API."""
30
+
31
+ name: Literal["ollama"] = "ollama"
32
+
33
+ def __init__(
34
+ self,
35
+ model: str = "gemma4:e2b",
36
+ host: str = DEFAULT_HOST,
37
+ num_predict: int = DEFAULT_NUM_PREDICT,
38
+ timeout: float = DEFAULT_TIMEOUT_S,
39
+ client: httpx.Client | None = None,
40
+ ):
41
+ self.model_id = model
42
+ self.host = host.rstrip("/")
43
+ self.num_predict = num_predict
44
+ self.timeout = timeout
45
+ self._client = client or httpx.Client(timeout=timeout)
46
+ self._owns_client = client is None
47
+
48
+ def __enter__(self) -> "OllamaRuntime":
49
+ return self
50
+
51
+ def __exit__(self, *_exc) -> None:
52
+ self.close()
53
+
54
+ def close(self) -> None:
55
+ if self._owns_client:
56
+ self._client.close()
57
+
58
+ # --- Health ---
59
+
60
+ def is_alive(self) -> bool:
61
+ try:
62
+ r = self._client.get(f"{self.host}/api/tags", timeout=2.0)
63
+ return r.status_code == 200
64
+ except httpx.HTTPError:
65
+ return False
66
+
67
+ def model_available(self) -> bool:
68
+ """True iff `model_id` is in the local Ollama tag list."""
69
+ try:
70
+ r = self._client.get(f"{self.host}/api/tags", timeout=5.0)
71
+ r.raise_for_status()
72
+ except httpx.HTTPError:
73
+ return False
74
+ names = {m.get("name", "") for m in r.json().get("models", [])}
75
+ return self.model_id in names
76
+
77
+ # --- Inference ---
78
+
79
+ def infer(self, prompt: str, image: Path | None) -> str:
80
+ # Use /api/chat — applies the model's chat template (e.g. Gemma 4's
81
+ # <start_of_turn>...<end_of_turn> markers). /api/generate feeds the
82
+ # prompt raw and instruct-tuned models produce special-token garbage.
83
+ message: dict = {"role": "user", "content": prompt}
84
+ if image is not None:
85
+ message["images"] = [_b64(Path(image))]
86
+ body = {
87
+ "model": self.model_id,
88
+ "messages": [message],
89
+ "stream": False,
90
+ "options": {"temperature": 0, "num_predict": self.num_predict},
91
+ }
92
+ r = self._client.post(f"{self.host}/api/chat", json=body)
93
+ r.raise_for_status()
94
+ payload = r.json()
95
+ text = (payload.get("message") or {}).get("content", "")
96
+ if not text:
97
+ done_reason = payload.get("done_reason", "unknown")
98
+ eval_count = payload.get("eval_count", 0)
99
+ prompt_eval = payload.get("prompt_eval_count", 0)
100
+ hint = ""
101
+ if done_reason == "length" and eval_count == 0:
102
+ hint = (
103
+ " — context likely full from prompt+image before generation could start. "
104
+ "Try a multimodal model (e.g. gemma4:e4b on a GPU host), increase num_predict, "
105
+ "or run text-only via --no-image."
106
+ )
107
+ elif done_reason == "length":
108
+ hint = (
109
+ f" — model generated {eval_count} tokens but produced no decoded text. "
110
+ "Likely a chat-template / special-token issue. Try a different model or check "
111
+ "Ollama version compatibility."
112
+ )
113
+ raise RuntimeError(
114
+ f"Ollama returned empty response (done_reason={done_reason!r}, "
115
+ f"eval_count={eval_count}, prompt_eval_count={prompt_eval}).{hint}"
116
+ )
117
+ return text
118
+
119
+
120
+ def _b64(path: Path) -> str:
121
+ return base64.b64encode(path.read_bytes()).decode("ascii")
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.44.1
2
+ spaces>=0.30.0
3
+ torch==2.4.0
4
+ transformers>=4.46.0
5
+ accelerate>=0.34.0
6
+ pillow>=10.3
7
+ pydantic>=2.7
8
+ jsonschema>=4.22
9
+ numpy>=1.26
10
+ scikit-learn>=1.5
11
+ httpx>=0.27
spec/calibration_v1.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "method": "histogram",
3
+ "n_fit": 43,
4
+ "ece_before": 0.16046511627906979,
5
+ "ece_after": 0.0,
6
+ "params": {
7
+ "bins": [
8
+ 0.0,
9
+ 0.01,
10
+ 0.02,
11
+ 0.03,
12
+ 0.04,
13
+ 0.05,
14
+ 0.06,
15
+ 0.07,
16
+ 0.08,
17
+ 0.09,
18
+ 0.1,
19
+ 0.11,
20
+ 0.12,
21
+ 0.13,
22
+ 0.14,
23
+ 0.15,
24
+ 0.16,
25
+ 0.17,
26
+ 0.18,
27
+ 0.19,
28
+ 0.2,
29
+ 0.21,
30
+ 0.22,
31
+ 0.23,
32
+ 0.24,
33
+ 0.25,
34
+ 0.26,
35
+ 0.27,
36
+ 0.28,
37
+ 0.29,
38
+ 0.3,
39
+ 0.31,
40
+ 0.32,
41
+ 0.33,
42
+ 0.34,
43
+ 0.35000000000000003,
44
+ 0.36,
45
+ 0.37,
46
+ 0.38,
47
+ 0.39,
48
+ 0.4,
49
+ 0.41000000000000003,
50
+ 0.42,
51
+ 0.43,
52
+ 0.44,
53
+ 0.45,
54
+ 0.46,
55
+ 0.47000000000000003,
56
+ 0.48,
57
+ 0.49,
58
+ 0.5,
59
+ 0.51,
60
+ 0.52,
61
+ 0.53,
62
+ 0.54,
63
+ 0.55,
64
+ 0.56,
65
+ 0.5700000000000001,
66
+ 0.58,
67
+ 0.59,
68
+ 0.6,
69
+ 0.61,
70
+ 0.62,
71
+ 0.63,
72
+ 0.64,
73
+ 0.65,
74
+ 0.66,
75
+ 0.67,
76
+ 0.68,
77
+ 0.6900000000000001,
78
+ 0.7000000000000001,
79
+ 0.71,
80
+ 0.72,
81
+ 0.73,
82
+ 0.74,
83
+ 0.75,
84
+ 0.76,
85
+ 0.77,
86
+ 0.78,
87
+ 0.79,
88
+ 0.8,
89
+ 0.81,
90
+ 0.8200000000000001,
91
+ 0.8300000000000001,
92
+ 0.84,
93
+ 0.85,
94
+ 0.86,
95
+ 0.87,
96
+ 0.88,
97
+ 0.89,
98
+ 0.9,
99
+ 0.91,
100
+ 0.92,
101
+ 0.93,
102
+ 0.9400000000000001,
103
+ 0.9500000000000001,
104
+ 0.96,
105
+ 0.97,
106
+ 0.98,
107
+ 0.99,
108
+ 1.0
109
+ ],
110
+ "bin_accuracy": {
111
+ "94": 0.375,
112
+ "98": 0.9142857142857143
113
+ }
114
+ }
115
+ }
spec/co2e_factors_v1.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "1.0",
4
+ "license": "CC0-1.0",
5
+ "scope": "carbon footprint only — kg CO2-equivalent avoided per kg of material when routed to the listed action vs. business-as-usual landfill",
6
+ "description": "Per-(identity_class, action) CO2e factors used by matter.impact to fill Passport.value.environmental.co2e_avoided_kg. Values are central estimates from peer-reviewed lifecycle assessments and government inventories. Out of scope: water footprint, land-use change, biodiversity. Stage 1 may add jurisdiction-conditional factors.",
7
+ "primary_sources": [
8
+ "ICE — Inventory of Carbon and Energy v3.0 (Hammond & Jones, University of Bath; updated 2019)",
9
+ "EPA WARM v15 — US EPA Waste Reduction Model (2020)",
10
+ "IPCC AR6 — Working Group III, Chapter 11: Industry (2022)",
11
+ "Geyer, R., Jambeck, J. R., & Law, K. L. (2017). Production, use, and fate of all plastics ever made. Science Advances 3:e1700782",
12
+ "Hischier, R. et al. (2020). Lifecycle assessment of plastic recycling. International Journal of LCA",
13
+ "Ellingsen, L. A.-W. et al. (2017). The size and range effect: lifecycle GHG of EV batteries. ERL 12"
14
+ ],
15
+ "limitations_disclosed": [
16
+ "Factors are point estimates; real LCA distributions are wide. Stage 1 will add ±sigma bands.",
17
+ "Baseline (counterfactual) is jurisdiction-specific landfill mix; ICE/EPA values assume OECD-typical landfill.",
18
+ "EV/battery factors carry the highest uncertainty (5-50x range across studies).",
19
+ "Pharmaceutical takeback factor is dominated by avoided water contamination, not direct CO2; included for completeness but reported low and noted."
20
+ ]
21
+ },
22
+ "factors": {
23
+ "domestic": {
24
+ "plastic_bottle": {
25
+ "blue_bin_recycle": {"co2e_avoided_kg_per_kg": 1.5, "typical_unit_weight_kg": 0.030, "source": "Geyer 2017; ICE v3.0 (PET, recycled vs. virgin)"},
26
+ "general_waste": {"co2e_avoided_kg_per_kg": 0.0, "typical_unit_weight_kg": 0.030, "source": "baseline (BAU)"}
27
+ },
28
+ "multilayer_plastic": {
29
+ "special_collection": {"co2e_avoided_kg_per_kg": 0.4, "typical_unit_weight_kg": 0.005, "source": "Hischier 2020 (multilayer recovery efficiency low)"},
30
+ "general_waste": {"co2e_avoided_kg_per_kg": 0.0, "typical_unit_weight_kg": 0.005, "source": "baseline"}
31
+ },
32
+ "carton": {
33
+ "blue_bin_recycle": {"co2e_avoided_kg_per_kg": 0.9, "typical_unit_weight_kg": 0.030, "source": "EPA WARM v15 (mixed paper/aseptic)"}
34
+ },
35
+ "metal_can": {
36
+ "blue_bin_recycle": {"co2e_avoided_kg_per_kg": 9.0, "typical_unit_weight_kg": 0.015, "source": "ICE v3.0 (aluminum, recycled vs. primary)"}
37
+ },
38
+ "organic": {
39
+ "compost_bin": {"co2e_avoided_kg_per_kg": 0.5, "typical_unit_weight_kg": 0.150, "source": "EPA WARM v15 (food scraps composted vs. landfilled, includes avoided methane)"},
40
+ "general_waste": {"co2e_avoided_kg_per_kg": -0.7, "typical_unit_weight_kg": 0.150, "source": "EPA WARM (organics in landfill emit methane — *positive* footprint)"}
41
+ },
42
+ "glass": {
43
+ "blue_bin_recycle": {"co2e_avoided_kg_per_kg": 0.3, "typical_unit_weight_kg": 0.250, "source": "ICE v3.0 (glass cullet vs. virgin)"}
44
+ },
45
+ "paper": {
46
+ "blue_bin_recycle": {"co2e_avoided_kg_per_kg": 0.7, "typical_unit_weight_kg": 0.010, "source": "EPA WARM v15 (mixed paper recycled)"},
47
+ "compost_bin": {"co2e_avoided_kg_per_kg": 0.4, "typical_unit_weight_kg": 0.010, "source": "EPA WARM v15"}
48
+ }
49
+ },
50
+ "ewaste": {
51
+ "laptop": {
52
+ "ewaste_collection_event": {"co2e_avoided_kg_per_kg": 35.0, "typical_unit_weight_kg": 2.0, "source": "Andrae & Edler 2015 (laptop recovery, rare earths + precious metals)"}
53
+ },
54
+ "smartphone": {
55
+ "retailer_takeback": {"co2e_avoided_kg_per_kg": 50.0, "typical_unit_weight_kg": 0.180, "source": "Apple Environmental Reports + Andrae 2015"}
56
+ },
57
+ "cable": {
58
+ "retailer_takeback": {"co2e_avoided_kg_per_kg": 5.0, "typical_unit_weight_kg": 0.080, "source": "ICE (copper, recycled vs. mined)"}
59
+ },
60
+ "power_adapter": {
61
+ "ewaste_collection_event": {"co2e_avoided_kg_per_kg": 8.0, "typical_unit_weight_kg": 0.150, "source": "ICE (mixed metals + PCB)"}
62
+ },
63
+ "audio": {
64
+ "retailer_takeback": {"co2e_avoided_kg_per_kg": 6.0, "typical_unit_weight_kg": 0.050, "source": "Andrae 2015 (consumer electronics)"}
65
+ },
66
+ "battery": {
67
+ "battery_drop_off": {"co2e_avoided_kg_per_kg": 1.5, "typical_unit_weight_kg": 0.024, "source": "EPA WARM (alkaline batteries)"}
68
+ },
69
+ "pcb": {
70
+ "ewaste_collection_event": {"co2e_avoided_kg_per_kg": 12.0, "typical_unit_weight_kg": 0.200, "source": "Cucchiella 2015 (PCB recycling, copper + gold + silver recovery)"}
71
+ },
72
+ "lighting": {
73
+ "retailer_takeback": {"co2e_avoided_kg_per_kg": 2.0, "typical_unit_weight_kg": 0.025, "source": "EPA WARM (mercury-containing lamps, LED average)"}
74
+ }
75
+ },
76
+ "ev": {
77
+ "lithium_ion_cell": {
78
+ "second_life_stationary_storage": {"co2e_avoided_kg_per_kg": 8.0, "typical_unit_weight_kg": 0.050, "source": "Ellingsen 2017 + Bobba 2018 (extended-life ratio applied to manufacturing-phase emissions)"},
79
+ "battery_drop_off": {"co2e_avoided_kg_per_kg": 4.0, "typical_unit_weight_kg": 0.050, "source": "Dunn 2015 (Li-ion hydrometallurgical recovery)"},
80
+ "certified_ev_recycler": {"co2e_avoided_kg_per_kg": 4.0, "typical_unit_weight_kg": 0.050, "source": "Dunn 2015"}
81
+ },
82
+ "lead_acid_battery": {
83
+ "certified_ev_recycler": {"co2e_avoided_kg_per_kg": 0.5, "typical_unit_weight_kg": 15.0, "source": "ILA 2019 (lead recycling, 99% recovery rate baseline)"}
84
+ },
85
+ "battery_pack": {
86
+ "certified_ev_recycler": {"co2e_avoided_kg_per_kg": 5.0, "typical_unit_weight_kg": 25.0, "source": "Ellingsen 2017 (full pack disassembly + recovery)"},
87
+ "second_life_stationary_storage": {"co2e_avoided_kg_per_kg": 10.0, "typical_unit_weight_kg": 25.0, "source": "Bobba 2018"}
88
+ },
89
+ "connector": {
90
+ "retailer_takeback": {"co2e_avoided_kg_per_kg": 4.0, "typical_unit_weight_kg": 0.500, "source": "ICE (mixed-metal recovery)"}
91
+ },
92
+ "pcb": {
93
+ "ewaste_collection_event": {"co2e_avoided_kg_per_kg": 12.0, "typical_unit_weight_kg": 0.200, "source": "Cucchiella 2015 (BMS PCB)"}
94
+ }
95
+ },
96
+ "medical": {
97
+ "blister_pack": {
98
+ "general_waste": {"co2e_avoided_kg_per_kg": 0.0, "typical_unit_weight_kg": 0.005, "source": "baseline; multilayer non-recoverable"}
99
+ },
100
+ "sharps": {
101
+ "biomedical_waste_collector": {"co2e_avoided_kg_per_kg": -0.3, "typical_unit_weight_kg": 0.010, "source": "WHO 2014 (autoclave + incineration emits ~0.3 kg CO2e/kg). Footprint reported as *negative avoidance* — disposal is necessary safety-spend, not climate gain."}
102
+ },
103
+ "medicine_bottle": {
104
+ "pharmacy_takeback": {"co2e_avoided_kg_per_kg": 0.05, "typical_unit_weight_kg": 0.015, "source": "Daughton & Ruhoy 2013 — primary value is avoided water-system contamination; CO2e contribution minor"}
105
+ },
106
+ "diagnostic": {
107
+ "biomedical_waste_collector": {"co2e_avoided_kg_per_kg": -0.3, "typical_unit_weight_kg": 0.001, "source": "WHO 2014 (small mass; safety-driven)"}
108
+ },
109
+ "wound_care": {
110
+ "general_waste": {"co2e_avoided_kg_per_kg": 0.0, "typical_unit_weight_kg": 0.005, "source": "baseline"},
111
+ "biomedical_waste_collector": {"co2e_avoided_kg_per_kg": -0.3, "typical_unit_weight_kg": 0.005, "source": "WHO 2014"}
112
+ },
113
+ "packaging": {
114
+ "recycle_paper": {"co2e_avoided_kg_per_kg": 0.7, "typical_unit_weight_kg": 0.010, "source": "EPA WARM (paperboard)"}
115
+ },
116
+ "device": {
117
+ "pharmacy_takeback": {"co2e_avoided_kg_per_kg": 1.0, "typical_unit_weight_kg": 0.050, "source": "ICE + Andrae (medical-device average)"}
118
+ }
119
+ },
120
+ "cd": {
121
+ "concrete": {
122
+ "aggregate_recycler": {"co2e_avoided_kg_per_kg": 0.05, "typical_unit_weight_kg": 5.0, "source": "ICE v3.0 (recycled aggregate vs. quarried; small per-kg, large per-tonne aggregated)"}
123
+ },
124
+ "brick": {
125
+ "aggregate_recycler": {"co2e_avoided_kg_per_kg": 0.2, "typical_unit_weight_kg": 2.5, "source": "ICE v3.0 (clay brick reused)"}
126
+ },
127
+ "gypsum": {
128
+ "gypsum_recycler": {"co2e_avoided_kg_per_kg": 0.04, "typical_unit_weight_kg": 10.0, "source": "USA Gypsum 2017 (drywall closed-loop)"}
129
+ },
130
+ "steel": {
131
+ "metal_recycler": {"co2e_avoided_kg_per_kg": 1.5, "typical_unit_weight_kg": 5.0, "source": "ICE v3.0 (recycled steel vs. BOF virgin)"}
132
+ },
133
+ "plastic": {
134
+ "specialty_recycler": {"co2e_avoided_kg_per_kg": 1.4, "typical_unit_weight_kg": 1.0, "source": "Hischier 2020 (PVC pipe recycling)"}
135
+ },
136
+ "ceramic": {
137
+ "aggregate_recycler": {"co2e_avoided_kg_per_kg": 0.04, "typical_unit_weight_kg": 1.5, "source": "ICE (limited; aggregate downcycle)"}
138
+ },
139
+ "insulation": {
140
+ "specialty_recycler": {"co2e_avoided_kg_per_kg": 0.3, "typical_unit_weight_kg": 0.5, "source": "Mineral wool LCA (Pargana 2014)"}
141
+ }
142
+ },
143
+ "textile": {
144
+ "cotton_garment": {
145
+ "resale_reuse": {"co2e_avoided_kg_per_kg": 8.0, "typical_unit_weight_kg": 0.300, "source": "Sandin & Peters 2018 (reuse displaces new garment manufacture, cotton)"},
146
+ "fiber_recycler": {"co2e_avoided_kg_per_kg": 3.0, "typical_unit_weight_kg": 0.300, "source": "Hawley 2014 (cotton fiber reclamation)"}
147
+ },
148
+ "denim": {
149
+ "fiber_recycler": {"co2e_avoided_kg_per_kg": 3.5, "typical_unit_weight_kg": 0.700, "source": "Cotton Inc Blue Jeans Go Green LCA 2018"},
150
+ "resale_reuse": {"co2e_avoided_kg_per_kg": 8.0, "typical_unit_weight_kg": 0.700, "source": "Sandin & Peters 2018"}
151
+ },
152
+ "blended_garment": {
153
+ "downcycle_rags": {"co2e_avoided_kg_per_kg": 1.5, "typical_unit_weight_kg": 0.300, "source": "Sandin 2018 (industrial rag stream displaces new wipes)"}
154
+ },
155
+ "synthetic_garment": {
156
+ "downcycle_rags": {"co2e_avoided_kg_per_kg": 2.1, "typical_unit_weight_kg": 0.300, "source": "Hischier 2020 (polyester downcycle)"},
157
+ "resale_reuse": {"co2e_avoided_kg_per_kg": 6.0, "typical_unit_weight_kg": 0.300, "source": "Sandin 2018 (synthetics reuse)"}
158
+ },
159
+ "wool_garment": {
160
+ "fiber_recycler": {"co2e_avoided_kg_per_kg": 5.0, "typical_unit_weight_kg": 0.500, "source": "Wiedemann 2020 (wool LCA, Australian flock)"},
161
+ "resale_reuse": {"co2e_avoided_kg_per_kg": 12.0, "typical_unit_weight_kg": 0.500, "source": "Wiedemann 2020"}
162
+ },
163
+ "leather": {
164
+ "leather_reclaim": {"co2e_avoided_kg_per_kg": 4.0, "typical_unit_weight_kg": 0.200, "source": "FAO 2019 (leather recovery, conservative)"}
165
+ },
166
+ "mixed_fabric_scrap": {
167
+ "downcycle_rags": {"co2e_avoided_kg_per_kg": 1.5, "typical_unit_weight_kg": 0.300, "source": "Sandin 2018"}
168
+ }
169
+ }
170
+ }
171
+ }
spec/examples/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Matter Passport — Example Library
2
+
3
+ Reference Passports produced by running the **real MIE pipeline** (validator → calibration → hazard auto-flagger → do_not guardrail) on synthetic model outputs designed to exercise each platform primitive. Every file in this directory is schema-valid against `spec/matter-passport-v0.1.schema.json`. Released CC0-1.0 alongside the schema.
4
+
5
+ Volatile fields (timestamp, content_hash, passport_id) are stabilized to deterministic placeholders so the files are stable under git.
6
+
7
+ | File | Demonstrates |
8
+ |---|---|
9
+ | [`passport-01-domestic-pet-bottle.json`](passport-01-domestic-pet-bottle.json) | Happy path — clean PET bottle correctly routed to NYC blue bin. No guardrail; no fallback. |
10
+ | [`passport-02-medical-sharps-hazards-auto-flagged.json`](passport-02-medical-sharps-hazards-auto-flagged.json) | A6 regression — model emitted only sharps_injury_risk; the auto-flagger appends biohazard. |
11
+ | [`passport-03-medical-glucose-strip-guardrail-fires.json`](passport-03-medical-glucose-strip-guardrail-fires.json) | A6 save — model proposed general_waste for a contaminated diagnostic; guardrail overrode it. |
12
+ | [`passport-04-ev-18650-cell.json`](passport-04-ev-18650-cell.json) | EV cell with state-of-health estimate and class-implied thermal_runaway_risk auto-flag. |
13
+ | [`passport-05-ewaste-laptop.json`](passport-05-ewaste-laptop.json) | Laptop routed to the SAFE Disposal e-waste collection event. |
14
+ | [`passport-06-cd-concrete-with-recovery-value.json`](passport-06-cd-concrete-with-recovery-value.json) | Construction & Demolition — concrete chunk with economic + environmental value. |
15
+ | [`passport-07-textile-denim.json`](passport-07-textile-denim.json) | Higg-MSI-aligned taxonomy — denim routed to fiber recycler (Cotton Inc Blue Jeans Go Green). |
16
+
17
+ ## How these were generated
18
+
19
+ ```
20
+ uv run python scripts/generate_example_passports.py
21
+ ```
22
+
23
+ The script runs each scenario's synthetic model output through the actual
24
+ `matter.engine.MIE` pipeline. Anything that lands in these files is exactly
25
+ what consumers would see from a real runtime — so you can use them as fixtures
26
+ for SDK tests, UI mockups, or downstream validators.
27
+
28
+ ## Decision references
29
+
30
+ - D012 — JSON + enum validator
31
+ - D015 — calibration (`spec/calibration_v1.json`)
32
+ - D018 + D020 — do_not guardrail (`spec/safety_rules_v1.json`)
33
+ - D019 — class-implied hazard auto-flagger (`spec/hazard_flags_v1.json`)
34
+ - D023 — schema v0.1 frozen for CC0
spec/examples/passport-01-domestic-pet-bottle.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema": "matter-passport/v0.1",
3
+ "passport_id": "mp_sha256:dd6b5b42f266910a",
4
+ "timestamp": "2026-04-26T00:00:00Z",
5
+ "capture": {
6
+ "modality": "image",
7
+ "content_hash": "sha256:dd6b5b42f266910a...(truncated)"
8
+ },
9
+ "identity": {
10
+ "class": "plastic_bottle",
11
+ "subclass": "PET water bottle",
12
+ "taxonomy": "https://matter.spec/taxonomy/domestic/v0.1",
13
+ "confidence": 0.9142857142857143
14
+ },
15
+ "state": {
16
+ "condition": "good",
17
+ "hazard_flags": [],
18
+ "confidence": 0.375
19
+ },
20
+ "value": {
21
+ "environmental": {
22
+ "co2e_avoided_kg": 0.045,
23
+ "basis": "co2e_factors_v1[domestic/plastic_bottle/blue_bin_recycle] (+1.50 kg CO2e/kg \u00d7 0.030 kg) [typical unit weight]"
24
+ }
25
+ },
26
+ "next_best_action": {
27
+ "primary": "blue_bin_recycle",
28
+ "do_not": [],
29
+ "confidence": 0.375,
30
+ "fallback_used": false
31
+ },
32
+ "routing": {
33
+ "jurisdiction": "NYC (DSNY + NY EPR)",
34
+ "regulation_refs": []
35
+ },
36
+ "provenance": {
37
+ "model": "matter-examples/static@v0.1",
38
+ "runtime": "other",
39
+ "on_device": false,
40
+ "confidence_calibrated": true,
41
+ "calibration_ref": "histogram@<sha256-of-spec/calibration_v1.json>"
42
+ }
43
+ }
spec/examples/passport-02-medical-sharps-hazards-auto-flagged.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema": "matter-passport/v0.1",
3
+ "passport_id": "mp_sha256:1a6d39db2c0e7e0f",
4
+ "timestamp": "2026-04-26T00:00:00Z",
5
+ "capture": {
6
+ "modality": "image",
7
+ "content_hash": "sha256:1a6d39db2c0e7e0f...(truncated)"
8
+ },
9
+ "identity": {
10
+ "class": "sharps",
11
+ "subclass": "syringe with needle",
12
+ "taxonomy": "https://matter.spec/taxonomy/medical/v0.1",
13
+ "confidence": 0.9142857142857143
14
+ },
15
+ "state": {
16
+ "condition": "contaminated",
17
+ "hazard_flags": [
18
+ "sharps_injury_risk",
19
+ "biohazard"
20
+ ],
21
+ "confidence": 0.375
22
+ },
23
+ "value": {
24
+ "environmental": {
25
+ "co2e_avoided_kg": -0.003,
26
+ "basis": "co2e_factors_v1[medical/sharps/biomedical_waste_collector] (-0.30 kg CO2e/kg \u00d7 0.010 kg) [typical unit weight]"
27
+ }
28
+ },
29
+ "next_best_action": {
30
+ "primary": "biomedical_waste_collector",
31
+ "do_not": [
32
+ "general_waste",
33
+ "blue_bin_recycle"
34
+ ],
35
+ "confidence": 0.375,
36
+ "fallback_used": false
37
+ },
38
+ "routing": {
39
+ "jurisdiction": "NYC (CPCB + NY DOH biomedical waste rules)",
40
+ "regulation_refs": []
41
+ },
42
+ "provenance": {
43
+ "model": "matter-examples/static@v0.1",
44
+ "runtime": "other",
45
+ "on_device": false,
46
+ "confidence_calibrated": true,
47
+ "calibration_ref": "histogram@<sha256-of-spec/calibration_v1.json>"
48
+ }
49
+ }
spec/examples/passport-03-medical-glucose-strip-guardrail-fires.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema": "matter-passport/v0.1",
3
+ "passport_id": "mp_sha256:71cdbc8416d761bc",
4
+ "timestamp": "2026-04-26T00:00:00Z",
5
+ "capture": {
6
+ "modality": "image",
7
+ "content_hash": "sha256:71cdbc8416d761bc...(truncated)"
8
+ },
9
+ "identity": {
10
+ "class": "diagnostic",
11
+ "subclass": "blood glucose test strip",
12
+ "taxonomy": "https://matter.spec/taxonomy/medical/v0.1",
13
+ "confidence": 0.9142857142857143
14
+ },
15
+ "state": {
16
+ "condition": "contaminated",
17
+ "hazard_flags": [
18
+ "biohazard"
19
+ ],
20
+ "confidence": 0.375
21
+ },
22
+ "value": {
23
+ "environmental": {
24
+ "co2e_avoided_kg": -0.0003,
25
+ "basis": "co2e_factors_v1[medical/diagnostic/biomedical_waste_collector] (-0.30 kg CO2e/kg \u00d7 0.001 kg) [typical unit weight]"
26
+ }
27
+ },
28
+ "next_best_action": {
29
+ "primary": "biomedical_waste_collector",
30
+ "do_not": [
31
+ "general_waste"
32
+ ],
33
+ "confidence": 0.9,
34
+ "fallback_used": true
35
+ },
36
+ "routing": {
37
+ "jurisdiction": "NYC (CPCB + NY DOH biomedical waste rules)",
38
+ "regulation_refs": []
39
+ },
40
+ "provenance": {
41
+ "model": "matter-examples/static@v0.1",
42
+ "runtime": "other",
43
+ "on_device": false,
44
+ "confidence_calibrated": true,
45
+ "calibration_ref": "histogram@<sha256-of-spec/calibration_v1.json>"
46
+ }
47
+ }
spec/examples/passport-04-ev-18650-cell.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema": "matter-passport/v0.1",
3
+ "passport_id": "mp_sha256:8bc13c3e392f5f71",
4
+ "timestamp": "2026-04-26T00:00:00Z",
5
+ "capture": {
6
+ "modality": "image",
7
+ "content_hash": "sha256:8bc13c3e392f5f71...(truncated)"
8
+ },
9
+ "identity": {
10
+ "class": "lithium_ion_cell",
11
+ "subclass": "NMC 18650",
12
+ "taxonomy": "https://matter.spec/taxonomy/ev/v0.1",
13
+ "confidence": 0.9142857142857143
14
+ },
15
+ "state": {
16
+ "condition": "degraded",
17
+ "hazard_flags": [
18
+ "thermal_runaway_risk"
19
+ ],
20
+ "confidence": 0.85
21
+ },
22
+ "value": {
23
+ "environmental": {
24
+ "co2e_avoided_kg": 0.4,
25
+ "basis": "co2e_factors_v1[ev/lithium_ion_cell/second_life_stationary_storage] (+8.00 kg CO2e/kg \u00d7 0.050 kg) [typical unit weight]"
26
+ }
27
+ },
28
+ "next_best_action": {
29
+ "primary": "second_life_stationary_storage",
30
+ "secondary": "battery_drop_off",
31
+ "do_not": [
32
+ "general_waste",
33
+ "landfill"
34
+ ],
35
+ "confidence": 0.85,
36
+ "fallback_used": false
37
+ },
38
+ "routing": {
39
+ "jurisdiction": "NYC (DSNY + NY EPR)",
40
+ "regulation_refs": []
41
+ },
42
+ "provenance": {
43
+ "model": "matter-examples/static@v0.1",
44
+ "runtime": "other",
45
+ "on_device": false,
46
+ "confidence_calibrated": true,
47
+ "calibration_ref": "histogram@<sha256-of-spec/calibration_v1.json>"
48
+ }
49
+ }
spec/examples/passport-05-ewaste-laptop.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema": "matter-passport/v0.1",
3
+ "passport_id": "mp_sha256:73e6c748e167e518",
4
+ "timestamp": "2026-04-26T00:00:00Z",
5
+ "capture": {
6
+ "modality": "image",
7
+ "content_hash": "sha256:73e6c748e167e518...(truncated)"
8
+ },
9
+ "identity": {
10
+ "class": "laptop",
11
+ "subclass": "Lenovo ThinkPad-class notebook",
12
+ "taxonomy": "https://matter.spec/taxonomy/ewaste/v0.1",
13
+ "confidence": 0.9142857142857143
14
+ },
15
+ "state": {
16
+ "condition": "degraded",
17
+ "hazard_flags": [],
18
+ "confidence": 0.9
19
+ },
20
+ "value": {
21
+ "environmental": {
22
+ "co2e_avoided_kg": 70.0,
23
+ "basis": "co2e_factors_v1[ewaste/laptop/ewaste_collection_event] (+35.00 kg CO2e/kg \u00d7 2.000 kg) [typical unit weight]"
24
+ }
25
+ },
26
+ "next_best_action": {
27
+ "primary": "ewaste_collection_event",
28
+ "secondary": "retailer_takeback",
29
+ "do_not": [
30
+ "general_waste"
31
+ ],
32
+ "confidence": 0.375,
33
+ "fallback_used": false
34
+ },
35
+ "routing": {
36
+ "jurisdiction": "NYC (DSNY + NY EPR)",
37
+ "regulation_refs": []
38
+ },
39
+ "provenance": {
40
+ "model": "matter-examples/static@v0.1",
41
+ "runtime": "other",
42
+ "on_device": false,
43
+ "confidence_calibrated": true,
44
+ "calibration_ref": "histogram@<sha256-of-spec/calibration_v1.json>"
45
+ }
46
+ }
spec/examples/passport-06-cd-concrete-with-recovery-value.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema": "matter-passport/v0.1",
3
+ "passport_id": "mp_sha256:7b05677dedf22215",
4
+ "timestamp": "2026-04-26T00:00:00Z",
5
+ "capture": {
6
+ "modality": "image",
7
+ "content_hash": "sha256:7b05677dedf22215...(truncated)"
8
+ },
9
+ "identity": {
10
+ "class": "concrete",
11
+ "subclass": "structural rubble",
12
+ "taxonomy": "https://matter.spec/taxonomy/cd/v0.1",
13
+ "confidence": 0.9142857142857143
14
+ },
15
+ "state": {
16
+ "condition": "good",
17
+ "hazard_flags": [],
18
+ "confidence": 0.375
19
+ },
20
+ "value": {
21
+ "environmental": {
22
+ "co2e_avoided_kg": 0.25,
23
+ "basis": "co2e_factors_v1[cd/concrete/aggregate_recycler] (+0.05 kg CO2e/kg \u00d7 5.000 kg) [typical unit weight]"
24
+ }
25
+ },
26
+ "next_best_action": {
27
+ "primary": "aggregate_recycler",
28
+ "do_not": [
29
+ "landfill"
30
+ ],
31
+ "confidence": 0.375,
32
+ "fallback_used": false
33
+ },
34
+ "routing": {
35
+ "jurisdiction": "NYC (Local Law 97 + DEP construction debris rules)",
36
+ "regulation_refs": []
37
+ },
38
+ "provenance": {
39
+ "model": "matter-examples/static@v0.1",
40
+ "runtime": "other",
41
+ "on_device": false,
42
+ "confidence_calibrated": true,
43
+ "calibration_ref": "histogram@<sha256-of-spec/calibration_v1.json>"
44
+ }
45
+ }
spec/examples/passport-07-textile-denim.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema": "matter-passport/v0.1",
3
+ "passport_id": "mp_sha256:a0d738e5d571843a",
4
+ "timestamp": "2026-04-26T00:00:00Z",
5
+ "capture": {
6
+ "modality": "image",
7
+ "content_hash": "sha256:a0d738e5d571843a...(truncated)"
8
+ },
9
+ "identity": {
10
+ "class": "denim",
11
+ "subclass": "blue jeans scrap",
12
+ "taxonomy": "https://matter.spec/taxonomy/textile/v0.1",
13
+ "confidence": 0.9142857142857143
14
+ },
15
+ "state": {
16
+ "condition": "degraded",
17
+ "hazard_flags": [],
18
+ "confidence": 0.9
19
+ },
20
+ "value": {
21
+ "environmental": {
22
+ "co2e_avoided_kg": 2.45,
23
+ "basis": "co2e_factors_v1[textile/denim/fiber_recycler] (+3.50 kg CO2e/kg \u00d7 0.700 kg) [typical unit weight]"
24
+ }
25
+ },
26
+ "next_best_action": {
27
+ "primary": "fiber_recycler",
28
+ "secondary": "resale_reuse",
29
+ "do_not": [
30
+ "landfill"
31
+ ],
32
+ "confidence": 0.9,
33
+ "fallback_used": false
34
+ },
35
+ "routing": {
36
+ "jurisdiction": "NYC (NY EPR Packaging + Higg MSI alignment)",
37
+ "regulation_refs": []
38
+ },
39
+ "provenance": {
40
+ "model": "matter-examples/static@v0.1",
41
+ "runtime": "other",
42
+ "on_device": false,
43
+ "confidence_calibrated": true,
44
+ "calibration_ref": "histogram@<sha256-of-spec/calibration_v1.json>"
45
+ }
46
+ }
spec/governance.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Matter Passport — Governance
2
+
3
+ **Status:** v0.1 governance proposal · **License:** CC0-1.0
4
+
5
+ ## Stewardship today
6
+
7
+ The Matter Passport schema (`matter-passport-v0.1`) and its companion CC0 artifacts (`calibration_v1.json`, `safety_rules_v1.json`, `hazard_flags_v1.json`) are currently maintained by the Matter project's solo author for the Gemma 4 Impact Challenge submission.
8
+
9
+ This is not a permanent arrangement. The schema is intended to be a public good and should not depend on a single individual indefinitely.
10
+
11
+ ## Stewardship after v1.0
12
+
13
+ When the Matter Passport reaches `v1.0`, stewardship will be proposed to a neutral body. Candidates, in rough order of preference:
14
+
15
+ 1. **Open Source Initiative (OSI)** — natural home for an open standard with an open license stack.
16
+ 2. **Linux Foundation Energy / LF Decentralized Trust** — has hosted similar schema work (e.g., Battery Passport).
17
+ 3. **The CIRPASS consortium** — directly aligned mission; would also strengthen DPP interop.
18
+ 4. **A new dedicated foundation** — only if none of the above accept stewardship.
19
+
20
+ The author commits to reaching out to at least two of these bodies within 60 days of v1.0 release.
21
+
22
+ ## Contribution policy (today through v1.0)
23
+
24
+ - **Issues and proposals:** open via the project repository's issue tracker.
25
+ - **Schema changes:** pull request against `spec/`. Must include rationale and a backwards-compatibility analysis.
26
+ - **Companion-artifact changes** (calibration / safety / hazard rules): may version independently; bump artifact version, append a `changelog` block.
27
+ - **Field additions:** must include the rationale, a reference to prior-art alignment (or explicit statement that the field is novel), and a sample Passport that exercises the new field.
28
+ - **Field removals or semantic changes:** breaking change; require a major version bump and a 6-month deprecation window per `D023`.
29
+
30
+ ## Decision authority (interim)
31
+
32
+ - **Maintainer:** the project author makes editorial decisions until v1.0.
33
+ - **External review:** for any change deemed "load-bearing" (privacy, safety, signing semantics), at least one external reviewer must approve before merge. The reviewer's identity and feedback are recorded in the PR.
34
+ - **Public-comment period:** any breaking change is announced with a 14-day comment window before merge.
35
+
36
+ ## Provenance commitments
37
+
38
+ The Matter project commits to:
39
+
40
+ 1. **Honest evaluation disclosure.** Every Stage of the project's evaluation (Stage 0 N=43, Stage 1 target N=200) is documented in `findings/`. No cherry-picked numbers in the writeup.
41
+ 2. **Reproducibility.** Every published number must be reproducible from a public Kaggle or Colab notebook in this repository.
42
+ 3. **No retroactive redaction.** If a finding is wrong, we publish the correction; we do not silently delete.
43
+ 4. **Calibration honesty.** When the Passport's `confidence_calibrated: true` flag is set, the calibration table that produced it is publicly available.
44
+
45
+ ## Conflict of interest
46
+
47
+ The maintainer is a solo individual with no current commercial product based on Matter. If that changes, the maintainer commits to:
48
+ - Disclosing any commercial interest in the Matter platform on the project README.
49
+ - Recusing from decisions that materially advantage that commercial interest, if any.
50
+ - Stewardship handover (above) will be the structural answer.
51
+
52
+ ## License of this document
53
+
54
+ This governance addendum is itself CC0-1.0 — fork it, reuse it, adapt it for other open standards.
spec/hazard_flags_v1.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "1.0",
4
+ "description": "Class → canonical hazard_flags map (D019). Run before the do_not guardrail. Augments the model's hazard_flags rather than replacing them.",
5
+ "license": "CC0-1.0"
6
+ },
7
+ "rules": {
8
+ "sharps": {"required": ["sharps_injury_risk", "biohazard"], "context_dependent": []},
9
+ "diagnostic": {"required": ["biohazard"], "context_dependent": []},
10
+ "wound_care": {"required": [], "context_dependent": ["biohazard"]},
11
+ "medicine_bottle": {"required": [], "context_dependent": ["expired_pharmaceutical", "controlled_substance"]},
12
+ "lithium_ion_cell": {"required": ["thermal_runaway_risk"], "context_dependent": []},
13
+ "battery_pack": {"required": ["thermal_runaway_risk"], "context_dependent": []},
14
+ "lead_acid_battery": {"required": ["lead_toxicity", "acid_corrosion"], "context_dependent": []}
15
+ }
16
+ }
spec/jurisdictions/README.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Matter Jurisdiction Packs
2
+
3
+ CC0 routing rules per jurisdiction. Each pack defines per-(head, identity_class) the local default `next_best_action`, the locally-available NBA enum (extending the base set if the jurisdiction has unique infrastructure), regulation references, and infrastructure context.
4
+
5
+ The MIE picks the right pack based on the `Capture.jurisdiction` field (or falls back to the default head-level rules in `safety_rules_v1.json`).
6
+
7
+ ## Shipping in v0.1
8
+
9
+ | Pack | Code | Notable property |
10
+ |---|---|---|
11
+ | [`nyc.json`](nyc.json) | `US-NY-NYC` | Decentralized municipal pickup + retailer takeback + DSNY SAFE Disposal Days |
12
+ | [`eu-dpp.json`](eu-dpp.json) | `EU` | Strong EPR; aligned with the upcoming Digital Product Passport mandate; higher fiber-recycler share for textiles |
13
+ | [`pune.json`](pune.json) | `IN-MH-PUN` | Adds `informal_kabadi_pickup` as a first-class NBA — recognizes the recovery path most actually used in Pune. SWaCH cooperative integration. |
14
+
15
+ Each pack is **self-contained** (no inheritance from a base file) so consumers can adopt one pack without loading the rest of Matter's spec.
16
+
17
+ ## Adding a new pack
18
+
19
+ 1. Copy `nyc.json` as a starting template.
20
+ 2. Update `_meta.jurisdiction_code` (use ISO 3166-2 where possible).
21
+ 3. Update `regulation_refs` with the local rule citations.
22
+ 4. Adjust `head_default_actions` per local infrastructure reality.
23
+ 5. Extend `actions_available` only if your jurisdiction has a recovery path that doesn't fit any existing NBA enum (Pune's `informal_kabadi_pickup` is the canonical example).
24
+ 6. Submit as a PR to `spec/jurisdictions/` — packs are versioned independently of the Passport schema.
25
+
26
+ ## What the packs are NOT
27
+
28
+ - Not a replacement for `safety_rules_v1.json` — the do-not guardrail still runs as a global safety primitive.
29
+ - Not legal advice — they're routing defaults built from public regulation citations and field-pilot context.
30
+ - Not exhaustive — Stage 1 will add São Paulo, Lagos, Berlin, and Brooklyn (sub-municipal) packs as field-pilot evidence accumulates.
spec/jurisdictions/eu-dpp.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "1.0",
4
+ "license": "CC0-1.0",
5
+ "jurisdiction_code": "EU",
6
+ "name": "European Union — Digital Product Passport scope",
7
+ "regulation_refs": [
8
+ "ESPR — EU Ecodesign for Sustainable Products Regulation (2024/1781)",
9
+ "EU Waste Framework Directive 2008/98/EC (consolidated 2018)",
10
+ "EU Battery Regulation 2023/1542 — battery passport mandatory 2027",
11
+ "EU Packaging and Packaging Waste Regulation (PPWR, 2024)",
12
+ "EU Strategy for Sustainable and Circular Textiles (2022)",
13
+ "WEEE Directive 2012/19/EU"
14
+ ],
15
+ "description": "EU-wide rules aligned with the Digital Product Passport (DPP) framework. Battery + electronics + textile DPPs become mandatory at staggered dates 2026–2030. Matter Passport is interop-shaped with DPP per spec/passport-prior-art.md."
16
+ },
17
+ "actions_available": [
18
+ "blue_bin_recycle", "compost_bin", "general_waste",
19
+ "retailer_takeback", "ewaste_collection_event", "battery_drop_off",
20
+ "second_life_stationary_storage", "certified_ev_recycler",
21
+ "pharmacy_takeback", "biomedical_waste_collector", "recycle_paper",
22
+ "aggregate_recycler", "metal_recycler", "gypsum_recycler", "specialty_recycler",
23
+ "resale_reuse", "fiber_recycler", "downcycle_rags", "leather_reclaim",
24
+ "special_collection", "landfill"
25
+ ],
26
+ "head_default_actions": {
27
+ "domestic": {
28
+ "plastic_bottle": "blue_bin_recycle",
29
+ "multilayer_plastic": "special_collection",
30
+ "carton": "blue_bin_recycle",
31
+ "metal_can": "blue_bin_recycle",
32
+ "organic": "compost_bin",
33
+ "glass": "blue_bin_recycle",
34
+ "paper": "blue_bin_recycle"
35
+ },
36
+ "ewaste": {
37
+ "laptop": "retailer_takeback",
38
+ "smartphone": "retailer_takeback",
39
+ "cable": "retailer_takeback",
40
+ "power_adapter": "retailer_takeback",
41
+ "audio": "retailer_takeback",
42
+ "battery": "battery_drop_off",
43
+ "pcb": "ewaste_collection_event",
44
+ "lighting": "retailer_takeback"
45
+ },
46
+ "ev": {
47
+ "lithium_ion_cell": "second_life_stationary_storage",
48
+ "lead_acid_battery": "certified_ev_recycler",
49
+ "battery_pack": "certified_ev_recycler",
50
+ "connector": "retailer_takeback",
51
+ "pcb": "ewaste_collection_event"
52
+ },
53
+ "medical": {
54
+ "blister_pack": "general_waste",
55
+ "sharps": "biomedical_waste_collector",
56
+ "medicine_bottle": "pharmacy_takeback",
57
+ "diagnostic": "biomedical_waste_collector",
58
+ "wound_care": "biomedical_waste_collector",
59
+ "packaging": "recycle_paper",
60
+ "device": "pharmacy_takeback"
61
+ },
62
+ "cd": {
63
+ "concrete": "aggregate_recycler",
64
+ "brick": "aggregate_recycler",
65
+ "gypsum": "gypsum_recycler",
66
+ "steel": "metal_recycler",
67
+ "plastic": "specialty_recycler",
68
+ "ceramic": "aggregate_recycler",
69
+ "insulation": "specialty_recycler"
70
+ },
71
+ "textile": {
72
+ "cotton_garment": "resale_reuse",
73
+ "denim": "fiber_recycler",
74
+ "blended_garment": "fiber_recycler",
75
+ "synthetic_garment": "fiber_recycler",
76
+ "wool_garment": "fiber_recycler",
77
+ "leather": "leather_reclaim",
78
+ "mixed_fabric_scrap": "downcycle_rags"
79
+ }
80
+ },
81
+ "infrastructure_notes": {
82
+ "extended_producer_responsibility": "Strong — manufacturers fund collection + recovery for batteries, packaging, electronics, textiles (PPWR + WEEE + Battery Reg)",
83
+ "dpp_alignment": "Battery Reg makes DPP mandatory Feb 2027 for industrial + EV batteries; ESPR phases other categories 2026–2030",
84
+ "default_textile_path": "Higher fiber-recycler share than NYC because EU Textile Strategy mandates separate textile collection by 2025"
85
+ }
86
+ }
spec/jurisdictions/nyc.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "1.0",
4
+ "license": "CC0-1.0",
5
+ "jurisdiction_code": "US-NY-NYC",
6
+ "name": "New York City",
7
+ "regulation_refs": [
8
+ "DSNY — NYC Department of Sanitation Recycling Rules (2025)",
9
+ "NY State Environmental Conservation Law Article 27 — EPR Packaging (2025)",
10
+ "NYC Local Law 97 — Building Emissions (2024 enforcement)",
11
+ "NY State Battery Stewardship — Rechargeable Battery Recycling Act"
12
+ ],
13
+ "description": "NYC routing rules. Decentralized municipal pickup + retailer takeback + DSNY SAFE Disposal Days. Source-of-truth for matter routing on US-NY-NYC."
14
+ },
15
+ "actions_available": [
16
+ "blue_bin_recycle", "compost_bin", "general_waste",
17
+ "retailer_takeback", "ewaste_collection_event", "battery_drop_off",
18
+ "second_life_stationary_storage", "certified_ev_recycler",
19
+ "pharmacy_takeback", "biomedical_waste_collector", "recycle_paper",
20
+ "aggregate_recycler", "metal_recycler", "gypsum_recycler", "specialty_recycler",
21
+ "resale_reuse", "fiber_recycler", "downcycle_rags", "leather_reclaim",
22
+ "special_collection", "landfill"
23
+ ],
24
+ "head_default_actions": {
25
+ "domestic": {
26
+ "plastic_bottle": "blue_bin_recycle",
27
+ "multilayer_plastic": "general_waste",
28
+ "carton": "blue_bin_recycle",
29
+ "metal_can": "blue_bin_recycle",
30
+ "organic": "compost_bin",
31
+ "glass": "blue_bin_recycle",
32
+ "paper": "blue_bin_recycle"
33
+ },
34
+ "ewaste": {
35
+ "laptop": "ewaste_collection_event",
36
+ "smartphone": "retailer_takeback",
37
+ "cable": "retailer_takeback",
38
+ "power_adapter": "ewaste_collection_event",
39
+ "audio": "retailer_takeback",
40
+ "battery": "battery_drop_off",
41
+ "pcb": "ewaste_collection_event",
42
+ "lighting": "retailer_takeback"
43
+ },
44
+ "ev": {
45
+ "lithium_ion_cell": "battery_drop_off",
46
+ "lead_acid_battery": "certified_ev_recycler",
47
+ "battery_pack": "certified_ev_recycler",
48
+ "connector": "retailer_takeback",
49
+ "pcb": "ewaste_collection_event"
50
+ },
51
+ "medical": {
52
+ "blister_pack": "general_waste",
53
+ "sharps": "biomedical_waste_collector",
54
+ "medicine_bottle": "pharmacy_takeback",
55
+ "diagnostic": "biomedical_waste_collector",
56
+ "wound_care": "general_waste",
57
+ "packaging": "recycle_paper",
58
+ "device": "pharmacy_takeback"
59
+ },
60
+ "cd": {
61
+ "concrete": "aggregate_recycler",
62
+ "brick": "aggregate_recycler",
63
+ "gypsum": "gypsum_recycler",
64
+ "steel": "metal_recycler",
65
+ "plastic": "specialty_recycler",
66
+ "ceramic": "aggregate_recycler",
67
+ "insulation": "specialty_recycler"
68
+ },
69
+ "textile": {
70
+ "cotton_garment": "resale_reuse",
71
+ "denim": "fiber_recycler",
72
+ "blended_garment": "downcycle_rags",
73
+ "synthetic_garment": "downcycle_rags",
74
+ "wool_garment": "fiber_recycler",
75
+ "leather": "leather_reclaim",
76
+ "mixed_fabric_scrap": "downcycle_rags"
77
+ }
78
+ },
79
+ "infrastructure_notes": {
80
+ "pickup_cadence": "Curbside weekly; organics weekly post-2025 mandate; SAFE Disposal Days quarterly per borough",
81
+ "informal_sector": "Active but unregulated — bottle-deposit canners (NY $0.05/container)",
82
+ "key_partners_for_pilot": ["LES Ecology Center", "Sure We Can (Brooklyn)", "Big Reuse"]
83
+ }
84
+ }
spec/jurisdictions/pune.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "1.0",
4
+ "license": "CC0-1.0",
5
+ "jurisdiction_code": "IN-MH-PUN",
6
+ "name": "Pune — Maharashtra, India",
7
+ "regulation_refs": [
8
+ "Plastic Waste Management Rules, 2016 (amended 2022) — India",
9
+ "Battery Waste Management Rules, 2022 — India",
10
+ "E-Waste (Management) Rules, 2022 — India",
11
+ "Solid Waste Management Rules, 2016 — India",
12
+ "Bio-Medical Waste Management Rules, 2016 — CPCB",
13
+ "PMC Pune Municipal Corporation source-segregation by-laws (2017)"
14
+ ],
15
+ "description": "Pune routing rules. Decentralized informal-sector recovery (SWaCH cooperative) overlaid on PMC kerbside collection. Different infrastructure shape from NYC + EU — denser informal recovery, smaller formal recycler footprint."
16
+ },
17
+ "actions_available": [
18
+ "blue_bin_recycle", "compost_bin", "general_waste",
19
+ "retailer_takeback", "ewaste_collection_event", "battery_drop_off",
20
+ "second_life_stationary_storage", "certified_ev_recycler",
21
+ "pharmacy_takeback", "biomedical_waste_collector", "recycle_paper",
22
+ "aggregate_recycler", "metal_recycler", "gypsum_recycler", "specialty_recycler",
23
+ "resale_reuse", "fiber_recycler", "downcycle_rags", "leather_reclaim",
24
+ "special_collection", "landfill",
25
+ "informal_kabadi_pickup"
26
+ ],
27
+ "head_default_actions": {
28
+ "domestic": {
29
+ "plastic_bottle": "informal_kabadi_pickup",
30
+ "multilayer_plastic": "general_waste",
31
+ "carton": "informal_kabadi_pickup",
32
+ "metal_can": "informal_kabadi_pickup",
33
+ "organic": "compost_bin",
34
+ "glass": "informal_kabadi_pickup",
35
+ "paper": "informal_kabadi_pickup"
36
+ },
37
+ "ewaste": {
38
+ "laptop": "ewaste_collection_event",
39
+ "smartphone": "informal_kabadi_pickup",
40
+ "cable": "informal_kabadi_pickup",
41
+ "power_adapter": "ewaste_collection_event",
42
+ "audio": "informal_kabadi_pickup",
43
+ "battery": "battery_drop_off",
44
+ "pcb": "ewaste_collection_event",
45
+ "lighting": "ewaste_collection_event"
46
+ },
47
+ "ev": {
48
+ "lithium_ion_cell": "certified_ev_recycler",
49
+ "lead_acid_battery": "certified_ev_recycler",
50
+ "battery_pack": "certified_ev_recycler",
51
+ "connector": "informal_kabadi_pickup",
52
+ "pcb": "ewaste_collection_event"
53
+ },
54
+ "medical": {
55
+ "blister_pack": "general_waste",
56
+ "sharps": "biomedical_waste_collector",
57
+ "medicine_bottle": "pharmacy_takeback",
58
+ "diagnostic": "biomedical_waste_collector",
59
+ "wound_care": "biomedical_waste_collector",
60
+ "packaging": "informal_kabadi_pickup",
61
+ "device": "pharmacy_takeback"
62
+ },
63
+ "cd": {
64
+ "concrete": "aggregate_recycler",
65
+ "brick": "aggregate_recycler",
66
+ "gypsum": "landfill",
67
+ "steel": "informal_kabadi_pickup",
68
+ "plastic": "specialty_recycler",
69
+ "ceramic": "aggregate_recycler",
70
+ "insulation": "landfill"
71
+ },
72
+ "textile": {
73
+ "cotton_garment": "resale_reuse",
74
+ "denim": "resale_reuse",
75
+ "blended_garment": "downcycle_rags",
76
+ "synthetic_garment": "downcycle_rags",
77
+ "wool_garment": "resale_reuse",
78
+ "leather": "leather_reclaim",
79
+ "mixed_fabric_scrap": "downcycle_rags"
80
+ }
81
+ },
82
+ "infrastructure_notes": {
83
+ "informal_sector": "Dominant. SWaCH (Solid Waste Collection & Handling) cooperative — ~3,800 waste pickers as registered service providers under PMC contracts.",
84
+ "kabadi_pickup": "Door-to-door scrap dealers (kabadiwallahs) buy paper, metal, rigid plastics, glass — material-specific pricing per kg.",
85
+ "informal_action_rationale": "Including 'informal_kabadi_pickup' as a first-class NBA explicitly recognizes the recovery path most actually used in Pune — pretending the formal blue-bin is the default would be unfair to actual practice and to informal-sector workers who do the recovery work.",
86
+ "key_partners_for_pilot": ["SWaCH Pune", "Hasiru Dala (Bengaluru — sister NGO)", "PMC ULB liaisons"]
87
+ }
88
+ }
spec/matter-passport-v0.1.md ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Matter Passport — v0.1
2
+
3
+ **Status:** Frozen for CC0 release · **License:** CC0-1.0 · **Stewardship:** Matter project maintainers (governance handover proposed at v1.0)
4
+
5
+ > A universal, content-addressed, jurisdiction-aware descriptor of a material at a point in its lifecycle.
6
+
7
+ ## Design principles
8
+
9
+ 1. **Append-only.** A Passport is a version in a chain (`prev` field). Not a mutable record. Aligns with EU DPP event-log pattern and W3C VC immutability.
10
+ 2. **Content-addressed.** Primary identity is `mp_sha256:` of the capture + identity block. Optional `gs1_alias` for items that carry a GTIN.
11
+ 3. **Per-field confidence.** `identity`, `state`, and `next_best_action` confidence are independent — these blocks fail independently in practice (A1 v2, A6 evidence). Calibration applies per-field.
12
+ 4. **Jurisdiction-aware.** Regulation references travel with the Passport; rule contents are versioned separately and resolved at consumption (DPP pattern).
13
+ 5. **Provenance-honest.** Every Passport declares the model, runtime, on-device status, calibration table reference, and whether the safety guardrail fired.
14
+
15
+ ## Companion CC0 artifacts (versioned independently)
16
+
17
+ | File | Purpose | Decision ref |
18
+ |---|---|---|
19
+ | `matter-passport-v0.1.schema.json` | JSON Schema validator | this document |
20
+ | `passport-prior-art.md` | Alignment with DPP / CIRPASS / W3C VC / GS1 / GTR-22 | this document |
21
+ | `calibration_v1.json` | Confidence calibration table (histogram, fit on N=43) | D015 |
22
+ | `safety_rules_v1.json` | do_not guardrail rules per identity class | D018 |
23
+ | `hazard_flags_v1.json` | Class → canonical hazard map | D019 |
24
+
25
+ ## Example
26
+
27
+ ```json
28
+ {
29
+ "schema": "matter-passport/v0.1",
30
+ "@context": "https://matter.spec/contexts/v0.1.jsonld",
31
+ "passport_id": "mp_sha256:9f2e...c4",
32
+ "prev": null,
33
+ "timestamp": "2026-04-15T08:12:00Z",
34
+ "capture": {
35
+ "modality": "image",
36
+ "content_hash": "sha256:7af1...",
37
+ "geohash_coarse": "dr5"
38
+ },
39
+ "identity": {
40
+ "class": "lithium_ion_cell",
41
+ "subclass": "NMC_18650",
42
+ "taxonomy": "https://matter.spec/taxonomy/ev/v0.1",
43
+ "confidence": 0.91
44
+ },
45
+ "state": {
46
+ "condition": "degraded",
47
+ "estimated_soh": 0.62,
48
+ "contamination": null,
49
+ "hazard_flags": ["thermal_runaway_risk", "biohazard_if_punctured"],
50
+ "confidence": 0.74
51
+ },
52
+ "value": {
53
+ "economic": {"amount": 180, "currency": "INR", "basis": "scrap_cobalt_index_2026w15"},
54
+ "environmental": {"co2e_avoided_kg": 4.2, "basis": "IPCC_AR6_recycling_factor"}
55
+ },
56
+ "next_best_action": {
57
+ "primary": "second_life_stationary_storage",
58
+ "secondary": "certified_ev_recycler",
59
+ "do_not": ["landfill", "general_waste", "blue_bin_recycle"],
60
+ "confidence": 0.83,
61
+ "fallback_used": false
62
+ },
63
+ "routing": {
64
+ "jurisdiction": "IN-MH-PUN",
65
+ "regulation_refs": ["BWMR_2022", "EPR_batteries_2022"],
66
+ "function_call": "dispatch_certified_ev_recycler"
67
+ },
68
+ "provenance": {
69
+ "model": "gemma-4-e4b@unsloth-bnb-4bit",
70
+ "runtime": "cactus",
71
+ "on_device": true,
72
+ "lora": "matter-ev/v0.1@sha256:ab12...",
73
+ "confidence_calibrated": true,
74
+ "calibration_ref": "histogram@sha256:c4d1..."
75
+ },
76
+ "signature": null
77
+ }
78
+ ```
79
+
80
+ ## Confidence calibration (D015)
81
+
82
+ `confidence` is a first-class Passport field. Because general-purpose vision-language models (Gemma 4 included) produce systematically over-confident self-reported confidences at zero-shot, consumers of a Passport must know whether a confidence has been recalibrated against held-out data.
83
+
84
+ - If `provenance.confidence_calibrated` is `true`, every `confidence` field has been passed through the function identified by `provenance.calibration_ref`. Calibrated confidences approximate posterior probabilities and are comparable across Passports produced by the same method + table.
85
+ - If `false`, confidences are raw model outputs — comparable within a single Passport but not interpretable as probabilities, not comparable across runtimes or heads.
86
+
87
+ Calibration tables version independently of the schema. v0.1 ships `calibration_v1.json` (histogram method, fit on N=43, in-sample ECE 0.000 / 5-fold CV-ECE 0.167). Stage 1 will refit on N≥200.
88
+
89
+ ## Safety primitives (D018, D019, D020)
90
+
91
+ The MIE pipeline runs **four layered platform primitives** before emitting a Passport:
92
+
93
+ ```
94
+ Gemma 4 raw output
95
+ → JSON + enum validator (D012)
96
+ → calibration (D015 — calibration_v1.json)
97
+ → hazard auto-flagger (D019 — hazard_flags_v1.json)
98
+ → do_not guardrail (D018 — safety_rules_v1.json)
99
+ → Material Passport
100
+ ```
101
+
102
+ `hazard_flags` on a Passport is the **union** of model-emitted and class-implied hazards. `next_best_action.fallback_used = true` indicates the guardrail overrode the model's NBA because it violated the item's `do_not` set; in that case `primary` is a safe-default action, not the raw model output.
103
+
104
+ ## Privacy
105
+
106
+ `capture.geohash_coarse` is **optional**, **maximum 5 characters** (~4.9 km precision), and **excluded by default**. Consumers MUST NOT combine `geohash_coarse` + `timestamp` + `identity` for individual tracking. GDPR-compliant by design.
107
+
108
+ ## Resolved design decisions (v0.1)
109
+
110
+ | Question | Resolution | Rationale |
111
+ |---|---|---|
112
+ | Stateful vs append-only | Append-only via `prev` chain | DPP / W3C VC / ISO 59040 |
113
+ | Identity scheme | Content-hash primary; optional `gs1_alias` | Captures, not products; GS1 only when GTIN visible |
114
+ | Confidence representation | Per-field on identity / state / NBA | A1 v2, A6 — these blocks fail independently |
115
+ | Jurisdiction grounding | Embed `regulation_refs`; resolve rules at consumption | DPP pattern |
116
+ | Signing | Optional in v0.1; W3C VC `proof` shape when present | Aligns with VC ecosystem; sets v0.2 path to full VC envelope |
117
+ | Wire format | JSON Schema mandatory; optional `@context` for JSON-LD | Validation everywhere; JSON-LD as upgrade path |
118
+ | Versioning | SemVer, `v0.x` until 1.0 | Industry standard |
119
+ | Embedded vs referenced taxonomy | Referenced via URI | Lighter Passport; taxonomy evolves independently |
120
+ | `geohash_coarse` privacy | Optional, max 5 chars, excluded by default | GDPR principle |
121
+ | Chain of custody | Existing `prev` field | DPP event-log pattern |
122
+ | Multi-material Passports | Defer to v0.2 via `components` array | Complexity not justified for v0.1 |
123
+
124
+ ## Stage 1 / v0.2 commitments
125
+
126
+ - Full W3C VC envelope wrapping (Matter Passport as `credentialSubject`)
127
+ - `components` array for multi-material items (laptop with 20 sub-Passports, building with 200)
128
+ - ISO 59040 PCDS export profile
129
+ - JSON-LD context document at `spec/contexts/v0.1.jsonld`
130
+ - Calibration table refit on N ≥ 200 with native-speaker review of multilingual data
131
+ - Stewardship handover proposal to a neutral body (OSI / Linux Foundation candidates)
132
+
133
+ ## Governance
134
+
135
+ v0.x is maintained by the Matter project. Outside contributions via PR on `spec/`. After v1.0, stewardship is proposed to move to a neutral body. Breaking schema changes require a major version bump and a 6-month deprecation window.
spec/matter-passport-v0.1.schema.json ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://matter.spec/matter-passport/v0.1.schema.json",
4
+ "title": "Matter Passport",
5
+ "description": "v0.1 — a universal, content-addressed, jurisdiction-aware descriptor of a material at a point in its lifecycle. Aligns with EU DPP / CIRPASS layering and W3C Verifiable Credentials proof shape. CC0-1.0.",
6
+ "type": "object",
7
+ "required": ["schema", "passport_id", "timestamp", "capture", "identity", "state", "next_best_action", "provenance"],
8
+ "properties": {
9
+ "schema": {"const": "matter-passport/v0.1"},
10
+ "@context": {
11
+ "type": ["string", "array"],
12
+ "description": "Optional JSON-LD context for downstream interop with EU DPP and W3C Verifiable Credentials. Recommended URI when present: 'https://matter.spec/contexts/v0.1.jsonld'."
13
+ },
14
+ "passport_id": {
15
+ "type": "string",
16
+ "pattern": "^mp_sha256:[0-9a-f]{6,64}$",
17
+ "description": "Content-addressed identifier derived from sha256(capture.content_hash || identity.class || timestamp)."
18
+ },
19
+ "prev": {
20
+ "type": ["string", "null"],
21
+ "description": "passport_id of the prior version in this material's lifecycle chain. Null for the first scan. Enables append-only chain-of-custody (CIRPASS dynamic-data layer)."
22
+ },
23
+ "timestamp": {"type": "string", "format": "date-time"},
24
+ "capture": {
25
+ "type": "object",
26
+ "required": ["modality", "content_hash"],
27
+ "properties": {
28
+ "modality": {"enum": ["image", "video", "text", "audio", "multimodal"]},
29
+ "content_hash": {"type": "string"},
30
+ "geohash_coarse": {
31
+ "type": "string",
32
+ "maxLength": 5,
33
+ "description": "Optional, MAX 5 characters (~4.9 km precision). Excluded by default for privacy. Consumers MUST NOT combine with timestamp + identity for individual tracking."
34
+ }
35
+ }
36
+ },
37
+ "identity": {
38
+ "type": "object",
39
+ "required": ["class", "taxonomy", "confidence"],
40
+ "properties": {
41
+ "class": {"type": "string"},
42
+ "subclass": {"type": "string"},
43
+ "taxonomy": {"type": "string", "description": "URI to the taxonomy version, e.g. 'matter-ev/v0.1'. Resolved at consumption."},
44
+ "gs1_alias": {
45
+ "type": "string",
46
+ "description": "Optional GS1 Digital Link URL when a GTIN is visible on the item. Format per GS1 Digital Link spec."
47
+ },
48
+ "confidence": {"type": "number", "minimum": 0, "maximum": 1}
49
+ }
50
+ },
51
+ "state": {
52
+ "type": "object",
53
+ "required": ["condition", "confidence"],
54
+ "properties": {
55
+ "condition": {"type": "string"},
56
+ "estimated_soh": {"type": "number", "minimum": 0, "maximum": 1, "description": "State-of-Health for batteries; aligns with UNECE GTR-22 vocabulary."},
57
+ "contamination": {"type": ["string", "null"]},
58
+ "hazard_flags": {
59
+ "type": "array",
60
+ "items": {"type": "string"},
61
+ "description": "Union of model-emitted and class-implied hazards. See spec/hazard_flags_v1.json for canonical class → hazards mapping."
62
+ },
63
+ "confidence": {"type": "number", "minimum": 0, "maximum": 1}
64
+ }
65
+ },
66
+ "value": {
67
+ "type": "object",
68
+ "properties": {
69
+ "economic": {
70
+ "type": "object",
71
+ "properties": {
72
+ "amount": {"type": "number"},
73
+ "currency": {"type": "string"},
74
+ "basis": {"type": "string"}
75
+ }
76
+ },
77
+ "environmental": {
78
+ "type": "object",
79
+ "properties": {
80
+ "co2e_avoided_kg": {"type": "number"},
81
+ "basis": {"type": "string"}
82
+ }
83
+ }
84
+ }
85
+ },
86
+ "next_best_action": {
87
+ "type": "object",
88
+ "required": ["primary", "confidence"],
89
+ "properties": {
90
+ "primary": {"type": "string"},
91
+ "secondary": {"type": "string"},
92
+ "do_not": {"type": "array", "items": {"type": "string"}},
93
+ "confidence": {"type": "number", "minimum": 0, "maximum": 1},
94
+ "fallback_used": {
95
+ "type": "boolean",
96
+ "description": "True iff the MIE's rule-based guardrail overrode the model's proposed action because it violated the item's do_not set (per spec/safety_rules_v1.json). When true, `primary` is a safe-default action, not the raw model output."
97
+ }
98
+ }
99
+ },
100
+ "routing": {
101
+ "type": "object",
102
+ "properties": {
103
+ "jurisdiction": {"type": "string"},
104
+ "regulation_refs": {
105
+ "type": "array",
106
+ "items": {"type": "string"},
107
+ "description": "Embedded references to regulations; rule contents are versioned separately and resolved at consumption (DPP pattern)."
108
+ },
109
+ "function_call": {"type": "string"}
110
+ }
111
+ },
112
+ "provenance": {
113
+ "type": "object",
114
+ "required": ["model", "runtime", "on_device"],
115
+ "properties": {
116
+ "model": {"type": "string"},
117
+ "runtime": {"enum": ["unsloth", "cactus", "litert", "llama.cpp", "mlx", "ollama", "other"]},
118
+ "on_device": {"type": "boolean"},
119
+ "lora": {"type": "string"},
120
+ "confidence_calibrated": {
121
+ "type": "boolean",
122
+ "description": "True iff all `confidence` fields in this Passport have been passed through a published calibration function. False means the confidences are raw model outputs and MUST NOT be interpreted as probabilities."
123
+ },
124
+ "calibration_ref": {
125
+ "type": "string",
126
+ "description": "Identifier of the calibration table applied, of the form `<method>@<sha256-prefix>`. Resolvable via spec/calibration_v1.json. Required iff confidence_calibrated is true."
127
+ }
128
+ }
129
+ },
130
+ "signature": {
131
+ "oneOf": [
132
+ {"type": "null"},
133
+ {
134
+ "type": "object",
135
+ "description": "Optional cryptographic proof. When present, follows W3C Verifiable Credentials proof shape so a Matter Passport may be wrapped as a VC credentialSubject in v0.2.",
136
+ "required": ["type", "created", "verificationMethod", "proofValue"],
137
+ "properties": {
138
+ "type": {"type": "string", "description": "e.g. 'Ed25519Signature2020'"},
139
+ "created": {"type": "string", "format": "date-time"},
140
+ "verificationMethod": {"type": "string", "description": "URI or DID identifying the signing key"},
141
+ "proofPurpose": {"type": "string"},
142
+ "proofValue": {"type": "string"}
143
+ }
144
+ }
145
+ ]
146
+ }
147
+ }
148
+ }
spec/medical-disclaimer.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Matter — Medical Disclaimer
2
+
3
+ **Status:** Required text · **License:** CC0-1.0 · **Decision ref:** D018, D019
4
+
5
+ Any UI surface that uses Matter's Medical head MUST display the following disclaimer before the user submits a query that would invoke the Medical head, and again on any returned Passport whose `identity.class` falls in the Medical head's taxonomy.
6
+
7
+ The disclaimer text is the canonical English source. Translations are encouraged; the Matter project will host reviewed translations at `spec/medical-disclaimer/<lang>.md` once native review is complete.
8
+
9
+ ---
10
+
11
+ ## Required disclaimer (English source)
12
+
13
+ > **Matter does not provide medical advice.**
14
+ >
15
+ > Matter helps identify pharmaceutical packaging, sharps, diagnostic items, and other medical-adjacent materials, and suggests how to dispose of them under local rules. It is **not** a substitute for advice from a doctor, pharmacist, nurse, or licensed medical waste hauler.
16
+ >
17
+ > If an item:
18
+ > - is a used needle, syringe, or other sharps — **do not put it in regular trash.** Use a licensed biomedical waste collector. Matter's rule-based guardrail will block this in software, but you are still responsible for safe handling at the point of disposal.
19
+ > - is an unidentified pill, capsule, or liquid — **do not handle, ingest, or pour it down a drain.** Take it to a pharmacy with a takeback program (US: DEA-approved kiosks; EU: pharmacist-managed disposal).
20
+ > - has been in contact with bodily fluids — treat it as biomedical waste regardless of what Matter suggests.
21
+ >
22
+ > Matter's identification accuracy on medical items, as of v0.1, is documented in `findings/A6_analysis.md`. The model's self-reported confidence does **not** mean the suggestion is safe; the rule-based safety pipeline (`spec/safety_rules_v1.json`, `spec/hazard_flags_v1.json`) is what we ask you to trust, and even then only as a starting point.
23
+
24
+ ---
25
+
26
+ ## When this disclaimer must appear
27
+
28
+ | Surface | Trigger | Display rule |
29
+ |---|---|---|
30
+ | Web demo | Domain selector includes "medical" | Modal on first medical query per session |
31
+ | Android APK | Camera scan returns identity.class in Medical taxonomy | Inline panel above the Passport result |
32
+ | API / SDK consumer | Passport returned with Medical taxonomy | Required as part of the SDK return-type docstring; SDK may NOT silently strip |
33
+ | Demo video | A medical item appears on screen | On-screen text overlay for ≥2 seconds |
34
+
35
+ ## Why this is required (not optional)
36
+
37
+ Two reasons:
38
+
39
+ 1. **A6 evidence.** Gemma 4 missed 5 of 8 expected biohazard flags in our medical evaluation. The model alone is not safe; the rules layer is what makes Matter trustworthy. Users must know the difference.
40
+ 2. **Gemma Prohibited Use Policy.** "No use for high-risk medical decisions without expert oversight." A clear disclaimer is the documented operational pattern that keeps Matter inside that line.
41
+
42
+ ## How to translate
43
+
44
+ Translations are welcome via PR on `spec/medical-disclaimer/<lang>.md`. Translations MUST:
45
+ - preserve the safety-critical bullets (sharps, unidentified pills, bodily fluids) verbatim in meaning.
46
+ - be reviewed by a native speaker before merging.
47
+ - include a `_meta.reviewer` field naming the reviewer.
spec/passport-prior-art.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Matter Passport — Prior-Art Alignment
2
+
3
+ **Status:** ships with `matter-passport/v0.1` · **License:** CC0-1.0
4
+
5
+ This document maps Matter Passport v0.1 fields to existing standards. Published as part of the v0.1 release so consumers and reviewers can evaluate interoperability without reading the design history.
6
+
7
+ ## Standards in scope
8
+
9
+ - **EU Digital Product Passport (DPP / ESPR 2024)** — EU regulatory framework
10
+ - **CIRPASS** — EU consortium reference architecture for DPP
11
+ - **W3C Verifiable Credentials (VC)** — cryptographic credential format
12
+ - **GS1 Digital Link** — URL-based product identity
13
+ - **ISO 59040** — Product Circularity Data Sheet (PCDS)
14
+ - **UNECE GTR-22** — Global Technical Regulation for EV battery passports
15
+ - **Schema.org Product** — generic product vocabulary
16
+
17
+ ## CIRPASS three-layer mapping
18
+
19
+ CIRPASS organizes a Product Passport into identity / dynamic / static layers. Matter follows the same shape:
20
+
21
+ | CIRPASS layer | Matter Passport blocks |
22
+ |---|---|
23
+ | Identity | `passport_id`, `prev`, `timestamp`, `capture`, `identity`, `provenance` |
24
+ | Dynamic | `state`, `next_best_action`, `value`, `signature` |
25
+ | Static | `routing`, `identity.taxonomy`, `routing.regulation_refs` |
26
+
27
+ ## Field-level alignment
28
+
29
+ ### `passport_id` and `prev`
30
+ - **DPP:** event-log pattern — each event is a new entry referencing the prior. Matter's `prev` matches.
31
+ - **W3C VC:** each credential is immutable; revocation handled separately. Matter aligns.
32
+
33
+ ### `identity.class` and `identity.taxonomy`
34
+ - **CIRPASS:** taxonomy resolution via URI. Matter does the same — `taxonomy` is a URI, not embedded.
35
+ - **Schema.org:** maps to `additionalType` for SEO interop. Optional via `@context`.
36
+
37
+ ### `identity.gs1_alias`
38
+ - **GS1 Digital Link:** when an item carries a visible GTIN (e.g., a paracetamol box, a brand-new laptop), Matter optionally records the GS1 Digital Link URL alongside the content-hash identity. Most Matter items (a brick, a piece of rebar, a used syringe) have no GTIN — content-hash remains primary.
39
+
40
+ ### `identity.confidence`, `state.confidence`, `next_best_action.confidence`
41
+ - **No prior-art equivalent.** Per-field confidence is novel for circular-economy schemas. A1 v2 + A6 evidence shows identity / state / action fail independently, so per-field is the only honest representation. Calibration table at `spec/calibration_v1.json` documents the meaning.
42
+
43
+ ### `state.estimated_soh`
44
+ - **UNECE GTR-22:** State-of-Health for EV batteries. Direct field-level alignment. Stage 1 EV LoRA training will target GTR-22 vocabulary.
45
+
46
+ ### `state.hazard_flags`
47
+ - **UNECE GTR-22:** hazardous-materials list for batteries.
48
+ - **Matter extension:** generalizes to all domains (medical biohazards, C&D contaminants, etc.). Class-implied hazards documented in `spec/hazard_flags_v1.json`.
49
+
50
+ ### `next_best_action.do_not` and `fallback_used`
51
+ - **No prior-art equivalent.** Matter's contribution. Documents safety-critical exclusions and whether the MIE's rule-based guardrail overrode a model recommendation. Rules at `spec/safety_rules_v1.json`.
52
+
53
+ ### `routing.regulation_refs`
54
+ - **DPP:** regulation references travel with the Passport; rule contents resolved at consumption time. Matter follows this pattern.
55
+
56
+ ### `provenance.confidence_calibrated`, `provenance.calibration_ref`
57
+ - **No prior-art equivalent.** Matter's contribution. AI inference is a first-class concern in Matter; DPP / CIRPASS assume manufacturer-emitted data, so calibration is moot for them. Required for honest AI provenance.
58
+
59
+ ### `signature`
60
+ - **W3C VC `proof` block.** When present, Matter's `signature` shape mirrors the VC `proof` shape (`type`, `created`, `verificationMethod`, `proofValue`). v0.2 will likely add a full VC envelope wrapping option.
61
+
62
+ ### `@context`
63
+ - **W3C VC + DPP / JSON-LD ecosystem.** Optional. When supplied, recommends `https://matter.spec/contexts/v0.1.jsonld` (to be published with v0.1 release).
64
+
65
+ ## What Matter is deliberately NOT adopting (and why)
66
+
67
+ - **ISO 59040 PCDS** — 50+ binary attributes per item. Too heavyweight for a zero-shot inference model. Future export-format target, not input.
68
+ - **Madaster building-passport schema** — closed commercial format. Matter's CC0 release is the explicit counter-positioning.
69
+ - **Circularise supply-chain schema** — vendor-locked taxonomies. Same reason.
70
+
71
+ ## Stage 1 / v0.2 commitments
72
+
73
+ - Full W3C VC envelope wrapping (Matter Passport as `credentialSubject`)
74
+ - `components` array for multi-material items (laptop with 20 sub-Passports)
75
+ - ISO 59040 PCDS export profile
76
+ - JSON-LD context document at `spec/contexts/v0.1.jsonld`
77
+ - Stewardship handover proposal to a neutral body (OSI / Linux Foundation candidates)
78
+
79
+ ## Versioning policy
80
+
81
+ - SemVer: `vMAJOR.MINOR` until `v1.0`; then `vMAJOR.MINOR.PATCH`.
82
+ - Breaking changes require a major version bump and a 6-month deprecation window.
83
+ - Calibration tables, safety rules, and hazard rules version independently of the schema.
spec/safety_rules_v1.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "1.0",
4
+ "description": "Matter MIE do_not guardrail rules — per-identity-class safety constraints. Loaded by matter.guardrail.",
5
+ "jurisdiction_scope": "NYC (DSNY + NY EPR). Jurisdictions add overrides in spec/safety_rules_<region>.json.",
6
+ "license": "CC0-1.0"
7
+ },
8
+ "rules": {
9
+ "sharps": {
10
+ "do_not": ["general_waste", "recycle_paper", "blue_bin_recycle"],
11
+ "safe_default": "biomedical_waste_collector",
12
+ "severity": "critical",
13
+ "reason": "Sharps must only be disposed via licensed biomedical waste collection to prevent needle-stick injury and bloodborne pathogen transmission."
14
+ },
15
+ "diagnostic": {
16
+ "do_not": ["general_waste", "blue_bin_recycle"],
17
+ "safe_default": "biomedical_waste_collector",
18
+ "severity": "high",
19
+ "reason": "Blood- or bodily-fluid-contaminated diagnostic strips are biomedical waste."
20
+ },
21
+ "medicine_bottle": {
22
+ "do_not": ["general_waste"],
23
+ "safe_default": "pharmacy_takeback",
24
+ "severity": "high",
25
+ "reason": "Pharmaceuticals diverted from general waste prevent waterway contamination and misuse."
26
+ },
27
+ "blister_pack": {
28
+ "do_not": [],
29
+ "safe_default": "general_waste",
30
+ "severity": "informational",
31
+ "reason": "Multi-material blisters have no standard recovery path; general_waste is currently the honest default."
32
+ },
33
+ "wound_care": {
34
+ "do_not": ["blue_bin_recycle", "recycle_paper"],
35
+ "safe_default": "general_waste",
36
+ "severity": "high",
37
+ "reason": "Wound-care items can be biohazardous when used; the model cannot disambiguate used vs unused from a photo. Block recycling streams; users should manually route to biomedical_waste_collector if used."
38
+ },
39
+ "lithium_ion_cell": {
40
+ "do_not": ["general_waste", "landfill", "blue_bin_recycle"],
41
+ "safe_default": "battery_drop_off",
42
+ "severity": "critical",
43
+ "reason": "Lithium-ion cells can thermal-runaway in compactors and landfills; battery drop-off is mandatory."
44
+ },
45
+ "battery_pack": {
46
+ "do_not": ["general_waste", "landfill", "blue_bin_recycle"],
47
+ "safe_default": "certified_ev_recycler",
48
+ "severity": "critical",
49
+ "reason": "EV / e-bike battery packs require licensed recyclers due to hazardous materials and fire risk."
50
+ },
51
+ "lead_acid_battery": {
52
+ "do_not": ["general_waste", "landfill", "blue_bin_recycle"],
53
+ "safe_default": "certified_ev_recycler",
54
+ "severity": "critical",
55
+ "reason": "Lead-acid batteries contain regulated lead and sulfuric acid; retailer takeback is legally required in NY."
56
+ },
57
+ "concrete": {
58
+ "do_not": ["landfill"],
59
+ "safe_default": "aggregate_recycler",
60
+ "severity": "economic",
61
+ "reason": "Concrete is high-volume and economically recoverable; landfill diverts recoverable material."
62
+ },
63
+ "brick": {
64
+ "do_not": ["landfill"],
65
+ "safe_default": "aggregate_recycler",
66
+ "severity": "economic",
67
+ "reason": "Brick is reusable or crushable; landfill is environmentally and economically wasteful."
68
+ },
69
+ "steel": {
70
+ "do_not": ["landfill"],
71
+ "safe_default": "metal_recycler",
72
+ "severity": "economic",
73
+ "reason": "Steel has universal recycling markets; landfill destroys recoverable value."
74
+ },
75
+ "gypsum": {
76
+ "do_not": ["landfill"],
77
+ "safe_default": "gypsum_recycler",
78
+ "severity": "environmental",
79
+ "reason": "Landfilled gypsum generates hydrogen sulfide; clean drywall recovery exists in many markets."
80
+ }
81
+ }
82
+ }
sync.sh ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Sync upstream `matter/` package, `spec/` directory, and curated example images
3
+ # into the Space directory. Run this BEFORE `git push` to ashu-1069/matter.
4
+ set -euo pipefail
5
+
6
+ SPACE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
7
+ REPO_DIR="$(cd "$SPACE_DIR/.." && pwd)"
8
+
9
+ echo "==> Syncing matter/ → space/matter/"
10
+ rm -rf "$SPACE_DIR/matter"
11
+ rsync -a --exclude="__pycache__" --exclude="*.pyc" "$REPO_DIR/matter/" "$SPACE_DIR/matter/"
12
+
13
+ echo "==> Syncing spec/ → space/spec/"
14
+ rm -rf "$SPACE_DIR/spec"
15
+ rsync -a --exclude="__pycache__" "$REPO_DIR/spec/" "$SPACE_DIR/spec/"
16
+
17
+ echo "==> Resizing + converting example images → space/examples/ (JPEG, max 1280px)"
18
+ mkdir -p "$SPACE_DIR/examples"
19
+ python3 - <<PY
20
+ from PIL import Image
21
+ from pathlib import Path
22
+ src_dir = Path("$REPO_DIR/data/eval_seed/images")
23
+ out_dir = Path("$SPACE_DIR/examples")
24
+ out_dir.mkdir(parents=True, exist_ok=True)
25
+ for f in [
26
+ "domestic_pet_bottle.png",
27
+ "ewaste_dead_laptop.png",
28
+ "ev_pouch_cell.png",
29
+ "medical_glucose_strip.png",
30
+ "cd_brick.png",
31
+ "textile_cotton_tshirt.png",
32
+ ]:
33
+ src = src_dir / f
34
+ if not src.exists():
35
+ print(f" ⚠️ missing: {src}")
36
+ continue
37
+ img = Image.open(src).convert("RGB")
38
+ w, h = img.size
39
+ m = max(w, h)
40
+ if m > 1280:
41
+ r = 1280 / m
42
+ img = img.resize((int(w * r), int(h * r)), Image.LANCZOS)
43
+ out = out_dir / (src.stem + ".jpg")
44
+ img.save(out, "JPEG", quality=85, optimize=True)
45
+ print(f" ✓ {out.name} ({out.stat().st_size // 1024} KB)")
46
+ PY
47
+
48
+ echo "✅ Space directory ready at: $SPACE_DIR"
49
+ echo " Push with: cd $SPACE_DIR && git add -A && git commit -m 'sync' && git push"
transformers_runtime.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """HuggingFace transformers runtime — implements matter.engine.Runtime.
2
+
3
+ Loads Gemma 4 lazily on first inference (so cold Spaces serve the demo-mode path
4
+ without ever paying the load cost) and wraps inference in @spaces.GPU so the
5
+ Space's ZeroGPU pool only spins up while we're actually generating.
6
+
7
+ Picks Gemma 4 E2B (5B, any-to-any, instruction-tuned) by default. Override via
8
+ the MATTER_MODEL_ID Space secret.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import os
14
+ from pathlib import Path
15
+ from typing import Literal
16
+
17
+ import torch
18
+ from PIL import Image
19
+
20
+ try:
21
+ import spaces # type: ignore
22
+ HAS_SPACES = True
23
+ except ImportError:
24
+ HAS_SPACES = False
25
+
26
+ DEFAULT_MODEL_ID = os.environ.get("MATTER_MODEL_ID", "google/gemma-4-E2B-it")
27
+ DEFAULT_MAX_NEW_TOKENS = int(os.environ.get("MATTER_MAX_NEW_TOKENS", "512"))
28
+ DEFAULT_LORA_ID = os.environ.get("MATTER_LORA_ID", "").strip() or None
29
+
30
+
31
+ def _gpu_decorator(fn):
32
+ """No-op when running locally (no `spaces` module), real decorator on HF."""
33
+ if HAS_SPACES:
34
+ return spaces.GPU(duration=90)(fn)
35
+ return fn
36
+
37
+
38
+ class TransformersRuntime:
39
+ """Implements matter.engine.Runtime over HF transformers + Gemma 4."""
40
+
41
+ name: Literal["transformers"] = "transformers"
42
+
43
+ def __init__(
44
+ self,
45
+ model: str = DEFAULT_MODEL_ID,
46
+ max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
47
+ lora_id: str | None = DEFAULT_LORA_ID,
48
+ ):
49
+ self.model_id = model
50
+ self.lora_id = lora_id
51
+ self.max_new_tokens = max_new_tokens
52
+ self._model = None
53
+ self._processor = None
54
+
55
+ def _ensure_loaded(self) -> None:
56
+ if self._model is not None:
57
+ return
58
+ from transformers import AutoModelForImageTextToText, AutoProcessor
59
+
60
+ dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
61
+ device = "cuda" if torch.cuda.is_available() else "cpu"
62
+
63
+ self._processor = AutoProcessor.from_pretrained(self.model_id)
64
+ self._model = AutoModelForImageTextToText.from_pretrained(
65
+ self.model_id,
66
+ torch_dtype=dtype,
67
+ device_map=device,
68
+ )
69
+ if self.lora_id:
70
+ try:
71
+ from peft import PeftModel
72
+ self._model = PeftModel.from_pretrained(self._model, self.lora_id)
73
+ except Exception as e:
74
+ print(f"[TransformersRuntime] LoRA load failed ({self.lora_id}): {e}")
75
+ self._model.eval()
76
+
77
+ def infer(self, prompt: str, image: Path | None) -> str:
78
+ return self._infer_gpu(prompt, str(image) if image is not None else None)
79
+
80
+ @_gpu_decorator
81
+ def _infer_gpu(self, prompt: str, image_path: str | None) -> str:
82
+ self._ensure_loaded()
83
+ proc = self._processor
84
+ model = self._model
85
+
86
+ content: list[dict] = []
87
+ if image_path:
88
+ content.append({"type": "image", "image": Image.open(image_path).convert("RGB")})
89
+ content.append({"type": "text", "text": prompt})
90
+ messages = [{"role": "user", "content": content}]
91
+
92
+ inputs = proc.apply_chat_template(
93
+ messages,
94
+ add_generation_prompt=True,
95
+ tokenize=True,
96
+ return_dict=True,
97
+ return_tensors="pt",
98
+ ).to(model.device)
99
+
100
+ with torch.no_grad():
101
+ out = model.generate(
102
+ **inputs,
103
+ max_new_tokens=self.max_new_tokens,
104
+ do_sample=False,
105
+ temperature=None,
106
+ top_p=None,
107
+ )
108
+
109
+ prompt_len = inputs["input_ids"].shape[-1]
110
+ generated = out[0][prompt_len:]
111
+ text = proc.decode(generated, skip_special_tokens=True)
112
+ return text
113
+
114
+
115
+ __all__ = ["TransformersRuntime"]