Spaces:
Sleeping
Sleeping
| """ | |
| Paper to Code | |
| Turn a method description into an implementation plan, PyTorch scaffold, and reproducibility checklist. | |
| """ | |
| import json | |
| import os | |
| import re | |
| import sys | |
| from textwrap import dedent | |
| from typing import Dict | |
| import gradio as gr | |
| sys.path.append(os.path.join(os.path.dirname(__file__), "..")) | |
| from shared.components import create_footer, create_method_panel, create_premium_hero | |
| try: | |
| from huggingface_hub import InferenceClient | |
| except Exception: # pragma: no cover - optional dependency | |
| InferenceClient = None | |
| SAMPLE_METHOD = """We introduce a retrieval-augmented classifier for support tickets. | |
| Each ticket is embedded with a sentence-transformer, nearest historical cases are retrieved, | |
| and a lightweight cross-encoder reranks them before the final label is produced. | |
| The model reports confidence, top evidence snippets, and an abstain decision when evidence is weak.""" | |
| def _extract_keywords(text: str): | |
| words = re.findall(r"[A-Za-z][A-Za-z\-]{3,}", text.lower()) | |
| stop = {"with", "that", "this", "from", "before", "after", "when", "model", "method", "using"} | |
| counts = {} | |
| for word in words: | |
| if word not in stop: | |
| counts[word] = counts.get(word, 0) + 1 | |
| return [word for word, _ in sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:8]] | |
| def _hf_plan(method_text: str, target: str) -> Dict[str, str]: | |
| token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
| if not token or InferenceClient is None: | |
| return {} | |
| prompt = f""" | |
| You are an ML engineer converting papers into clean implementation plans. | |
| Return JSON with keys: summary, modules, code, checklist. | |
| Target artifact: {target} | |
| Method text: | |
| {method_text} | |
| """ | |
| client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token=token) | |
| response = client.text_generation(prompt, max_new_tokens=900, temperature=0.25) | |
| match = re.search(r"\{[\s\S]*\}", response) | |
| if not match: | |
| return {} | |
| try: | |
| parsed = json.loads(match.group(0)) | |
| except json.JSONDecodeError: | |
| return {} | |
| return {key: str(parsed.get(key, "")).strip() for key in ["summary", "modules", "code", "checklist"]} | |
| def _fallback_plan(method_text: str, target: str) -> Dict[str, str]: | |
| keywords = _extract_keywords(method_text) | |
| technique = ", ".join(keywords[:5]) or "model pipeline" | |
| summary = ( | |
| f"This looks like a {technique} workflow. The safest implementation path is to " | |
| "separate data preparation, model logic, inference, and evaluation so each claim can be tested." | |
| ) | |
| modules = dedent(f""" | |
| 1. `data.py` - parse examples, labels, and evidence fields. | |
| 2. `model.py` - implement the core method as a small, testable module. | |
| 3. `retrieve.py` - isolate nearest-neighbor or feature lookup logic if retrieval is involved. | |
| 4. `train.py` - keep hyperparameters explicit and serializable. | |
| 5. `evaluate.py` - report task metric, calibration, and failure cases. | |
| Detected method signals: {", ".join(keywords) if keywords else "none"} | |
| """).strip() | |
| code = dedent(''' | |
| import torch | |
| from torch import nn | |
| class PaperModule(nn.Module): | |
| """Minimal scaffold generated from the method description.""" | |
| def __init__(self, encoder: nn.Module, hidden_size: int, num_labels: int): | |
| super().__init__() | |
| self.encoder = encoder | |
| self.classifier = nn.Sequential( | |
| nn.Linear(hidden_size, hidden_size), | |
| nn.GELU(), | |
| nn.Dropout(0.1), | |
| nn.Linear(hidden_size, num_labels), | |
| ) | |
| def forward(self, inputs, evidence=None): | |
| encoded = self.encoder(**inputs) | |
| pooled = encoded.last_hidden_state[:, 0] | |
| logits = self.classifier(pooled) | |
| confidence = torch.softmax(logits, dim=-1).max(dim=-1).values | |
| return {"logits": logits, "confidence": confidence, "evidence": evidence} | |
| ''').strip() | |
| if target == "Evaluation Harness": | |
| code = dedent(''' | |
| from sklearn.metrics import accuracy_score, f1_score | |
| def evaluate(predictions, labels, confidences, abstain_threshold=0.55): | |
| keep = [score >= abstain_threshold for score in confidences] | |
| covered_preds = [pred for pred, ok in zip(predictions, keep) if ok] | |
| covered_labels = [label for label, ok in zip(labels, keep) if ok] | |
| return { | |
| "coverage": sum(keep) / max(1, len(keep)), | |
| "accuracy": accuracy_score(covered_labels, covered_preds) if covered_preds else 0.0, | |
| "macro_f1": f1_score(covered_labels, covered_preds, average="macro") if covered_preds else 0.0, | |
| } | |
| ''').strip() | |
| checklist = dedent(""" | |
| - Define the exact dataset split and leakage checks. | |
| - Log every hyperparameter needed to reproduce the run. | |
| - Add one baseline that is simpler than the proposed method. | |
| - Report both aggregate metrics and 5-10 qualitative failures. | |
| - Save model card notes: intended use, limitations, and ethical risks. | |
| """).strip() | |
| return {"summary": summary, "modules": modules, "code": code, "checklist": checklist} | |
| def generate_scaffold(method_text: str, target: str): | |
| if not method_text or len(method_text.strip()) < 40: | |
| return "Paste at least a paragraph of method text.", "", "", "" | |
| plan = _hf_plan(method_text, target) or _fallback_plan(method_text, target) | |
| return plan["summary"], plan["modules"], plan["code"], plan["checklist"] | |
| with gr.Blocks(title="Paper to Code", theme=gr.themes.Soft()) as app: | |
| create_premium_hero( | |
| "Paper to Code", | |
| "Convert a paper method into a reproducible engineering scaffold: modules, PyTorch code, evaluation harness, and checklist.", | |
| "🧪", | |
| badge="Research Engineering", | |
| highlights=["Method parsing", "PyTorch scaffold", "Reproducibility checklist"], | |
| ) | |
| create_method_panel({ | |
| "Technique": "Paper implementation planning with optional Hugging Face inference.", | |
| "What it proves": "You can translate research claims into testable software boundaries.", | |
| "HF capability": "Use HF-hosted LLMs when `HF_TOKEN` is available; fall back locally otherwise.", | |
| }) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| method_input = gr.Textbox( | |
| label="Paper method or abstract", | |
| value=SAMPLE_METHOD, | |
| lines=12, | |
| placeholder="Paste the method section, abstract, or algorithm summary...", | |
| ) | |
| target = gr.Radio( | |
| ["PyTorch Module", "Training Loop", "Evaluation Harness", "Experiment Checklist"], | |
| value="PyTorch Module", | |
| label="Target artifact", | |
| ) | |
| generate_btn = gr.Button("Generate Research Scaffold", variant="primary") | |
| with gr.Column(scale=1): | |
| summary = gr.Markdown(label="Technique extraction") | |
| modules = gr.Markdown(label="Implementation plan") | |
| code = gr.Code(label="Code scaffold", language="python", lines=18) | |
| checklist = gr.Markdown(label="Reproducibility checklist") | |
| generate_btn.click(generate_scaffold, inputs=[method_input, target], outputs=[summary, modules, code, checklist]) | |
| gr.Markdown(""" | |
| ## Why This Is Useful | |
| Many AI demos stop at summarizing papers. This Space shows the engineering step after reading: identify components, define test boundaries, scaffold code, and make reproducibility explicit. | |
| """) | |
| create_footer("Paper to Code") | |
| if __name__ == "__main__": | |
| app.launch(server_name="0.0.0.0", server_port=7860) | |