Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- Dockerfile +11 -0
- README.md +29 -13
- app.py +40 -0
- requirements.txt +10 -0
Dockerfile
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Optional: containerize for non-HF hosting (Railway/Fly/Render/VPS)
|
| 3 |
+
FROM python:3.11-slim
|
| 4 |
+
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
COPY requirements.txt ./
|
| 7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 8 |
+
|
| 9 |
+
COPY app.py ./
|
| 10 |
+
EXPOSE 7860
|
| 11 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -1,13 +1,29 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# AxionX Digital — QA Demo (Hugging Face Space)
|
| 3 |
+
|
| 4 |
+
This repo hosts a stable, year-long demo of a Question Answering model using a pinned DistilBERT checkpoint.
|
| 5 |
+
|
| 6 |
+
## Why this stays stable
|
| 7 |
+
- **Pinned Python deps** in `requirements.txt` prevent surprise breaking changes.
|
| 8 |
+
- **Pinned model** (`distilbert-base-cased-distilled-squad`) avoids checkpoint drift.
|
| 9 |
+
- **Simple Gradio app** with a fixed JSON output schema.
|
| 10 |
+
- **CPU-only**: no GPU reliance or CUDA driver changes.
|
| 11 |
+
|
| 12 |
+
## Deploying to Hugging Face Spaces
|
| 13 |
+
1. Create a new Space (SDK: **Gradio**, Hardware: **CPU Basic**).
|
| 14 |
+
2. Upload these files: `app.py`, `requirements.txt`, `README.md`.
|
| 15 |
+
3. The app auto-builds and serves at your permanent Space URL.
|
| 16 |
+
|
| 17 |
+
### Optional (Custom domain / always-on)
|
| 18 |
+
- Switch Space to PRO for **“Always On”** hardware if you need zero cold-starts.
|
| 19 |
+
- The free CPU Basic URL remains permanent; the app may sleep when idle but wakes on first request.
|
| 20 |
+
|
| 21 |
+
## Local run (for testing)
|
| 22 |
+
```bash
|
| 23 |
+
pip install -r requirements.txt
|
| 24 |
+
python app.py
|
| 25 |
+
# open http://localhost:7860
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## API-style use
|
| 29 |
+
You can wrap `predict` behind a FastAPI if you prefer a JSON API. For a pure Space, use the UI or Gradio Client.
|
app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from transformers import pipeline
|
| 4 |
+
|
| 5 |
+
# Pin the exact model to avoid unexpected changes.
|
| 6 |
+
MODEL_ID = "distilbert-base-cased-distilled-squad"
|
| 7 |
+
|
| 8 |
+
# Create the QA pipeline once at startup; HF Spaces will cache the weights.
|
| 9 |
+
qa = pipeline("question-answering", model=MODEL_ID)
|
| 10 |
+
|
| 11 |
+
EXAMPLE_CONTEXT = (
|
| 12 |
+
"AxionX Digital builds model-training tools for AI developers. "
|
| 13 |
+
"We fine-tune open-source LLMs for customer support, finance, and legal use cases. "
|
| 14 |
+
"We also provide evaluation dashboards and fast private deployments."
|
| 15 |
+
)
|
| 16 |
+
EXAMPLE_QUESTION = "What does AxionX Digital build?"
|
| 17 |
+
|
| 18 |
+
def predict(context, question):
|
| 19 |
+
context = (context or "").strip()
|
| 20 |
+
question = (question or "").strip()
|
| 21 |
+
if not context or not question:
|
| 22 |
+
return {"answer": "Please provide both context and a question.", "score": ""}
|
| 23 |
+
res = qa(question=question, context=context)
|
| 24 |
+
# Return a stable JSON schema
|
| 25 |
+
return {"answer": res.get("answer",""), "score": round(float(res.get("score", 0.0)), 3)}
|
| 26 |
+
|
| 27 |
+
with gr.Blocks(title="AxionX — Question Answering Demo") as demo:
|
| 28 |
+
gr.Markdown("# AxionX — Question Answering Demo\n"
|
| 29 |
+
"Type a paragraph in **Context**, then ask a **Question** about it.\n\n"
|
| 30 |
+
"**Model:** distilbert-base-cased-distilled-squad (pinned)")
|
| 31 |
+
with gr.Row():
|
| 32 |
+
ctx = gr.Textbox(label="Context", lines=10, value=EXAMPLE_CONTEXT)
|
| 33 |
+
q = gr.Textbox(label="Question", value=EXAMPLE_QUESTION)
|
| 34 |
+
btn = gr.Button("Get Answer")
|
| 35 |
+
out = gr.JSON(label="Result (answer, score)")
|
| 36 |
+
btn.click(predict, inputs=[ctx, q], outputs=[out])
|
| 37 |
+
|
| 38 |
+
if __name__ == "__main__":
|
| 39 |
+
# share=False on Spaces; set to True only for local/Colab runs
|
| 40 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Pin versions for 12+ months stability
|
| 3 |
+
# (these were current/stable at creation time — pinning prevents breaking changes)
|
| 4 |
+
transformers==4.42.4
|
| 5 |
+
torch==2.3.1
|
| 6 |
+
gradio==4.44.0
|
| 7 |
+
# Accelerated tokenizers used by transformers
|
| 8 |
+
tokenizers==0.19.1
|
| 9 |
+
# Explicitly pin pydantic major to avoid breaking gradio deps
|
| 10 |
+
pydantic==2.7.4
|