PiotrPasztor commited on
Commit
73c62ee
·
1 Parent(s): b735d5b
Files changed (5) hide show
  1. .gitignore +2 -0
  2. Dockerfile +16 -0
  3. app.py +122 -0
  4. dataset.jsonl +0 -0
  5. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .DS_Store
2
+ .idea
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import json
4
+ import os
5
+ from fastapi import FastAPI
6
+ from pydantic import BaseModel
7
+ from transformers import AutoTokenizer, AutoModel
8
+
9
+ # Simple RL Classifier using Transformer
10
+ ACTIONS = ["TRIP", "GITHUB", "MESSAGE"]
11
+ DATASET_PATH = os.path.join(os.path.dirname(__file__), "dataset.jsonl")
12
+
13
+ app = FastAPI()
14
+
15
+ # Global model state - loaded lazily
16
+ model_state = {"ready": False, "tokenizer": None, "encoder": None, "policy_head": None}
17
+
18
+
19
+ class MessageRequest(BaseModel):
20
+ message: str
21
+
22
+
23
+ class ActionResponse(BaseModel):
24
+ action: str
25
+ score: float
26
+
27
+
28
+ @app.get("/health")
29
+ def health():
30
+ return {"status": "ok", "model_ready": model_state["ready"]}
31
+
32
+
33
+ def load_model():
34
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
35
+ encoder = AutoModel.from_pretrained("distilbert-base-uncased")
36
+
37
+ # Simple policy head
38
+ policy_head = nn.Linear(768, len(ACTIONS))
39
+
40
+ # Load dataset for training
41
+ data = []
42
+ with open(DATASET_PATH, "r") as f:
43
+ for line in f:
44
+ item = json.loads(line)
45
+ user_msg = item["messages"][1]["content"]
46
+ label = item["messages"][2]["content"]
47
+ data.append((user_msg, ACTIONS.index(label)))
48
+
49
+ # Quick RL-style training (policy gradient simplified)
50
+ optimizer = torch.optim.Adam(policy_head.parameters(), lr=1e-3)
51
+ encoder.eval()
52
+
53
+ for epoch in range(3):
54
+ total_reward = 0
55
+ for text, label in data[:100]: # use subset for speed
56
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=64)
57
+ with torch.no_grad():
58
+ hidden = encoder(**inputs).last_hidden_state[:, 0, :]
59
+
60
+ logits = policy_head(hidden)
61
+ probs = torch.softmax(logits, dim=-1)
62
+
63
+ # Sample action (RL style)
64
+ action = torch.multinomial(probs, 1).item()
65
+
66
+ # Reward: +1 if correct, -1 if wrong
67
+ reward = 1.0 if action == label else -1.0
68
+ total_reward += reward
69
+
70
+ # Policy gradient update
71
+ log_prob = torch.log(probs[0, action])
72
+ loss = -log_prob * reward
73
+
74
+ optimizer.zero_grad()
75
+ loss.backward()
76
+ optimizer.step()
77
+
78
+ return tokenizer, encoder, policy_head
79
+
80
+
81
+ def predict(text, tokenizer, encoder, policy_head):
82
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=64)
83
+ with torch.no_grad():
84
+ hidden = encoder(**inputs).last_hidden_state[:, 0, :]
85
+ logits = policy_head(hidden)
86
+ probs = torch.softmax(logits, dim=-1)
87
+ action_idx = torch.argmax(probs, dim=-1).item()
88
+ score = probs[0, action_idx].item()
89
+
90
+ return ACTIONS[action_idx], score
91
+
92
+
93
+ @app.on_event("startup")
94
+ async def startup_event():
95
+ import threading
96
+
97
+ def load_in_background():
98
+ tokenizer, encoder, policy_head = load_model()
99
+ model_state["tokenizer"] = tokenizer
100
+ model_state["encoder"] = encoder
101
+ model_state["policy_head"] = policy_head
102
+ model_state["ready"] = True
103
+ print("Model loaded and ready!")
104
+
105
+ # Load model in background thread so server can respond immediately
106
+ thread = threading.Thread(target=load_in_background)
107
+ thread.start()
108
+
109
+
110
+ @app.post("/action", response_model=ActionResponse)
111
+ def action(request: MessageRequest):
112
+ if not model_state["ready"]:
113
+ from fastapi import HTTPException
114
+ raise HTTPException(status_code=503, detail="Model is still loading, please wait")
115
+
116
+ action_name, score = predict(
117
+ request.message,
118
+ model_state["tokenizer"],
119
+ model_state["encoder"],
120
+ model_state["policy_head"]
121
+ )
122
+ return ActionResponse(action=action_name, score=round(score, 4))
dataset.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ fastapi
4
+ uvicorn[standard]