Update app.py
Browse files
app.py
CHANGED
|
@@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)
|
|
| 20 |
logger.info("Loading Whisper-tiny...")
|
| 21 |
whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
|
| 22 |
|
| 23 |
-
logger.info("Loading SmolLM2-360M-Instruct
|
| 24 |
model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
|
| 25 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 26 |
model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -32,7 +32,6 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 32 |
|
| 33 |
logger.info("All models loaded!")
|
| 34 |
|
| 35 |
-
# API keys
|
| 36 |
TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '')
|
| 37 |
BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
|
| 38 |
|
|
@@ -170,10 +169,17 @@ def transcribe_audio_base64(audio_base64):
|
|
| 170 |
return {"error": str(e)}
|
| 171 |
|
| 172 |
def generate_answer(text_input):
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
try:
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
current_date = datetime.now().strftime("%B %d, %Y")
|
| 179 |
|
|
@@ -183,7 +189,7 @@ def generate_answer(text_input):
|
|
| 183 |
search_time = time.time() - search_start
|
| 184 |
logger.info(f"[AI] Search completed in {search_time:.2f}s")
|
| 185 |
|
| 186 |
-
# Generate
|
| 187 |
messages = [
|
| 188 |
{
|
| 189 |
"role": "system",
|
|
@@ -195,7 +201,6 @@ def generate_answer(text_input):
|
|
| 195 |
}
|
| 196 |
]
|
| 197 |
|
| 198 |
-
# SmolLM2 uses simple chat template
|
| 199 |
prompt = f"<|im_start|>system\n{messages[0]['content']}<|im_end|>\n<|im_start|>user\n{messages[1]['content']}<|im_end|>\n<|im_start|>assistant\n"
|
| 200 |
|
| 201 |
gen_start = time.time()
|
|
@@ -205,7 +210,7 @@ def generate_answer(text_input):
|
|
| 205 |
with torch.no_grad():
|
| 206 |
outputs = model.generate(
|
| 207 |
**inputs,
|
| 208 |
-
max_new_tokens=80,
|
| 209 |
temperature=0.7,
|
| 210 |
do_sample=True,
|
| 211 |
top_p=0.9,
|
|
@@ -219,11 +224,10 @@ def generate_answer(text_input):
|
|
| 219 |
logger.info(f"[AI] Generation completed in {gen_time:.2f}s")
|
| 220 |
|
| 221 |
answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
|
| 222 |
-
|
| 223 |
-
# Add source attribution
|
| 224 |
full_answer = f"{answer}\n\n**Source:** {search_engine}"
|
| 225 |
|
| 226 |
logger.info("[AI] ✓ Complete")
|
|
|
|
| 227 |
return full_answer
|
| 228 |
|
| 229 |
except Exception as e:
|
|
@@ -235,7 +239,6 @@ def process_audio(audio_path, question_text):
|
|
| 235 |
logger.info("="*50)
|
| 236 |
logger.info("[MAIN] New request received")
|
| 237 |
|
| 238 |
-
# Transcribe audio if provided
|
| 239 |
if audio_path:
|
| 240 |
logger.info(f"[MAIN] Processing audio: {audio_path}")
|
| 241 |
try:
|
|
@@ -255,14 +258,11 @@ def process_audio(audio_path, question_text):
|
|
| 255 |
|
| 256 |
transcription_time = time.time() - start_time
|
| 257 |
|
| 258 |
-
# Generate answer (includes search)
|
| 259 |
gen_start = time.time()
|
| 260 |
answer = generate_answer(question)
|
| 261 |
gen_time = time.time() - gen_start
|
| 262 |
|
| 263 |
total_time = time.time() - start_time
|
| 264 |
-
|
| 265 |
-
# Time indicator
|
| 266 |
time_emoji = "🟢" if total_time < 2.0 else "🟡" if total_time < 3.0 else "🔴"
|
| 267 |
|
| 268 |
timing = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search+Gen={gen_time:.2f}s | **Total={total_time:.2f}s**"
|
|
@@ -282,140 +282,71 @@ def text_handler(text_input):
|
|
| 282 |
with gr.Blocks(title="Ultra-Fast Q&A - SmolLM2-360M", theme=gr.themes.Soft()) as demo:
|
| 283 |
gr.Markdown("""
|
| 284 |
# ⚡ Ultra-Fast Political Q&A System
|
| 285 |
-
**SmolLM2-360M** (250-400 tok/s) + **Parallel Search**
|
| 286 |
-
|
| 287 |
-
**Features:**
|
| 288 |
-
- Whisper-tiny for speech-to-text
|
| 289 |
-
- SmolLM2-360M-Instruct (20x faster than Qwen 0.5B)
|
| 290 |
-
- Multi-engine parallel search (Tavily → Brave → Searx → DDG)
|
| 291 |
-
- Search-grounded answers only
|
| 292 |
""")
|
| 293 |
|
| 294 |
with gr.Tab("🎙️ Audio Input"):
|
| 295 |
with gr.Row():
|
| 296 |
with gr.Column():
|
| 297 |
-
audio_input = gr.Audio(
|
| 298 |
-
|
| 299 |
-
type="filepath",
|
| 300 |
-
label="Record or Upload Audio"
|
| 301 |
-
)
|
| 302 |
-
audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
|
| 303 |
-
|
| 304 |
with gr.Column():
|
| 305 |
audio_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
|
| 306 |
-
audio_time = gr.Number(label="
|
| 307 |
|
| 308 |
-
audio_submit.click(
|
| 309 |
-
fn=audio_handler,
|
| 310 |
-
inputs=[audio_input],
|
| 311 |
-
outputs=[audio_output, audio_time],
|
| 312 |
-
api_name="audio_query"
|
| 313 |
-
)
|
| 314 |
|
| 315 |
with gr.Tab("✍️ Text Input"):
|
| 316 |
with gr.Row():
|
| 317 |
with gr.Column():
|
| 318 |
-
text_input = gr.Textbox(
|
| 319 |
-
|
| 320 |
-
placeholder="Is internet shut down in Bareilly today?",
|
| 321 |
-
lines=3
|
| 322 |
-
)
|
| 323 |
-
text_submit = gr.Button("🚀 Submit Question", variant="primary", size="lg")
|
| 324 |
-
|
| 325 |
with gr.Column():
|
| 326 |
text_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
|
| 327 |
-
text_time = gr.Number(label="
|
| 328 |
|
| 329 |
-
text_submit.click(
|
| 330 |
-
fn=text_handler,
|
| 331 |
-
inputs=[text_input],
|
| 332 |
-
outputs=[text_output, text_time],
|
| 333 |
-
api_name="text_query"
|
| 334 |
-
)
|
| 335 |
|
| 336 |
-
gr.Examples(
|
| 337 |
-
examples=[
|
| 338 |
-
["Is internet shut down in Bareilly today?"],
|
| 339 |
-
["Who won the 2024 US presidential election?"],
|
| 340 |
-
["What is current India inflation rate?"],
|
| 341 |
-
["What are the top 3 news stories today?"]
|
| 342 |
-
],
|
| 343 |
-
inputs=text_input
|
| 344 |
-
)
|
| 345 |
|
| 346 |
with gr.Tab("🔌 Pluely API"):
|
| 347 |
gr.Markdown("""
|
| 348 |
-
##
|
| 349 |
|
| 350 |
-
###
|
|
|
|
|
|
|
| 351 |
```
|
| 352 |
-
curl -X POST https://archcoder-basic-app.hf.space/call/
|
| 353 |
-
-H "Content-Type: application/json" \\
|
| 354 |
-
-d '{"data": ["BASE64_AUDIO_DATA"]}'
|
| 355 |
```
|
| 356 |
-
|
| 357 |
|
| 358 |
-
|
| 359 |
```
|
| 360 |
-
curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
|
| 361 |
-
-H "Content-Type: application/json" \\
|
| 362 |
-
-d '{"data": ["Your question here"]}'
|
| 363 |
```
|
| 364 |
-
**Response Format:** `{"data": ["Answer with source attribution"]}`
|
| 365 |
|
| 366 |
-
|
| 367 |
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
### Custom STT Provider:
|
| 371 |
-
**Curl Command:**
|
| 372 |
-
```
|
| 373 |
-
curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
|
| 374 |
-
```
|
| 375 |
-
**Response Content Path:** `data[0].text`
|
| 376 |
-
**Streaming:** OFF
|
| 377 |
|
| 378 |
-
###
|
| 379 |
-
**Curl Command:**
|
| 380 |
```
|
| 381 |
-
curl https://archcoder-basic-app.hf.space/call/
|
| 382 |
```
|
| 383 |
-
**Response
|
| 384 |
-
**Streaming:** OFF
|
| 385 |
""")
|
| 386 |
|
| 387 |
-
# Hidden API endpoint components
|
| 388 |
with gr.Row(visible=False):
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
stt_btn = gr.Button("STT", visible=False)
|
| 395 |
-
stt_btn.click(
|
| 396 |
-
fn=transcribe_audio_base64,
|
| 397 |
-
inputs=[stt_input],
|
| 398 |
-
outputs=[stt_output],
|
| 399 |
-
api_name="transcribe_stt"
|
| 400 |
-
)
|
| 401 |
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
fn=generate_answer,
|
| 405 |
-
inputs=[ai_input],
|
| 406 |
-
outputs=[ai_output],
|
| 407 |
-
api_name="answer_ai"
|
| 408 |
-
)
|
| 409 |
|
| 410 |
-
gr.Markdown(""
|
| 411 |
-
---
|
| 412 |
-
**Model:** SmolLM2-360M-Instruct (250-400 tokens/second on CPU)
|
| 413 |
-
**Search:** Parallel multi-engine (Tavily → Brave → Searx → DDG)
|
| 414 |
-
**Expected Speed:** 1.5-2.5 seconds total
|
| 415 |
-
**All requests logged** - Check Logs tab in HF Space
|
| 416 |
-
|
| 417 |
-
🟢 < 2s | 🟡 2-3s | 🔴 > 3s
|
| 418 |
-
""")
|
| 419 |
|
| 420 |
if __name__ == "__main__":
|
| 421 |
demo.queue(max_size=5)
|
|
|
|
| 20 |
logger.info("Loading Whisper-tiny...")
|
| 21 |
whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
|
| 22 |
|
| 23 |
+
logger.info("Loading SmolLM2-360M-Instruct...")
|
| 24 |
model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
|
| 25 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 26 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 32 |
|
| 33 |
logger.info("All models loaded!")
|
| 34 |
|
|
|
|
| 35 |
TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '')
|
| 36 |
BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
|
| 37 |
|
|
|
|
| 169 |
return {"error": str(e)}
|
| 170 |
|
| 171 |
def generate_answer(text_input):
|
| 172 |
+
"""Main answer generation - with debug logging"""
|
| 173 |
+
logger.info("="*60)
|
| 174 |
+
logger.info(f"[AI] Raw input: '{text_input}'")
|
| 175 |
+
logger.info(f"[AI] Input type: {type(text_input)}, Length: {len(text_input) if text_input else 0}")
|
| 176 |
+
|
| 177 |
try:
|
| 178 |
+
# Handle literal {{TEXT}} from Pluely
|
| 179 |
+
if not text_input or text_input.strip() in ["", "{{TEXT}}", "{{text}}", "$TEXT"]:
|
| 180 |
+
error_msg = "❌ ERROR: No question received. Pluely sent empty/template variable.\n\nPluely Config Issue:\n- Check your curl command uses correct format\n- Make sure variable substitution is enabled"
|
| 181 |
+
logger.error(f"[AI] {error_msg}")
|
| 182 |
+
return error_msg
|
| 183 |
|
| 184 |
current_date = datetime.now().strftime("%B %d, %Y")
|
| 185 |
|
|
|
|
| 189 |
search_time = time.time() - search_start
|
| 190 |
logger.info(f"[AI] Search completed in {search_time:.2f}s")
|
| 191 |
|
| 192 |
+
# Generate
|
| 193 |
messages = [
|
| 194 |
{
|
| 195 |
"role": "system",
|
|
|
|
| 201 |
}
|
| 202 |
]
|
| 203 |
|
|
|
|
| 204 |
prompt = f"<|im_start|>system\n{messages[0]['content']}<|im_end|>\n<|im_start|>user\n{messages[1]['content']}<|im_end|>\n<|im_start|>assistant\n"
|
| 205 |
|
| 206 |
gen_start = time.time()
|
|
|
|
| 210 |
with torch.no_grad():
|
| 211 |
outputs = model.generate(
|
| 212 |
**inputs,
|
| 213 |
+
max_new_tokens=80,
|
| 214 |
temperature=0.7,
|
| 215 |
do_sample=True,
|
| 216 |
top_p=0.9,
|
|
|
|
| 224 |
logger.info(f"[AI] Generation completed in {gen_time:.2f}s")
|
| 225 |
|
| 226 |
answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
|
|
|
|
|
|
|
| 227 |
full_answer = f"{answer}\n\n**Source:** {search_engine}"
|
| 228 |
|
| 229 |
logger.info("[AI] ✓ Complete")
|
| 230 |
+
logger.info("="*60)
|
| 231 |
return full_answer
|
| 232 |
|
| 233 |
except Exception as e:
|
|
|
|
| 239 |
logger.info("="*50)
|
| 240 |
logger.info("[MAIN] New request received")
|
| 241 |
|
|
|
|
| 242 |
if audio_path:
|
| 243 |
logger.info(f"[MAIN] Processing audio: {audio_path}")
|
| 244 |
try:
|
|
|
|
| 258 |
|
| 259 |
transcription_time = time.time() - start_time
|
| 260 |
|
|
|
|
| 261 |
gen_start = time.time()
|
| 262 |
answer = generate_answer(question)
|
| 263 |
gen_time = time.time() - gen_start
|
| 264 |
|
| 265 |
total_time = time.time() - start_time
|
|
|
|
|
|
|
| 266 |
time_emoji = "🟢" if total_time < 2.0 else "🟡" if total_time < 3.0 else "🔴"
|
| 267 |
|
| 268 |
timing = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search+Gen={gen_time:.2f}s | **Total={total_time:.2f}s**"
|
|
|
|
| 282 |
with gr.Blocks(title="Ultra-Fast Q&A - SmolLM2-360M", theme=gr.themes.Soft()) as demo:
|
| 283 |
gr.Markdown("""
|
| 284 |
# ⚡ Ultra-Fast Political Q&A System
|
| 285 |
+
**SmolLM2-360M** (250-400 tok/s) + **Parallel Search**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
""")
|
| 287 |
|
| 288 |
with gr.Tab("🎙️ Audio Input"):
|
| 289 |
with gr.Row():
|
| 290 |
with gr.Column():
|
| 291 |
+
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio")
|
| 292 |
+
audio_submit = gr.Button("🚀 Submit", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
with gr.Column():
|
| 294 |
audio_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
|
| 295 |
+
audio_time = gr.Number(label="Time (s)", precision=2)
|
| 296 |
|
| 297 |
+
audio_submit.click(fn=audio_handler, inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
with gr.Tab("✍️ Text Input"):
|
| 300 |
with gr.Row():
|
| 301 |
with gr.Column():
|
| 302 |
+
text_input = gr.Textbox(label="Question", placeholder="Ask anything...", lines=3)
|
| 303 |
+
text_submit = gr.Button("🚀 Submit", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
with gr.Column():
|
| 305 |
text_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
|
| 306 |
+
text_time = gr.Number(label="Time (s)", precision=2)
|
| 307 |
|
| 308 |
+
text_submit.click(fn=text_handler, inputs=[text_input], outputs=[text_output, text_time], api_name="text_query")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
+
gr.Examples(examples=[["Who is the US president?"]], inputs=text_input)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
|
| 312 |
with gr.Tab("🔌 Pluely API"):
|
| 313 |
gr.Markdown("""
|
| 314 |
+
## ⚠️ IMPORTANT: Pluely Configuration
|
| 315 |
|
| 316 |
+
### If you see "{{TEXT}}" in logs, try these formats:
|
| 317 |
+
|
| 318 |
+
**Format 1 (Windows CMD - Use This First):**
|
| 319 |
```
|
| 320 |
+
curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d "{\\"data\\": [\\"TEXT_PLACEHOLDER\\"]}"
|
|
|
|
|
|
|
| 321 |
```
|
| 322 |
+
Then in Pluely, replace `TEXT_PLACEHOLDER` with `{{TEXT}}`
|
| 323 |
|
| 324 |
+
**Format 2 (Alternative):**
|
| 325 |
```
|
| 326 |
+
curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" --data-binary "{\\"data\\": [\\"{{TEXT}}\\"]}"
|
|
|
|
|
|
|
| 327 |
```
|
|
|
|
| 328 |
|
| 329 |
+
**Response Path:** `data[0]`
|
| 330 |
|
| 331 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
|
| 333 |
+
### STT Endpoint:
|
|
|
|
| 334 |
```
|
| 335 |
+
curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d "{\\"data\\": [\\"{{AUDIO_BASE64}}\\"]}"
|
| 336 |
```
|
| 337 |
+
**Response Path:** `data[0].text`
|
|
|
|
| 338 |
""")
|
| 339 |
|
|
|
|
| 340 |
with gr.Row(visible=False):
|
| 341 |
+
stt_in = gr.Textbox()
|
| 342 |
+
stt_out = gr.JSON()
|
| 343 |
+
ai_in = gr.Textbox()
|
| 344 |
+
ai_out = gr.Textbox()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
+
gr.Button("STT", visible=False).click(fn=transcribe_audio_base64, inputs=[stt_in], outputs=[stt_out], api_name="transcribe_stt")
|
| 347 |
+
gr.Button("AI", visible=False).click(fn=generate_answer, inputs=[ai_in], outputs=[ai_out], api_name="answer_ai")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
|
| 349 |
+
gr.Markdown("🟢 < 2s | 🟡 2-3s | 🔴 > 3s")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
if __name__ == "__main__":
|
| 352 |
demo.queue(max_size=5)
|