khurrameycon commited on
Commit
cb0bf83
·
verified ·
1 Parent(s): 579223b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ from fastapi import FastAPI, HTTPException
4
+ from pydantic import BaseModel
5
+ from huggingface_hub import InferenceClient
6
+ from typing import Optional
7
+
8
+ # Initialize FastAPI app
9
+ app = FastAPI(
10
+ title="LLM Chat API",
11
+ description="API for getting chat responses from Llama model",
12
+ version="1.0.0"
13
+ )
14
+
15
+ class ChatRequest(BaseModel):
16
+ text: str
17
+
18
+ class ChatResponse(BaseModel):
19
+ response: str
20
+
21
+ def llm_chat_response(text: str) -> str:
22
+ try:
23
+ HF_TOKEN = os.getenv("HF_TOKEN")
24
+ if not HF_TOKEN:
25
+ raise HTTPException(status_code=500, detail="HF_TOKEN not configured")
26
+
27
+ client = InferenceClient(api_key=HF_TOKEN)
28
+ messages = [
29
+ {
30
+ "role": "user",
31
+ "content": [
32
+ {
33
+ "type": "text",
34
+ "text": text + str('describe in one line only')
35
+ }
36
+ ]
37
+ }
38
+ ]
39
+
40
+ response_from_llama = client.chat.completions.create(
41
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
42
+ messages=messages,
43
+ max_tokens=500
44
+ )
45
+ return response_from_llama.choices[0].message['content']
46
+ except Exception as e:
47
+ raise HTTPException(status_code=500, detail=str(e))
48
+
49
+ @app.post("/chat", response_model=ChatResponse)
50
+ async def chat(request: ChatRequest):
51
+ try:
52
+ response = llm_chat_response(request.text)
53
+ return ChatResponse(response=response)
54
+ except HTTPException as he:
55
+ raise he
56
+ except Exception as e:
57
+ raise HTTPException(status_code=500, detail=str(e))
58
+
59
+ @app.get("/")
60
+ async def root():
61
+ return {"message": "Welcome to the LLM Chat API. Use POST /chat endpoint to get responses."}