Jonny Tran commited on
Commit
1171a09
·
1 Parent(s): d0fd76c

first commit

Browse files
Files changed (4) hide show
  1. .gitignore +12 -0
  2. README.md +54 -1
  3. app.py +114 -0
  4. requirements.txt +6 -0
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # .gitignore
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ .env
6
+ .env.local
7
+ venv/
8
+ .idea/
9
+ .vscode/
10
+ *.log
11
+ data/*
12
+ !data/.gitkeep
README.md CHANGED
@@ -5,6 +5,59 @@ colorFrom: gray
5
  colorTo: yellow
6
  sdk: docker
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
+ port: 7860
9
+ app_port: 7860
10
  ---
11
 
12
+ # Pet Assistant LlamaIndex API
13
+
14
+ This API provides a chat endpoint powered by LlamaIndex, specifically designed to assist with pet-related queries. The service is compatible with Hugging Face's Chat UI and exposes a REST API on port 7860.
15
+
16
+ ## API Endpoints
17
+
18
+ ### Chat Endpoint
19
+
20
+ ```http
21
+ POST /chat
22
+ Content-Type: application/json
23
+ ```
24
+
25
+ Request body:
26
+ ```json
27
+ {
28
+ "messages": [
29
+ {
30
+ "role": "user",
31
+ "content": "How often should I feed my cat?"
32
+ }
33
+ ],
34
+ "temperature": 0.7,
35
+ "max_tokens": 1024,
36
+ "parameters": {}
37
+ }
38
+ ```
39
+
40
+ Response:
41
+ ```json
42
+ {
43
+ "content": "The response from LlamaIndex...",
44
+ "stop": null,
45
+ "usage": {
46
+ "prompt_tokens": 0,
47
+ "completion_tokens": 0,
48
+ "total_tokens": 0
49
+ }
50
+ }
51
+ ```
52
+
53
+ ### Health Check
54
+
55
+ ```http
56
+ GET /health
57
+ ```
58
+
59
+ Response:
60
+ ```json
61
+ {
62
+ "status": "healthy"
63
+ }
app.py CHANGED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from fastapi import FastAPI, HTTPException
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import BaseModel
5
+ from typing import List, Dict, Optional, Union
6
+ import os
7
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
8
+ from llama_index.llms import OpenAI
9
+ import logging
10
+
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ app = FastAPI()
16
+
17
+ # Configure CORS
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=["*"], # Adjust this in production
21
+ allow_credentials=True,
22
+ allow_methods=["*"],
23
+ allow_headers=["*"],
24
+ )
25
+
26
+ class ChatMessage(BaseModel):
27
+ role: str
28
+ content: str
29
+
30
+ class ChatRequest(BaseModel):
31
+ messages: List[ChatMessage]
32
+ temperature: Optional[float] = 0.7
33
+ max_tokens: Optional[int] = 1024
34
+ parameters: Optional[Dict] = {}
35
+
36
+ class ChatResponse(BaseModel):
37
+ content: str
38
+ stop: Optional[List[str]] = None
39
+ usage: Dict[str, int]
40
+
41
+ # Initialize LlamaIndex components
42
+ @app.on_event("startup")
43
+ async def startup_event():
44
+ try:
45
+ # Initialize OpenAI client (can be replaced with other LLM providers)
46
+ llm = OpenAI(
47
+ model="gpt-3.5-turbo",
48
+ temperature=0.7,
49
+ api_key=os.getenv("OPENAI_API_KEY")
50
+ )
51
+
52
+ # Create service context
53
+ service_context = ServiceContext.from_defaults(llm=llm)
54
+
55
+ # Load documents (adjust path as needed)
56
+ if os.path.exists("data"):
57
+ documents = SimpleDirectoryReader("data").load_data()
58
+ app.state.index = VectorStoreIndex.from_documents(
59
+ documents,
60
+ service_context=service_context
61
+ )
62
+ else:
63
+ # Create empty index if no documents
64
+ app.state.index = VectorStoreIndex([])
65
+
66
+ app.state.query_engine = app.state.index.as_query_engine()
67
+ logger.info("LlamaIndex initialization completed successfully")
68
+ except Exception as e:
69
+ logger.error(f"Error during startup: {str(e)}")
70
+ raise
71
+
72
+ @app.get("/health")
73
+ async def health_check():
74
+ return {"status": "healthy"}
75
+
76
+ @app.post("/chat", response_model=ChatResponse)
77
+ async def chat_endpoint(request: ChatRequest):
78
+ try:
79
+ # Extract the last user message
80
+ last_message = next(
81
+ (msg.content for msg in reversed(request.messages) if msg.role == "user"),
82
+ None
83
+ )
84
+
85
+ if not last_message:
86
+ raise HTTPException(
87
+ status_code=400,
88
+ detail="No user message found in the conversation"
89
+ )
90
+
91
+ # Get response from LlamaIndex
92
+ response = app.state.query_engine.query(
93
+ last_message,
94
+ similarity_top_k=3, # Adjust as needed
95
+ )
96
+
97
+ # Format response
98
+ return ChatResponse(
99
+ content=str(response),
100
+ stop=None,
101
+ usage={
102
+ "prompt_tokens": 0, # Add actual token counting if needed
103
+ "completion_tokens": 0,
104
+ "total_tokens": 0
105
+ }
106
+ )
107
+
108
+ except Exception as e:
109
+ logger.error(f"Error processing chat request: {str(e)}")
110
+ raise HTTPException(status_code=500, detail=str(e))
111
+
112
+ if __name__ == "__main__":
113
+ import uvicorn
114
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.68.0
2
+ uvicorn==0.15.0
3
+ llama-index==0.9.3
4
+ python-dotenv==0.19.0
5
+ pydantic==1.8.2
6
+ openai==1.3.0