Spaces:
Sleeping
Sleeping
Praneeth Yerrapragada
commited on
Commit
•
2636575
0
Parent(s):
feat: repo setup
Browse files- .env.example +45 -0
- .gitignore +4 -0
- Dockerfile +26 -0
- README.md +101 -0
- app/__init__.py +0 -0
- app/api/__init__.py +0 -0
- app/api/routers/__init__.py +0 -0
- app/api/routers/chat.py +148 -0
- app/api/routers/messaging.py +141 -0
- app/api/routers/vercel_response.py +29 -0
- app/engine/__init__.py +23 -0
- app/engine/generate.py +80 -0
- app/engine/index.py +17 -0
- app/engine/loaders/__init__.py +39 -0
- app/engine/loaders/db.py +26 -0
- app/engine/loaders/file.py +57 -0
- app/engine/loaders/web.py +36 -0
- app/engine/vectordb.py +19 -0
- app/observability.py +5 -0
- app/settings.py +96 -0
- config/loaders.yaml +3 -0
- main.py +51 -0
- poetry.lock +0 -0
- pyproject.toml +39 -0
- tests/__init__.py +0 -0
.env.example
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# The Llama Cloud API key.
|
2 |
+
LLAMA_CLOUD_API_KEY=
|
3 |
+
|
4 |
+
# The provider for the AI models to use.
|
5 |
+
MODEL_PROVIDER=openai
|
6 |
+
|
7 |
+
# The name of LLM model to use.
|
8 |
+
MODEL=gpt-3.5-turbo
|
9 |
+
|
10 |
+
# Name of the embedding model to use.
|
11 |
+
EMBEDDING_MODEL=text-embedding-3-large
|
12 |
+
|
13 |
+
# Dimension of the embedding model to use.
|
14 |
+
EMBEDDING_DIM=1024
|
15 |
+
|
16 |
+
# The OpenAI API key to use.
|
17 |
+
OPENAI_API_KEY=
|
18 |
+
|
19 |
+
# Temperature for sampling from the model.
|
20 |
+
# LLM_TEMPERATURE=
|
21 |
+
|
22 |
+
# Maximum number of tokens to generate.
|
23 |
+
# LLM_MAX_TOKENS=
|
24 |
+
|
25 |
+
# The number of similar embeddings to return when retrieving documents.
|
26 |
+
TOP_K=3
|
27 |
+
|
28 |
+
# Custom system prompt.
|
29 |
+
# Example:
|
30 |
+
# SYSTEM_PROMPT="You are a helpful assistant who helps users with their questions."
|
31 |
+
# SYSTEM_PROMPT=
|
32 |
+
|
33 |
+
# Configuration for Pinecone vector store
|
34 |
+
# The Pinecone API key.
|
35 |
+
# PINECONE_API_KEY=
|
36 |
+
|
37 |
+
# PINECONE_ENVIRONMENT=
|
38 |
+
|
39 |
+
# PINECONE_INDEX_NAME=
|
40 |
+
|
41 |
+
# The address to start the backend app.
|
42 |
+
APP_HOST=0.0.0.0
|
43 |
+
|
44 |
+
# The port to start the backend app.
|
45 |
+
APP_PORT=8000
|
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__
|
2 |
+
storage
|
3 |
+
.env
|
4 |
+
data/*
|
Dockerfile
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11 as build
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
ENV PYTHONPATH=/app
|
6 |
+
|
7 |
+
# Install Poetry
|
8 |
+
RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=/opt/poetry python && \
|
9 |
+
cd /usr/local/bin && \
|
10 |
+
ln -s /opt/poetry/bin/poetry && \
|
11 |
+
poetry config virtualenvs.create false
|
12 |
+
|
13 |
+
# Install Chromium for web loader
|
14 |
+
# Can disable this if you don't use the web loader to reduce the image size
|
15 |
+
RUN apt update && apt install -y chromium chromium-driver
|
16 |
+
|
17 |
+
# Install dependencies
|
18 |
+
COPY ./pyproject.toml ./poetry.lock* /app/
|
19 |
+
RUN poetry install --no-root --no-cache --only main
|
20 |
+
|
21 |
+
# ====================================
|
22 |
+
FROM build as release
|
23 |
+
|
24 |
+
COPY . .
|
25 |
+
|
26 |
+
CMD ["python", "main.py"]
|
README.md
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
This is a [LlamaIndex](https://www.llamaindex.ai/) project using [FastAPI](https://fastapi.tiangolo.com/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
|
2 |
+
|
3 |
+
## Getting Started
|
4 |
+
|
5 |
+
First, setup the environment with poetry:
|
6 |
+
|
7 |
+
> **_Note:_** This step is not needed if you are using the dev-container.
|
8 |
+
|
9 |
+
```
|
10 |
+
poetry install
|
11 |
+
poetry shell
|
12 |
+
```
|
13 |
+
|
14 |
+
Then check the parameters that have been pre-configured in the `.env` file in this directory. (E.g. you might need to configure an `OPENAI_API_KEY` if you're using OpenAI as model provider).
|
15 |
+
|
16 |
+
If you are using any tools or data sources, you can update their config files in the `config` folder.
|
17 |
+
|
18 |
+
Second, generate the embeddings of the documents in the `./data` directory (if this folder exists - otherwise, skip this step):
|
19 |
+
|
20 |
+
```
|
21 |
+
poetry run generate
|
22 |
+
```
|
23 |
+
|
24 |
+
Third, run the development server:
|
25 |
+
|
26 |
+
```
|
27 |
+
python main.py
|
28 |
+
```
|
29 |
+
|
30 |
+
The example provides two different API endpoints:
|
31 |
+
|
32 |
+
1. `/api/chat` - a streaming chat endpoint
|
33 |
+
2. `/api/chat/request` - a non-streaming chat endpoint
|
34 |
+
|
35 |
+
You can test the streaming endpoint with the following curl request:
|
36 |
+
|
37 |
+
```
|
38 |
+
curl --location 'localhost:8000/api/chat' \
|
39 |
+
--header 'Content-Type: application/json' \
|
40 |
+
--data '{ "messages": [{ "role": "user", "content": "Hello" }] }'
|
41 |
+
```
|
42 |
+
|
43 |
+
And for the non-streaming endpoint run:
|
44 |
+
|
45 |
+
```
|
46 |
+
curl --location 'localhost:8000/api/chat/request' \
|
47 |
+
--header 'Content-Type: application/json' \
|
48 |
+
--data '{ "messages": [{ "role": "user", "content": "Hello" }] }'
|
49 |
+
```
|
50 |
+
|
51 |
+
You can start editing the API endpoints by modifying `app/api/routers/chat.py`. The endpoints auto-update as you save the file. You can delete the endpoint you're not using.
|
52 |
+
|
53 |
+
Open [http://localhost:8000/docs](http://localhost:8000/docs) with your browser to see the Swagger UI of the API.
|
54 |
+
|
55 |
+
The API allows CORS for all origins to simplify development. You can change this behavior by setting the `ENVIRONMENT` environment variable to `prod`:
|
56 |
+
|
57 |
+
```
|
58 |
+
ENVIRONMENT=prod python main.py
|
59 |
+
```
|
60 |
+
|
61 |
+
## Using Docker
|
62 |
+
|
63 |
+
1. Build an image for the FastAPI app:
|
64 |
+
|
65 |
+
```
|
66 |
+
docker build -t <your_backend_image_name> .
|
67 |
+
```
|
68 |
+
|
69 |
+
2. Generate embeddings:
|
70 |
+
|
71 |
+
Parse the data and generate the vector embeddings if the `./data` folder exists - otherwise, skip this step:
|
72 |
+
|
73 |
+
```
|
74 |
+
docker run \
|
75 |
+
--rm \
|
76 |
+
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
|
77 |
+
-v $(pwd)/config:/app/config \
|
78 |
+
-v $(pwd)/data:/app/data \ # Use your local folder to read the data
|
79 |
+
-v $(pwd)/storage:/app/storage \ # Use your file system to store the vector database
|
80 |
+
<your_backend_image_name> \
|
81 |
+
poetry run generate
|
82 |
+
```
|
83 |
+
|
84 |
+
3. Start the API:
|
85 |
+
|
86 |
+
```
|
87 |
+
docker run \
|
88 |
+
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
|
89 |
+
-v $(pwd)/config:/app/config \
|
90 |
+
-v $(pwd)/storage:/app/storage \ # Use your file system to store gea vector database
|
91 |
+
-p 8000:8000 \
|
92 |
+
<your_backend_image_name>
|
93 |
+
```
|
94 |
+
|
95 |
+
## Learn More
|
96 |
+
|
97 |
+
To learn more about LlamaIndex, take a look at the following resources:
|
98 |
+
|
99 |
+
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
|
100 |
+
|
101 |
+
You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
|
app/__init__.py
ADDED
File without changes
|
app/api/__init__.py
ADDED
File without changes
|
app/api/routers/__init__.py
ADDED
File without changes
|
app/api/routers/chat.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List, Any, Optional, Dict, Tuple
|
3 |
+
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
4 |
+
from llama_index.core.chat_engine.types import BaseChatEngine
|
5 |
+
from llama_index.core.schema import NodeWithScore
|
6 |
+
from llama_index.core.llms import ChatMessage, MessageRole
|
7 |
+
from app.engine import get_chat_engine
|
8 |
+
from app.api.routers.vercel_response import VercelStreamResponse
|
9 |
+
from app.api.routers.messaging import EventCallbackHandler
|
10 |
+
from aiostream import stream
|
11 |
+
|
12 |
+
chat_router = r = APIRouter()
|
13 |
+
|
14 |
+
|
15 |
+
class _Message(BaseModel):
|
16 |
+
role: MessageRole
|
17 |
+
content: str
|
18 |
+
|
19 |
+
|
20 |
+
class _ChatData(BaseModel):
|
21 |
+
messages: List[_Message]
|
22 |
+
|
23 |
+
class Config:
|
24 |
+
json_schema_extra = {
|
25 |
+
"example": {
|
26 |
+
"messages": [
|
27 |
+
{
|
28 |
+
"role": "user",
|
29 |
+
"content": "What standards for letters exist?",
|
30 |
+
}
|
31 |
+
]
|
32 |
+
}
|
33 |
+
}
|
34 |
+
|
35 |
+
|
36 |
+
class _SourceNodes(BaseModel):
|
37 |
+
id: str
|
38 |
+
metadata: Dict[str, Any]
|
39 |
+
score: Optional[float]
|
40 |
+
text: str
|
41 |
+
|
42 |
+
@classmethod
|
43 |
+
def from_source_node(cls, source_node: NodeWithScore):
|
44 |
+
return cls(
|
45 |
+
id=source_node.node.node_id,
|
46 |
+
metadata=source_node.node.metadata,
|
47 |
+
score=source_node.score,
|
48 |
+
text=source_node.node.text, # type: ignore
|
49 |
+
)
|
50 |
+
|
51 |
+
@classmethod
|
52 |
+
def from_source_nodes(cls, source_nodes: List[NodeWithScore]):
|
53 |
+
return [cls.from_source_node(node) for node in source_nodes]
|
54 |
+
|
55 |
+
|
56 |
+
class _Result(BaseModel):
|
57 |
+
result: _Message
|
58 |
+
nodes: List[_SourceNodes]
|
59 |
+
|
60 |
+
|
61 |
+
async def parse_chat_data(data: _ChatData) -> Tuple[str, List[ChatMessage]]:
|
62 |
+
# check preconditions and get last message
|
63 |
+
if len(data.messages) == 0:
|
64 |
+
raise HTTPException(
|
65 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
66 |
+
detail="No messages provided",
|
67 |
+
)
|
68 |
+
last_message = data.messages.pop()
|
69 |
+
if last_message.role != MessageRole.USER:
|
70 |
+
raise HTTPException(
|
71 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
72 |
+
detail="Last message must be from user",
|
73 |
+
)
|
74 |
+
# convert messages coming from the request to type ChatMessage
|
75 |
+
messages = [
|
76 |
+
ChatMessage(
|
77 |
+
role=m.role,
|
78 |
+
content=m.content,
|
79 |
+
)
|
80 |
+
for m in data.messages
|
81 |
+
]
|
82 |
+
return last_message.content, messages
|
83 |
+
|
84 |
+
|
85 |
+
# streaming endpoint - delete if not needed
|
86 |
+
@r.post("")
|
87 |
+
async def chat(
|
88 |
+
request: Request,
|
89 |
+
data: _ChatData,
|
90 |
+
chat_engine: BaseChatEngine = Depends(get_chat_engine),
|
91 |
+
):
|
92 |
+
last_message_content, messages = await parse_chat_data(data)
|
93 |
+
|
94 |
+
event_handler = EventCallbackHandler()
|
95 |
+
chat_engine.callback_manager.handlers.append(event_handler) # type: ignore
|
96 |
+
response = await chat_engine.astream_chat(last_message_content, messages)
|
97 |
+
|
98 |
+
async def content_generator():
|
99 |
+
# Yield the text response
|
100 |
+
async def _text_generator():
|
101 |
+
async for token in response.async_response_gen():
|
102 |
+
yield VercelStreamResponse.convert_text(token)
|
103 |
+
# the text_generator is the leading stream, once it's finished, also finish the event stream
|
104 |
+
event_handler.is_done = True
|
105 |
+
|
106 |
+
# Yield the events from the event handler
|
107 |
+
async def _event_generator():
|
108 |
+
async for event in event_handler.async_event_gen():
|
109 |
+
event_response = event.to_response()
|
110 |
+
if event_response is not None:
|
111 |
+
yield VercelStreamResponse.convert_data(event_response)
|
112 |
+
|
113 |
+
combine = stream.merge(_text_generator(), _event_generator())
|
114 |
+
async with combine.stream() as streamer:
|
115 |
+
async for item in streamer:
|
116 |
+
if await request.is_disconnected():
|
117 |
+
break
|
118 |
+
yield item
|
119 |
+
|
120 |
+
# Yield the source nodes
|
121 |
+
yield VercelStreamResponse.convert_data(
|
122 |
+
{
|
123 |
+
"type": "sources",
|
124 |
+
"data": {
|
125 |
+
"nodes": [
|
126 |
+
_SourceNodes.from_source_node(node).dict()
|
127 |
+
for node in response.source_nodes
|
128 |
+
]
|
129 |
+
},
|
130 |
+
}
|
131 |
+
)
|
132 |
+
|
133 |
+
return VercelStreamResponse(content=content_generator())
|
134 |
+
|
135 |
+
|
136 |
+
# non-streaming endpoint - delete if not needed
|
137 |
+
@r.post("/request")
|
138 |
+
async def chat_request(
|
139 |
+
data: _ChatData,
|
140 |
+
chat_engine: BaseChatEngine = Depends(get_chat_engine),
|
141 |
+
) -> _Result:
|
142 |
+
last_message_content, messages = await parse_chat_data(data)
|
143 |
+
|
144 |
+
response = await chat_engine.achat(last_message_content, messages)
|
145 |
+
return _Result(
|
146 |
+
result=_Message(role=MessageRole.ASSISTANT, content=response.response),
|
147 |
+
nodes=_SourceNodes.from_source_nodes(response.source_nodes),
|
148 |
+
)
|
app/api/routers/messaging.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import asyncio
|
3 |
+
from typing import AsyncGenerator, Dict, Any, List, Optional
|
4 |
+
from llama_index.core.callbacks.base import BaseCallbackHandler
|
5 |
+
from llama_index.core.callbacks.schema import CBEventType
|
6 |
+
from llama_index.core.tools.types import ToolOutput
|
7 |
+
from pydantic import BaseModel
|
8 |
+
|
9 |
+
|
10 |
+
class CallbackEvent(BaseModel):
|
11 |
+
event_type: CBEventType
|
12 |
+
payload: Optional[Dict[str, Any]] = None
|
13 |
+
event_id: str = ""
|
14 |
+
|
15 |
+
def get_retrieval_message(self) -> dict | None:
|
16 |
+
if self.payload:
|
17 |
+
nodes = self.payload.get("nodes")
|
18 |
+
if nodes:
|
19 |
+
msg = f"Retrieved {len(nodes)} sources to use as context for the query"
|
20 |
+
else:
|
21 |
+
msg = f"Retrieving context for query: '{self.payload.get('query_str')}'"
|
22 |
+
return {
|
23 |
+
"type": "events",
|
24 |
+
"data": {"title": msg},
|
25 |
+
}
|
26 |
+
else:
|
27 |
+
return None
|
28 |
+
|
29 |
+
def get_tool_message(self) -> dict | None:
|
30 |
+
func_call_args = self.payload.get("function_call")
|
31 |
+
if func_call_args is not None and "tool" in self.payload:
|
32 |
+
tool = self.payload.get("tool")
|
33 |
+
return {
|
34 |
+
"type": "events",
|
35 |
+
"data": {
|
36 |
+
"title": f"Calling tool: {tool.name} with inputs: {func_call_args}",
|
37 |
+
},
|
38 |
+
}
|
39 |
+
|
40 |
+
def _is_output_serializable(self, output: Any) -> bool:
|
41 |
+
try:
|
42 |
+
json.dumps(output)
|
43 |
+
return True
|
44 |
+
except TypeError:
|
45 |
+
return False
|
46 |
+
|
47 |
+
def get_agent_tool_response(self) -> dict | None:
|
48 |
+
response = self.payload.get("response")
|
49 |
+
if response is not None:
|
50 |
+
sources = response.sources
|
51 |
+
for source in sources:
|
52 |
+
# Return the tool response here to include the toolCall information
|
53 |
+
if isinstance(source, ToolOutput):
|
54 |
+
if self._is_output_serializable(source.raw_output):
|
55 |
+
output = source.raw_output
|
56 |
+
else:
|
57 |
+
output = source.content
|
58 |
+
|
59 |
+
return {
|
60 |
+
"type": "tools",
|
61 |
+
"data": {
|
62 |
+
"toolOutput": {
|
63 |
+
"output": output,
|
64 |
+
"isError": source.is_error,
|
65 |
+
},
|
66 |
+
"toolCall": {
|
67 |
+
"id": None, # There is no tool id in the ToolOutput
|
68 |
+
"name": source.tool_name,
|
69 |
+
"input": source.raw_input,
|
70 |
+
},
|
71 |
+
},
|
72 |
+
}
|
73 |
+
|
74 |
+
def to_response(self):
|
75 |
+
match self.event_type:
|
76 |
+
case "retrieve":
|
77 |
+
return self.get_retrieval_message()
|
78 |
+
case "function_call":
|
79 |
+
return self.get_tool_message()
|
80 |
+
case "agent_step":
|
81 |
+
return self.get_agent_tool_response()
|
82 |
+
case _:
|
83 |
+
return None
|
84 |
+
|
85 |
+
|
86 |
+
class EventCallbackHandler(BaseCallbackHandler):
|
87 |
+
_aqueue: asyncio.Queue
|
88 |
+
is_done: bool = False
|
89 |
+
|
90 |
+
def __init__(
|
91 |
+
self,
|
92 |
+
):
|
93 |
+
"""Initialize the base callback handler."""
|
94 |
+
ignored_events = [
|
95 |
+
CBEventType.CHUNKING,
|
96 |
+
CBEventType.NODE_PARSING,
|
97 |
+
CBEventType.EMBEDDING,
|
98 |
+
CBEventType.LLM,
|
99 |
+
CBEventType.TEMPLATING,
|
100 |
+
]
|
101 |
+
super().__init__(ignored_events, ignored_events)
|
102 |
+
self._aqueue = asyncio.Queue()
|
103 |
+
|
104 |
+
def on_event_start(
|
105 |
+
self,
|
106 |
+
event_type: CBEventType,
|
107 |
+
payload: Optional[Dict[str, Any]] = None,
|
108 |
+
event_id: str = "",
|
109 |
+
**kwargs: Any,
|
110 |
+
) -> str:
|
111 |
+
event = CallbackEvent(event_id=event_id, event_type=event_type, payload=payload)
|
112 |
+
if event.to_response() is not None:
|
113 |
+
self._aqueue.put_nowait(event)
|
114 |
+
|
115 |
+
def on_event_end(
|
116 |
+
self,
|
117 |
+
event_type: CBEventType,
|
118 |
+
payload: Optional[Dict[str, Any]] = None,
|
119 |
+
event_id: str = "",
|
120 |
+
**kwargs: Any,
|
121 |
+
) -> None:
|
122 |
+
event = CallbackEvent(event_id=event_id, event_type=event_type, payload=payload)
|
123 |
+
if event.to_response() is not None:
|
124 |
+
self._aqueue.put_nowait(event)
|
125 |
+
|
126 |
+
def start_trace(self, trace_id: Optional[str] = None) -> None:
|
127 |
+
"""No-op."""
|
128 |
+
|
129 |
+
def end_trace(
|
130 |
+
self,
|
131 |
+
trace_id: Optional[str] = None,
|
132 |
+
trace_map: Optional[Dict[str, List[str]]] = None,
|
133 |
+
) -> None:
|
134 |
+
"""No-op."""
|
135 |
+
|
136 |
+
async def async_event_gen(self) -> AsyncGenerator[CallbackEvent, None]:
|
137 |
+
while not self._aqueue.empty() or not self.is_done:
|
138 |
+
try:
|
139 |
+
yield await asyncio.wait_for(self._aqueue.get(), timeout=0.1)
|
140 |
+
except asyncio.TimeoutError:
|
141 |
+
pass
|
app/api/routers/vercel_response.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from typing import Any
|
3 |
+
from fastapi.responses import StreamingResponse
|
4 |
+
|
5 |
+
|
6 |
+
class VercelStreamResponse(StreamingResponse):
|
7 |
+
"""
|
8 |
+
Class to convert the response from the chat engine to the streaming format expected by Vercel
|
9 |
+
"""
|
10 |
+
|
11 |
+
TEXT_PREFIX = "0:"
|
12 |
+
DATA_PREFIX = "8:"
|
13 |
+
|
14 |
+
@classmethod
|
15 |
+
def convert_text(cls, token: str):
|
16 |
+
# Escape newlines and double quotes to avoid breaking the stream
|
17 |
+
token = json.dumps(token)
|
18 |
+
return f"{cls.TEXT_PREFIX}{token}\n"
|
19 |
+
|
20 |
+
@classmethod
|
21 |
+
def convert_data(cls, data: dict):
|
22 |
+
data_str = json.dumps(data)
|
23 |
+
return f"{cls.DATA_PREFIX}[{data_str}]\n"
|
24 |
+
|
25 |
+
def __init__(self, content: Any, **kwargs):
|
26 |
+
super().__init__(
|
27 |
+
content=content,
|
28 |
+
**kwargs,
|
29 |
+
)
|
app/engine/__init__.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from app.engine.index import get_index
|
3 |
+
from fastapi import HTTPException
|
4 |
+
|
5 |
+
|
6 |
+
def get_chat_engine():
|
7 |
+
system_prompt = os.getenv("SYSTEM_PROMPT")
|
8 |
+
top_k = os.getenv("TOP_K", 3)
|
9 |
+
|
10 |
+
index = get_index()
|
11 |
+
if index is None:
|
12 |
+
raise HTTPException(
|
13 |
+
status_code=500,
|
14 |
+
detail=str(
|
15 |
+
"StorageContext is empty - call 'poetry run generate' to generate the storage first"
|
16 |
+
),
|
17 |
+
)
|
18 |
+
|
19 |
+
return index.as_chat_engine(
|
20 |
+
similarity_top_k=int(top_k),
|
21 |
+
system_prompt=system_prompt,
|
22 |
+
chat_mode="condense_plus_context",
|
23 |
+
)
|
app/engine/generate.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
|
3 |
+
load_dotenv()
|
4 |
+
|
5 |
+
import os
|
6 |
+
import logging
|
7 |
+
from llama_index.core.settings import Settings
|
8 |
+
from llama_index.core.ingestion import IngestionPipeline
|
9 |
+
from llama_index.core.node_parser import SentenceSplitter
|
10 |
+
from llama_index.core.storage.docstore import SimpleDocumentStore
|
11 |
+
from llama_index.core.storage import StorageContext
|
12 |
+
from app.settings import init_settings
|
13 |
+
from app.engine.loaders import get_documents
|
14 |
+
from app.engine.vectordb import get_vector_store
|
15 |
+
|
16 |
+
|
17 |
+
logging.basicConfig(level=logging.INFO)
|
18 |
+
logger = logging.getLogger()
|
19 |
+
|
20 |
+
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
|
21 |
+
|
22 |
+
|
23 |
+
def get_doc_store():
|
24 |
+
|
25 |
+
# If the storage directory is there, load the document store from it.
|
26 |
+
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
|
27 |
+
if os.path.exists(STORAGE_DIR):
|
28 |
+
return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
|
29 |
+
else:
|
30 |
+
return SimpleDocumentStore()
|
31 |
+
|
32 |
+
|
33 |
+
def run_pipeline(docstore, vector_store, documents):
|
34 |
+
pipeline = IngestionPipeline(
|
35 |
+
transformations=[
|
36 |
+
SentenceSplitter(
|
37 |
+
chunk_size=Settings.chunk_size,
|
38 |
+
chunk_overlap=Settings.chunk_overlap,
|
39 |
+
),
|
40 |
+
Settings.embed_model,
|
41 |
+
],
|
42 |
+
docstore=docstore,
|
43 |
+
docstore_strategy="upserts_and_delete",
|
44 |
+
vector_store=vector_store,
|
45 |
+
)
|
46 |
+
|
47 |
+
# Run the ingestion pipeline and store the results
|
48 |
+
nodes = pipeline.run(show_progress=True, documents=documents)
|
49 |
+
|
50 |
+
return nodes
|
51 |
+
|
52 |
+
|
53 |
+
def persist_storage(docstore, vector_store):
|
54 |
+
storage_context = StorageContext.from_defaults(
|
55 |
+
docstore=docstore,
|
56 |
+
vector_store=vector_store,
|
57 |
+
)
|
58 |
+
storage_context.persist(STORAGE_DIR)
|
59 |
+
|
60 |
+
|
61 |
+
def generate_datasource():
|
62 |
+
init_settings()
|
63 |
+
logger.info("Generate index for the provided data")
|
64 |
+
|
65 |
+
# Get the stores and documents or create new ones
|
66 |
+
documents = get_documents()
|
67 |
+
docstore = get_doc_store()
|
68 |
+
vector_store = get_vector_store()
|
69 |
+
|
70 |
+
# Run the ingestion pipeline
|
71 |
+
_ = run_pipeline(docstore, vector_store, documents)
|
72 |
+
|
73 |
+
# Build the index and persist storage
|
74 |
+
persist_storage(docstore, vector_store)
|
75 |
+
|
76 |
+
logger.info("Finished generating the index")
|
77 |
+
|
78 |
+
|
79 |
+
if __name__ == "__main__":
|
80 |
+
generate_datasource()
|
app/engine/index.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from llama_index.core.indices import VectorStoreIndex
|
3 |
+
from app.engine.vectordb import get_vector_store
|
4 |
+
|
5 |
+
|
6 |
+
logger = logging.getLogger("uvicorn")
|
7 |
+
|
8 |
+
|
9 |
+
def get_index():
|
10 |
+
logger.info("Connecting vector store...")
|
11 |
+
store = get_vector_store()
|
12 |
+
# Load the index from the vector store
|
13 |
+
# If you are using a vector store that doesn't store text,
|
14 |
+
# you must load the index from both the vector store and the document store
|
15 |
+
index = VectorStoreIndex.from_vector_store(store)
|
16 |
+
logger.info("Finished load index from vector store.")
|
17 |
+
return index
|
app/engine/loaders/__init__.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import yaml
|
3 |
+
import importlib
|
4 |
+
import logging
|
5 |
+
from typing import Dict
|
6 |
+
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
|
7 |
+
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
|
8 |
+
from app.engine.loaders.db import DBLoaderConfig, get_db_documents
|
9 |
+
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
|
13 |
+
def load_configs():
|
14 |
+
with open("config/loaders.yaml") as f:
|
15 |
+
configs = yaml.safe_load(f)
|
16 |
+
return configs
|
17 |
+
|
18 |
+
|
19 |
+
def get_documents():
|
20 |
+
documents = []
|
21 |
+
config = load_configs()
|
22 |
+
for loader_type, loader_config in config.items():
|
23 |
+
logger.info(
|
24 |
+
f"Loading documents from loader: {loader_type}, config: {loader_config}"
|
25 |
+
)
|
26 |
+
match loader_type:
|
27 |
+
case "file":
|
28 |
+
document = get_file_documents(FileLoaderConfig(**loader_config))
|
29 |
+
case "web":
|
30 |
+
document = get_web_documents(WebLoaderConfig(**loader_config))
|
31 |
+
case "db":
|
32 |
+
document = get_db_documents(
|
33 |
+
configs=[DBLoaderConfig(**cfg) for cfg in loader_config]
|
34 |
+
)
|
35 |
+
case _:
|
36 |
+
raise ValueError(f"Invalid loader type: {loader_type}")
|
37 |
+
documents.extend(document)
|
38 |
+
|
39 |
+
return documents
|
app/engine/loaders/db.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from typing import List
|
4 |
+
from pydantic import BaseModel, validator
|
5 |
+
from llama_index.core.indices.vector_store import VectorStoreIndex
|
6 |
+
|
7 |
+
logger = logging.getLogger(__name__)
|
8 |
+
|
9 |
+
|
10 |
+
class DBLoaderConfig(BaseModel):
|
11 |
+
uri: str
|
12 |
+
queries: List[str]
|
13 |
+
|
14 |
+
|
15 |
+
def get_db_documents(configs: list[DBLoaderConfig]):
|
16 |
+
from llama_index.readers.database import DatabaseReader
|
17 |
+
|
18 |
+
docs = []
|
19 |
+
for entry in configs:
|
20 |
+
loader = DatabaseReader(uri=entry.uri)
|
21 |
+
for query in entry.queries:
|
22 |
+
logger.info(f"Loading data from database with query: {query}")
|
23 |
+
documents = loader.load_data(query=query)
|
24 |
+
docs.extend(documents)
|
25 |
+
|
26 |
+
return documents
|
app/engine/loaders/file.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from llama_parse import LlamaParse
|
4 |
+
from pydantic import BaseModel, validator
|
5 |
+
|
6 |
+
logger = logging.getLogger(__name__)
|
7 |
+
|
8 |
+
|
9 |
+
class FileLoaderConfig(BaseModel):
|
10 |
+
data_dir: str = "data"
|
11 |
+
use_llama_parse: bool = False
|
12 |
+
|
13 |
+
@validator("data_dir")
|
14 |
+
def data_dir_must_exist(cls, v):
|
15 |
+
if not os.path.isdir(v):
|
16 |
+
raise ValueError(f"Directory '{v}' does not exist")
|
17 |
+
return v
|
18 |
+
|
19 |
+
|
20 |
+
def llama_parse_parser():
|
21 |
+
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
|
22 |
+
raise ValueError(
|
23 |
+
"LLAMA_CLOUD_API_KEY environment variable is not set. "
|
24 |
+
"Please set it in .env file or in your shell environment then run again!"
|
25 |
+
)
|
26 |
+
parser = LlamaParse(result_type="markdown", verbose=True, language="en")
|
27 |
+
return parser
|
28 |
+
|
29 |
+
|
30 |
+
def get_file_documents(config: FileLoaderConfig):
|
31 |
+
from llama_index.core.readers import SimpleDirectoryReader
|
32 |
+
|
33 |
+
try:
|
34 |
+
reader = SimpleDirectoryReader(
|
35 |
+
config.data_dir,
|
36 |
+
recursive=True,
|
37 |
+
filename_as_id=True,
|
38 |
+
)
|
39 |
+
if config.use_llama_parse:
|
40 |
+
parser = llama_parse_parser()
|
41 |
+
reader.file_extractor = {".pdf": parser}
|
42 |
+
return reader.load_data()
|
43 |
+
except ValueError as e:
|
44 |
+
import sys, traceback
|
45 |
+
|
46 |
+
# Catch the error if the data dir is empty
|
47 |
+
# and return as empty document list
|
48 |
+
_, _, exc_traceback = sys.exc_info()
|
49 |
+
function_name = traceback.extract_tb(exc_traceback)[-1].name
|
50 |
+
if function_name == "_add_files":
|
51 |
+
logger.warning(
|
52 |
+
f"Failed to load file documents, error message: {e} . Return as empty document list."
|
53 |
+
)
|
54 |
+
return []
|
55 |
+
else:
|
56 |
+
# Raise the error if it is not the case of empty data dir
|
57 |
+
raise e
|
app/engine/loaders/web.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from pydantic import BaseModel, Field
|
4 |
+
|
5 |
+
|
6 |
+
class CrawlUrl(BaseModel):
|
7 |
+
base_url: str
|
8 |
+
prefix: str
|
9 |
+
max_depth: int = Field(default=1, ge=0)
|
10 |
+
|
11 |
+
|
12 |
+
class WebLoaderConfig(BaseModel):
|
13 |
+
driver_arguments: list[str] = Field(default=None)
|
14 |
+
urls: list[CrawlUrl]
|
15 |
+
|
16 |
+
|
17 |
+
def get_web_documents(config: WebLoaderConfig):
|
18 |
+
from llama_index.readers.web import WholeSiteReader
|
19 |
+
from selenium import webdriver
|
20 |
+
from selenium.webdriver.chrome.options import Options
|
21 |
+
|
22 |
+
options = Options()
|
23 |
+
driver_arguments = config.driver_arguments or []
|
24 |
+
for arg in driver_arguments:
|
25 |
+
options.add_argument(arg)
|
26 |
+
|
27 |
+
docs = []
|
28 |
+
for url in config.urls:
|
29 |
+
scraper = WholeSiteReader(
|
30 |
+
prefix=url.prefix,
|
31 |
+
max_depth=url.max_depth,
|
32 |
+
driver=webdriver.Chrome(options=options),
|
33 |
+
)
|
34 |
+
docs.extend(scraper.load_data(url.base_url))
|
35 |
+
|
36 |
+
return docs
|
app/engine/vectordb.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from llama_index.vector_stores.pinecone import PineconeVectorStore
|
3 |
+
|
4 |
+
|
5 |
+
def get_vector_store():
|
6 |
+
api_key = os.getenv("PINECONE_API_KEY")
|
7 |
+
index_name = os.getenv("PINECONE_INDEX_NAME")
|
8 |
+
environment = os.getenv("PINECONE_ENVIRONMENT")
|
9 |
+
if not api_key or not index_name or not environment:
|
10 |
+
raise ValueError(
|
11 |
+
"Please set PINECONE_API_KEY, PINECONE_INDEX_NAME, and PINECONE_ENVIRONMENT"
|
12 |
+
" to your environment variables or config them in the .env file"
|
13 |
+
)
|
14 |
+
store = PineconeVectorStore(
|
15 |
+
api_key=api_key,
|
16 |
+
index_name=index_name,
|
17 |
+
environment=environment,
|
18 |
+
)
|
19 |
+
return store
|
app/observability.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from traceloop.sdk import Traceloop
|
2 |
+
|
3 |
+
|
4 |
+
def init_observability():
|
5 |
+
Traceloop.init()
|
app/settings.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Dict
|
3 |
+
from llama_index.core.settings import Settings
|
4 |
+
|
5 |
+
|
6 |
+
def init_settings():
|
7 |
+
model_provider = os.getenv("MODEL_PROVIDER")
|
8 |
+
if model_provider == "openai":
|
9 |
+
init_openai()
|
10 |
+
elif model_provider == "ollama":
|
11 |
+
init_ollama()
|
12 |
+
elif model_provider == "anthropic":
|
13 |
+
init_anthropic()
|
14 |
+
elif model_provider == "gemini":
|
15 |
+
init_gemini()
|
16 |
+
else:
|
17 |
+
raise ValueError(f"Invalid model provider: {model_provider}")
|
18 |
+
Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
|
19 |
+
Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
|
20 |
+
|
21 |
+
|
22 |
+
def init_ollama():
|
23 |
+
from llama_index.llms.ollama import Ollama
|
24 |
+
from llama_index.embeddings.ollama import OllamaEmbedding
|
25 |
+
|
26 |
+
base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
|
27 |
+
Settings.embed_model = OllamaEmbedding(
|
28 |
+
base_url=base_url,
|
29 |
+
model_name=os.getenv("EMBEDDING_MODEL"),
|
30 |
+
)
|
31 |
+
Settings.llm = Ollama(base_url=base_url, model=os.getenv("MODEL"))
|
32 |
+
|
33 |
+
|
34 |
+
def init_openai():
|
35 |
+
from llama_index.llms.openai import OpenAI
|
36 |
+
from llama_index.embeddings.openai import OpenAIEmbedding
|
37 |
+
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
38 |
+
|
39 |
+
max_tokens = os.getenv("LLM_MAX_TOKENS")
|
40 |
+
config = {
|
41 |
+
"model": os.getenv("MODEL"),
|
42 |
+
"temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
|
43 |
+
"max_tokens": int(max_tokens) if max_tokens is not None else None,
|
44 |
+
}
|
45 |
+
Settings.llm = OpenAI(**config)
|
46 |
+
|
47 |
+
dimensions = os.getenv("EMBEDDING_DIM")
|
48 |
+
config = {
|
49 |
+
"model": os.getenv("EMBEDDING_MODEL"),
|
50 |
+
"dimensions": int(dimensions) if dimensions is not None else None,
|
51 |
+
}
|
52 |
+
Settings.embed_model = OpenAIEmbedding(**config)
|
53 |
+
|
54 |
+
|
55 |
+
def init_anthropic():
|
56 |
+
from llama_index.llms.anthropic import Anthropic
|
57 |
+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
58 |
+
|
59 |
+
model_map: Dict[str, str] = {
|
60 |
+
"claude-3-opus": "claude-3-opus-20240229",
|
61 |
+
"claude-3-sonnet": "claude-3-sonnet-20240229",
|
62 |
+
"claude-3-haiku": "claude-3-haiku-20240307",
|
63 |
+
"claude-2.1": "claude-2.1",
|
64 |
+
"claude-instant-1.2": "claude-instant-1.2",
|
65 |
+
}
|
66 |
+
|
67 |
+
embed_model_map: Dict[str, str] = {
|
68 |
+
"all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
|
69 |
+
"all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2",
|
70 |
+
}
|
71 |
+
|
72 |
+
Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")])
|
73 |
+
Settings.embed_model = HuggingFaceEmbedding(
|
74 |
+
model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
|
75 |
+
)
|
76 |
+
|
77 |
+
|
78 |
+
def init_gemini():
|
79 |
+
from llama_index.llms.gemini import Gemini
|
80 |
+
from llama_index.embeddings.gemini import GeminiEmbedding
|
81 |
+
|
82 |
+
model_map: Dict[str, str] = {
|
83 |
+
"gemini-1.5-pro-latest": "models/gemini-1.5-pro-latest",
|
84 |
+
"gemini-pro": "models/gemini-pro",
|
85 |
+
"gemini-pro-vision": "models/gemini-pro-vision",
|
86 |
+
}
|
87 |
+
|
88 |
+
embed_model_map: Dict[str, str] = {
|
89 |
+
"embedding-001": "models/embedding-001",
|
90 |
+
"text-embedding-004": "models/text-embedding-004",
|
91 |
+
}
|
92 |
+
|
93 |
+
Settings.llm = Gemini(model=model_map[os.getenv("MODEL")])
|
94 |
+
Settings.embed_model = GeminiEmbedding(
|
95 |
+
model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
|
96 |
+
)
|
config/loaders.yaml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
file:
|
2 |
+
# use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable
|
3 |
+
use_llama_parse: true
|
main.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
|
3 |
+
load_dotenv()
|
4 |
+
|
5 |
+
import logging
|
6 |
+
import os
|
7 |
+
import uvicorn
|
8 |
+
from fastapi import FastAPI
|
9 |
+
from fastapi.middleware.cors import CORSMiddleware
|
10 |
+
from fastapi.responses import RedirectResponse
|
11 |
+
from app.api.routers.chat import chat_router
|
12 |
+
from app.settings import init_settings
|
13 |
+
from app.observability import init_observability
|
14 |
+
from fastapi.staticfiles import StaticFiles
|
15 |
+
|
16 |
+
|
17 |
+
app = FastAPI()
|
18 |
+
|
19 |
+
init_settings()
|
20 |
+
init_observability()
|
21 |
+
|
22 |
+
environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set
|
23 |
+
|
24 |
+
if environment == "dev":
|
25 |
+
logger = logging.getLogger("uvicorn")
|
26 |
+
logger.warning("Running in development mode - allowing CORS for all origins")
|
27 |
+
app.add_middleware(
|
28 |
+
CORSMiddleware,
|
29 |
+
allow_origins=["*"],
|
30 |
+
allow_credentials=True,
|
31 |
+
allow_methods=["*"],
|
32 |
+
allow_headers=["*"],
|
33 |
+
)
|
34 |
+
|
35 |
+
# Redirect to documentation page when accessing base URL
|
36 |
+
@app.get("/")
|
37 |
+
async def redirect_to_docs():
|
38 |
+
return RedirectResponse(url="/docs")
|
39 |
+
|
40 |
+
|
41 |
+
if os.path.exists("data"):
|
42 |
+
app.mount("/api/data", StaticFiles(directory="data"), name="static")
|
43 |
+
app.include_router(chat_router, prefix="/api/chat")
|
44 |
+
|
45 |
+
|
46 |
+
if __name__ == "__main__":
|
47 |
+
app_host = os.getenv("APP_HOST", "0.0.0.0")
|
48 |
+
app_port = int(os.getenv("APP_PORT", "8000"))
|
49 |
+
reload = True if environment == "dev" else False
|
50 |
+
|
51 |
+
uvicorn.run(app="main:app", host=app_host, port=app_port, reload=reload)
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool]
|
2 |
+
[tool.poetry]
|
3 |
+
name = "app"
|
4 |
+
version = "0.1.0"
|
5 |
+
description = ""
|
6 |
+
authors = [ "Marcus Schiesser <mail@marcusschiesser.de>" ]
|
7 |
+
readme = "README.md"
|
8 |
+
|
9 |
+
[tool.poetry.scripts]
|
10 |
+
generate = "app.engine.generate:generate_datasource"
|
11 |
+
|
12 |
+
[tool.poetry.dependencies]
|
13 |
+
python = "^3.11,<3.12"
|
14 |
+
fastapi = "^0.109.1"
|
15 |
+
python-dotenv = "^1.0.0"
|
16 |
+
aiostream = "^0.5.2"
|
17 |
+
llama-index = "0.10.28"
|
18 |
+
llama-index-core = "0.10.28"
|
19 |
+
cachetools = "^5.3.3"
|
20 |
+
|
21 |
+
[tool.poetry.dependencies.uvicorn]
|
22 |
+
extras = [ "standard" ]
|
23 |
+
version = "^0.23.2"
|
24 |
+
|
25 |
+
[tool.poetry.dependencies.llama-index-vector-stores-pinecone]
|
26 |
+
version = "^0.1.3"
|
27 |
+
|
28 |
+
[tool.poetry.dependencies.docx2txt]
|
29 |
+
version = "^0.8"
|
30 |
+
|
31 |
+
[tool.poetry.dependencies.llama-index-agent-openai]
|
32 |
+
version = "0.2.2"
|
33 |
+
|
34 |
+
[tool.poetry.dependencies.traceloop-sdk]
|
35 |
+
version = "^0.15.11"
|
36 |
+
|
37 |
+
[build-system]
|
38 |
+
requires = [ "poetry-core" ]
|
39 |
+
build-backend = "poetry.core.masonry.api"
|
tests/__init__.py
ADDED
File without changes
|