Spaces:
Runtime error
Runtime error
Commit
•
2402611
1
Parent(s):
ac028e9
Upload 4 files
Browse files- Dockerfile +17 -0
- app/llama2.py +22 -0
- app/server.py +40 -0
- requirements.txt +9 -0
Dockerfile
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
+
|
3 |
+
WORKDIR /usr/src/app
|
4 |
+
|
5 |
+
COPY requirements.txt ./
|
6 |
+
RUN pip install --upgrade pip && \
|
7 |
+
pip install -r requirements.txt &&\
|
8 |
+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
9 |
+
|
10 |
+
COPY ./app ./app
|
11 |
+
|
12 |
+
# Copy the model files into the Docker image
|
13 |
+
COPY flan-t5-small /models/flan-t5-small
|
14 |
+
|
15 |
+
EXPOSE 5005
|
16 |
+
|
17 |
+
CMD ["uvicorn", "app.server:app", "--host", "0.0.0.0", "--port", "5005"]
|
app/llama2.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline, LlamaForCausalLM, LlamaTokenizer
|
2 |
+
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
3 |
+
|
4 |
+
# Set the model path to the location where your model files are stored
|
5 |
+
model_path = "/models/flan-t5-small" # Adjust this path as needed
|
6 |
+
|
7 |
+
# Load the model and tokenizer
|
8 |
+
local_model = LlamaForCausalLM.from_pretrained(model_path, return_dict=True)
|
9 |
+
local_tokenizer = LlamaTokenizer.from_pretrained(model_path)
|
10 |
+
|
11 |
+
# Create a text generation pipeline
|
12 |
+
pipe = pipeline(
|
13 |
+
task="text-generation",
|
14 |
+
model=local_model,
|
15 |
+
tokenizer=local_tokenizer,
|
16 |
+
max_new_tokens=100,
|
17 |
+
repetition_penalty=1.1,
|
18 |
+
model_kwargs={"max_length": 1200, "temperature": 0.01}
|
19 |
+
)
|
20 |
+
|
21 |
+
# Pipeline to be consumed by Langserve API
|
22 |
+
llm_pipeline = HuggingFacePipeline(pipeline=pipe)
|
app/server.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from langchain.prompts import PromptTemplate
|
3 |
+
from fastapi.responses import RedirectResponse
|
4 |
+
from fastapi.middleware.cors import CORSMiddleware
|
5 |
+
from langserve import add_routes
|
6 |
+
from app.llama2 import llm_pipeline
|
7 |
+
|
8 |
+
app = FastAPI()
|
9 |
+
|
10 |
+
# Set up CORS middleware to allow requests from any origin
|
11 |
+
app.add_middleware(
|
12 |
+
CORSMiddleware,
|
13 |
+
allow_origins=["*"], # Set this to the specific origin of your frontend in production
|
14 |
+
allow_credentials=False,
|
15 |
+
allow_methods=["*"],
|
16 |
+
allow_headers=["*"],
|
17 |
+
)
|
18 |
+
template = """You are a very smart and educated assistant to guide the user to understand the concepts. Please Explaining the answer
|
19 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
20 |
+
|
21 |
+
Question: {question}
|
22 |
+
|
23 |
+
Only return the helpful answer below and nothing else. Give an answer in 1000 characteres at maximum please
|
24 |
+
Helpful answer:
|
25 |
+
"""
|
26 |
+
|
27 |
+
prompt = PromptTemplate.from_template(template)
|
28 |
+
|
29 |
+
@app.get("/")
|
30 |
+
async def redirect_root_to_docs():
|
31 |
+
return RedirectResponse("/docs")
|
32 |
+
|
33 |
+
add_routes(app,
|
34 |
+
prompt|llm_pipeline,
|
35 |
+
path='/chain_llama_non')
|
36 |
+
|
37 |
+
if __name__ == "__main__":
|
38 |
+
import uvicorn
|
39 |
+
|
40 |
+
uvicorn.run(app, host="localhost", port=5005)
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
langchain_community
|
3 |
+
sentencepiece
|
4 |
+
langserve
|
5 |
+
langchain
|
6 |
+
fastapi
|
7 |
+
pydantic==1.10.13
|
8 |
+
uvicorn
|
9 |
+
sse_starlette
|