sudhir1234 commited on
Commit
2402611
1 Parent(s): ac028e9

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +17 -0
  2. app/llama2.py +22 -0
  3. app/server.py +40 -0
  4. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /usr/src/app
4
+
5
+ COPY requirements.txt ./
6
+ RUN pip install --upgrade pip && \
7
+ pip install -r requirements.txt &&\
8
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
9
+
10
+ COPY ./app ./app
11
+
12
+ # Copy the model files into the Docker image
13
+ COPY flan-t5-small /models/flan-t5-small
14
+
15
+ EXPOSE 5005
16
+
17
+ CMD ["uvicorn", "app.server:app", "--host", "0.0.0.0", "--port", "5005"]
app/llama2.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, LlamaForCausalLM, LlamaTokenizer
2
+ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
3
+
4
+ # Set the model path to the location where your model files are stored
5
+ model_path = "/models/flan-t5-small" # Adjust this path as needed
6
+
7
+ # Load the model and tokenizer
8
+ local_model = LlamaForCausalLM.from_pretrained(model_path, return_dict=True)
9
+ local_tokenizer = LlamaTokenizer.from_pretrained(model_path)
10
+
11
+ # Create a text generation pipeline
12
+ pipe = pipeline(
13
+ task="text-generation",
14
+ model=local_model,
15
+ tokenizer=local_tokenizer,
16
+ max_new_tokens=100,
17
+ repetition_penalty=1.1,
18
+ model_kwargs={"max_length": 1200, "temperature": 0.01}
19
+ )
20
+
21
+ # Pipeline to be consumed by Langserve API
22
+ llm_pipeline = HuggingFacePipeline(pipeline=pipe)
app/server.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from langchain.prompts import PromptTemplate
3
+ from fastapi.responses import RedirectResponse
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from langserve import add_routes
6
+ from app.llama2 import llm_pipeline
7
+
8
+ app = FastAPI()
9
+
10
+ # Set up CORS middleware to allow requests from any origin
11
+ app.add_middleware(
12
+ CORSMiddleware,
13
+ allow_origins=["*"], # Set this to the specific origin of your frontend in production
14
+ allow_credentials=False,
15
+ allow_methods=["*"],
16
+ allow_headers=["*"],
17
+ )
18
+ template = """You are a very smart and educated assistant to guide the user to understand the concepts. Please Explaining the answer
19
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
20
+
21
+ Question: {question}
22
+
23
+ Only return the helpful answer below and nothing else. Give an answer in 1000 characteres at maximum please
24
+ Helpful answer:
25
+ """
26
+
27
+ prompt = PromptTemplate.from_template(template)
28
+
29
+ @app.get("/")
30
+ async def redirect_root_to_docs():
31
+ return RedirectResponse("/docs")
32
+
33
+ add_routes(app,
34
+ prompt|llm_pipeline,
35
+ path='/chain_llama_non')
36
+
37
+ if __name__ == "__main__":
38
+ import uvicorn
39
+
40
+ uvicorn.run(app, host="localhost", port=5005)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ langchain_community
3
+ sentencepiece
4
+ langserve
5
+ langchain
6
+ fastapi
7
+ pydantic==1.10.13
8
+ uvicorn
9
+ sse_starlette