Spaces:
Sleeping
Sleeping
tensorgirl
commited on
Commit
•
f28768b
1
Parent(s):
9bd5b6d
Upload 3 files
Browse files- Dockerfile.txt +23 -0
- main.py +132 -0
- requirements.txt +13 -0
Dockerfile.txt
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.9
|
5 |
+
|
6 |
+
WORKDIR /code
|
7 |
+
|
8 |
+
COPY ./requirements.txt /code/requirements.txt
|
9 |
+
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
11 |
+
|
12 |
+
#Added from here
|
13 |
+
RUN useradd -m -u 1000 user
|
14 |
+
USER user
|
15 |
+
ENV HOME=/home/user \
|
16 |
+
PATH=/home/user/.local/bin:$PATH
|
17 |
+
|
18 |
+
WORKDIR $HOME/app
|
19 |
+
|
20 |
+
COPY --chown=user . $HOME/app
|
21 |
+
#COPY . .
|
22 |
+
|
23 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
main.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from app import predict
|
3 |
+
import os
|
4 |
+
from huggingface_hub import login
|
5 |
+
from pydantic import BaseModel
|
6 |
+
import sys
|
7 |
+
from langchain.chat_models import ChatOpenAI
|
8 |
+
from langchain.prompts import PromptTemplate
|
9 |
+
from langchain.memory import ConversationBufferMemory
|
10 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
11 |
+
from langchain_core.output_parsers import StrOutputParser
|
12 |
+
from langchain_core.runnables import RunnablePassthrough
|
13 |
+
import os
|
14 |
+
import PyPDF2 as pdf
|
15 |
+
import gradio as gr
|
16 |
+
from langchain_community.document_loaders import PyPDFLoader
|
17 |
+
import os
|
18 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
19 |
+
from langchain_community.embeddings.sentence_transformer import (
|
20 |
+
SentenceTransformerEmbeddings,
|
21 |
+
)
|
22 |
+
from langchain_chroma import Chroma
|
23 |
+
from sentence_transformers import SentenceTransformer
|
24 |
+
from langchain_core.messages import AIMessage, HumanMessage
|
25 |
+
from fastapi import FastAPI, Request, UploadFile, File
|
26 |
+
|
27 |
+
os.environ['HF_HOME'] = '/hug/cache/'
|
28 |
+
os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
|
29 |
+
|
30 |
+
app = FastAPI()
|
31 |
+
app.recursion_limit = 10**4
|
32 |
+
|
33 |
+
def predict(message, db):
|
34 |
+
|
35 |
+
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
|
36 |
+
template = """You are a general purpose chatbot. Be friendly and kind. Help people answer their questions. Use the context below to answer the questions
|
37 |
+
{context}
|
38 |
+
Question: {question}
|
39 |
+
Helpful Answer:"""
|
40 |
+
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)
|
41 |
+
memory = ConversationBufferMemory(
|
42 |
+
memory_key="chat_history",
|
43 |
+
return_messages=True
|
44 |
+
)
|
45 |
+
|
46 |
+
retriever = db.as_retriever(k=3)
|
47 |
+
|
48 |
+
contextualize_q_system_prompt = """Given a chat history and the latest user question \
|
49 |
+
which might reference context in the chat history, formulate a standalone question \
|
50 |
+
which can be understood without the chat history. Do NOT answer the question, \
|
51 |
+
just reformulate it if needed and otherwise return it as is."""
|
52 |
+
contextualize_q_prompt = ChatPromptTemplate.from_messages(
|
53 |
+
[
|
54 |
+
("system", contextualize_q_system_prompt),
|
55 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
56 |
+
("human", "{question}"),
|
57 |
+
]
|
58 |
+
)
|
59 |
+
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()
|
60 |
+
def contextualized_question(input: dict):
|
61 |
+
if input.get("chat_history"):
|
62 |
+
return contextualize_q_chain
|
63 |
+
else:
|
64 |
+
return input["question"]
|
65 |
+
|
66 |
+
rag_chain = (
|
67 |
+
RunnablePassthrough.assign(
|
68 |
+
context=contextualized_question | retriever
|
69 |
+
)
|
70 |
+
| QA_CHAIN_PROMPT
|
71 |
+
| llm
|
72 |
+
)
|
73 |
+
history = []
|
74 |
+
ai_msg = rag_chain.invoke({"question": message, "chat_history": history})
|
75 |
+
print(ai_msg)
|
76 |
+
bot_response = ai_msg.content.strip()
|
77 |
+
|
78 |
+
# Ensure history is correctly formatted as a list of tuples (user_message, bot_response)
|
79 |
+
history.append((HumanMessage(content=message), AIMessage(content=bot_response)))
|
80 |
+
|
81 |
+
docs = db.similarity_search(message,k=3)
|
82 |
+
extra = "\n" + "*"*100 + "\n"
|
83 |
+
additional_info = []
|
84 |
+
for d in docs:
|
85 |
+
citations = d.metadata["source"] + " pg." + str(d.metadata["page"])
|
86 |
+
additional_info = d.page_content
|
87 |
+
extra += citations + "\n" + additional_info + "\n" + "*"*100 + "\n"
|
88 |
+
# Return the bot's response and the updated history
|
89 |
+
return bot_response + extra
|
90 |
+
|
91 |
+
def upload_file(file_path):
|
92 |
+
|
93 |
+
loaders = []
|
94 |
+
print(file_path)
|
95 |
+
loaders.append(PyPDFLoader(file_path))
|
96 |
+
|
97 |
+
documents = []
|
98 |
+
for loader in loaders:
|
99 |
+
documents.extend(loader.load())
|
100 |
+
|
101 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=16)
|
102 |
+
docs = text_splitter.split_documents(documents)
|
103 |
+
|
104 |
+
model = "thenlper/gte-large"
|
105 |
+
embedding_function = SentenceTransformerEmbeddings(model_name=model)
|
106 |
+
print(f"Model's maximum sequence length: {SentenceTransformer(model).max_seq_length}")
|
107 |
+
collection_name = "Autism"
|
108 |
+
persist_directory = "./chroma"
|
109 |
+
print(len(docs))
|
110 |
+
db = Chroma.from_documents(docs, embedding_function)
|
111 |
+
print("Done Processing, you can query")
|
112 |
+
|
113 |
+
return db
|
114 |
+
|
115 |
+
|
116 |
+
class Item(BaseModel):
|
117 |
+
code: str
|
118 |
+
|
119 |
+
@app.get("/")
|
120 |
+
async def root():
|
121 |
+
return {"Code Review Automation":"Version 1.0 'First Draft'"}
|
122 |
+
|
123 |
+
@app.post("/UploadFile/")
|
124 |
+
def predict(question: str, file: UploadFile = File(...)):
|
125 |
+
contents = file.file.read()
|
126 |
+
with open(file.filename, 'wb') as f:
|
127 |
+
f.write(contents)
|
128 |
+
|
129 |
+
db = upload_file(file.filename)
|
130 |
+
result = predict(question, db)
|
131 |
+
return {"answer":result}
|
132 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PyPDF2
|
2 |
+
langchain
|
3 |
+
langchain_community
|
4 |
+
langchain_chroma
|
5 |
+
chromadb
|
6 |
+
openai
|
7 |
+
pypdf
|
8 |
+
requests
|
9 |
+
sentence-transformers
|
10 |
+
fastapi
|
11 |
+
pydantic
|
12 |
+
uvicorn
|
13 |
+
openpyxl
|