tensorgirl commited on
Commit
f28768b
1 Parent(s): 9bd5b6d

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile.txt +23 -0
  2. main.py +132 -0
  3. requirements.txt +13 -0
Dockerfile.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ #Added from here
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
+
20
+ COPY --chown=user . $HOME/app
21
+ #COPY . .
22
+
23
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from app import predict
3
+ import os
4
+ from huggingface_hub import login
5
+ from pydantic import BaseModel
6
+ import sys
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
11
+ from langchain_core.output_parsers import StrOutputParser
12
+ from langchain_core.runnables import RunnablePassthrough
13
+ import os
14
+ import PyPDF2 as pdf
15
+ import gradio as gr
16
+ from langchain_community.document_loaders import PyPDFLoader
17
+ import os
18
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
19
+ from langchain_community.embeddings.sentence_transformer import (
20
+ SentenceTransformerEmbeddings,
21
+ )
22
+ from langchain_chroma import Chroma
23
+ from sentence_transformers import SentenceTransformer
24
+ from langchain_core.messages import AIMessage, HumanMessage
25
+ from fastapi import FastAPI, Request, UploadFile, File
26
+
27
+ os.environ['HF_HOME'] = '/hug/cache/'
28
+ os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
29
+
30
+ app = FastAPI()
31
+ app.recursion_limit = 10**4
32
+
33
+ def predict(message, db):
34
+
35
+ llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
36
+ template = """You are a general purpose chatbot. Be friendly and kind. Help people answer their questions. Use the context below to answer the questions
37
+ {context}
38
+ Question: {question}
39
+ Helpful Answer:"""
40
+ QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)
41
+ memory = ConversationBufferMemory(
42
+ memory_key="chat_history",
43
+ return_messages=True
44
+ )
45
+
46
+ retriever = db.as_retriever(k=3)
47
+
48
+ contextualize_q_system_prompt = """Given a chat history and the latest user question \
49
+ which might reference context in the chat history, formulate a standalone question \
50
+ which can be understood without the chat history. Do NOT answer the question, \
51
+ just reformulate it if needed and otherwise return it as is."""
52
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
53
+ [
54
+ ("system", contextualize_q_system_prompt),
55
+ MessagesPlaceholder(variable_name="chat_history"),
56
+ ("human", "{question}"),
57
+ ]
58
+ )
59
+ contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()
60
+ def contextualized_question(input: dict):
61
+ if input.get("chat_history"):
62
+ return contextualize_q_chain
63
+ else:
64
+ return input["question"]
65
+
66
+ rag_chain = (
67
+ RunnablePassthrough.assign(
68
+ context=contextualized_question | retriever
69
+ )
70
+ | QA_CHAIN_PROMPT
71
+ | llm
72
+ )
73
+ history = []
74
+ ai_msg = rag_chain.invoke({"question": message, "chat_history": history})
75
+ print(ai_msg)
76
+ bot_response = ai_msg.content.strip()
77
+
78
+ # Ensure history is correctly formatted as a list of tuples (user_message, bot_response)
79
+ history.append((HumanMessage(content=message), AIMessage(content=bot_response)))
80
+
81
+ docs = db.similarity_search(message,k=3)
82
+ extra = "\n" + "*"*100 + "\n"
83
+ additional_info = []
84
+ for d in docs:
85
+ citations = d.metadata["source"] + " pg." + str(d.metadata["page"])
86
+ additional_info = d.page_content
87
+ extra += citations + "\n" + additional_info + "\n" + "*"*100 + "\n"
88
+ # Return the bot's response and the updated history
89
+ return bot_response + extra
90
+
91
+ def upload_file(file_path):
92
+
93
+ loaders = []
94
+ print(file_path)
95
+ loaders.append(PyPDFLoader(file_path))
96
+
97
+ documents = []
98
+ for loader in loaders:
99
+ documents.extend(loader.load())
100
+
101
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=16)
102
+ docs = text_splitter.split_documents(documents)
103
+
104
+ model = "thenlper/gte-large"
105
+ embedding_function = SentenceTransformerEmbeddings(model_name=model)
106
+ print(f"Model's maximum sequence length: {SentenceTransformer(model).max_seq_length}")
107
+ collection_name = "Autism"
108
+ persist_directory = "./chroma"
109
+ print(len(docs))
110
+ db = Chroma.from_documents(docs, embedding_function)
111
+ print("Done Processing, you can query")
112
+
113
+ return db
114
+
115
+
116
+ class Item(BaseModel):
117
+ code: str
118
+
119
+ @app.get("/")
120
+ async def root():
121
+ return {"Code Review Automation":"Version 1.0 'First Draft'"}
122
+
123
+ @app.post("/UploadFile/")
124
+ def predict(question: str, file: UploadFile = File(...)):
125
+ contents = file.file.read()
126
+ with open(file.filename, 'wb') as f:
127
+ f.write(contents)
128
+
129
+ db = upload_file(file.filename)
130
+ result = predict(question, db)
131
+ return {"answer":result}
132
+
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PyPDF2
2
+ langchain
3
+ langchain_community
4
+ langchain_chroma
5
+ chromadb
6
+ openai
7
+ pypdf
8
+ requests
9
+ sentence-transformers
10
+ fastapi
11
+ pydantic
12
+ uvicorn
13
+ openpyxl