Ronr commited on
Commit
3a0e6a8
·
1 Parent(s): 1cf82b8

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +174 -0
  2. requirements.txt +7 -0
main.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Body, File, Form, UploadFile
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import FileResponse
4
+ import os
5
+ import openai
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.llms import OpenAI
8
+ from langchain.document_loaders import TextLoader
9
+ from langchain.document_loaders import PyPDFLoader
10
+ from langchain.document_loaders import DirectoryLoader
11
+ from langchain.text_splitter import CharacterTextSplitter
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from langchain.embeddings import OpenAIEmbeddings
14
+ from langchain.vectorstores import Chroma
15
+ from langchain.callbacks import get_openai_callback
16
+ from langchain.chat_models import AzureChatOpenAI
17
+ from langchain.llms import AzureOpenAI
18
+
19
+
20
+ # Import Azure OpenAI
21
+ from langchain.llms import AzureOpenAI
22
+ import uvicorn
23
+ import datetime
24
+ import shutil
25
+
26
+
27
+ app = FastAPI()
28
+
29
+ origins = ['*']
30
+
31
+ app.add_middleware(
32
+ CORSMiddleware,
33
+ allow_origins=origins,
34
+ allow_credentials=True,
35
+ allow_methods=["*"],
36
+ allow_headers=["*"]
37
+ )
38
+
39
+ os.environ["OPENAI_API_TYPE"] = "Azure"
40
+ os.environ["OPENAI_API_VERSION"] ="2023-05-15"
41
+ os.environ["OPENAI_API_BASE"] = "https://rgenopenaitest.openai.azure.com/"
42
+ os.environ["DEPLOYMENT_NAME"]= "rgen_pdf_analyzer"
43
+
44
+ @app.get("/")
45
+ async def read_item():
46
+ return {"message": "App Working"}
47
+
48
+ @app.post("/search")
49
+ async def upload_pdf(file: UploadFile = None, api_key: str = Form(...), text: str = Form(...)):
50
+ timestamp = datetime.datetime.now()
51
+ print(f'---------------------------------------------------{timestamp}------------------------------------------------------')
52
+ print(f'query: {text}')
53
+
54
+ try:
55
+ if file is not None:
56
+ folder_path = "./pdf_temp_storage"
57
+ # Create the folder if it doesn't exist
58
+ if not os.path.exists(folder_path):
59
+ os.makedirs(folder_path)
60
+ file_name = os.path.join('./pdf_temp_storage', file.filename)
61
+ await save_pdf(file_name, file)
62
+ openAi_api = api_key
63
+ query = text+"\n" +"Response should not be longer than 300 words"
64
+ response = qa_result(openAi_api, query, file_name, "map_reduce")
65
+ print(response)
66
+ print('-----------------------------------------------------------------------------------------------------------------------------------')
67
+ return response
68
+ else:
69
+ return chat_openai(api_key, text)
70
+
71
+ except Exception as e:
72
+ print("Error: ", e)
73
+ return e
74
+ finally:
75
+ # Cleanup operations regardless of success or failure
76
+ # shutil.rmtree(folder_path)
77
+ if file is not None:
78
+ rem_documents(file_name)
79
+ print('-----------------------------------------------------------------------------------------------------------------------------------')
80
+
81
+
82
+
83
+ def chat_openai(api_key,prompt_text):
84
+ # os.environ["OPENAI_API_KEY"] = api_key
85
+ print('Inside chat')
86
+ openai.api_key =api_key
87
+ openai.api_type = "azure"
88
+ openai.api_base = "https://rgenopenaitest.openai.azure.com/"
89
+ openai.api_version = "2023-03-15-preview"
90
+ # openai.api_key = os.getenv("OPENAI_API_KEY")
91
+ if prompt_text:
92
+ response = openai.ChatCompletion.create(
93
+ engine="rgen_pdf_analyzer",
94
+ messages = [{"role":"system","content":"You are an AI assistant that helps people find information."},
95
+ {"role": "user", "content": prompt_text }],
96
+ temperature=0.7,
97
+ max_tokens=800,
98
+ top_p=0.95,
99
+ frequency_penalty=0,
100
+ presence_penalty=0,
101
+ stop=None)
102
+
103
+ print("Answers to the prommpt :------->", response["choices"][0]["message"]["content"])
104
+ return {
105
+ "response": response["choices"][0]["message"]["content"],
106
+ }
107
+
108
+
109
+ def qa(file , query, chain_type, k):
110
+ if os.path.splitext(file)[1].lower() == ".pdf":
111
+ loader = PyPDFLoader(file)
112
+ documents = loader.load()
113
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
114
+ texts = text_splitter.split_documents(documents)
115
+ else:
116
+ # loader = DirectoryLoader('./pdf_temp_storage', glob="*", loader_cls=TextLoader)
117
+ # documents = loader.load()
118
+ with open(file) as f:
119
+ documents = f.read()
120
+ text_splitter = RecursiveCharacterTextSplitter(
121
+ # Set a really small chunk size, just to show.
122
+ chunk_size = 1000,
123
+ chunk_overlap = 100,
124
+ length_function = len,
125
+ )
126
+ texts = text_splitter.create_documents([documents])
127
+
128
+ azure_llm = AzureChatOpenAI(
129
+ openai_api_base=os.environ["OPENAI_API_BASE"],
130
+ openai_api_version="2023-05-15",
131
+ deployment_name=os.environ["DEPLOYMENT_NAME"],
132
+ openai_api_type="azure",
133
+ )
134
+
135
+ embeddings = OpenAIEmbeddings(model='text-embedding-ada-002',
136
+ deployment='text_embeddings',
137
+ openai_api_base='https://rgenopenaitest.openai.azure.com/',
138
+ openai_api_type='azure',
139
+ chunk_size=1)
140
+ db = Chroma.from_documents(texts, embeddings)
141
+ retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
142
+ qa = RetrievalQA.from_chain_type(llm=azure_llm, chain_type=chain_type, retriever=retriever, return_source_documents=True)
143
+ result = qa({"query": query})
144
+ print(result['result'])
145
+ return result
146
+
147
+
148
+ def qa_result(api_key, prompt_text, file, Chain_type):
149
+ os.environ["OPENAI_API_KEY"] = api_key
150
+ if prompt_text:
151
+ with get_openai_callback() as cb:
152
+ result = qa(file=file, query=prompt_text, chain_type=Chain_type, k=2)
153
+ return {
154
+ "response": result["result"],
155
+ "reference": result["source_documents"],
156
+ "Total Tokens": {cb.total_tokens},
157
+ "Prompt Tokens": {cb.prompt_tokens},
158
+ "Completion Tokens": {cb.completion_tokens}
159
+ }
160
+
161
+
162
+ async def save_pdf(file_name, file):
163
+ with open(file_name, "wb") as f:
164
+ f.write(await file.read())
165
+
166
+ def rem_documents(file_path):
167
+ try:
168
+ os.remove(file_path)
169
+ print("File deleted successfully:", file_path)
170
+ except FileNotFoundError:
171
+ print("File not found:", file_path)
172
+ except Exception as e:
173
+ print("An error occurred while deleting the file:", str(e))
174
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ chromadb
4
+ tiktoken
5
+ pypdf
6
+ fastapi
7
+ python-multipart