Spaces:
Running
Running
shivakerur99
commited on
Commit
•
2af1af9
1
Parent(s):
6774487
Update main.py
Browse files
main.py
CHANGED
@@ -1,19 +1,24 @@
|
|
1 |
import io
|
2 |
import os
|
|
|
|
|
3 |
from pydantic import BaseModel
|
4 |
from fastapi import FastAPI, HTTPException, File, UploadFile
|
5 |
-
from pdfminer.high_level import extract_text
|
6 |
-
from datetime import datetime
|
7 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
8 |
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
|
9 |
from databases import Database
|
10 |
-
|
11 |
from langchain.chains.question_answering import load_qa_chain
|
12 |
from langchain_community.llms import HuggingFaceEndpoint
|
13 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
14 |
from langchain_community.vectorstores import FAISS
|
15 |
from langchain.text_splitter import CharacterTextSplitter
|
16 |
from langchain.docstore.document import Document as LangchainDocument
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
app = FastAPI()
|
19 |
|
@@ -23,7 +28,7 @@ app.add_middleware(
|
|
23 |
CORSMiddleware,
|
24 |
allow_origins=origins,
|
25 |
allow_credentials=True,
|
26 |
-
allow_methods=["
|
27 |
allow_headers=["*"],
|
28 |
)
|
29 |
|
@@ -63,8 +68,11 @@ async def upload_pdf(file: UploadFile = File(...)):
|
|
63 |
content = await file.read()
|
64 |
|
65 |
# Extract text from the PDF
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
68 |
|
69 |
# Create a document object
|
70 |
doc = Document(filename=file.filename, upload_date=str(datetime.now()), content=text_content)
|
@@ -78,7 +86,7 @@ async def upload_pdf(file: UploadFile = File(...)):
|
|
78 |
last_record_id = await database.execute(query)
|
79 |
|
80 |
# Return the document object
|
81 |
-
return doc
|
82 |
|
83 |
# Pydantic model for input data
|
84 |
class DataInput(BaseModel):
|
@@ -94,6 +102,9 @@ async def process_data(data: DataInput):
|
|
94 |
|
95 |
# Load required models and components from Langchain library
|
96 |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
|
|
|
|
|
|
97 |
os.environ['HUGGINGFACEHUB_API_TOKEN'] = HUGGINGFACEHUB_API_TOKEN
|
98 |
|
99 |
dom = [LangchainDocument(page_content=response_data, metadata={"source": "local"})]
|
@@ -104,16 +115,28 @@ async def process_data(data: DataInput):
|
|
104 |
embeddings = HuggingFaceEmbeddings()
|
105 |
db = FAISS.from_documents(docs, embeddings)
|
106 |
|
|
|
107 |
llm = HuggingFaceEndpoint(
|
108 |
repo_id="google/flan-t5-xxl",
|
109 |
-
temperature=0.5
|
|
|
110 |
)
|
111 |
|
112 |
chain = load_qa_chain(llm, chain_type="stuff")
|
113 |
|
114 |
-
#
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
return {"result": result}
|
119 |
|
|
|
1 |
import io
|
2 |
import os
|
3 |
+
import time
|
4 |
+
from datetime import datetime
|
5 |
from pydantic import BaseModel
|
6 |
from fastapi import FastAPI, HTTPException, File, UploadFile
|
|
|
|
|
7 |
from fastapi.middleware.cors import CORSMiddleware
|
8 |
+
from pdfminer.high_level import extract_text
|
9 |
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
|
10 |
from databases import Database
|
|
|
11 |
from langchain.chains.question_answering import load_qa_chain
|
12 |
from langchain_community.llms import HuggingFaceEndpoint
|
13 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
14 |
from langchain_community.vectorstores import FAISS
|
15 |
from langchain.text_splitter import CharacterTextSplitter
|
16 |
from langchain.docstore.document import Document as LangchainDocument
|
17 |
+
from requests.exceptions import HTTPError
|
18 |
+
from dotenv import load_dotenv
|
19 |
+
|
20 |
+
# Load environment variables from .env file
|
21 |
+
load_dotenv()
|
22 |
|
23 |
app = FastAPI()
|
24 |
|
|
|
28 |
CORSMiddleware,
|
29 |
allow_origins=origins,
|
30 |
allow_credentials=True,
|
31 |
+
allow_methods=["*"],
|
32 |
allow_headers=["*"],
|
33 |
)
|
34 |
|
|
|
68 |
content = await file.read()
|
69 |
|
70 |
# Extract text from the PDF
|
71 |
+
try:
|
72 |
+
with io.BytesIO(content) as pdf_file:
|
73 |
+
text_content = extract_text(pdf_file)
|
74 |
+
except Exception as e:
|
75 |
+
raise HTTPException(status_code=500, detail=f"Failed to extract text from PDF: {str(e)}")
|
76 |
|
77 |
# Create a document object
|
78 |
doc = Document(filename=file.filename, upload_date=str(datetime.now()), content=text_content)
|
|
|
86 |
last_record_id = await database.execute(query)
|
87 |
|
88 |
# Return the document object
|
89 |
+
return {"id": last_record_id, "filename": doc.filename, "upload_date": doc.upload_date}
|
90 |
|
91 |
# Pydantic model for input data
|
92 |
class DataInput(BaseModel):
|
|
|
102 |
|
103 |
# Load required models and components from Langchain library
|
104 |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
105 |
+
if not HUGGINGFACEHUB_API_TOKEN:
|
106 |
+
raise HTTPException(status_code=500, detail="HuggingFace API token not found.")
|
107 |
+
|
108 |
os.environ['HUGGINGFACEHUB_API_TOKEN'] = HUGGINGFACEHUB_API_TOKEN
|
109 |
|
110 |
dom = [LangchainDocument(page_content=response_data, metadata={"source": "local"})]
|
|
|
115 |
embeddings = HuggingFaceEmbeddings()
|
116 |
db = FAISS.from_documents(docs, embeddings)
|
117 |
|
118 |
+
# Configure the HuggingFaceEndpoint with an increased timeout
|
119 |
llm = HuggingFaceEndpoint(
|
120 |
repo_id="google/flan-t5-xxl",
|
121 |
+
temperature=0.5,
|
122 |
+
timeout=300 # Increase the timeout to 300 seconds
|
123 |
)
|
124 |
|
125 |
chain = load_qa_chain(llm, chain_type="stuff")
|
126 |
|
127 |
+
# Implement a retry mechanism
|
128 |
+
max_retries = 5
|
129 |
+
for attempt in range(max_retries):
|
130 |
+
try:
|
131 |
+
# Perform similarity search and question answering
|
132 |
+
dm = db.similarity_search(user_input)
|
133 |
+
result = chain.run(input_documents=dm, question=user_input)
|
134 |
+
break # Break out of the loop if successful
|
135 |
+
except HTTPError as e:
|
136 |
+
if attempt < max_retries - 1:
|
137 |
+
time.sleep(10) # Wait for 10 seconds before retrying
|
138 |
+
else:
|
139 |
+
raise HTTPException(status_code=503, detail="Service Unavailable. Please try again later.") from e
|
140 |
|
141 |
return {"result": result}
|
142 |
|