shivakerur99 commited on
Commit
2af1af9
1 Parent(s): 6774487

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +34 -11
main.py CHANGED
@@ -1,19 +1,24 @@
1
  import io
2
  import os
 
 
3
  from pydantic import BaseModel
4
  from fastapi import FastAPI, HTTPException, File, UploadFile
5
- from pdfminer.high_level import extract_text
6
- from datetime import datetime
7
  from fastapi.middleware.cors import CORSMiddleware
 
8
  from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
9
  from databases import Database
10
-
11
  from langchain.chains.question_answering import load_qa_chain
12
  from langchain_community.llms import HuggingFaceEndpoint
13
  from langchain_community.embeddings import HuggingFaceEmbeddings
14
  from langchain_community.vectorstores import FAISS
15
  from langchain.text_splitter import CharacterTextSplitter
16
  from langchain.docstore.document import Document as LangchainDocument
 
 
 
 
 
17
 
18
  app = FastAPI()
19
 
@@ -23,7 +28,7 @@ app.add_middleware(
23
  CORSMiddleware,
24
  allow_origins=origins,
25
  allow_credentials=True,
26
- allow_methods=["GET", "POST", "PUT", "DELETE"],
27
  allow_headers=["*"],
28
  )
29
 
@@ -63,8 +68,11 @@ async def upload_pdf(file: UploadFile = File(...)):
63
  content = await file.read()
64
 
65
  # Extract text from the PDF
66
- with io.BytesIO(content) as pdf_file:
67
- text_content = extract_text(pdf_file)
 
 
 
68
 
69
  # Create a document object
70
  doc = Document(filename=file.filename, upload_date=str(datetime.now()), content=text_content)
@@ -78,7 +86,7 @@ async def upload_pdf(file: UploadFile = File(...)):
78
  last_record_id = await database.execute(query)
79
 
80
  # Return the document object
81
- return doc
82
 
83
  # Pydantic model for input data
84
  class DataInput(BaseModel):
@@ -94,6 +102,9 @@ async def process_data(data: DataInput):
94
 
95
  # Load required models and components from Langchain library
96
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 
 
 
97
  os.environ['HUGGINGFACEHUB_API_TOKEN'] = HUGGINGFACEHUB_API_TOKEN
98
 
99
  dom = [LangchainDocument(page_content=response_data, metadata={"source": "local"})]
@@ -104,16 +115,28 @@ async def process_data(data: DataInput):
104
  embeddings = HuggingFaceEmbeddings()
105
  db = FAISS.from_documents(docs, embeddings)
106
 
 
107
  llm = HuggingFaceEndpoint(
108
  repo_id="google/flan-t5-xxl",
109
- temperature=0.5
 
110
  )
111
 
112
  chain = load_qa_chain(llm, chain_type="stuff")
113
 
114
- # Perform similarity search and question answering
115
- dm = db.similarity_search(user_input)
116
- result = chain.run(input_documents=dm, question=user_input)
 
 
 
 
 
 
 
 
 
 
117
 
118
  return {"result": result}
119
 
 
1
  import io
2
  import os
3
+ import time
4
+ from datetime import datetime
5
  from pydantic import BaseModel
6
  from fastapi import FastAPI, HTTPException, File, UploadFile
 
 
7
  from fastapi.middleware.cors import CORSMiddleware
8
+ from pdfminer.high_level import extract_text
9
  from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
10
  from databases import Database
 
11
  from langchain.chains.question_answering import load_qa_chain
12
  from langchain_community.llms import HuggingFaceEndpoint
13
  from langchain_community.embeddings import HuggingFaceEmbeddings
14
  from langchain_community.vectorstores import FAISS
15
  from langchain.text_splitter import CharacterTextSplitter
16
  from langchain.docstore.document import Document as LangchainDocument
17
+ from requests.exceptions import HTTPError
18
+ from dotenv import load_dotenv
19
+
20
+ # Load environment variables from .env file
21
+ load_dotenv()
22
 
23
  app = FastAPI()
24
 
 
28
  CORSMiddleware,
29
  allow_origins=origins,
30
  allow_credentials=True,
31
+ allow_methods=["*"],
32
  allow_headers=["*"],
33
  )
34
 
 
68
  content = await file.read()
69
 
70
  # Extract text from the PDF
71
+ try:
72
+ with io.BytesIO(content) as pdf_file:
73
+ text_content = extract_text(pdf_file)
74
+ except Exception as e:
75
+ raise HTTPException(status_code=500, detail=f"Failed to extract text from PDF: {str(e)}")
76
 
77
  # Create a document object
78
  doc = Document(filename=file.filename, upload_date=str(datetime.now()), content=text_content)
 
86
  last_record_id = await database.execute(query)
87
 
88
  # Return the document object
89
+ return {"id": last_record_id, "filename": doc.filename, "upload_date": doc.upload_date}
90
 
91
  # Pydantic model for input data
92
  class DataInput(BaseModel):
 
102
 
103
  # Load required models and components from Langchain library
104
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
105
+ if not HUGGINGFACEHUB_API_TOKEN:
106
+ raise HTTPException(status_code=500, detail="HuggingFace API token not found.")
107
+
108
  os.environ['HUGGINGFACEHUB_API_TOKEN'] = HUGGINGFACEHUB_API_TOKEN
109
 
110
  dom = [LangchainDocument(page_content=response_data, metadata={"source": "local"})]
 
115
  embeddings = HuggingFaceEmbeddings()
116
  db = FAISS.from_documents(docs, embeddings)
117
 
118
+ # Configure the HuggingFaceEndpoint with an increased timeout
119
  llm = HuggingFaceEndpoint(
120
  repo_id="google/flan-t5-xxl",
121
+ temperature=0.5,
122
+ timeout=300 # Increase the timeout to 300 seconds
123
  )
124
 
125
  chain = load_qa_chain(llm, chain_type="stuff")
126
 
127
+ # Implement a retry mechanism
128
+ max_retries = 5
129
+ for attempt in range(max_retries):
130
+ try:
131
+ # Perform similarity search and question answering
132
+ dm = db.similarity_search(user_input)
133
+ result = chain.run(input_documents=dm, question=user_input)
134
+ break # Break out of the loop if successful
135
+ except HTTPError as e:
136
+ if attempt < max_retries - 1:
137
+ time.sleep(10) # Wait for 10 seconds before retrying
138
+ else:
139
+ raise HTTPException(status_code=503, detail="Service Unavailable. Please try again later.") from e
140
 
141
  return {"result": result}
142