shivakerur99 commited on
Commit
8eb0be8
1 Parent(s): 82c30f7

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +24 -28
main.py CHANGED
@@ -1,4 +1,5 @@
1
  import io
 
2
  from pydantic import BaseModel
3
  from fastapi import FastAPI, HTTPException, File, UploadFile
4
  from pdfminer.high_level import extract_text
@@ -14,13 +15,10 @@ from langchain_community.vectorstores import FAISS
14
  from langchain.text_splitter import CharacterTextSplitter
15
  from langchain.docstore.document import Document as LangchainDocument
16
 
17
-
18
- import os
19
-
20
  app = FastAPI()
21
 
22
  # Set up CORS (Cross-Origin Resource Sharing) for allowing requests from all origins
23
- origins=["*"]
24
  app.add_middleware(
25
  CORSMiddleware,
26
  allow_origins=origins,
@@ -46,6 +44,7 @@ documents = Table(
46
 
47
  # Create the document table in the database
48
  metadata.create_all(engine)
 
49
 
50
  # Define Pydantic model for the document
51
  class Document(BaseModel):
@@ -53,16 +52,9 @@ class Document(BaseModel):
53
  upload_date: str
54
  content: str
55
 
56
- # Function to save uploaded files
57
- # async def save_uploaded_file(file: UploadFile, destination: str):
58
- # with open(destination, "wb") as buffer:
59
- # while chunk := await file.read(1024):
60
- # buffer.write(chunk)
61
-
62
  # Endpoint for uploading PDF files
63
  @app.post("/upload/")
64
  async def upload_pdf(file: UploadFile = File(...)):
65
-
66
  # Check if the uploaded file is a PDF
67
  if not file.filename.lower().endswith('.pdf'):
68
  raise HTTPException(status_code=400, detail="Only PDF files are allowed.")
@@ -78,17 +70,12 @@ async def upload_pdf(file: UploadFile = File(...)):
78
  doc = Document(filename=file.filename, upload_date=str(datetime.now()), content=text_content)
79
 
80
  # Insert the document data into the database
81
- async with Database(DATABASE_URL) as database:
82
- query = documents.insert().values(
83
- filename=doc.filename,
84
- upload_date=doc.upload_date,
85
- content=doc.content
86
- )
87
- last_record_id = await database.execute(query)
88
-
89
- # Save the uploaded PDF file
90
- # destination = f"files/{file.filename}"
91
- # await save_uploaded_file(file, destination)
92
 
93
  # Return the document object
94
  return doc
@@ -105,18 +92,18 @@ async def process_data(data: DataInput):
105
  response_data = data.responseData
106
  user_input = data.userInput
107
 
108
- # Load required models and components from Langchain librar
109
- os.environ['HUGGINGFACEHUB_API_TOKEN'] =HUGGINGFACEHUB_API_TOKEN
110
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
111
-
112
 
113
  dom = [LangchainDocument(page_content=response_data, metadata={"source": "local"})]
114
 
115
- text_spliter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=0)
116
- docs = text_spliter.split_documents(dom)
117
 
118
  embeddings = HuggingFaceEmbeddings()
119
  db = FAISS.from_documents(docs, embeddings)
 
120
  llm = HuggingFaceEndpoint(
121
  repo_id="google/flan-t5-large",
122
  temperature=0.8,
@@ -128,4 +115,13 @@ async def process_data(data: DataInput):
128
  dm = db.similarity_search(user_input)
129
  result = chain.run(input_documents=dm, question=user_input)
130
 
131
- return result
 
 
 
 
 
 
 
 
 
 
1
  import io
2
+ import os
3
  from pydantic import BaseModel
4
  from fastapi import FastAPI, HTTPException, File, UploadFile
5
  from pdfminer.high_level import extract_text
 
15
  from langchain.text_splitter import CharacterTextSplitter
16
  from langchain.docstore.document import Document as LangchainDocument
17
 
 
 
 
18
  app = FastAPI()
19
 
20
  # Set up CORS (Cross-Origin Resource Sharing) for allowing requests from all origins
21
+ origins = ["*"]
22
  app.add_middleware(
23
  CORSMiddleware,
24
  allow_origins=origins,
 
44
 
45
  # Create the document table in the database
46
  metadata.create_all(engine)
47
+ database = Database(DATABASE_URL)
48
 
49
  # Define Pydantic model for the document
50
  class Document(BaseModel):
 
52
  upload_date: str
53
  content: str
54
 
 
 
 
 
 
 
55
  # Endpoint for uploading PDF files
56
  @app.post("/upload/")
57
  async def upload_pdf(file: UploadFile = File(...)):
 
58
  # Check if the uploaded file is a PDF
59
  if not file.filename.lower().endswith('.pdf'):
60
  raise HTTPException(status_code=400, detail="Only PDF files are allowed.")
 
70
  doc = Document(filename=file.filename, upload_date=str(datetime.now()), content=text_content)
71
 
72
  # Insert the document data into the database
73
+ query = documents.insert().values(
74
+ filename=doc.filename,
75
+ upload_date=doc.upload_date,
76
+ content=doc.content
77
+ )
78
+ last_record_id = await database.execute(query)
 
 
 
 
 
79
 
80
  # Return the document object
81
  return doc
 
92
  response_data = data.responseData
93
  user_input = data.userInput
94
 
95
+ # Load required models and components from Langchain library
 
96
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
97
+ os.environ['HUGGINGFACEHUB_API_TOKEN'] = HUGGINGFACEHUB_API_TOKEN
98
 
99
  dom = [LangchainDocument(page_content=response_data, metadata={"source": "local"})]
100
 
101
+ text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=0)
102
+ docs = text_splitter.split_documents(dom)
103
 
104
  embeddings = HuggingFaceEmbeddings()
105
  db = FAISS.from_documents(docs, embeddings)
106
+
107
  llm = HuggingFaceEndpoint(
108
  repo_id="google/flan-t5-large",
109
  temperature=0.8,
 
115
  dm = db.similarity_search(user_input)
116
  result = chain.run(input_documents=dm, question=user_input)
117
 
118
+ return {"result": result}
119
+
120
+ # To ensure the database connection is managed properly
121
+ @app.on_event("startup")
122
+ async def startup():
123
+ await database.connect()
124
+
125
+ @app.on_event("shutdown")
126
+ async def shutdown():
127
+ await database.disconnect()