Spaces:

shivakerur99
/

PDF_CHATGPT

Running

App Files Files Community

shivakerur99 commited on 4 days ago

Commit

8eb0be8

•

1 Parent(s): 82c30f7

Update main.py

Browse files

Files changed (1) hide show

main.py +24 -28

main.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import io
 from pydantic import BaseModel
 from fastapi import FastAPI, HTTPException, File, UploadFile
 from pdfminer.high_level import extract_text
@@ -14,13 +15,10 @@ from langchain_community.vectorstores import FAISS
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.docstore.document import Document as LangchainDocument
-import os
 app = FastAPI()
 # Set up CORS (Cross-Origin Resource Sharing) for allowing requests from all origins
-origins=["*"]
 app.add_middleware(
     CORSMiddleware,
     allow_origins=origins,
@@ -46,6 +44,7 @@ documents = Table(
 # Create the document table in the database
 metadata.create_all(engine)
 # Define Pydantic model for the document
 class Document(BaseModel):
@@ -53,16 +52,9 @@ class Document(BaseModel):
     upload_date: str
     content: str
-# Function to save uploaded files
-# async def save_uploaded_file(file: UploadFile, destination: str):
-#     with open(destination, "wb") as buffer:
-#         while chunk := await file.read(1024):
-#             buffer.write(chunk)
 # Endpoint for uploading PDF files
 @app.post("/upload/")
 async def upload_pdf(file: UploadFile = File(...)):
     # Check if the uploaded file is a PDF
     if not file.filename.lower().endswith('.pdf'):
         raise HTTPException(status_code=400, detail="Only PDF files are allowed.")
@@ -78,17 +70,12 @@ async def upload_pdf(file: UploadFile = File(...)):
     doc = Document(filename=file.filename, upload_date=str(datetime.now()), content=text_content)
     # Insert the document data into the database
-    async with Database(DATABASE_URL) as database:
-        query = documents.insert().values(
-            filename=doc.filename,
-            upload_date=doc.upload_date,
-            content=doc.content
-        )
-        last_record_id = await database.execute(query)
-    # Save the uploaded PDF file
-    # destination = f"files/{file.filename}"
-    # await save_uploaded_file(file, destination)
     # Return the document object
     return doc
@@ -105,18 +92,18 @@ async def process_data(data: DataInput):
     response_data = data.responseData
     user_input = data.userInput
-    # Load required models and components from Langchain librar
-    os.environ['HUGGINGFACEHUB_API_TOKEN'] =HUGGINGFACEHUB_API_TOKEN
     HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
     dom = [LangchainDocument(page_content=response_data, metadata={"source": "local"})]
-    text_spliter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=0)
-    docs = text_spliter.split_documents(dom)
     embeddings = HuggingFaceEmbeddings()
     db = FAISS.from_documents(docs, embeddings)
     llm = HuggingFaceEndpoint(
         repo_id="google/flan-t5-large",
         temperature=0.8,
@@ -128,4 +115,13 @@ async def process_data(data: DataInput):
     dm = db.similarity_search(user_input)
     result = chain.run(input_documents=dm, question=user_input)
-    return result

 import io
+import os
 from pydantic import BaseModel
 from fastapi import FastAPI, HTTPException, File, UploadFile
 from pdfminer.high_level import extract_text
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.docstore.document import Document as LangchainDocument
 app = FastAPI()
 # Set up CORS (Cross-Origin Resource Sharing) for allowing requests from all origins
+origins = ["*"]
 app.add_middleware(
     CORSMiddleware,
     allow_origins=origins,
 # Create the document table in the database
 metadata.create_all(engine)
+database = Database(DATABASE_URL)
 # Define Pydantic model for the document
 class Document(BaseModel):
     upload_date: str
     content: str
 # Endpoint for uploading PDF files
 @app.post("/upload/")
 async def upload_pdf(file: UploadFile = File(...)):
     # Check if the uploaded file is a PDF
     if not file.filename.lower().endswith('.pdf'):
         raise HTTPException(status_code=400, detail="Only PDF files are allowed.")
     doc = Document(filename=file.filename, upload_date=str(datetime.now()), content=text_content)
     # Insert the document data into the database
+    query = documents.insert().values(
+        filename=doc.filename,
+        upload_date=doc.upload_date,
+        content=doc.content
+    )
+    last_record_id = await database.execute(query)
     # Return the document object
     return doc
     response_data = data.responseData
     user_input = data.userInput
+    # Load required models and components from Langchain library
     HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+    os.environ['HUGGINGFACEHUB_API_TOKEN'] = HUGGINGFACEHUB_API_TOKEN
     dom = [LangchainDocument(page_content=response_data, metadata={"source": "local"})]
+    text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=0)
+    docs = text_splitter.split_documents(dom)
     embeddings = HuggingFaceEmbeddings()
     db = FAISS.from_documents(docs, embeddings)
     llm = HuggingFaceEndpoint(
         repo_id="google/flan-t5-large",
         temperature=0.8,
     dm = db.similarity_search(user_input)
     result = chain.run(input_documents=dm, question=user_input)
+    return {"result": result}
+# To ensure the database connection is managed properly
+@app.on_event("startup")
+async def startup():
+    await database.connect()
+@app.on_event("shutdown")
+async def shutdown():
+    await database.disconnect()