shivakerur99 commited on
Commit
1251efa
1 Parent(s): ceb7f7f

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +127 -0
main.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ from pydantic import BaseModel
3
+ from fastapi import FastAPI, HTTPException, File, UploadFile
4
+ from pdfminer.high_level import extract_text
5
+ from datetime import datetime
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
8
+ from databases import Database
9
+
10
+ from langchain.chains.question_answering import load_qa_chain
11
+ from langchain_community.llms import HuggingFaceEndpoint
12
+ from langchain_community.embeddings import HuggingFaceEmbeddings
13
+ from langchain_community.vectorstores import FAISS
14
+ from langchain.text_splitter import CharacterTextSplitter
15
+ from langchain.docstore.document import Document as LangchainDocument
16
+
17
+
18
+ import os
19
+
20
+ app = FastAPI()
21
+
22
+ # Set up CORS (Cross-Origin Resource Sharing) for allowing requests from all origins
23
+ origins=["*"]
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=origins,
27
+ allow_credentials=True,
28
+ allow_methods=["GET", "POST", "PUT", "DELETE"],
29
+ allow_headers=["*"],
30
+ )
31
+
32
+ # Define SQLAlchemy engine and metadata
33
+ DATABASE_URL = "sqlite:///./test.db"
34
+ engine = create_engine(DATABASE_URL)
35
+ metadata = MetaData()
36
+
37
+ # Define the document table schema
38
+ documents = Table(
39
+ "documents",
40
+ metadata,
41
+ Column("id", Integer, primary_key=True),
42
+ Column("filename", String),
43
+ Column("upload_date", String),
44
+ Column("content", String),
45
+ )
46
+
47
+ # Create the document table in the database
48
+ metadata.create_all(engine)
49
+
50
+ # Define Pydantic model for the document
51
+ class Document(BaseModel):
52
+ filename: str
53
+ upload_date: str
54
+ content: str
55
+
56
+ # Function to save uploaded files
57
+ async def save_uploaded_file(file: UploadFile, destination: str):
58
+ with open(destination, "wb") as buffer:
59
+ while chunk := await file.read(1024):
60
+ buffer.write(chunk)
61
+
62
+ # Endpoint for uploading PDF files
63
+ @app.post("/upload/")
64
+ async def upload_pdf(file: UploadFile = File(...)):
65
+
66
+ # Check if the uploaded file is a PDF
67
+ if not file.filename.lower().endswith('.pdf'):
68
+ raise HTTPException(status_code=400, detail="Only PDF files are allowed.")
69
+
70
+ # Read content of the uploaded PDF file
71
+ content = await file.read()
72
+
73
+ # Extract text from the PDF
74
+ with io.BytesIO(content) as pdf_file:
75
+ text_content = extract_text(pdf_file)
76
+
77
+ # Create a document object
78
+ doc = Document(filename=file.filename, upload_date=str(datetime.now()), content=text_content)
79
+
80
+ # Insert the document data into the database
81
+ async with Database(DATABASE_URL) as database:
82
+ query = documents.insert().values(
83
+ filename=doc.filename,
84
+ upload_date=doc.upload_date,
85
+ content=doc.content
86
+ )
87
+ last_record_id = await database.execute(query)
88
+
89
+ # Save the uploaded PDF file
90
+ destination = f"files/{file.filename}"
91
+ await save_uploaded_file(file, destination)
92
+
93
+ # Return the document object
94
+ return doc
95
+
96
+ # Pydantic model for input data
97
+ class DataInput(BaseModel):
98
+ responseData: str
99
+ userInput: str
100
+
101
+ # Endpoint for processing user data
102
+ @app.post("/doc/")
103
+ async def process_data(data: DataInput):
104
+ # Access responseData and userInput
105
+ response_data = data.responseData
106
+ user_input = data.userInput
107
+
108
+ # Load required models and components from Langchain library
109
+ dom = [LangchainDocument(page_content=response_data, metadata={"source": "local"})]
110
+
111
+ text_spliter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=0)
112
+ docs = text_spliter.split_documents(dom)
113
+
114
+ embeddings = HuggingFaceEmbeddings()
115
+ db = FAISS.from_documents(docs, embeddings)
116
+ llm = HuggingFaceEndpoint(
117
+ repo_id="google/flan-t5-xxl",
118
+ temperature=0.8,
119
+ )
120
+
121
+ chain = load_qa_chain(llm, chain_type="stuff")
122
+
123
+ # Perform similarity search and question answering
124
+ dm = db.similarity_search(user_input)
125
+ result = chain.run(input_documents=dm, question=user_input)
126
+
127
+ return result