Spaces:
Sleeping
Sleeping
| from langchain.llms import OpenAI | |
| from langchain.chains import AnalyzeDocumentChain | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.document_loaders import UnstructuredEmailLoader | |
| from langchain.document_loaders import Docx2txtLoader | |
| from langchain.chains.question_answering import load_qa_chain | |
| import os | |
| from pandas_ai import csv_file,excel_file | |
| def pdf_file(file_upload,message): | |
| page_text = '' | |
| loader = PyPDFLoader(file_upload) | |
| pages = loader.load_and_split() | |
| for page in pages: | |
| page_text = page_text + page.page_content | |
| os.environ.get('OPENAI_API_KEY') | |
| llm = OpenAI(temperature=0) | |
| qa_chain = load_qa_chain(llm, chain_type="map_reduce") | |
| qa_pdf_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain) | |
| output = qa_pdf_chain.run(input_document=page_text,question=message) | |
| return output | |
| def email_file(file_upload,message): | |
| email_text = '' | |
| loader = UnstructuredEmailLoader(file_upload,mode="elements",process_attachments=True) | |
| emails = loader.load() | |
| for email in emails: | |
| email_text = email_text + email.page_content | |
| os.environ.get('OPENAI_API_KEY') | |
| llm = OpenAI(temperature=0) | |
| qa_chain = load_qa_chain(llm, chain_type="map_reduce") | |
| qa_email_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain) | |
| output = qa_email_chain.run(input_document=email_text,question=message) | |
| return output | |
| def docx_file(file_upload,message): | |
| doc_text = '' | |
| loader = Docx2txtLoader(file_upload) | |
| documents = loader.load() | |
| for doc in documents: | |
| doc_text = doc_text + doc.page_content | |
| os.environ.get('OPENAI_API_KEY') | |
| llm = OpenAI(temperature=0) | |
| qa_chain = load_qa_chain(llm, chain_type="map_reduce") | |
| qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain) | |
| output = qa_document_chain.run(input_document=doc_text,question=message) | |
| return output | |
| def initiate_process(file_upload,extention,message): | |
| # try: | |
| if extention=='pdf': | |
| output = pdf_file(file_upload,message) | |
| elif extention=='eml': | |
| output = email_file(file_upload,message) | |
| elif extention=='docx': | |
| output = docx_file(file_upload,message) | |
| elif extention=='csv': | |
| output = csv_file(file_upload,message) | |
| elif extention=='xlsx': | |
| output = excel_file(file_upload,message) | |
| else: | |
| output = "Please upload correct file format" | |
| return output | |
| # except Exception as e: | |
| # output = str(e) | |
| # return output |