import streamlit as st from langchain import OpenAI, PromptTemplate, LLMChain from langchain.text_splitter import CharacterTextSplitter from langchain.chains.mapreduce import MapReduceChain from langchain.prompts import PromptTemplate from langchain.chat_models import AzureChatOpenAI from langchain.chains.summarize import load_summarize_chain from langchain.chains import AnalyzeDocumentChain from PyPDF2 import PdfReader from langchain.document_loaders import TextLoader from langchain.indexes import VectorstoreIndexCreator from langchain.document_loaders import PyPDFLoader import os os.environ["OPENAI_API_TYPE"] = "azure" os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview" llm = AzureChatOpenAI( deployment_name="esujnand", model_name="gpt-35-turbo" ) st.title("Wipro CSRD AI 1") # description text st.write("Step 1: Summary of your selected section of CSRD... Sections in this are enviormental topic1, enviornamtal topic2 ") st.write("Step 2: Ask your specfici questions regarding a CSRD disclosure requirments") # pdf file upload pdf_file = st.file_uploader("Upload file", type=["pdf"]) numberofpages = 100 if st.button("How many pages? "): reader = PdfReader(pdf_file) numberofpages = len(reader.pages) st.write("length is ", numberofpages) if st.button("table of contents? "): reader = PdfReader(pdf_file) page = reader.pages[2].extract_text() st.write(page) startpage = st.slider('Which section to look at', 0, numberofpages, 1) st.write("starting section page", startpage) pagecount = st.slider('How many pages', 1, 5, 1) st.write("pages to read", pagecount) def extract_text_from_pdf(): reader = PdfReader(pdf_file) # get all pages text text = [reader.pages[i].extract_text() for i in range(startpage, startpage + pagecount )] # join all pages text text = " ".join(text) return text def extract_text_from_pdf2(): reader = PdfReader(pdf_file) # get all pages text text = [reader.pages[i].extract_text() for i in range(len(reader.pages))] # join all pages text text = " ".join(text) return text if st.button("Summerize "): with st.spinner("Extracting Text..."): summary_chain = load_summarize_chain(llm, chain_type="map_reduce") summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain, verbose=True) text = extract_text_from_pdf() with st.spinner("Summarizing..."): result = summarize_document_chain.run(text) st.write(result)