import os import openai import streamlit as st import trafilatura from langchain import PromptTemplate, OpenAI, LLMChain # Definition of the style custom_css = """ """ st.sidebar.write("This program uses the OpenAI API to function. Please enter your API key below to use the program. ") OPENAI_API_KEY = st.sidebar.text_input("API KEY:") class WebContentSummarizerApp: def __init__(self): st.title("WebContent Summarizer") # Insert the custom CSS into the application st.markdown(custom_css, unsafe_allow_html=True) # Use a CSS class for the subtitle st.markdown('
With just a simple URL, simplify your online research and content consumption. ' 'This AI tool extracts and condenses the essential information from web pages into a concise, easy-to-read paragraph.
', unsafe_allow_html=True) self.url = st.text_input("Paste or type the URL you want to summarize:") self.run_button = st.button("Summarize") @staticmethod def scrape_website(url): downloaded = trafilatura.fetch_url(url) main_text = trafilatura.extract(downloaded) return main_text def run(self): if self.run_button: if self.url: url = self.url with st.spinner("Getting information from the page..."): self.text = self.scrape_website(url) with st.spinner("Analyzing and organizing..."): result = self.llm_text_insight() st.write("Summarized content:") st.write(result) else: st.warning("Please enter a valid URL and your OpenAI API Key.") def llm_text_insight(self): openai.api_key = OPENAI_API_KEY text = self.text # Split the text into smaller fragments max_length = 4096 chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)] results = [] template = """ Please take the following text interprete and transform it into an organized paragraph for a study. Ensure that the paragraph is clear and coherent. If necessary, add or rearrange ideas to achieve a logical structure. The text is as follows: text: {input} Make sure the text is only one paragraph """ llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY) prompt_template = PromptTemplate.from_template(template=template) chain = LLMChain(llm=llm, prompt=prompt_template) for chunk in chunks: result = chain.predict(input=chunk) results.append(result) # Check if results contain more than one paragraph and final result if len(results) > 1: alternate_input = " ".join(results) llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY) prompt_template = PromptTemplate.from_template(template=template) chain = LLMChain(llm=llm, prompt=prompt_template) result = chain.predict(input=alternate_input) return result else: # Concatenation of the results return " ".join(results) if __name__ == "__main__": app = WebContentSummarizerApp() app.run()