Upload 13 files

Files changed (13) hide show

summarizer/README.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ Summariser

summarizer/__pycache__/document_data.cpython-311.pyc ADDED Viewed

Binary file (1.43 kB). View file

summarizer/__pycache__/load.cpython-311.pyc ADDED Viewed

Binary file (811 Bytes). View file

summarizer/__pycache__/website_data.cpython-311.pyc ADDED Viewed

Binary file (1.59 kB). View file

summarizer/app.py ADDED Viewed

+import streamlit as st
+st.set_page_config(
+        page_title="Summarize EAZY",
+        page_icon="fav2.jpg",
+        layout="wide",
+)
+st.header("Welcome to the **SUMMARIZE EAZY**")

summarizer/document_data.py ADDED Viewed

+import streamlit as st
+from PyPDF2 import PdfReader
+import docx2txt
+def document_extract():
+    uploaded_file = st.file_uploader("Choose a file",accept_multiple_files=True)
+    content = """"""
+    if uploaded_file is not None:
+        for x in uploaded_file:
+            if(x.type=="application/vnd.openxmlformats-officedocument.wordprocessingml.document"):
+                content += docx2txt.process(x)
+            elif(x.type=="application/pdf"):
+                pdf_reader = PdfReader(x)
+                for page_num in range(len(pdf_reader.pages)):
+                    content += pdf_reader.pages[page_num].extract_text()
+            elif(x.type=="text/plain"):
+                content = st.read(x)
+            else:
+                st.error("Please provide files of type **.docx**,**.pdf**,**.txt**")
+    return content

summarizer/load.py ADDED Viewed

+import pathlib
+import textwrap
+import google.generativeai as genai
+def output(text):
+    response = model.generate_content("Summarize this and also don't make this too short "+text)
+    return response
+GOOGLE_API_KEY="AIzaSyCgX2dUhfUNGZ1aSrjUIlXNfor1ylBdw7I"
+genai.configure(api_key=GOOGLE_API_KEY)
+model = genai.GenerativeModel('gemini-pro')

summarizer/pages/document.py ADDED Viewed

+import streamlit as st
+import document_data
+import load
+data=document_data.document_extract()
+if data:
+    response=(load.output(data))
+    for chunk in response:
+        st.write(chunk.text)

summarizer/pages/text.py ADDED Viewed

+import load
+import streamlit as st
+text=st.text_input("Enter the text you want to summarize..")
+if text:
+    response=(load.output(text))
+    for chunk in response:
+        st.write(chunk.text)

summarizer/pages/url.py ADDED Viewed

+import streamlit as st
+import load
+import website_data
+url=st.text_input("Enter the url",placeholder="URL....")
+if url:
+    response=(load.output(website_data.scrape(url)))
+    for chunk in response:
+        st.write(chunk.text)

summarizer/pages/video.py ADDED Viewed

File without changes

summarizer/pages/youtube_video.py ADDED Viewed

+import streamlit as st
+import load
+from langchain_community.document_loaders import YoutubeLoader
+url=st.text_input("Enter the url",placeholder="URL....")
+if url:
+    loader = YoutubeLoader.from_youtube_url(
+        url, add_video_info=False
+    )
+    transcript=loader.load()
+    response=load.output(str(transcript))
+    for chunk in response:
+        st.write(chunk.text)

summarizer/website_data.py ADDED Viewed

+import requests
+from bs4 import BeautifulSoup
+def scrape(url):
+    URL = url
+    headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"}
+    # Here the user agent is for Edge browser on windows 10. You can find your browser user agent from the above given link.
+    r = requests.get(url=URL, headers=headers)
+    if r.status_code ==200:
+        # print(r.content)
+        soup = BeautifulSoup(r.content, 'html.parser') # If this line causes an error, run 'pip install html5lib' or install html5lib
+        # print(soup.prettify())
+        # b = soup.find_all('p','ul li')
+        # for yy in b:
+        #     print(yy.get_text())
+        flag = 0;
+        enter = """
+    """
+        str = ""
+        body = soup.body.find_all()
+        for tag in body:
+            if tag.name=='p' or tag.name=='h1' or tag.name=='h2' or tag.name=='h3' :
+                str += tag.get_text()
+                str += enter
+                if tag.name=='p':
+                    flag=1
+            elif (tag.name == 'ul' or tag.name=='ol') and flag==1:
+                liBody=tag.find_all('li')
+                for text in liBody:
+                    str+=text.get_text()
+                    str += enter
+        # print(str)
+        # print(soup.body.get_text())
+        return str
+    else:
+        return ""