Spaces:
Configuration error
Configuration error
Upload 13 files
Browse files- summarizer/README.md +1 -0
- summarizer/__pycache__/document_data.cpython-311.pyc +0 -0
- summarizer/__pycache__/load.cpython-311.pyc +0 -0
- summarizer/__pycache__/website_data.cpython-311.pyc +0 -0
- summarizer/app.py +7 -0
- summarizer/document_data.py +19 -0
- summarizer/load.py +9 -0
- summarizer/pages/document.py +8 -0
- summarizer/pages/text.py +7 -0
- summarizer/pages/url.py +8 -0
- summarizer/pages/video.py +0 -0
- summarizer/pages/youtube_video.py +12 -0
- summarizer/website_data.py +40 -0
summarizer/README.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Summariser
|
summarizer/__pycache__/document_data.cpython-311.pyc
ADDED
Binary file (1.43 kB). View file
|
|
summarizer/__pycache__/load.cpython-311.pyc
ADDED
Binary file (811 Bytes). View file
|
|
summarizer/__pycache__/website_data.cpython-311.pyc
ADDED
Binary file (1.59 kB). View file
|
|
summarizer/app.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
st.set_page_config(
|
3 |
+
page_title="Summarize EAZY",
|
4 |
+
page_icon="fav2.jpg",
|
5 |
+
layout="wide",
|
6 |
+
)
|
7 |
+
st.header("Welcome to the **SUMMARIZE EAZY**")
|
summarizer/document_data.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PyPDF2 import PdfReader
|
3 |
+
import docx2txt
|
4 |
+
def document_extract():
|
5 |
+
uploaded_file = st.file_uploader("Choose a file",accept_multiple_files=True)
|
6 |
+
content = """"""
|
7 |
+
if uploaded_file is not None:
|
8 |
+
for x in uploaded_file:
|
9 |
+
if(x.type=="application/vnd.openxmlformats-officedocument.wordprocessingml.document"):
|
10 |
+
content += docx2txt.process(x)
|
11 |
+
elif(x.type=="application/pdf"):
|
12 |
+
pdf_reader = PdfReader(x)
|
13 |
+
for page_num in range(len(pdf_reader.pages)):
|
14 |
+
content += pdf_reader.pages[page_num].extract_text()
|
15 |
+
elif(x.type=="text/plain"):
|
16 |
+
content = st.read(x)
|
17 |
+
else:
|
18 |
+
st.error("Please provide files of type **.docx**,**.pdf**,**.txt**")
|
19 |
+
return content
|
summarizer/load.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pathlib
|
2 |
+
import textwrap
|
3 |
+
import google.generativeai as genai
|
4 |
+
def output(text):
|
5 |
+
response = model.generate_content("Summarize this and also don't make this too short "+text)
|
6 |
+
return response
|
7 |
+
GOOGLE_API_KEY="AIzaSyCgX2dUhfUNGZ1aSrjUIlXNfor1ylBdw7I"
|
8 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
9 |
+
model = genai.GenerativeModel('gemini-pro')
|
summarizer/pages/document.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import document_data
|
3 |
+
import load
|
4 |
+
data=document_data.document_extract()
|
5 |
+
if data:
|
6 |
+
response=(load.output(data))
|
7 |
+
for chunk in response:
|
8 |
+
st.write(chunk.text)
|
summarizer/pages/text.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import load
|
2 |
+
import streamlit as st
|
3 |
+
text=st.text_input("Enter the text you want to summarize..")
|
4 |
+
if text:
|
5 |
+
response=(load.output(text))
|
6 |
+
for chunk in response:
|
7 |
+
st.write(chunk.text)
|
summarizer/pages/url.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import load
|
3 |
+
import website_data
|
4 |
+
url=st.text_input("Enter the url",placeholder="URL....")
|
5 |
+
if url:
|
6 |
+
response=(load.output(website_data.scrape(url)))
|
7 |
+
for chunk in response:
|
8 |
+
st.write(chunk.text)
|
summarizer/pages/video.py
ADDED
File without changes
|
summarizer/pages/youtube_video.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import load
|
3 |
+
from langchain_community.document_loaders import YoutubeLoader
|
4 |
+
url=st.text_input("Enter the url",placeholder="URL....")
|
5 |
+
if url:
|
6 |
+
loader = YoutubeLoader.from_youtube_url(
|
7 |
+
url, add_video_info=False
|
8 |
+
)
|
9 |
+
transcript=loader.load()
|
10 |
+
response=load.output(str(transcript))
|
11 |
+
for chunk in response:
|
12 |
+
st.write(chunk.text)
|
summarizer/website_data.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
|
4 |
+
|
5 |
+
def scrape(url):
|
6 |
+
URL = url
|
7 |
+
headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"}
|
8 |
+
# Here the user agent is for Edge browser on windows 10. You can find your browser user agent from the above given link.
|
9 |
+
|
10 |
+
r = requests.get(url=URL, headers=headers)
|
11 |
+
if r.status_code ==200:
|
12 |
+
# print(r.content)
|
13 |
+
|
14 |
+
soup = BeautifulSoup(r.content, 'html.parser') # If this line causes an error, run 'pip install html5lib' or install html5lib
|
15 |
+
# print(soup.prettify())
|
16 |
+
# b = soup.find_all('p','ul li')
|
17 |
+
# for yy in b:
|
18 |
+
# print(yy.get_text())
|
19 |
+
flag = 0;
|
20 |
+
enter = """
|
21 |
+
"""
|
22 |
+
str = ""
|
23 |
+
body = soup.body.find_all()
|
24 |
+
for tag in body:
|
25 |
+
if tag.name=='p' or tag.name=='h1' or tag.name=='h2' or tag.name=='h3' :
|
26 |
+
str += tag.get_text()
|
27 |
+
str += enter
|
28 |
+
if tag.name=='p':
|
29 |
+
flag=1
|
30 |
+
|
31 |
+
elif (tag.name == 'ul' or tag.name=='ol') and flag==1:
|
32 |
+
liBody=tag.find_all('li')
|
33 |
+
for text in liBody:
|
34 |
+
str+=text.get_text()
|
35 |
+
str += enter
|
36 |
+
# print(str)
|
37 |
+
# print(soup.body.get_text())
|
38 |
+
return str
|
39 |
+
else:
|
40 |
+
return ""
|