saketh092 commited on
Commit
af827e0
1 Parent(s): 8663f50

Upload 13 files

Browse files
summarizer/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ Summariser
summarizer/__pycache__/document_data.cpython-311.pyc ADDED
Binary file (1.43 kB). View file
 
summarizer/__pycache__/load.cpython-311.pyc ADDED
Binary file (811 Bytes). View file
 
summarizer/__pycache__/website_data.cpython-311.pyc ADDED
Binary file (1.59 kB). View file
 
summarizer/app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ st.set_page_config(
3
+ page_title="Summarize EAZY",
4
+ page_icon="fav2.jpg",
5
+ layout="wide",
6
+ )
7
+ st.header("Welcome to the **SUMMARIZE EAZY**")
summarizer/document_data.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ import docx2txt
4
+ def document_extract():
5
+ uploaded_file = st.file_uploader("Choose a file",accept_multiple_files=True)
6
+ content = """"""
7
+ if uploaded_file is not None:
8
+ for x in uploaded_file:
9
+ if(x.type=="application/vnd.openxmlformats-officedocument.wordprocessingml.document"):
10
+ content += docx2txt.process(x)
11
+ elif(x.type=="application/pdf"):
12
+ pdf_reader = PdfReader(x)
13
+ for page_num in range(len(pdf_reader.pages)):
14
+ content += pdf_reader.pages[page_num].extract_text()
15
+ elif(x.type=="text/plain"):
16
+ content = st.read(x)
17
+ else:
18
+ st.error("Please provide files of type **.docx**,**.pdf**,**.txt**")
19
+ return content
summarizer/load.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import pathlib
2
+ import textwrap
3
+ import google.generativeai as genai
4
+ def output(text):
5
+ response = model.generate_content("Summarize this and also don't make this too short "+text)
6
+ return response
7
+ GOOGLE_API_KEY="AIzaSyCgX2dUhfUNGZ1aSrjUIlXNfor1ylBdw7I"
8
+ genai.configure(api_key=GOOGLE_API_KEY)
9
+ model = genai.GenerativeModel('gemini-pro')
summarizer/pages/document.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import document_data
3
+ import load
4
+ data=document_data.document_extract()
5
+ if data:
6
+ response=(load.output(data))
7
+ for chunk in response:
8
+ st.write(chunk.text)
summarizer/pages/text.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import load
2
+ import streamlit as st
3
+ text=st.text_input("Enter the text you want to summarize..")
4
+ if text:
5
+ response=(load.output(text))
6
+ for chunk in response:
7
+ st.write(chunk.text)
summarizer/pages/url.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import load
3
+ import website_data
4
+ url=st.text_input("Enter the url",placeholder="URL....")
5
+ if url:
6
+ response=(load.output(website_data.scrape(url)))
7
+ for chunk in response:
8
+ st.write(chunk.text)
summarizer/pages/video.py ADDED
File without changes
summarizer/pages/youtube_video.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import load
3
+ from langchain_community.document_loaders import YoutubeLoader
4
+ url=st.text_input("Enter the url",placeholder="URL....")
5
+ if url:
6
+ loader = YoutubeLoader.from_youtube_url(
7
+ url, add_video_info=False
8
+ )
9
+ transcript=loader.load()
10
+ response=load.output(str(transcript))
11
+ for chunk in response:
12
+ st.write(chunk.text)
summarizer/website_data.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+
5
+ def scrape(url):
6
+ URL = url
7
+ headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"}
8
+ # Here the user agent is for Edge browser on windows 10. You can find your browser user agent from the above given link.
9
+
10
+ r = requests.get(url=URL, headers=headers)
11
+ if r.status_code ==200:
12
+ # print(r.content)
13
+
14
+ soup = BeautifulSoup(r.content, 'html.parser') # If this line causes an error, run 'pip install html5lib' or install html5lib
15
+ # print(soup.prettify())
16
+ # b = soup.find_all('p','ul li')
17
+ # for yy in b:
18
+ # print(yy.get_text())
19
+ flag = 0;
20
+ enter = """
21
+ """
22
+ str = ""
23
+ body = soup.body.find_all()
24
+ for tag in body:
25
+ if tag.name=='p' or tag.name=='h1' or tag.name=='h2' or tag.name=='h3' :
26
+ str += tag.get_text()
27
+ str += enter
28
+ if tag.name=='p':
29
+ flag=1
30
+
31
+ elif (tag.name == 'ul' or tag.name=='ol') and flag==1:
32
+ liBody=tag.find_all('li')
33
+ for text in liBody:
34
+ str+=text.get_text()
35
+ str += enter
36
+ # print(str)
37
+ # print(soup.body.get_text())
38
+ return str
39
+ else:
40
+ return ""