saketh092 commited on
Commit
4ea8f5c
1 Parent(s): c2a2fb2

Upload 11 files

Browse files
Files changed (11) hide show
  1. README.md +1 -13
  2. app.py +7 -0
  3. document_data.py +19 -0
  4. load.py +9 -0
  5. pages/document.py +8 -0
  6. pages/text.py +7 -0
  7. pages/url.py +8 -0
  8. pages/video.py +0 -0
  9. pages/youtube_video.py +12 -0
  10. video_c.py +26 -0
  11. website_data.py +40 -0
README.md CHANGED
@@ -1,13 +1 @@
1
- ---
2
- title: Summarizer
3
- emoji: 👁
4
- colorFrom: yellow
5
- colorTo: red
6
- sdk: streamlit
7
- sdk_version: 1.33.0
8
- app_file: app.py
9
- pinned: false
10
- license: llama2
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ Summariser
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ st.set_page_config(
3
+ page_title="Summarize EAZY",
4
+ page_icon="fav2.jpg",
5
+ layout="wide",
6
+ )
7
+ st.header("Welcome to the **SUMMARIZE EAZY**")
document_data.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ import docx2txt
4
+ def document_extract():
5
+ uploaded_file = st.file_uploader("Choose a file",accept_multiple_files=True)
6
+ content = """"""
7
+ if uploaded_file is not None:
8
+ for x in uploaded_file:
9
+ if(x.type=="application/vnd.openxmlformats-officedocument.wordprocessingml.document"):
10
+ content += docx2txt.process(x)
11
+ elif(x.type=="application/pdf"):
12
+ pdf_reader = PdfReader(x)
13
+ for page_num in range(len(pdf_reader.pages)):
14
+ content += pdf_reader.pages[page_num].extract_text()
15
+ elif(x.type=="text/plain"):
16
+ content = st.read(x)
17
+ else:
18
+ st.error("Please provide files of type **.docx**,**.pdf**,**.txt**")
19
+ return content
load.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import pathlib
2
+ import textwrap
3
+ import google.generativeai as genai
4
+ def output(text):
5
+ response = model.generate_content("Summarize this and also don't make this too short "+text)
6
+ return response
7
+ GOOGLE_API_KEY="AIzaSyCgX2dUhfUNGZ1aSrjUIlXNfor1ylBdw7I"
8
+ genai.configure(api_key=GOOGLE_API_KEY)
9
+ model = genai.GenerativeModel('gemini-pro')
pages/document.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import document_data
3
+ import load
4
+ data=document_data.document_extract()
5
+ if data:
6
+ response=(load.output(data))
7
+ for chunk in response:
8
+ st.write(chunk.text)
pages/text.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import load
2
+ import streamlit as st
3
+ text=st.text_input("Enter the text you want to summarize..")
4
+ if text:
5
+ response=(load.output(text))
6
+ for chunk in response:
7
+ st.write(chunk.text)
pages/url.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import load
3
+ import website_data
4
+ url=st.text_input("Enter the url",placeholder="URL....")
5
+ if url:
6
+ response=(load.output(website_data.scrape(url)))
7
+ for chunk in response:
8
+ st.write(chunk.text)
pages/video.py ADDED
File without changes
pages/youtube_video.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import load
3
+ from langchain_community.document_loaders import YoutubeLoader
4
+ url=st.text_input("Enter the url",placeholder="URL....")
5
+ if url:
6
+ loader = YoutubeLoader.from_youtube_url(
7
+ url, add_video_info=False
8
+ )
9
+ transcript=loader.load()
10
+ response=load.output(str(transcript))
11
+ for chunk in response:
12
+ st.write(chunk.text)
video_c.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy.editor import *
2
+ import assemblyai as aai
3
+ import tempfile
4
+
5
+
6
+ aai.settings.api_key = "4f70d0a2a54e454f90a869173b7a8c48"
7
+
8
+ # Load the mp4 file
9
+ video = VideoFileClip(r"C:\Users\HP\OneDrive\Documents\app\summarizer\summarizer\video_file.mp4")
10
+
11
+ # Create a temporary file to write the audio
12
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
13
+ # Extract audio from video and write to temporary file
14
+ video.audio.write_audiofile(temp_audio_file.name, codec='mp3')
15
+
16
+ # Close the file to ensure all data is written
17
+ temp_audio_file.close()
18
+
19
+ # Transcribe audio to text
20
+ transcriber = aai.Transcriber()
21
+ transcript = transcriber.transcribe(temp_audio_file.name)
22
+
23
+
24
+ text_from_audio = transcript.text
25
+
26
+ print(text_from_audio)
website_data.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+
5
+ def scrape(url):
6
+ URL = url
7
+ headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"}
8
+ # Here the user agent is for Edge browser on windows 10. You can find your browser user agent from the above given link.
9
+
10
+ r = requests.get(url=URL, headers=headers)
11
+ if r.status_code ==200:
12
+ # print(r.content)
13
+
14
+ soup = BeautifulSoup(r.content, 'html.parser') # If this line causes an error, run 'pip install html5lib' or install html5lib
15
+ # print(soup.prettify())
16
+ # b = soup.find_all('p','ul li')
17
+ # for yy in b:
18
+ # print(yy.get_text())
19
+ flag = 0;
20
+ enter = """
21
+ """
22
+ str = ""
23
+ body = soup.body.find_all()
24
+ for tag in body:
25
+ if tag.name=='p' or tag.name=='h1' or tag.name=='h2' or tag.name=='h3' :
26
+ str += tag.get_text()
27
+ str += enter
28
+ if tag.name=='p':
29
+ flag=1
30
+
31
+ elif (tag.name == 'ul' or tag.name=='ol') and flag==1:
32
+ liBody=tag.find_all('li')
33
+ for text in liBody:
34
+ str+=text.get_text()
35
+ str += enter
36
+ # print(str)
37
+ # print(soup.body.get_text())
38
+ return str
39
+ else:
40
+ return ""