Spaces:
Configuration error
Configuration error
Upload 11 files
Browse files- README.md +1 -13
- app.py +7 -0
- document_data.py +19 -0
- load.py +9 -0
- pages/document.py +8 -0
- pages/text.py +7 -0
- pages/url.py +8 -0
- pages/video.py +0 -0
- pages/youtube_video.py +12 -0
- video_c.py +26 -0
- website_data.py +40 -0
README.md
CHANGED
@@ -1,13 +1 @@
|
|
1 |
-
|
2 |
-
title: Summarizer
|
3 |
-
emoji: 👁
|
4 |
-
colorFrom: yellow
|
5 |
-
colorTo: red
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.33.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: llama2
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
Summariser
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
st.set_page_config(
|
3 |
+
page_title="Summarize EAZY",
|
4 |
+
page_icon="fav2.jpg",
|
5 |
+
layout="wide",
|
6 |
+
)
|
7 |
+
st.header("Welcome to the **SUMMARIZE EAZY**")
|
document_data.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PyPDF2 import PdfReader
|
3 |
+
import docx2txt
|
4 |
+
def document_extract():
|
5 |
+
uploaded_file = st.file_uploader("Choose a file",accept_multiple_files=True)
|
6 |
+
content = """"""
|
7 |
+
if uploaded_file is not None:
|
8 |
+
for x in uploaded_file:
|
9 |
+
if(x.type=="application/vnd.openxmlformats-officedocument.wordprocessingml.document"):
|
10 |
+
content += docx2txt.process(x)
|
11 |
+
elif(x.type=="application/pdf"):
|
12 |
+
pdf_reader = PdfReader(x)
|
13 |
+
for page_num in range(len(pdf_reader.pages)):
|
14 |
+
content += pdf_reader.pages[page_num].extract_text()
|
15 |
+
elif(x.type=="text/plain"):
|
16 |
+
content = st.read(x)
|
17 |
+
else:
|
18 |
+
st.error("Please provide files of type **.docx**,**.pdf**,**.txt**")
|
19 |
+
return content
|
load.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pathlib
|
2 |
+
import textwrap
|
3 |
+
import google.generativeai as genai
|
4 |
+
def output(text):
|
5 |
+
response = model.generate_content("Summarize this and also don't make this too short "+text)
|
6 |
+
return response
|
7 |
+
GOOGLE_API_KEY="AIzaSyCgX2dUhfUNGZ1aSrjUIlXNfor1ylBdw7I"
|
8 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
9 |
+
model = genai.GenerativeModel('gemini-pro')
|
pages/document.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import document_data
|
3 |
+
import load
|
4 |
+
data=document_data.document_extract()
|
5 |
+
if data:
|
6 |
+
response=(load.output(data))
|
7 |
+
for chunk in response:
|
8 |
+
st.write(chunk.text)
|
pages/text.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import load
|
2 |
+
import streamlit as st
|
3 |
+
text=st.text_input("Enter the text you want to summarize..")
|
4 |
+
if text:
|
5 |
+
response=(load.output(text))
|
6 |
+
for chunk in response:
|
7 |
+
st.write(chunk.text)
|
pages/url.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import load
|
3 |
+
import website_data
|
4 |
+
url=st.text_input("Enter the url",placeholder="URL....")
|
5 |
+
if url:
|
6 |
+
response=(load.output(website_data.scrape(url)))
|
7 |
+
for chunk in response:
|
8 |
+
st.write(chunk.text)
|
pages/video.py
ADDED
File without changes
|
pages/youtube_video.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import load
|
3 |
+
from langchain_community.document_loaders import YoutubeLoader
|
4 |
+
url=st.text_input("Enter the url",placeholder="URL....")
|
5 |
+
if url:
|
6 |
+
loader = YoutubeLoader.from_youtube_url(
|
7 |
+
url, add_video_info=False
|
8 |
+
)
|
9 |
+
transcript=loader.load()
|
10 |
+
response=load.output(str(transcript))
|
11 |
+
for chunk in response:
|
12 |
+
st.write(chunk.text)
|
video_c.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from moviepy.editor import *
|
2 |
+
import assemblyai as aai
|
3 |
+
import tempfile
|
4 |
+
|
5 |
+
|
6 |
+
aai.settings.api_key = "4f70d0a2a54e454f90a869173b7a8c48"
|
7 |
+
|
8 |
+
# Load the mp4 file
|
9 |
+
video = VideoFileClip(r"C:\Users\HP\OneDrive\Documents\app\summarizer\summarizer\video_file.mp4")
|
10 |
+
|
11 |
+
# Create a temporary file to write the audio
|
12 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
|
13 |
+
# Extract audio from video and write to temporary file
|
14 |
+
video.audio.write_audiofile(temp_audio_file.name, codec='mp3')
|
15 |
+
|
16 |
+
# Close the file to ensure all data is written
|
17 |
+
temp_audio_file.close()
|
18 |
+
|
19 |
+
# Transcribe audio to text
|
20 |
+
transcriber = aai.Transcriber()
|
21 |
+
transcript = transcriber.transcribe(temp_audio_file.name)
|
22 |
+
|
23 |
+
|
24 |
+
text_from_audio = transcript.text
|
25 |
+
|
26 |
+
print(text_from_audio)
|
website_data.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
|
4 |
+
|
5 |
+
def scrape(url):
|
6 |
+
URL = url
|
7 |
+
headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"}
|
8 |
+
# Here the user agent is for Edge browser on windows 10. You can find your browser user agent from the above given link.
|
9 |
+
|
10 |
+
r = requests.get(url=URL, headers=headers)
|
11 |
+
if r.status_code ==200:
|
12 |
+
# print(r.content)
|
13 |
+
|
14 |
+
soup = BeautifulSoup(r.content, 'html.parser') # If this line causes an error, run 'pip install html5lib' or install html5lib
|
15 |
+
# print(soup.prettify())
|
16 |
+
# b = soup.find_all('p','ul li')
|
17 |
+
# for yy in b:
|
18 |
+
# print(yy.get_text())
|
19 |
+
flag = 0;
|
20 |
+
enter = """
|
21 |
+
"""
|
22 |
+
str = ""
|
23 |
+
body = soup.body.find_all()
|
24 |
+
for tag in body:
|
25 |
+
if tag.name=='p' or tag.name=='h1' or tag.name=='h2' or tag.name=='h3' :
|
26 |
+
str += tag.get_text()
|
27 |
+
str += enter
|
28 |
+
if tag.name=='p':
|
29 |
+
flag=1
|
30 |
+
|
31 |
+
elif (tag.name == 'ul' or tag.name=='ol') and flag==1:
|
32 |
+
liBody=tag.find_all('li')
|
33 |
+
for text in liBody:
|
34 |
+
str+=text.get_text()
|
35 |
+
str += enter
|
36 |
+
# print(str)
|
37 |
+
# print(soup.body.get_text())
|
38 |
+
return str
|
39 |
+
else:
|
40 |
+
return ""
|