Spaces:
Sleeping
Sleeping
Commit
•
2beb89b
1
Parent(s):
cc5b5b5
Commit Test SRT
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
from transformers import pipeline
|
3 |
from heapq import nlargest
|
|
|
4 |
|
5 |
# Function to extract text from SRT-formatted text
|
6 |
def extract_text_from_srt_text(srt_text):
|
@@ -13,10 +14,13 @@ def generate_summary(text, summary_length):
|
|
13 |
summarizer = pipeline("summarization")
|
14 |
summary = summarizer(text, max_length=summary_length, min_length=30, do_sample=False)
|
15 |
summary_text = summary[0]["summary_text"]
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
20 |
|
21 |
# Streamlit app
|
22 |
st.title("SRT Summarization")
|
@@ -24,13 +28,8 @@ st.title("SRT Summarization")
|
|
24 |
# Logo image URL
|
25 |
logo_url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ6uQl0omK_PHXBbyaCHdmh3VjCo_Yvgwavmcs5XRF9Rkjx5FpflxyO4yfux6d2ojKsCOA&usqp=CAU" # Replace with your logo image URL
|
26 |
|
27 |
-
#
|
28 |
-
st.
|
29 |
-
f'<div style="display: flex; justify-content: center;">'
|
30 |
-
f'<img src="{logo_url}" style="width: 364px;">'
|
31 |
-
f'</div>',
|
32 |
-
unsafe_allow_html=True
|
33 |
-
)
|
34 |
|
35 |
# Text area for user to input SRT-formatted text
|
36 |
srt_text_input = st.text_area("Paste SRT-formatted text here:")
|
@@ -43,12 +42,16 @@ if st.button("Summarize"):
|
|
43 |
with st.spinner("Summarizing..."):
|
44 |
# Extract text from SRT-formatted text
|
45 |
text_to_summarize = extract_text_from_srt_text(srt_text_input)
|
46 |
-
# Generate summary
|
47 |
-
summary
|
48 |
-
# Display summary
|
49 |
st.subheader("Summary:")
|
50 |
st.write(summary)
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
53 |
else:
|
54 |
st.warning("Please enter some SRT-formatted text.")
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import pipeline
|
3 |
from heapq import nlargest
|
4 |
+
from keybert import KeyBERT
|
5 |
|
6 |
# Function to extract text from SRT-formatted text
|
7 |
def extract_text_from_srt_text(srt_text):
|
|
|
14 |
summarizer = pipeline("summarization")
|
15 |
summary = summarizer(text, max_length=summary_length, min_length=30, do_sample=False)
|
16 |
summary_text = summary[0]["summary_text"]
|
17 |
+
return summary_text
|
18 |
+
|
19 |
+
# Function to extract top 4 topics from text
|
20 |
+
def extract_top_topics(text, n_top_topics):
|
21 |
+
model = KeyBERT('distilbert-base-nli-mean-tokens')
|
22 |
+
keywords = model.extract_keywords(text, keyphrase_ngram_range=(1, 3), stop_words='english', use_maxsum=True, nr_candidates=20, top_n=n_top_topics)
|
23 |
+
return keywords
|
24 |
|
25 |
# Streamlit app
|
26 |
st.title("SRT Summarization")
|
|
|
28 |
# Logo image URL
|
29 |
logo_url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ6uQl0omK_PHXBbyaCHdmh3VjCo_Yvgwavmcs5XRF9Rkjx5FpflxyO4yfux6d2ojKsCOA&usqp=CAU" # Replace with your logo image URL
|
30 |
|
31 |
+
# Display logo
|
32 |
+
st.image(logo_url, width=364)
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
# Text area for user to input SRT-formatted text
|
35 |
srt_text_input = st.text_area("Paste SRT-formatted text here:")
|
|
|
42 |
with st.spinner("Summarizing..."):
|
43 |
# Extract text from SRT-formatted text
|
44 |
text_to_summarize = extract_text_from_srt_text(srt_text_input)
|
45 |
+
# Generate summary
|
46 |
+
summary = generate_summary(text_to_summarize, 150) # You can adjust the summary length as needed
|
47 |
+
# Display summary
|
48 |
st.subheader("Summary:")
|
49 |
st.write(summary)
|
50 |
+
# Extract top 4 topics
|
51 |
+
top_topics = extract_top_topics(text_to_summarize, 4)
|
52 |
+
# Display top 4 topics
|
53 |
+
st.subheader("Top 4 Topics:")
|
54 |
+
for topic in top_topics:
|
55 |
+
st.write(f"- {topic}")
|
56 |
else:
|
57 |
st.warning("Please enter some SRT-formatted text.")
|