FrancoisHB commited on
Commit
2beb89b
1 Parent(s): cc5b5b5

Commit Test SRT

Browse files
Files changed (1) hide show
  1. app.py +19 -16
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
  from heapq import nlargest
 
4
 
5
  # Function to extract text from SRT-formatted text
6
  def extract_text_from_srt_text(srt_text):
@@ -13,10 +14,13 @@ def generate_summary(text, summary_length):
13
  summarizer = pipeline("summarization")
14
  summary = summarizer(text, max_length=summary_length, min_length=30, do_sample=False)
15
  summary_text = summary[0]["summary_text"]
16
- sentences = text.split(". ")
17
- top_sentences = nlargest(4, sentences, key=len)
18
- top_subjects = "\n".join(top_sentences)
19
- return summary_text, top_subjects
 
 
 
20
 
21
  # Streamlit app
22
  st.title("SRT Summarization")
@@ -24,13 +28,8 @@ st.title("SRT Summarization")
24
  # Logo image URL
25
  logo_url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ6uQl0omK_PHXBbyaCHdmh3VjCo_Yvgwavmcs5XRF9Rkjx5FpflxyO4yfux6d2ojKsCOA&usqp=CAU" # Replace with your logo image URL
26
 
27
- # Center the logo
28
- st.markdown(
29
- f'<div style="display: flex; justify-content: center;">'
30
- f'<img src="{logo_url}" style="width: 364px;">'
31
- f'</div>',
32
- unsafe_allow_html=True
33
- )
34
 
35
  # Text area for user to input SRT-formatted text
36
  srt_text_input = st.text_area("Paste SRT-formatted text here:")
@@ -43,12 +42,16 @@ if st.button("Summarize"):
43
  with st.spinner("Summarizing..."):
44
  # Extract text from SRT-formatted text
45
  text_to_summarize = extract_text_from_srt_text(srt_text_input)
46
- # Generate summary and top subjects
47
- summary, top_subjects = generate_summary(text_to_summarize, 150) # You can adjust the summary length as needed
48
- # Display summary and top subjects
49
  st.subheader("Summary:")
50
  st.write(summary)
51
- st.subheader("Top 4 Subjects:")
52
- st.write(top_subjects, bullet=True) # Display as bullet points
 
 
 
 
53
  else:
54
  st.warning("Please enter some SRT-formatted text.")
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  from heapq import nlargest
4
+ from keybert import KeyBERT
5
 
6
  # Function to extract text from SRT-formatted text
7
  def extract_text_from_srt_text(srt_text):
 
14
  summarizer = pipeline("summarization")
15
  summary = summarizer(text, max_length=summary_length, min_length=30, do_sample=False)
16
  summary_text = summary[0]["summary_text"]
17
+ return summary_text
18
+
19
+ # Function to extract top 4 topics from text
20
+ def extract_top_topics(text, n_top_topics):
21
+ model = KeyBERT('distilbert-base-nli-mean-tokens')
22
+ keywords = model.extract_keywords(text, keyphrase_ngram_range=(1, 3), stop_words='english', use_maxsum=True, nr_candidates=20, top_n=n_top_topics)
23
+ return keywords
24
 
25
  # Streamlit app
26
  st.title("SRT Summarization")
 
28
  # Logo image URL
29
  logo_url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ6uQl0omK_PHXBbyaCHdmh3VjCo_Yvgwavmcs5XRF9Rkjx5FpflxyO4yfux6d2ojKsCOA&usqp=CAU" # Replace with your logo image URL
30
 
31
+ # Display logo
32
+ st.image(logo_url, width=364)
 
 
 
 
 
33
 
34
  # Text area for user to input SRT-formatted text
35
  srt_text_input = st.text_area("Paste SRT-formatted text here:")
 
42
  with st.spinner("Summarizing..."):
43
  # Extract text from SRT-formatted text
44
  text_to_summarize = extract_text_from_srt_text(srt_text_input)
45
+ # Generate summary
46
+ summary = generate_summary(text_to_summarize, 150) # You can adjust the summary length as needed
47
+ # Display summary
48
  st.subheader("Summary:")
49
  st.write(summary)
50
+ # Extract top 4 topics
51
+ top_topics = extract_top_topics(text_to_summarize, 4)
52
+ # Display top 4 topics
53
+ st.subheader("Top 4 Topics:")
54
+ for topic in top_topics:
55
+ st.write(f"- {topic}")
56
  else:
57
  st.warning("Please enter some SRT-formatted text.")