FrancoisHB commited on
Commit
7c96a81
1 Parent(s): 2677543

Commit Test SRT

Browse files
Files changed (2) hide show
  1. app.py +34 -30
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,12 +1,15 @@
1
  import streamlit as st
 
2
  from transformers import pipeline
3
- from heapq import nlargest
4
  from keybert import KeyBERT
5
 
 
 
 
6
  # Function to extract text from SRT-formatted text
7
  def extract_text_from_srt_text(srt_text):
8
- lines = srt_text.strip().split("\n\n") # Split by empty lines to separate subtitles
9
- texts = [subtitle.split("\n")[2] for subtitle in lines if subtitle.strip()] # Extract text from the third line of each subtitle
10
  return " ".join(texts)
11
 
12
  # Function to generate summary from text
@@ -20,22 +23,11 @@ def generate_summary(text, summary_length):
20
  def extract_top_topics(text, n_top_topics):
21
  model = KeyBERT('distilbert-base-nli-mean-tokens')
22
  keywords = model.extract_keywords(text, keyphrase_ngram_range=(1, 3), stop_words='english', use_maxsum=True, nr_candidates=20, top_n=n_top_topics)
23
- return keywords
24
 
25
  # Streamlit app
26
  st.title("SRT Summarization")
27
 
28
- # Logo image URL
29
- logo_url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ6uQl0omK_PHXBbyaCHdmh3VjCo_Yvgwavmcs5XRF9Rkjx5FpflxyO4yfux6d2ojKsCOA&usqp=CAU" # Replace with your logo image URL
30
-
31
- # Center the logo
32
- st.markdown(
33
- f'<div style="display: flex; justify-content: center;">'
34
- f'<img src="{logo_url}" style="width: 364px;">'
35
- f'</div>',
36
- unsafe_allow_html=True
37
- )
38
-
39
  # Text area for user to input SRT-formatted text
40
  srt_text_input = st.text_area("Paste SRT-formatted text here:")
41
 
@@ -43,20 +35,32 @@ srt_text_input = st.text_area("Paste SRT-formatted text here:")
43
  if st.button("Summarize"):
44
  # Check if text area is not empty
45
  if srt_text_input.strip():
46
- # Show loading spinner while processing
47
- with st.spinner("Summarizing..."):
48
- # Extract text from SRT-formatted text
49
- text_to_summarize = extract_text_from_srt_text(srt_text_input)
50
- # Generate summary
51
- summary = generate_summary(text_to_summarize, 150) # You can adjust the summary length as needed
52
- # Display summary
53
- st.subheader("Summary:")
54
- st.write(summary)
55
- # Extract top 4 topics
56
- top_topics = extract_top_topics(text_to_summarize, 4)
57
- # Display top 4 topics
58
- st.subheader("Top 4 Keywords:")
59
- for topic, _ in top_topics:
60
- st.write(f"- {topic}")
61
  else:
62
  st.warning("Please enter some SRT-formatted text.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from flask import Flask, request, jsonify
3
  from transformers import pipeline
 
4
  from keybert import KeyBERT
5
 
6
+ # Initialize Flask app
7
+ app = Flask(__name__)
8
+
9
  # Function to extract text from SRT-formatted text
10
  def extract_text_from_srt_text(srt_text):
11
+ lines = srt_text.strip().split("\n\n")
12
+ texts = [subtitle.split("\n")[2] for subtitle in lines if subtitle.strip()]
13
  return " ".join(texts)
14
 
15
  # Function to generate summary from text
 
23
  def extract_top_topics(text, n_top_topics):
24
  model = KeyBERT('distilbert-base-nli-mean-tokens')
25
  keywords = model.extract_keywords(text, keyphrase_ngram_range=(1, 3), stop_words='english', use_maxsum=True, nr_candidates=20, top_n=n_top_topics)
26
+ return [topic for topic, _ in keywords]
27
 
28
  # Streamlit app
29
  st.title("SRT Summarization")
30
 
 
 
 
 
 
 
 
 
 
 
 
31
  # Text area for user to input SRT-formatted text
32
  srt_text_input = st.text_area("Paste SRT-formatted text here:")
33
 
 
35
  if st.button("Summarize"):
36
  # Check if text area is not empty
37
  if srt_text_input.strip():
38
+ # Extract text from SRT-formatted text
39
+ text_to_summarize = extract_text_from_srt_text(srt_text_input)
40
+ # Generate summary
41
+ summary = generate_summary(text_to_summarize, 150)
42
+ # Extract top 4 topics
43
+ top_topics = extract_top_topics(text_to_summarize, 4)
44
+ # Display summary and top 4 topics
45
+ st.subheader("Summary:")
46
+ st.write(summary)
47
+ st.subheader("Top 4 Keywords:")
48
+ for topic in top_topics:
49
+ st.write(f"- {topic}")
 
 
 
50
  else:
51
  st.warning("Please enter some SRT-formatted text.")
52
+
53
+ # Define endpoint for REST API
54
+ @app.route("/summarize", methods=["POST"])
55
+ def summarize():
56
+ data = request.json
57
+ if "srt_text" not in data:
58
+ return jsonify({"error": "Missing 'srt_text' parameter"}), 400
59
+ srt_text = data["srt_text"]
60
+ text_to_summarize = extract_text_from_srt_text(srt_text)
61
+ summary = generate_summary(text_to_summarize, 150)
62
+ top_topics = extract_top_topics(text_to_summarize, 4)
63
+ return jsonify({"summary": summary, "top_topics": top_topics})
64
+
65
+ if __name__ == "__main__":
66
+ app.run()
requirements.txt CHANGED
@@ -2,4 +2,5 @@ torch
2
  transformers
3
  streamlit
4
  gradio
5
- keybert
 
 
2
  transformers
3
  streamlit
4
  gradio
5
+ keybert
6
+ flask