Spaces:

FrancoisHB
/

testfhb

Sleeping

File size: 1,660 Bytes

9f27a52
75855be
2a6b055
42c2597
a8924ed
 
f91d5ee
 
a1d11c2
ece6dcf
2a6b055
 
 
 
 
a1d11c2
 
 
 
a8924ed
 
a1d11c2
a8924ed
 
 
 
 
 
2a6b055
 
 
 
 
 
 
a8924ed
2a6b055
 
 
a8924ed

import streamlit as st
from transformers import pipeline
from heapq import nlargest

# Function to extract text from SRT-formatted text
def extract_text_from_srt_text(srt_text):
    lines = srt_text.strip().split("\n\n")  # Split by empty lines to separate subtitles
    texts = [subtitle.split("\n")[2] for subtitle in lines if subtitle.strip()]  # Extract text from the third line of each subtitle
    return " ".join(texts)

# Function to generate summary from text
def generate_summary(text, summary_length):
    summarizer = pipeline("summarization")
    summary = summarizer(text, max_length=summary_length, min_length=30, do_sample=False)
    return summary[0]["summary_text"]

# Streamlit app
st.title("SRT Summarization")

# Text area for user to input SRT-formatted text
srt_text_input = st.text_area("Paste SRT-formatted text here:")

# Button to trigger summarization
if st.button("Summarize"):
    # Check if text area is not empty
    if srt_text_input.strip():
        # Extract text from SRT-formatted text
        text_to_summarize = extract_text_from_srt_text(srt_text_input)
        # Generate summary
        summary = generate_summary(text_to_summarize, 150)  # You can adjust the summary length as needed
        # Extract top 4 sentences
        sentences = text_to_summarize.split(". ")
        top_sentences = nlargest(4, sentences, key=len)
        top_subjects = "\n".join(top_sentences)
        # Display summary and top 4 subjects
        st.subheader("Summary:")
        st.write(summary)
        st.subheader("Top 4 Subjects:")
        st.write(top_subjects)
    else:
        st.warning("Please enter some SRT-formatted text.")