File size: 3,919 Bytes
ab58fb5
9d727e3
 
7117743
d1ef5e9
b74cc3f
d56b55d
ab58fb5
253b4e4
 
 
 
 
e7956b2
253b4e4
 
 
 
866ffb3
 
 
 
 
b74cc3f
e7956b2
555a0ea
 
2ea7ea8
555a0ea
2ea7ea8
3e7fc84
2ea7ea8
9bb604c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253b4e4
c274bf0
01bea1f
e7956b2
e3d61f6
9bb604c
 
 
 
01bea1f
e3d61f6
c274bf0
 
b03d0c5
4ebf6a9
b03d0c5
 
 
 
 
 
 
 
 
 
97c3b7e
3277eaa
e7956b2
 
5de52b7
690c95c
e7956b2
 
0069484
e7956b2
 
 
 
 
e8acce3
4ebf6a9
4125ee2
 
2b98878
4125ee2
 
 
 
 
 
e8acce3
4125ee2
2040736
34b0634
2040736
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import streamlit as st
import whisper
import os
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoModelForSeq2SeqLM
from pydub import AudioSegment
from nltk import sent_tokenize

def transcribe_audio(audiofile):

    st.session_state['audio'] = audiofile
    print(f"audio_file_session_state:{st.session_state['audio'] }")

    st.info("Getting size of file")
    #get size of audio file
    audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
    print(f"audio file size:{audio_size}")

    #determine audio duration
    podcast = AudioSegment.from_mp3(st.session_state['audio'])
    st.session_state['audio_segment'] = podcast
    podcast_duration = podcast.duration_seconds
    print(f"Audio Duration: {podcast_duration}")

    st.info("Transcribing")
    whisper_model = whisper.load_model("small.en")
    transcription = whisper_model.transcribe(audiofile)
    st.session_state['transcription'] = transcription
    print(f"ranscription: {transcription['text']}")
    st.info('Done Transcription')

    return transcription
def chunk_and_preprocess_text(text, model_name= 'philschmid/flan-t5-base-samsum'):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    sentences = sent_tokenize(text)

    length = 0
    chunk = ""
    chunks = []
    count = -1
    
    for sentence in sentences:
        count += 1
        combined_length = len(tokenizer.tokenize(sentence)) + length # add the no. of sentence tokens to the length counter
    
        if combined_length  <= tokenizer.max_len_single_sentence: # if it doesn't exceed
            chunk += sentence + " " # add the sentence to the chunk
            length = combined_length # update the length counter
    
            # if it is the last sentence
            if count == len(sentences) - 1:
                chunks.append(chunk) # save the chunk
      
        else: 
            chunks.append(chunk) # save the chunk
            # reset 
            length = 0 
            chunk = ""
        
            # take care of the overflow sentence
            chunk += sentence + " "
            length = len(tokenizer.tokenize(sentence))

    return chunks

def summarize_podcast(audiotranscription):
    st.info("Summarizing...")
    summarizer = pipeline("summarization", model="philschmid/flan-t5-base-samsum", device=0)

    st.info("Chunking text")
    text_chunks = chunk_and_preprocess_text(audiotranscription)

    summarized_text = summarizer(text_chunks, max_len=200,min_len=50)
    st.session_state['summary'] = summarized_text
    return summarized_text
    

st.markdown("# Podcast Q&amp;A")

st.markdown(
        """
        This helps understand information-dense podcast episodes by doing the following:
        - Speech to Text transcription - using OpenSource Whisper Model
        - Summarizes the episode
        - Allows you to ask questions and returns direct quotes from the episode.

        """
        )

st.audio("marketplace-2023-06-14.mp3") 
if st.button("Process Audio File"):
    podcast_text = transcribe_audio("marketplace-2023-06-14.mp3")
    #write text out
    with st.expander("See Transcription"):
        st.caption(podcast_text['text'])
    
    #Summarize Text
    podcast_summary = summarize_podcast(podcast_text['text'])
    st.markdown(
        """
           ##Summary of Text
        """
        )
    st.text(podcast_summary['summary_text'])

if st.button("Summarize Podcast"):
    with open('transcription.txt', 'r') as file:
        podcast_text = file.read().rstrip()
    podcast_summary = summarize_podcast(podcast_text)
    st.markdown(
        """
           ##Summary of Text
        """
        )
    st.text(podcast_summary['summary_text'])

#audio_file = st.file_uploader("Upload audio copy of file", key="upload", type=['.mp3'])


# if audio_file:
#    transcribe_audio(audio_file)