File size: 2,597 Bytes
782d96b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import json
import re
import textwrap
import openai
import gradio as gr

def gpt3_completion(prompt, engine='text-davinci-002', temp=0.6, top_p=1.0, tokens=1000, freq_pen=0.25, pres_pen=0.0, stop=['<<END>>']):
    print("here");
    max_retry = 5
    retry = 0
   
    response = openai.Completion.create(
        engine=engine,
        prompt=prompt,
        temperature=0.7,
        max_tokens=963,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0)
    text = response['choices'][0]['text'].strip()
    text = re.sub('\s+', ' ', text)
    return text

def summarize_podcast(podcast):
    # Open the transcript file
    vtt = podcast
    transcript = vtt.read().decode()
    res = len(transcript.split())
    
    transcript = transcript.replace("WEBVTT", "")
    transcript = "\n".join(filter(None, transcript.splitlines()))
    transcript = re.sub(r"\d\d:\d\d:\d\d\.\d\d\d \d\d:\d\d:\d\d\.\d\d\d", "", transcript)
    text = transcript

    lines = text.split('\n')
    names = []
    unique_list=[]
    for line in lines:
        if ':' in line:
            name = line.split(':')[0]
            names.append(name)
    for x in range(0,len(names)):
        if names[x] not in unique_list:
            unique_list.append(names[x])
    sp=""
    al=["A","B","C","D","E","F","G","H","I","K","L","M","N","O","P","Q","R","S","T","V","X","Y","Z"]
    for x in range(0,len(unique_list)):
        transcript = transcript.replace(unique_list[x], al[x])
        sp=sp+"Speaker "+str(x+1)+" "+unique_list[x]+"\n"
    transcript=sp+""+transcript
    openai.api_key = "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"
    chunks = textwrap.wrap(transcript, 8000)
    result = list()
    count = 0
    tempstr=""
    for chunk in chunks:
        count = count + 1
        prompt = sp+"""\n
        Summarize the portion of the podcast. The summary should be around 200 words. use the Name instead of A, B, C...
        Podcast: 

        """+str(chunk)+"""'\n\n"""
        summary = gpt3_completion(prompt)
        summary="chunk : "+str(count)+summary
        result.append(summary)
        tempstr=tempstr+"\n"+summary
    return str(tempstr)

input_file = gr.inputs.File(label="Upload your podcast in VTT format.")
output_text = gr.outputs.Textbox(label="Podcast Summary")

sample_url = "https://www.example.com/sample.vtt"

iface = gr.Interface(fn=summarize_podcast, inputs=input_file, outputs=output_text, title="Podcast Summarizer", 
                     description="This tool summarizes a podcast in VTT format, providing a summary of each chunk of text.")
iface.launch()