remzicam commited on
Commit
f0396e5
1 Parent(s): 38e354f

Upload 2 files

Browse files
Files changed (2) hide show
  1. TED.png +0 -0
  2. app.py +70 -0
TED.png ADDED
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """TED Talks Summarizer App."""
2
+
3
+ from re import sub
4
+
5
+ from gradio import Interface, Series, Textbox
6
+ from requests import get
7
+
8
+
9
+ def clean_text(text):
10
+ """Cleans subtitle text of ted talks.
11
+
12
+ Args:
13
+ text (str): subtitle of ted talk
14
+
15
+ Returns:
16
+ cleaned_text (str): cleaned version of subtitle text
17
+ """
18
+ # remove string inside parantheses (i.e appluse)
19
+ text = sub(r"\(.*\)", "", text)
20
+ # format text by splitting/removing new lines
21
+ text = text.split("\n")[1:]
22
+ # remove empty strings
23
+ text = list(filter(None, text))
24
+ # remove timestamps as they contains pattern of "-->"
25
+ cleaned_text = " ".join([x.strip() for x in text if "-->" not in x])
26
+ return cleaned_text
27
+
28
+
29
+ def ted_talk_transcriber(link):
30
+ """Creates transcription of ted talks from url.
31
+
32
+ Args:
33
+ link (str): url link of ted talks
34
+
35
+ Returns:
36
+ cleaned_transcript (str): transcription of the ted talk
37
+ """
38
+ # request link of the talk
39
+ page = get(link)
40
+ # extract unique talk id to reach subtitle file
41
+ talk_id = str(page.content).split("project_masters/")[1].split("/")[0]
42
+ raw_text = get(
43
+ f"https://hls.ted.com/project_masters/{talk_id}/subtitles/en/full.vtt"
44
+ ).text
45
+ cleaned_transcript = clean_text(raw_text)
46
+ return cleaned_transcript
47
+
48
+
49
+ transcriber = Interface(
50
+ ted_talk_transcriber,
51
+ "text",
52
+ "text",
53
+ )
54
+
55
+ summarizer = Interface.load(
56
+ "huggingface/pszemraj/long-t5-tglobal-base-16384-book-summary"
57
+ )
58
+
59
+ logo = "<center><img src='file/TED.png' width=180px></center>"
60
+
61
+ Series(
62
+ transcriber,
63
+ summarizer,
64
+ inputs=Textbox(label="Type the TED Talks link"),
65
+ examples=[
66
+ "https://www.ted.com/talks/jen_gunter_the_truth_about_yeast_in_your_body"
67
+ ],
68
+ allow_flagging="never",
69
+ description=logo,
70
+ ).launch()