Spaces:
Sleeping
Sleeping
BoldStudio
commited on
Commit
•
7c264ed
1
Parent(s):
f88eac0
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_client import Client
|
3 |
+
import re
|
4 |
+
|
5 |
+
|
6 |
+
def call_api(transcript):
|
7 |
+
client = Client(
|
8 |
+
"https://huggingface-projects-llama-2-13b-chat.hf.space/--replicas/dknmg/")
|
9 |
+
prompt = """
|
10 |
+
INSTRUCTIONS:
|
11 |
+
Convert this Transcript into Segment Cards.
|
12 |
+
|
13 |
+
IMPORTANT:
|
14 |
+
|
15 |
+
- use the same language from the transcription to write the segment cards.
|
16 |
+
- make sure that the duration is correct.
|
17 |
+
- dont use special characters or emojis in the description
|
18 |
+
- write as many segment cards as the transcription has
|
19 |
+
- only output the segment cards
|
20 |
+
- dont answer with additional text
|
21 |
+
- Add a <card> tag before and after every segment card
|
22 |
+
- be precise and make sure every segment card is complete
|
23 |
+
- Your Answer should be like the Output Format
|
24 |
+
|
25 |
+
CONTENT OF CARD:
|
26 |
+
{
|
27 |
+
Title: (max. 50 Characters)
|
28 |
+
Description: (summary of content max. 140 Characters)
|
29 |
+
Rating: 1-5 (How Relevant is this content in relation to the transript and how good is this topic for a youtube short video)
|
30 |
+
Duration: [00:00.000 - 00:00.000]
|
31 |
+
}
|
32 |
+
TRANSCRIPT:
|
33 |
+
"""
|
34 |
+
transcript = str(transcript)
|
35 |
+
result = client.predict(
|
36 |
+
transcript,
|
37 |
+
prompt,
|
38 |
+
1024,
|
39 |
+
0.6,
|
40 |
+
0.9,
|
41 |
+
50,
|
42 |
+
1.2,
|
43 |
+
api_name="/chat"
|
44 |
+
)
|
45 |
+
return result
|
46 |
+
|
47 |
+
|
48 |
+
def transcription_to_segments(segments):
|
49 |
+
segment_cards_array = []
|
50 |
+
card_pattern = r'<card>.*?</card>'
|
51 |
+
for segment in segments:
|
52 |
+
result = call_api(segment)
|
53 |
+
card_matches = re.findall(card_pattern, result, re.DOTALL)
|
54 |
+
for card_match in card_matches:
|
55 |
+
card_object = {
|
56 |
+
"Title": re.search(r'Title: (.*?)\n', card_match).group(1),
|
57 |
+
"Description": re.search(r'Description: (.*?)\n', card_match).group(1),
|
58 |
+
"Rating": re.search(r'Rating: (.*?)\n', card_match).group(1),
|
59 |
+
"Duration": re.search(r'Duration: (.*?)\n', card_match).group(1)
|
60 |
+
}
|
61 |
+
segment_cards_array.append(card_object)
|
62 |
+
return segment_cards_array
|
63 |
+
|
64 |
+
|
65 |
+
# Define the Gradio interface for transcription and segmentation
|
66 |
+
interface = gr.Interface(
|
67 |
+
fn=transcription_to_segments,
|
68 |
+
inputs="text",
|
69 |
+
outputs="json"
|
70 |
+
)
|
71 |
+
|
72 |
+
# Launch the Gradio interface
|
73 |
+
interface.launch()
|