Spaces:
Build error
Build error
juancopi81
commited on
Commit
•
6249bc9
1
Parent(s):
73a3627
Create YouTube Illustrated Summary
Browse files- .gitignore +132 -0
- README.md +4 -4
- app.py +241 -87
- requirements.txt +10 -2
- textprocessor.py +56 -0
- transcriber.py +12 -0
- utils.py +6 -0
- videocreator.py +46 -0
- youtubeaudioextractor.py +25 -0
.gitignore
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
pip-wheel-metadata/
|
24 |
+
share/python-wheels/
|
25 |
+
*.egg-info/
|
26 |
+
.installed.cfg
|
27 |
+
*.egg
|
28 |
+
MANIFEST
|
29 |
+
|
30 |
+
# PyInstaller
|
31 |
+
# Usually these files are written by a python script from a template
|
32 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
33 |
+
*.manifest
|
34 |
+
*.spec
|
35 |
+
|
36 |
+
# Installer logs
|
37 |
+
pip-log.txt
|
38 |
+
pip-delete-this-directory.txt
|
39 |
+
|
40 |
+
# Unit test / coverage reports
|
41 |
+
htmlcov/
|
42 |
+
.tox/
|
43 |
+
.nox/
|
44 |
+
.coverage
|
45 |
+
.coverage.*
|
46 |
+
.cache
|
47 |
+
nosetests.xml
|
48 |
+
coverage.xml
|
49 |
+
*.cover
|
50 |
+
*.py,cover
|
51 |
+
.hypothesis/
|
52 |
+
.pytest_cache/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
target/
|
76 |
+
|
77 |
+
# Jupyter Notebook
|
78 |
+
.ipynb_checkpoints
|
79 |
+
|
80 |
+
# IPython
|
81 |
+
profile_default/
|
82 |
+
ipython_config.py
|
83 |
+
|
84 |
+
# pyenv
|
85 |
+
.python-version
|
86 |
+
|
87 |
+
# pipenv
|
88 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
89 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
90 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
91 |
+
# install all needed dependencies.
|
92 |
+
#Pipfile.lock
|
93 |
+
|
94 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
95 |
+
__pypackages__/
|
96 |
+
|
97 |
+
# Celery stuff
|
98 |
+
celerybeat-schedule
|
99 |
+
celerybeat.pid
|
100 |
+
|
101 |
+
# SageMath parsed files
|
102 |
+
*.sage.py
|
103 |
+
|
104 |
+
# Environments
|
105 |
+
.env
|
106 |
+
.venv
|
107 |
+
env/
|
108 |
+
venv/
|
109 |
+
ENV/
|
110 |
+
env.bak/
|
111 |
+
venv.bak/
|
112 |
+
|
113 |
+
# Spyder project settings
|
114 |
+
.spyderproject
|
115 |
+
.spyproject
|
116 |
+
|
117 |
+
# Rope project settings
|
118 |
+
.ropeproject
|
119 |
+
|
120 |
+
# mkdocs documentation
|
121 |
+
/site
|
122 |
+
|
123 |
+
# mypy
|
124 |
+
.mypy_cache/
|
125 |
+
.dmypy.json
|
126 |
+
dmypy.json
|
127 |
+
|
128 |
+
# Pyre type checker
|
129 |
+
.pyre/
|
130 |
+
|
131 |
+
# vscode
|
132 |
+
.vscode/
|
README.md
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
tags:
|
11 |
- whisper-event
|
12 |
-
duplicated_from: whisper-event/whisper-demo
|
13 |
---
|
14 |
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: YouTube to Illustrated Summary
|
3 |
+
emoji: 🏢
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.14.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: openrail
|
11 |
tags:
|
12 |
- whisper-event
|
|
|
13 |
---
|
14 |
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,97 +1,251 @@
|
|
1 |
-
import torch
|
2 |
-
|
3 |
import gradio as gr
|
4 |
-
|
|
|
|
|
5 |
from transformers import pipeline
|
6 |
-
from
|
|
|
7 |
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
lang = "es"
|
10 |
|
11 |
-
device =
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
task="automatic-speech-recognition",
|
14 |
-
model=
|
15 |
chunk_length_s=30,
|
16 |
device=device,
|
17 |
)
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
)
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
)
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
layout="horizontal",
|
84 |
-
theme="huggingface",
|
85 |
-
title="Whisper Demo: Transcribe YouTube",
|
86 |
-
description=(
|
87 |
-
"Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
|
88 |
-
f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
|
89 |
-
" arbitrary length."
|
90 |
-
),
|
91 |
-
allow_flagging="never",
|
92 |
-
)
|
93 |
-
|
94 |
-
with demo:
|
95 |
-
gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
|
96 |
-
|
97 |
-
demo.launch(enable_queue=True)
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from typing import Any
|
3 |
+
|
4 |
+
import torch
|
5 |
from transformers import pipeline
|
6 |
+
from diffusers import StableDiffusionPipeline
|
7 |
+
from TTS.api import TTS
|
8 |
|
9 |
+
import utils
|
10 |
+
from youtubeaudioextractor import PytubeAudioExtractor
|
11 |
+
from transcriber import Transcriber
|
12 |
+
from textprocessor import TextProcessor
|
13 |
+
from videocreator import VideoCreator
|
14 |
+
|
15 |
+
TRANSCRIBER_MODEL_NAME = "juancopi81/whisper-medium-es"
|
16 |
lang = "es"
|
17 |
|
18 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
19 |
+
dtype = torch.float16 if device == "cuda" else torch.float32
|
20 |
+
|
21 |
+
# Detect if code is running in Colab
|
22 |
+
is_colab = utils.is_google_colab()
|
23 |
+
colab_instruction = "" if is_colab else """
|
24 |
+
<p>You can skip the queue using Colab:
|
25 |
+
<a href="">
|
26 |
+
<img data-canonical-src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg"></a></p>"""
|
27 |
+
device_print = "GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"
|
28 |
+
|
29 |
+
# Initialize components
|
30 |
+
audio_extractor = PytubeAudioExtractor()
|
31 |
+
transcription_pipe = pipeline(
|
32 |
task="automatic-speech-recognition",
|
33 |
+
model=TRANSCRIBER_MODEL_NAME,
|
34 |
chunk_length_s=30,
|
35 |
device=device,
|
36 |
)
|
37 |
+
transcription_pipe.model.config.forced_decoder_ids = transcription_pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
|
38 |
+
audio_transcriber = Transcriber(transcription_pipe)
|
39 |
+
openai_model = "text-davinci-003"
|
40 |
+
text_processor = TextProcessor(openai_model)
|
41 |
+
|
42 |
+
image_model_id = "runwayml/stable-diffusion-v1-5"
|
43 |
+
image_pipeline = StableDiffusionPipeline.from_pretrained(image_model_id,
|
44 |
+
torch_dtype=dtype,
|
45 |
+
revision="fp16")
|
46 |
+
|
47 |
+
image_pipeline = image_pipeline.to(device)
|
48 |
+
|
49 |
+
vo_model_name = TTS.list_models()[22]
|
50 |
+
# Init TTS
|
51 |
+
tts = TTS(vo_model_name)
|
52 |
+
video_creator = VideoCreator(tts, image_pipeline)
|
53 |
+
|
54 |
+
def datapipeline(url: str) -> Any:
|
55 |
+
audio_path_file = audio_extractor.extract(url)
|
56 |
+
print(f"Audio file created at: {audio_path_file}")
|
57 |
+
transcribed_text = audio_transcriber.transcribe(audio_path_file)
|
58 |
+
print("Audio transcription ready!")
|
59 |
+
json_scenes = text_processor.get_json_scenes(transcribed_text)
|
60 |
+
print("Scenes ready")
|
61 |
+
video = video_creator.create_video(json_scenes)
|
62 |
+
return video, video
|
63 |
+
|
64 |
+
css = """
|
65 |
+
a {
|
66 |
+
color: inherit;
|
67 |
+
text-decoration: underline;
|
68 |
+
}
|
69 |
+
.gradio-container {
|
70 |
+
font-family: 'IBM Plex Sans', sans-serif;
|
71 |
+
}
|
72 |
+
.gr-button {
|
73 |
+
color: white;
|
74 |
+
border-color: #000000;
|
75 |
+
background: #000000;
|
76 |
+
}
|
77 |
+
input[type='range'] {
|
78 |
+
accent-color: #000000;
|
79 |
+
}
|
80 |
+
.dark input[type='range'] {
|
81 |
+
accent-color: #dfdfdf;
|
82 |
+
}
|
83 |
+
.container {
|
84 |
+
max-width: 730px;
|
85 |
+
margin: auto;
|
86 |
+
padding-top: 1.5rem;
|
87 |
+
}
|
88 |
+
#gallery {
|
89 |
+
min-height: 22rem;
|
90 |
+
margin-bottom: 15px;
|
91 |
+
margin-left: auto;
|
92 |
+
margin-right: auto;
|
93 |
+
border-bottom-right-radius: .5rem !important;
|
94 |
+
border-bottom-left-radius: .5rem !important;
|
95 |
+
}
|
96 |
+
#gallery>div>.h-full {
|
97 |
+
min-height: 20rem;
|
98 |
+
}
|
99 |
+
.details:hover {
|
100 |
+
text-decoration: underline;
|
101 |
+
}
|
102 |
+
.gr-button {
|
103 |
+
white-space: nowrap;
|
104 |
+
}
|
105 |
+
.gr-button:focus {
|
106 |
+
border-color: rgb(147 197 253 / var(--tw-border-opacity));
|
107 |
+
outline: none;
|
108 |
+
box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
|
109 |
+
--tw-border-opacity: 1;
|
110 |
+
--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
|
111 |
+
--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
|
112 |
+
--tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
|
113 |
+
--tw-ring-opacity: .5;
|
114 |
+
}
|
115 |
+
#advanced-btn {
|
116 |
+
font-size: .7rem !important;
|
117 |
+
line-height: 19px;
|
118 |
+
margin-top: 12px;
|
119 |
+
margin-bottom: 12px;
|
120 |
+
padding: 2px 8px;
|
121 |
+
border-radius: 14px !important;
|
122 |
+
}
|
123 |
+
#advanced-options {
|
124 |
+
margin-bottom: 20px;
|
125 |
+
}
|
126 |
+
.footer {
|
127 |
+
margin-bottom: 45px;
|
128 |
+
margin-top: 35px;
|
129 |
+
text-align: center;
|
130 |
+
border-bottom: 1px solid #e5e5e5;
|
131 |
+
}
|
132 |
+
.footer>p {
|
133 |
+
font-size: .8rem;
|
134 |
+
display: inline-block;
|
135 |
+
padding: 0 10px;
|
136 |
+
transform: translateY(10px);
|
137 |
+
background: white;
|
138 |
+
}
|
139 |
+
.dark .footer {
|
140 |
+
border-color: #303030;
|
141 |
+
}
|
142 |
+
.dark .footer>p {
|
143 |
+
background: #0b0f19;
|
144 |
+
}
|
145 |
+
.acknowledgments h4{
|
146 |
+
margin: 1.25em 0 .25em 0;
|
147 |
+
font-weight: bold;
|
148 |
+
font-size: 115%;
|
149 |
+
}
|
150 |
+
#container-advanced-btns{
|
151 |
+
display: flex;
|
152 |
+
flex-wrap: wrap;
|
153 |
+
justify-content: space-between;
|
154 |
+
align-items: center;
|
155 |
+
}
|
156 |
+
.animate-spin {
|
157 |
+
animation: spin 1s linear infinite;
|
158 |
+
}
|
159 |
+
@keyframes spin {
|
160 |
+
from {
|
161 |
+
transform: rotate(0deg);
|
162 |
+
}
|
163 |
+
to {
|
164 |
+
transform: rotate(360deg);
|
165 |
+
}
|
166 |
+
}
|
167 |
+
#share-btn-container {
|
168 |
+
display: flex; padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; width: 13rem;
|
169 |
+
}
|
170 |
+
#share-btn {
|
171 |
+
all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.25rem !important; padding-bottom: 0.25rem !important;
|
172 |
+
}
|
173 |
+
#share-btn * {
|
174 |
+
all: unset;
|
175 |
+
}
|
176 |
+
.gr-form{
|
177 |
+
flex: 1 1 50%; border-top-right-radius: 0; border-bottom-right-radius: 0;
|
178 |
+
}
|
179 |
+
#prompt-container{
|
180 |
+
gap: 0;
|
181 |
+
}
|
182 |
+
#generated_id{
|
183 |
+
min-height: 700px
|
184 |
+
}
|
185 |
+
"""
|
186 |
+
block = gr.Blocks(css=css)
|
187 |
+
|
188 |
+
with block as demo:
|
189 |
+
gr.HTML(
|
190 |
+
f"""
|
191 |
+
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
|
192 |
+
<div
|
193 |
+
style="
|
194 |
+
display: inline-flex;
|
195 |
+
align-items: center;
|
196 |
+
gap: 0.8rem;
|
197 |
+
font-size: 1.75rem;
|
198 |
+
"
|
199 |
+
>
|
200 |
+
<h1 style="font-weight: 900; margin-bottom: 7px;">
|
201 |
+
YouTube to Illustraded Summary
|
202 |
+
</h1>
|
203 |
+
</div>
|
204 |
+
<p style="margin-bottom: 10px; font-size: 94%">
|
205 |
+
Enter the URL of a YouTuve video (Spanish) and you'll recive a video with an illustraded summary.
|
206 |
+
It works for audio books, history lessons, etc. Try it out with a short video (less than 10 minutes).
|
207 |
+
</p>
|
208 |
+
<p style="margin-bottom: 10px; font-size: 94%">
|
209 |
+
Running on <b>{device_print}</b>
|
210 |
+
</p>
|
211 |
+
</div>
|
212 |
+
"""
|
213 |
)
|
214 |
+
with gr.Group():
|
215 |
+
with gr.Box():
|
216 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
217 |
+
|
218 |
+
url = gr.Textbox(
|
219 |
+
label="Enter the URL of the YouTubeVideo", show_label=False, max_lines=1
|
220 |
+
).style(
|
221 |
+
border=(True, False, True, True),
|
222 |
+
rounded=(True, False, False, True),
|
223 |
+
container=False,
|
224 |
+
)
|
225 |
+
btn = gr.Button("Run").style(
|
226 |
+
margin=False,
|
227 |
+
rounded=(False, True, True, False),
|
228 |
+
)
|
229 |
+
video_output = gr.Video()
|
230 |
+
file_output = gr.File()
|
231 |
+
|
232 |
+
btn.click(datapipeline,
|
233 |
+
inputs=[url],
|
234 |
+
outputs=[video_output, file_output])
|
235 |
+
|
236 |
+
gr.HTML(
|
237 |
+
"""
|
238 |
+
<div class="footer">
|
239 |
+
<p>This demos is part of the Whisper Sprint (Dec. 2022).</a>
|
240 |
+
</p>
|
241 |
+
</div>
|
242 |
+
"""
|
243 |
+
)
|
244 |
+
gr.Markdown('''
|
245 |
+
[![Twitter Follow](https://img.shields.io/twitter/follow/juancopi81?style=social)](https://twitter.com/juancopi81)
|
246 |
+
![visitors](https://visitor-badge.glitch.me/badge?page_id=Juancopi81.yt-illustraded-summary)
|
247 |
+
''')
|
248 |
+
|
249 |
+
if not is_colab:
|
250 |
+
demo.queue(concurrency_count=1)
|
251 |
+
demo.launch(debug=is_colab, share=is_colab)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,3 +1,11 @@
|
|
1 |
-
git+https://github.com/huggingface/transformers
|
2 |
torch
|
3 |
-
pytube
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
torch
|
2 |
+
pytube
|
3 |
+
git+https://github.com/huggingface/transformers
|
4 |
+
openai
|
5 |
+
typing
|
6 |
+
git+https://github.com/huggingface/diffusers.git
|
7 |
+
accelerate
|
8 |
+
TTS
|
9 |
+
moviepy
|
10 |
+
imageio==2.4.1
|
11 |
+
tensorboard
|
textprocessor.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from typing import Dict
|
4 |
+
|
5 |
+
import openai
|
6 |
+
|
7 |
+
context_prompt = """
|
8 |
+
|
9 |
+
You are a creator of ilustraded books building a series of scenes for your book.
|
10 |
+
Your boss asked you to write a summary and the illustrations of the following text:
|
11 |
+
|
12 |
+
$TRANSCRIPTION
|
13 |
+
|
14 |
+
You have to write the summary using a maximum of 7 scenes, and using the following JSON format:
|
15 |
+
|
16 |
+
Write your answer in JSON format that has: The number of the scene, the summary for each scene, and the Illustration for each scene.
|
17 |
+
The value for "Summary" should be in Spanish and it should not be longer than 30 words.
|
18 |
+
The value for "Illustration" should be in English and no longer than 20 words. It should have a detail description of an illustration for this scene in English with many details, and a artistic style for the illustration that matches the text.
|
19 |
+
|
20 |
+
Just answer with the JSON object, so your boss can easily parse it.
|
21 |
+
|
22 |
+
"""
|
23 |
+
|
24 |
+
openai.api_key = os.getenv("SECRET_KEY_OPENAI")
|
25 |
+
|
26 |
+
class TextProcessor:
|
27 |
+
def __init__(self,
|
28 |
+
model: str = "text-davinci-003",
|
29 |
+
temperature: float = 0.7,
|
30 |
+
max_tokens: int = 2500,
|
31 |
+
top_p: int = 1,
|
32 |
+
frequency_penalty: int = 0,
|
33 |
+
presence_penalty: int = 0) -> None:
|
34 |
+
self.model = model
|
35 |
+
self.temperature = temperature
|
36 |
+
self.max_tokens = max_tokens
|
37 |
+
self.top_p = top_p
|
38 |
+
self.frequency_penalty = frequency_penalty
|
39 |
+
self.presence_penalty = presence_penalty
|
40 |
+
|
41 |
+
def get_json_scenes(self,
|
42 |
+
prompt: str) -> Dict:
|
43 |
+
gpt_prompt = context_prompt.replace("$TRANSCRIPTION", prompt)
|
44 |
+
response = openai.Completion.create(
|
45 |
+
model=self.model,
|
46 |
+
prompt=gpt_prompt,
|
47 |
+
temperature=self.temperature,
|
48 |
+
max_tokens=self.max_tokens,
|
49 |
+
top_p=self.top_p,
|
50 |
+
frequency_penalty=self.frequency_penalty,
|
51 |
+
presence_penalty=self.presence_penalty
|
52 |
+
)
|
53 |
+
scenes = json.loads(response["choices"][0]["text"])
|
54 |
+
if (type(scenes) == list):
|
55 |
+
scenes = {i: d for i, d in enumerate(scenes)}
|
56 |
+
return scenes
|
transcriber.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
|
3 |
+
class Transcriber:
|
4 |
+
def __init__(self, pipe: pipeline) -> None:
|
5 |
+
self.pipe = pipe
|
6 |
+
|
7 |
+
def transcribe(self, file_path: str = "yt_audio.mp3") -> str:
|
8 |
+
try:
|
9 |
+
transcription = self.pipe(file_path)["text"]
|
10 |
+
return transcription
|
11 |
+
except:
|
12 |
+
return "ERROR: No audio file found to transcribe"
|
utils.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def is_google_colab():
|
2 |
+
try:
|
3 |
+
import google.colab
|
4 |
+
return True
|
5 |
+
except:
|
6 |
+
return False
|
videocreator.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict
|
2 |
+
|
3 |
+
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
4 |
+
|
5 |
+
class VideoCreator:
|
6 |
+
def __init__(self,
|
7 |
+
tts_pipeline,
|
8 |
+
image_pipeline) -> None:
|
9 |
+
|
10 |
+
self.tts_pipeline = tts_pipeline
|
11 |
+
self.image_pipeline = image_pipeline
|
12 |
+
|
13 |
+
def create_video(self, scenes: Dict) -> Dict:
|
14 |
+
videos_dict = {}
|
15 |
+
for index, scene in enumerate(scenes):
|
16 |
+
video_scene = self._create_video_from_scene(scenes[scene])
|
17 |
+
videos_dict[scene] = video_scene
|
18 |
+
merged_video = self._merge_videos(videos_dict)
|
19 |
+
return merged_video
|
20 |
+
|
21 |
+
def _create_video_from_scene(self, scene: Dict) -> str:
|
22 |
+
audio_file = self._get_audio_from_text(scene["Summary"])
|
23 |
+
bg_image = self._get_bg_image_from_description(scene["Illustration"])
|
24 |
+
video = gr.make_waveform(audio=audio_file,
|
25 |
+
bg_image=bg_image)
|
26 |
+
return video
|
27 |
+
|
28 |
+
def _get_audio_from_text(self, voice_over: str) -> str:
|
29 |
+
self.tts_pipeline.tts_to_file(text=voice_over,
|
30 |
+
file_path="output.wav")
|
31 |
+
return "output.wav"
|
32 |
+
|
33 |
+
def _get_bg_image_from_description(self, img_desc: str):
|
34 |
+
images = self.image_pipeline(img_desc)
|
35 |
+
print("Image generated!")
|
36 |
+
image_output = images.images[0]
|
37 |
+
image_output.save("img.png")
|
38 |
+
return "img.png"
|
39 |
+
|
40 |
+
def _merge_videos(self, videos_dict: Dict):
|
41 |
+
videos_to_concatenate = []
|
42 |
+
for video in range(len(videos_dict)):
|
43 |
+
video_clip = VideoFileClip(videos_dict[video])
|
44 |
+
videos_to_concatenate.append(video_clip)
|
45 |
+
final_video = concatenate_videoclips(videos_to_concatenate)
|
46 |
+
final_video.write_videofile("final_video.mp4")
|
youtubeaudioextractor.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC, abstractmethod
|
2 |
+
|
3 |
+
import pytube as pt
|
4 |
+
|
5 |
+
class YouTubeAudioExtractor(ABC):
|
6 |
+
|
7 |
+
@abstractmethod
|
8 |
+
def extract(self, url: str, save_path: str) -> str:
|
9 |
+
pass
|
10 |
+
|
11 |
+
class PytubeAudioExtractor(YouTubeAudioExtractor):
|
12 |
+
|
13 |
+
def __init__(self,
|
14 |
+
only_audio: bool = True,
|
15 |
+
extension: str = ".mp3") -> None:
|
16 |
+
self.only_audio = only_audio
|
17 |
+
self.extension = extension
|
18 |
+
|
19 |
+
def extract(self, url: str,
|
20 |
+
save_path: str = "yt_audio") -> str:
|
21 |
+
yt = pt.YouTube(url)
|
22 |
+
stream = yt.streams.filter(only_audio=self.only_audio)[0]
|
23 |
+
filename = save_path + self.extension
|
24 |
+
stream.download(filename=filename)
|
25 |
+
return filename
|