Spaces:
Running
Running
Joshua Lochner
commited on
Commit
•
a9123fa
1
Parent(s):
fb87012
Remove duplicated methods from streamlit app
Browse files
app.py
CHANGED
@@ -7,16 +7,15 @@ import sys
|
|
7 |
import os
|
8 |
import json
|
9 |
from urllib.parse import quote
|
10 |
-
from huggingface_hub import hf_hub_download
|
11 |
|
12 |
# Allow direct execution
|
13 |
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'src')) # noqa
|
14 |
|
15 |
-
from predict import SegmentationArguments, ClassifierArguments, predict as pred
|
16 |
from evaluate import EvaluationArguments
|
17 |
-
from shared import
|
18 |
from utils import regex_search
|
19 |
-
from model import get_model_tokenizer
|
20 |
|
21 |
st.set_page_config(
|
22 |
page_title='SponsorBlock ML',
|
@@ -106,22 +105,6 @@ for m in MODELS:
|
|
106 |
CLASSIFIER_PATH = 'Xenova/sponsorblock-classifier'
|
107 |
|
108 |
|
109 |
-
@st.cache(persist=True, allow_output_mutation=True)
|
110 |
-
def download_classifier(classifier_args):
|
111 |
-
# Save classifier and vectorizer
|
112 |
-
hf_hub_download(repo_id=CLASSIFIER_PATH,
|
113 |
-
filename=classifier_args.classifier_file,
|
114 |
-
cache_dir=classifier_args.classifier_dir,
|
115 |
-
force_filename=classifier_args.classifier_file,
|
116 |
-
)
|
117 |
-
hf_hub_download(repo_id=CLASSIFIER_PATH,
|
118 |
-
filename=classifier_args.vectorizer_file,
|
119 |
-
cache_dir=classifier_args.classifier_dir,
|
120 |
-
force_filename=classifier_args.vectorizer_file,
|
121 |
-
)
|
122 |
-
return True
|
123 |
-
|
124 |
-
|
125 |
def predict_function(model_id, model, tokenizer, segmentation_args, classifier_args, video_id):
|
126 |
if video_id not in prediction_cache[model_id]:
|
127 |
prediction_cache[model_id][video_id] = pred(
|
@@ -139,12 +122,11 @@ def load_predict(model_id):
|
|
139 |
# Use default segmentation and classification arguments
|
140 |
evaluation_args = EvaluationArguments(model_path=model_info['repo_id'])
|
141 |
segmentation_args = SegmentationArguments()
|
142 |
-
classifier_args = ClassifierArguments(
|
|
|
143 |
|
144 |
model, tokenizer = get_model_tokenizer(evaluation_args.model_path)
|
145 |
|
146 |
-
download_classifier(classifier_args)
|
147 |
-
|
148 |
prediction_function_cache[model_id] = partial(
|
149 |
predict_function, model_id, model, tokenizer, segmentation_args, classifier_args)
|
150 |
|
@@ -157,7 +139,8 @@ def main():
|
|
157 |
|
158 |
# Display heading and subheading
|
159 |
top.markdown('# SponsorBlock ML')
|
160 |
-
top.markdown(
|
|
|
161 |
|
162 |
# Add controls
|
163 |
model_id = top.selectbox(
|
@@ -174,8 +157,7 @@ def main():
|
|
174 |
|
175 |
# Hide segments with a confidence lower than
|
176 |
confidence_threshold = top.slider(
|
177 |
-
'Confidence Threshold (%):', min_value=0, max_value=100, on_change=output.empty)
|
178 |
-
|
179 |
|
180 |
if len(video_input) == 0: # No input, do not continue
|
181 |
return
|
@@ -184,7 +166,7 @@ def main():
|
|
184 |
with st.spinner('Loading model...'):
|
185 |
predict = load_predict(model_id)
|
186 |
|
187 |
-
with output.container():
|
188 |
video_id = regex_search(video_input, YT_VIDEO_REGEX)
|
189 |
if video_id is None:
|
190 |
st.exception(ValueError('Invalid YouTube URL/ID'))
|
|
|
7 |
import os
|
8 |
import json
|
9 |
from urllib.parse import quote
|
|
|
10 |
|
11 |
# Allow direct execution
|
12 |
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'src')) # noqa
|
13 |
|
14 |
+
from predict import SegmentationArguments, ClassifierArguments, predict as pred # noqa
|
15 |
from evaluate import EvaluationArguments
|
16 |
+
from shared import seconds_to_time, CATGEGORY_OPTIONS
|
17 |
from utils import regex_search
|
18 |
+
from model import get_model_tokenizer, get_classifier_vectorizer
|
19 |
|
20 |
st.set_page_config(
|
21 |
page_title='SponsorBlock ML',
|
|
|
105 |
CLASSIFIER_PATH = 'Xenova/sponsorblock-classifier'
|
106 |
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
def predict_function(model_id, model, tokenizer, segmentation_args, classifier_args, video_id):
|
109 |
if video_id not in prediction_cache[model_id]:
|
110 |
prediction_cache[model_id][video_id] = pred(
|
|
|
122 |
# Use default segmentation and classification arguments
|
123 |
evaluation_args = EvaluationArguments(model_path=model_info['repo_id'])
|
124 |
segmentation_args = SegmentationArguments()
|
125 |
+
classifier_args = ClassifierArguments(
|
126 |
+
min_probability=0) # Filtering done later
|
127 |
|
128 |
model, tokenizer = get_model_tokenizer(evaluation_args.model_path)
|
129 |
|
|
|
|
|
130 |
prediction_function_cache[model_id] = partial(
|
131 |
predict_function, model_id, model, tokenizer, segmentation_args, classifier_args)
|
132 |
|
|
|
139 |
|
140 |
# Display heading and subheading
|
141 |
top.markdown('# SponsorBlock ML')
|
142 |
+
top.markdown(
|
143 |
+
'##### Automatically detect in-video YouTube sponsorships, self/unpaid promotions, and interaction reminders.')
|
144 |
|
145 |
# Add controls
|
146 |
model_id = top.selectbox(
|
|
|
157 |
|
158 |
# Hide segments with a confidence lower than
|
159 |
confidence_threshold = top.slider(
|
160 |
+
'Confidence Threshold (%):', min_value=0, value=50, max_value=100, on_change=output.empty)
|
|
|
161 |
|
162 |
if len(video_input) == 0: # No input, do not continue
|
163 |
return
|
|
|
166 |
with st.spinner('Loading model...'):
|
167 |
predict = load_predict(model_id)
|
168 |
|
169 |
+
with output.container(): # Place all content in output container
|
170 |
video_id = regex_search(video_input, YT_VIDEO_REGEX)
|
171 |
if video_id is None:
|
172 |
st.exception(ValueError('Invalid YouTube URL/ID'))
|