Spaces:

IanRonk
/

sponsoredbye

Runtime error

App Files Files Community

IanRonk commited on May 22

Commit

e7ccdc6

•

1 Parent(s): 3ab33e3

changed smth

Browse files

Files changed (12) hide show

.gitattributes +0 -37
README.md +0 -12
RNN_model.keras +0 -3
__init__.py +0 -0
app.py +0 -34
functions/model.keras +0 -3
functions/model_infer.py +0 -40
functions/punctuation.py +0 -58
functions/tf_format.keras +0 -3
model.keras +0 -3
requirements.txt +0 -4
tf_format.keras +0 -3

.gitattributes DELETED Viewed

@@ -1,37 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text
-*.keras filter=lfs diff=lfs merge=lfs -text
-functions/*.keras filter=lfs diff=lfs merge=lfs -text

README.md DELETED Viewed

@@ -1,12 +0,0 @@
----
-title: Sponsoredbye
-emoji: 🦀
-colorFrom: pink
-colorTo: purple
-sdk: gradio
-sdk_version: 4.31.4
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

RNN_model.keras DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:642ec9499996ca6dcd3c8f2874ae3c5d9ca0095064d2f8faae1f12b2fea1e020
-size 3974964

__init__.py DELETED Viewed

File without changes

app.py DELETED Viewed

@@ -1,34 +0,0 @@
-from os import pipe
-import gradio as gr
-from functions.punctuation import punctuate
-from functions.model_infer import predict_from_document
-title = "sponsoredBye - never listen to sponsors again"
-description = "Sponsored sections in videos are annoying and take up a lot of time. Improve your YouTube watching experience, by filling in the youtube url and figure out what segments to skip."
-article = "Check out [the original Rick and Morty Bot](https://huggingface.co/spaces/kingabzpro/Rick_and_Morty_Bot) that this demo is based off of."
-def pipeline(video_url):
-    video_id = video_url.split("?v=")[-1]
-    punctuated_text = punctuate(video_id)
-    sentences = re.split(r"[\.\!\?]\s", punctuated_text)
-    classification = predict_from_document(sentences)
-    #    return punctuated_text
-    return [{"start": "12:05", "end": "12:52", "classification": str(classification)}]
-# print(pipeline("VL5M5ZihJK4"))
-demo = gr.Interface(
-    fn=pipeline,
-    title=title,
-    description=description,
-    inputs="text",
-    #    outputs=gr.Label(num_top_classes=3),
-    outputs="json",
-    examples=[
-        "https://www.youtube.com/watch?v=VL5M5ZihJK4",
-        "https://www.youtube.com/watch?v=VL5M5ZihJK4",
-    ],
-)
-demo.launch(share=True)

functions/model.keras DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5af590bbc6d50b5ca00d2f7cdca06d2e6c8ef94dd6e09019c69b75e816ca5d05
-size 3977504

functions/model_infer.py DELETED Viewed

@@ -1,40 +0,0 @@
-from keras.preprocessing.sequence import pad_sequences
-# import tensorflow as tf
-import os
-import requests
-from keras.models import load_model
-headers = {"Authorization": f"Bearer {os.environ['HF_Token']}"}
-model = load_model("./RNN_model.keras")
-def query_embeddings(texts):
-    payload = {"inputs": texts, "options": {"wait_for_model": True}}
-    model_id = "sentence-transformers/sentence-t5-base"
-    API_URL = (
-        f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
-    )
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-def preprocess(sentences):
-    max_len = 1682
-    embeddings = query_embeddings(sentences)
-    if len(sentences) > max_len:
-        X = embeddings[:max_len]
-    else:
-        X = embeddings
-    X_padded = pad_sequences([X], maxlen=max_len, dtype="float32", padding="post")
-    return X_padded
-def predict_from_document(sentences):
-    preprop = preprocess(sentences)
-    prediction = model.predict(preprop)
-    output = (prediction.flatten()[: len(sentences)] >= 0.5).astype(int)
-    return output

functions/punctuation.py DELETED Viewed

@@ -1,58 +0,0 @@
-import requests
-from youtube_transcript_api import YouTubeTranscriptApi
-import json
-import os
-headers = {
-    "Authorization": f"Bearer {os.environ['HF_Token']}"
-}  # NOTE: put this somewhere else
-def retrieve_transcript(vid_id):
-    try:
-        transcript = YouTubeTranscriptApi.get_transcript(vid_id)
-        return transcript
-    except Exception as e:
-        return None
-def split_transcript(transcript, chunk_size=40):
-    sentences = []
-    for i in range(0, len(transcript), chunk_size):
-        to_add = [x["text"] for x in transcript[i : i + chunk_size]]
-        sentences.append(" ".join(to_add))
-    return sentences
-def query_punctuation(splits):
-    payload = {"inputs": splits}
-    API_URL = "https://api-inference.huggingface.co/models/oliverguhr/fullstop-punctuation-multilang-large"
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-def parse_output(output, comb):
-    total = []
-    # loop over the response from the huggingface api
-    for i, o in enumerate(output):
-        added = 0
-        tt = comb[i]
-        for elem in o:
-            # Loop over the output chunks and add the . and ?
-            if elem["entity_group"] not in ["0", ",", ""]:
-                split = elem["end"] + added
-                tt = tt[:split] + elem["entity_group"] + tt[split:]
-                added += 1
-        total.append(tt)
-    return " ".join(total)
-def punctuate(video_id):
-    transcript = retrieve_transcript(video_id)
-    splits = split_transcript(
-        transcript
-    )  # Get the transcript from the YoutubeTranscriptApi
-    resp = query_punctuation(splits)  # Get the response from the Inference API
-    punctuated_transcript = parse_output(resp, splits)
-    return punctuated_transcript

functions/tf_format.keras DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5af590bbc6d50b5ca00d2f7cdca06d2e6c8ef94dd6e09019c69b75e816ca5d05
-size 3977504

model.keras DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5af590bbc6d50b5ca00d2f7cdca06d2e6c8ef94dd6e09019c69b75e816ca5d05
-size 3977504

requirements.txt DELETED Viewed

@@ -1,4 +0,0 @@
-youtube_transcript_api
-tensorflow==2.15
-keras
-keras-nlp

tf_format.keras DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5af590bbc6d50b5ca00d2f7cdca06d2e6c8ef94dd6e09019c69b75e816ca5d05
-size 3977504