semantic-song-search

Sleeping

App Files Files Community

Shea commited on Apr 17, 2023

Commit

15e1bbc

•

1 Parent(s): 1476156

update

Browse files

Files changed (2) hide show

app.py +14 -73
old_app.py +100 -0

app.py CHANGED Viewed

@@ -1,91 +1,32 @@
-from huggingface_hub import from_pretrained_keras
 import numpy as np
 import gradio as gr
-import transformers
-import tensorflow as tf
-class BertSemanticDataGenerator(tf.keras.utils.Sequence):
-    """Generates batches of data."""
-    def __init__(
-        self,
-        sentence_pairs,
-        labels,
-        batch_size=32,
-        shuffle=True,
-        include_targets=True,
-    ):
-        self.sentence_pairs = sentence_pairs
-        self.labels = labels
-        self.shuffle = shuffle
-        self.batch_size = batch_size
-        self.include_targets = include_targets
-        # Load our BERT Tokenizer to encode the text.
-        # We will use base-base-uncased pretrained model.
-        self.tokenizer = transformers.BertTokenizer.from_pretrained(
-            "bert-base-uncased", do_lower_case=True
-        )
-        self.indexes = np.arange(len(self.sentence_pairs))
-        self.on_epoch_end()
-    def __len__(self):
-        # Denotes the number of batches per epoch.
-        return len(self.sentence_pairs) // self.batch_size
-    def __getitem__(self, idx):
-        # Retrieves the batch of index.
-        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
-        sentence_pairs = self.sentence_pairs[indexes]
-        # With BERT tokenizer's batch_encode_plus batch of both the sentences are
-        # encoded together and separated by [SEP] token.
-        encoded = self.tokenizer.batch_encode_plus(
-            sentence_pairs.tolist(),
-            add_special_tokens=True,
-            max_length=128,
-            return_attention_mask=True,
-            return_token_type_ids=True,
-            pad_to_max_length=True,
-            return_tensors="tf",
-        )
-        # Convert batch of encoded features to numpy array.
-        input_ids = np.array(encoded["input_ids"], dtype="int32")
-        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
-        token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")
-        # Set to true if data generator is used for training/validation.
-        if self.include_targets:
-            labels = np.array(self.labels[indexes], dtype="int32")
-            return [input_ids, attention_masks, token_type_ids], labels
-        else:
-            return [input_ids, attention_masks, token_type_ids]
-model = from_pretrained_keras("keras-io/bert-semantic-similarity")
 labels = ["contradiction", "entailment", "neutral"]
 def predict(sentence1, sentence2):
     sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
-    test_data = BertSemanticDataGenerator(
-        sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
-    )
-    probs = model.predict(test_data[0])[0]
-    labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
-    return labels_probs
-    #idx = np.argmax(proba)
-    #proba = f"{proba[idx]*100:.2f}%"
-    #pred = labels[idx]
-    #return f'The semantic similarity of two input sentences is {pred} with {proba} of probability'
-inputs = [
-         gr.Audio(source = "upload", label='Upload audio file', type="filepath"),
-]
 examples = [["Two women are observing something together.", "Two women are standing with their eyes closed."],
             ["A smiling costumed woman is holding an umbrella", "A happy woman in a fairy costume holds an umbrella"],
             ["A soccer game with multiple males playing", "Some men are playing a sport"],
-]
 gr.Interface(
     fn=predict,

 import numpy as np
 import gradio as gr
+from sentence_transformers import SentenceTransformer
+minilm = SentenceTransformer('all-MiniLM-L12-v2')
+roberta = SentenceTransformer('all-distilroberta-v1')
+glove = SentenceTransformer('average_word_embeddings_glove.840B.300d')
 labels = ["contradiction", "entailment", "neutral"]
 def predict(sentence1, sentence2):
     sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
+    print(sentence1)
+    print(sentence2)
+#    test_data = BertSemanticDataGenerator(
+#        sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
+#    )
+#    probs = model.predict(test_data[0])[0]
+#    labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
+ #   return labels_probs
 examples = [["Two women are observing something together.", "Two women are standing with their eyes closed."],
             ["A smiling costumed woman is holding an umbrella", "A happy woman in a fairy costume holds an umbrella"],
             ["A soccer game with multiple males playing", "Some men are playing a sport"],
+            ]
 gr.Interface(
     fn=predict,

old_app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from huggingface_hub import from_pretrained_keras
+import numpy as np
+import gradio as gr
+import transformers
+import tensorflow as tf
+class BertSemanticDataGenerator(tf.keras.utils.Sequence):
+    """Generates batches of data."""
+    def __init__(
+        self,
+        sentence_pairs,
+        labels,
+        batch_size=32,
+        shuffle=True,
+        include_targets=True,
+    ):
+        self.sentence_pairs = sentence_pairs
+        self.labels = labels
+        self.shuffle = shuffle
+        self.batch_size = batch_size
+        self.include_targets = include_targets
+        # Load our BERT Tokenizer to encode the text.
+        # We will use base-base-uncased pretrained model.
+        self.tokenizer = transformers.BertTokenizer.from_pretrained(
+            "bert-base-uncased", do_lower_case=True
+        )
+        self.indexes = np.arange(len(self.sentence_pairs))
+        self.on_epoch_end()
+    def __len__(self):
+        # Denotes the number of batches per epoch.
+        return len(self.sentence_pairs) // self.batch_size
+    def __getitem__(self, idx):
+        # Retrieves the batch of index.
+        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
+        sentence_pairs = self.sentence_pairs[indexes]
+        # With BERT tokenizer's batch_encode_plus batch of both the sentences are
+        # encoded together and separated by [SEP] token.
+        encoded = self.tokenizer.batch_encode_plus(
+            sentence_pairs.tolist(),
+            add_special_tokens=True,
+            max_length=128,
+            return_attention_mask=True,
+            return_token_type_ids=True,
+            pad_to_max_length=True,
+            return_tensors="tf",
+        )
+        # Convert batch of encoded features to numpy array.
+        input_ids = np.array(encoded["input_ids"], dtype="int32")
+        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
+        token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")
+        # Set to true if data generator is used for training/validation.
+        if self.include_targets:
+            labels = np.array(self.labels[indexes], dtype="int32")
+            return [input_ids, attention_masks, token_type_ids], labels
+        else:
+            return [input_ids, attention_masks, token_type_ids]
+model = from_pretrained_keras("keras-io/bert-semantic-similarity")
+labels = ["contradiction", "entailment", "neutral"]
+def predict(sentence1, sentence2):
+    sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
+    test_data = BertSemanticDataGenerator(
+        sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
+    )
+    probs = model.predict(test_data[0])[0]
+    labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
+    return labels_probs
+    #idx = np.argmax(proba)
+    #proba = f"{proba[idx]*100:.2f}%"
+    #pred = labels[idx]
+    #return f'The semantic similarity of two input sentences is {pred} with {proba} of probability'
+inputs = [
+         gr.Audio(source = "upload", label='Upload audio file', type="filepath"),
+]
+examples = [["Two women are observing something together.", "Two women are standing with their eyes closed."],
+            ["A smiling costumed woman is holding an umbrella", "A happy woman in a fairy costume holds an umbrella"],
+            ["A soccer game with multiple males playing", "Some men are playing a sport"],
+]
+gr.Interface(
+    fn=predict,
+    title="Semantic Song Search",
+    description = "Search for songs based on the meaning in the song's lyrics using a variety of embeddings",
+    inputs=["text", "text"],
+    examples=examples,
+    #outputs=gr.Textbox(label='Prediction'),
+    outputs=gr.outputs.Label(num_top_classes=3, label='Semantic similarity'),
+    cache_examples=True,
+    article = "Author: @sheacon",
+).launch(debug=True, enable_queue=True)