Spaces:

m-butler
/

picardle

Sleeping

App Files Files Community

m-butler commited on Sep 19

Commit

f266d24

•

1 Parent(s): 6efdf97

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +2 -8
app.py +76 -0
processed_episodes.json +0 -0
summary_embeddings.npy +3 -0

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Picardle
-emoji: 👁
-colorFrom: red
-colorTo: yellow
 sdk: gradio
 sdk_version: 4.44.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: picardle
+app_file: app.py
 sdk: gradio
 sdk_version: 4.44.0
 ---

app.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import json
+import numpy as np
+from wordllama import WordLlama
+import gradio as gr
+from numpy.linalg import norm
+import os
+# Load episodes data
+with open('processed_episodes.json', 'r') as f:
+    episodes = json.load(f)
+# Load the WordLlama model with 256 dimensions
+wl = WordLlama.load(trunc_dim=256)
+# Check if embeddings are already cached
+if os.path.exists('summary_embeddings.npy'):
+    # Load embeddings from cache
+    summary_embeddings = np.load('summary_embeddings.npy')
+else:
+    # Compute embeddings for all summaries
+    summaries = [episode['summary'] for episode in episodes]
+    summary_embeddings = wl.embed(summaries)
+    summary_embeddings = np.array(summary_embeddings)
+    # Save embeddings to cache
+    np.save('summary_embeddings.npy', summary_embeddings)
+# Define the function to find matching episodes
+def find_matching_episodes(query, top_k=5):
+    # Compute the embedding for the query
+    query_embedding = wl.embed([query])[0]  # The result is a 1D numpy array
+    # Normalize embeddings
+    query_norm = query_embedding / (norm(query_embedding) + 1e-10)
+    summaries_norm = summary_embeddings / (norm(summary_embeddings, axis=1, keepdims=True) + 1e-10)
+    # Compute cosine similarities
+    similarities = summaries_norm @ query_norm
+    # Get indices of the top_k most similar summaries
+    top_k_indices = np.argsort(similarities)[-top_k:][::-1]
+    # Retrieve the matching episodes and their similarity scores
+    matching_episodes = []
+    for idx in top_k_indices:
+        episode = episodes[idx]
+        similarity_score = similarities[idx]
+        # Create a list of values instead of a dictionary
+        result = [
+            episode['episode_number'],
+            episode['title'],
+            f"{similarity_score:.4f}"
+        ]
+        matching_episodes.append(result)
+    return matching_episodes
+# Create the Gradio interface
+# Create the Gradio interface
+interface = gr.Interface(
+    fn=find_matching_episodes,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Enter your query here...", label="Search Query"),
+        gr.Slider(minimum=1, maximum=10, value=5, label="Number of Results")
+    ],
+    outputs=gr.Dataframe(
+        headers=["Episode Number", "Title", "Similarity Score"],
+        label="Matching Episodes"
+    ),
+    title="Picardle",
+    description="Enter a query to find matching ST:TNG episodes based on their summaries."
+)
+# Launch the app
+if __name__ == "__main__":
+    interface.launch(share=True)

processed_episodes.json ADDED Viewed

The diff for this file is too large to render. See raw diff

summary_embeddings.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb345516827ee150c38b8785b8cc26c7b65f95cb98b90160635e009addf59751
+size 182400