Upload folder using huggingface_hub
Browse files- README.md +2 -8
- app.py +76 -0
- processed_episodes.json +0 -0
- summary_embeddings.npy +3 -0
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.44.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: picardle
|
3 |
+
app_file: app.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 4.44.0
|
|
|
|
|
6 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import numpy as np
|
3 |
+
from wordllama import WordLlama
|
4 |
+
import gradio as gr
|
5 |
+
from numpy.linalg import norm
|
6 |
+
import os
|
7 |
+
|
8 |
+
# Load episodes data
|
9 |
+
with open('processed_episodes.json', 'r') as f:
|
10 |
+
episodes = json.load(f)
|
11 |
+
|
12 |
+
# Load the WordLlama model with 256 dimensions
|
13 |
+
wl = WordLlama.load(trunc_dim=256)
|
14 |
+
|
15 |
+
# Check if embeddings are already cached
|
16 |
+
if os.path.exists('summary_embeddings.npy'):
|
17 |
+
# Load embeddings from cache
|
18 |
+
summary_embeddings = np.load('summary_embeddings.npy')
|
19 |
+
else:
|
20 |
+
# Compute embeddings for all summaries
|
21 |
+
summaries = [episode['summary'] for episode in episodes]
|
22 |
+
summary_embeddings = wl.embed(summaries)
|
23 |
+
summary_embeddings = np.array(summary_embeddings)
|
24 |
+
# Save embeddings to cache
|
25 |
+
np.save('summary_embeddings.npy', summary_embeddings)
|
26 |
+
|
27 |
+
# Define the function to find matching episodes
|
28 |
+
def find_matching_episodes(query, top_k=5):
|
29 |
+
# Compute the embedding for the query
|
30 |
+
query_embedding = wl.embed([query])[0] # The result is a 1D numpy array
|
31 |
+
|
32 |
+
# Normalize embeddings
|
33 |
+
query_norm = query_embedding / (norm(query_embedding) + 1e-10)
|
34 |
+
summaries_norm = summary_embeddings / (norm(summary_embeddings, axis=1, keepdims=True) + 1e-10)
|
35 |
+
|
36 |
+
# Compute cosine similarities
|
37 |
+
similarities = summaries_norm @ query_norm
|
38 |
+
|
39 |
+
# Get indices of the top_k most similar summaries
|
40 |
+
top_k_indices = np.argsort(similarities)[-top_k:][::-1]
|
41 |
+
|
42 |
+
# Retrieve the matching episodes and their similarity scores
|
43 |
+
matching_episodes = []
|
44 |
+
for idx in top_k_indices:
|
45 |
+
episode = episodes[idx]
|
46 |
+
similarity_score = similarities[idx]
|
47 |
+
# Create a list of values instead of a dictionary
|
48 |
+
result = [
|
49 |
+
episode['episode_number'],
|
50 |
+
episode['title'],
|
51 |
+
f"{similarity_score:.4f}"
|
52 |
+
]
|
53 |
+
matching_episodes.append(result)
|
54 |
+
|
55 |
+
return matching_episodes
|
56 |
+
|
57 |
+
# Create the Gradio interface
|
58 |
+
# Create the Gradio interface
|
59 |
+
interface = gr.Interface(
|
60 |
+
fn=find_matching_episodes,
|
61 |
+
inputs=[
|
62 |
+
gr.Textbox(lines=2, placeholder="Enter your query here...", label="Search Query"),
|
63 |
+
gr.Slider(minimum=1, maximum=10, value=5, label="Number of Results")
|
64 |
+
],
|
65 |
+
outputs=gr.Dataframe(
|
66 |
+
headers=["Episode Number", "Title", "Similarity Score"],
|
67 |
+
label="Matching Episodes"
|
68 |
+
),
|
69 |
+
title="Picardle",
|
70 |
+
description="Enter a query to find matching ST:TNG episodes based on their summaries."
|
71 |
+
)
|
72 |
+
|
73 |
+
# Launch the app
|
74 |
+
if __name__ == "__main__":
|
75 |
+
interface.launch(share=True)
|
76 |
+
|
processed_episodes.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
summary_embeddings.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb345516827ee150c38b8785b8cc26c7b65f95cb98b90160635e009addf59751
|
3 |
+
size 182400
|