Spaces:
Running
Running
import solara | |
# Clean up all the directories used in this notebook | |
import shutil | |
shutil.rmtree("./data", ignore_errors=True) | |
import polars as pl | |
df = pl.read_csv( | |
"https://drive.google.com/uc?export=download&id=1uD3h7xYxr9EoZ0Ggoh99JtQXa3AxtxyU" | |
) | |
import string | |
df = df.with_columns( | |
pl.Series("Album", [string.capwords(album) for album in df["Album"]]) | |
) | |
df = df.with_columns(pl.Series("Song", [string.capwords(song) for song in df["Song"]])) | |
df = df.with_columns(pl.col("Lyrics").fill_null("None")) | |
df = df.with_columns( | |
text=pl.lit("# ") | |
+ pl.col("Album") | |
+ pl.lit(": ") | |
+ pl.col("Song") | |
+ pl.lit("\n\n") | |
+ pl.col("Lyrics") | |
# text = pl.col("Lyrics") | |
) | |
import lancedb | |
db = lancedb.connect("data/") | |
from lancedb.embeddings import get_registry | |
embeddings = ( | |
get_registry() | |
.get("sentence-transformers") | |
.create(name="TaylorAI/gte-tiny", device="cpu") | |
) | |
from lancedb.pydantic import LanceModel, Vector | |
class Songs(LanceModel): | |
Song: str | |
Lyrics: str | |
Album: str | |
Artist: str | |
text: str = embeddings.SourceField() | |
vector: Vector(embeddings.ndims()) = embeddings.VectorField() | |
table = db.create_table("Songs", schema=Songs) | |
table.add(data=df) | |
query = solara.reactive("Which song is about a boy who is having nightmares?") | |
def Page(): | |
with solara.Column(margin=10): | |
solara.Markdown("# Metallica Song Finder") | |
solara.InputText("Enter some query:", query, continuous_update=True) | |
if query.value != "": | |
df_results = table.search(query.value).limit(10).to_polars() | |
df_results = df_results.select(['Song', 'Album', '_distance', 'Lyrics', 'Artist']) | |
solara.DataFrame(df_results, items_per_page=10) | |