import solara # Clean up all the directories used in this notebook import shutil shutil.rmtree("./data", ignore_errors=True) import polars as pl df = pl.read_csv( "https://drive.google.com/uc?export=download&id=1uD3h7xYxr9EoZ0Ggoh99JtQXa3AxtxyU" ) import string df = df.with_columns( pl.Series("Album", [string.capwords(album) for album in df["Album"]]) ) df = df.with_columns(pl.Series("Song", [string.capwords(song) for song in df["Song"]])) df = df.with_columns(pl.col("Lyrics").fill_null("None")) df = df.with_columns( text=pl.lit("# ") + pl.col("Album") + pl.lit(": ") + pl.col("Song") + pl.lit("\n\n") + pl.col("Lyrics") # text = pl.col("Lyrics") ) import lancedb db = lancedb.connect("data/") from lancedb.embeddings import get_registry embeddings = ( get_registry() .get("sentence-transformers") .create(name="TaylorAI/gte-tiny", device="cpu") ) from lancedb.pydantic import LanceModel, Vector class Songs(LanceModel): Song: str Lyrics: str Album: str Artist: str text: str = embeddings.SourceField() vector: Vector(embeddings.ndims()) = embeddings.VectorField() table = db.create_table("Songs", schema=Songs) table.add(data=df) query = solara.reactive("Which song is about a boy who is having nightmares?") @solara.component def Page(): with solara.Column(margin=10): solara.Markdown("# Metallica Song Finder") solara.InputText("Enter some query:", query, continuous_update=True) if query.value != "": df_results = table.search(query.value).limit(10).to_polars() df_results = df_results.select(['Song', 'Album', '_distance', 'Lyrics', 'Artist']) solara.DataFrame(df_results, items_per_page=10)