import gradio as gr import spaces import torch import vdf_io from sentence_transformers import SentenceTransformer from rich import print as rprint zero = torch.Tensor([0]).cuda() print(zero.device) # <-- 'cpu' 🤔 print(vdf_io.__version__) @spaces.GPU def greet(n): print(zero.device) # <-- 'cuda:0' 🤗 return f"Hello {zero + n} Tensor" @spaces.GPU def reembed_dataset(ds, model): model = SentenceTransformer(model, device=zero.device) rprint(model) rprint(model.encode("Hello, World!")) ds.map(lambda x: model.encode(x["text"])) rprint(ds[0]) def reembed_main(dataset_name, embedding_model, output_username): print(f"{dataset_name=}, {embedding_model=}, {output_username=}") ds = download_dataset(dataset_name) reembed_dataset(ds, model=embedding_model) return "Dataset re-embedded successfully" def download_dataset(dataset_name): import datasets ds = datasets.load_dataset(dataset_name) print(len(ds)) return ds demo = gr.Interface( fn=reembed_main, inputs=[ # dataset name gr.Textbox(label="Dataset name"), # embedding model gr.Textbox(label="Embedding model"), # output username gr.Textbox(label="Output username"), ], outputs=gr.Textbox(label="Output"), title="Re-Embedder", description="Re-embed a dataset using a given model and output to a new username's account", ) demo.launch()