File size: 1,476 Bytes
8e4615b
55edacf
 
10cbe37
8e4615b
55346a2
 
2e811f9
55346a2
 
 
 
 
55edacf
 
55346a2
10cbe37
9447ea8
10cbe37
55346a2
10cbe37
 
 
 
 
 
 
 
55346a2
 
10cbe37
55346a2
 
10cbe37
55346a2
 
10cbe37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import gradio as gr
import chromadb
import pandas as pd
import json

client = chromadb.Client()
collection = client.create_collection("bolivian-recipes")
df = pd.read_parquet("hf://datasets/asoria/bolivian-recipes@~parquet/default/last/0000.parquet")
text_column = "preparation"
ids = [str(i) for i in range(df.shape[0])]
documents = df[text_column].to_list()
metadatas = df.drop(text_column, axis=1).to_dict("records")
collection.add(ids=ids, documents=documents, metadatas=metadatas)


with gr.Blocks() as demo:
    gr.Markdown(" ## Chroma demo using datasets server parquet files")
    gr.Markdown("Embedding parquet files from https://huggingface.co/datasets/asoria/bolivian-recipes ('preparation' column)")
    query = gr.Textbox(label="query", placeholder="anticucho")
    get_result_button = gr.Button("Submit")
    cached_responses_table = gr.DataFrame()

    def get_result(query) -> str:
        result = collection.query(query_texts=[query], n_results=2)
        ids = result["ids"][0]
        distances = result["distances"][0]
        metadatas = [json.dumps(data) for data in result["metadatas"][0]]
        documents = result["documents"][0]

        return {
            cached_responses_table: gr.update(value=pd.DataFrame(data={"ids": ids, "distances":distances, "metadatas": metadatas, "documents":documents})),
        }

    get_result_button.click(get_result, inputs=query, outputs=[cached_responses_table])

if __name__ == "__main__":
    demo.launch()