File size: 2,663 Bytes
fbc7e49
a5c05cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52a9cd3
 
a5c05cc
 
 
 
 
 
 
81d4c87
a5c05cc
81d4c87
a5c05cc
 
 
52a9cd3
a5c05cc
 
 
 
 
 
fbc7e49
a5c05cc
 
 
 
 
 
81d4c87
a5c05cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
afc3612
a5c05cc
 
 
afc3612
a5c05cc
02b7760
a5c05cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
from langchain_community.document_loaders import JSONLoader
from langchain_community.vectorstores import Qdrant
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers.cross_encoder import CrossEncoder

# loading data
json_path = "format_food.json"

def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["title"] = record.get("title")
    metadata["cuisine"] = record.get("cuisine")
    metadata["time"] = record.get("time")
    metadata["instructions"] = record.get("instructions")
    return metadata

def reranking_results(query, top_k_results, rerank_model):
    # Load the model, here we use our base sized model
    top_results_formatted = [f"{item.metadata['title']}, {item.page_content}" for item in top_k_results]
    reranked_results = rerank_model.rank(query, top_results_formatted, return_documents=True)
    return reranked_results


json_path = "format_food.json"
loader = JSONLoader(
    file_path=json_path,
    jq_schema='.dishes[].dish',
    text_content=False,
    content_key='doc',
    metadata_func=metadata_func
)
data = loader.load()

# Models
model_name = "Snowflake/snowflake-arctic-embed-xs"
# rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")

# Embedding
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embedding = HuggingFaceEmbeddings(
    model_name=model_name, encode_kwargs=encode_kwargs, model_kwargs=model_kwargs
)

qdrant = Qdrant.from_documents(
    data,
    hf_embedding,
    location=":memory:",  # Local mode with in-memory storage only
    collection_name="my_documents",
)

def format_to_markdown(response_list):
    response_list[0] = "- " + response_list[0]
    temp_string = "\n- ".join(response_list)
    return temp_string

def run_query(query):
    print("Running Query")
    answer = qdrant.similarity_search(query=query, k=10)
    title_and_description = f"# Best Choice:\nA {answer[0].metadata['title']}: {answer[0].page_content}"
    instructions = format_to_markdown(answer[0].metadata['instructions'])
    recipe = f"# Cooking time:\n{answer[0].metadata['time']}\n\n# Recipe:\n{instructions}"
    print("Returning query")
    return title_and_description, recipe

with gr.Blocks() as demo:
    gr.Markdown("Start typing below and then click **Run** to see the output.")
    inp = gr.Textbox(placeholder="What sort of meal are you after?")
    title_output = gr.Markdown(label="Title and description")
    instructions_output = gr.Markdown(label="Recipe")
    btn = gr.Button("Run")
    btn.click(fn=run_query, inputs=inp, outputs=[title_output, instructions_output])

demo.launch()