Spaces:
Runtime error
Runtime error
File size: 3,016 Bytes
38585cf ccee973 38585cf 4af69e2 38585cf ccee973 38585cf ccee973 38585cf 9d1c2b2 38585cf ab5d80d 992104c ab5d80d 38585cf 4af69e2 af6ed4f e4a0273 4af69e2 ab5d80d 4af69e2 be701db ab5d80d 38585cf 4af69e2 ab5d80d 38585cf ab5d80d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import time
import os
import gradio as gr
import torch
from transformers import AutoModel, AutoTokenizer
import meilisearch
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-base-en-v1.5')
model = AutoModel.from_pretrained('BAAI/bge-base-en-v1.5')
model.eval()
cuda_available = torch.cuda.is_available()
print(f"CUDA available: {cuda_available}")
meilisearch_client = meilisearch.Client("https://edge.meilisearch.com", os.environ["MEILISEARCH_KEY"])
meilisearch_index_name = "docs-embed"
meilisearch_index = meilisearch_client.index(meilisearch_index_name)
output_options = ["RAG-friendly", "human-friendly"]
def search_embeddings(query_text, output_option):
start_time_embedding = time.time()
query_prefix = 'Represent this sentence for searching code documentation: '
query_tokens = tokenizer(query_prefix + query_text, padding=True, truncation=True, return_tensors='pt', max_length=512)
# step1: tokenizer the query
with torch.no_grad():
# Compute token embeddings
model_output = model(**query_tokens)
sentence_embeddings = model_output[0][:, 0]
# normalize embeddings
sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
sentence_embeddings_list = sentence_embeddings[0].tolist()
elapsed_time_embedding = time.time() - start_time_embedding
# step2: search meilisearch
start_time_meilisearch = time.time()
response = meilisearch_index.search(
"", opt_params={"vector": sentence_embeddings_list, "hybrid": {"semanticRatio": 1.0}, "limit": 5, "attributesToRetrieve": ["text", "source_page_url", "source_page_title", "library"]}
)
elapsed_time_meilisearch = time.time() - start_time_meilisearch
hits = response["hits"]
sources_md = [f"[\"{hit['source_page_title']}\"]({hit['source_page_url']})" for hit in hits]
sources_md = ", ".join(sources_md)
# step3: present the results in markdown
if output_option == "human-friendly":
md = f"Stats:\n\nembedding time: {elapsed_time_embedding:.2f}s\n\nmeilisearch time: {elapsed_time_meilisearch:.2f}s\n\n---\n\n"
for hit in hits:
text, source_page_url, source_page_title = hit["text"], hit["source_page_url"], hit["source_page_title"]
source = f"src: [\"{source_page_title}\"]({source_page_url})"
md += text + f"\n\n{source}\n\n---\n\n"
return md, sources_md
elif output_option == "RAG-friendly":
hit_texts = [hit["text"] for hit in hits]
hit_text_str = "\n------------\n".join(hit_texts)
return hit_text_str, sources_md
demo = gr.Interface(
fn=search_embeddings,
inputs=[gr.Textbox(label="enter your query", placeholder="Type Markdown here...", lines=10), gr.Radio(label="Select an output option", choices=output_options, value="RAG-friendly")],
outputs=[gr.Markdown(), gr.Markdown()],
title="HF Docs Emebddings Explorer",
allow_flagging="never"
)
if __name__ == "__main__":
demo.launch() |