File size: 3,957 Bytes
3d4a127
89e8c55
0ab8a79
2bd4603
 
4edfa0f
2bd4603
302f51f
89e8c55
 
7c22493
89e8c55
e351115
302f51f
 
 
 
 
 
e9d5707
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302f51f
7c22493
e9d5707
 
 
7c22493
5e7a1c6
 
 
 
0ab8a79
e9d5707
 
0ab8a79
e9d5707
 
 
7c22493
e9d5707
eef425b
 
e9d5707
3d4a127
2ce08e8
7c22493
 
 
80ab570
e500452
0bc108d
80ab570
e500452
 
 
80ab570
 
 
 
 
e9d5707
7c22493
 
e500452
 
 
e9d5707
 
e500452
 
 
 
 
 
 
 
 
 
 
e9d5707
e500452
 
 
 
 
e9d5707
e500452
e9d5707
e500452
 
7c22493
e9d5707
 
 
 
 
 
 
 
 
 
 
3d4a127
2ce08e8
7c22493
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
import os
import time

from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient, models


qdrant = QdrantClient(
    url=os.environ['QDRANT_URL'],
    api_key=os.environ['QDRANT_API_KEY'],
)
encoder = SentenceTransformer(model_name_or_path='BAAI/bge-small-en-v1.5')


def compute_embedding(sentences, emb_model):
  return emb_model.encode(sentences=sentences)


def quantized_vector_search(embedding, top_k=5):
  return qdrant.search(
    collection_name='questions-binaryq',
    query_vector=embedding,
    limit=top_k,
    with_payload=True,
    search_params=models.SearchParams(
      exact=False,
      quantization=models.QuantizationSearchParams(
        ignore=False,
        rescore=True,
        oversampling=2.0,
      )
    )
  )


def query(question, top_k=5):
  collections_names = list(map(lambda x: x.name, qdrant.get_collections().collections))
  
  if 'questions-binaryq' not in collections_names:
    return {}, {}

  start_time = time.time()
  emb = compute_embedding(question, encoder)
  encoding_time = time.time() - start_time

  start_time = time.time()
  bq_results = quantized_vector_search(emb, top_k)
  bq_query_time = time.time() - start_time

  bq_results_dict = {}
  for bq_result in bq_results:
    bq_results_dict[bq_result.payload['question']] = round(bq_result.score, 3)

  return bq_results_dict, {
    "Encoding Time": str(round(encoding_time, 3)) + " s",
    "Query Time (w/ Binary Quantization)": str(round(bq_query_time, 3)) + " s",
  }


with gr.Blocks() as semantic_search_demo:
  gr.Markdown(
    """
    # Quora Similar Questions Finder using Semantic Search 🔍
    Welcome to the Quora Similar Questions Finder, a tool designed to enhance search experience.
    This space leverages advanced machine learning techniques to find the most relevant questions from {} questions based on your input.
    ## Features
    - **Dataset**: Utilizes the Quora duplicate questions dataset from Hugging Face `datasets`, ensuring a wide coverage of topics and queries.
    - **Advanced NLP Model**: Employs SentenceTransformer's [`BAAI/bge-small-en-v1.5`](https://huggingface.co/BAAI/bge-small-en-v1.5) model to create embeddings for each unique question. This model supports a maximum sequence length of 512 and provides an embedding dimension of 384, allowing for semantic understanding of questions.
    - **Efficient Storage**: Embeddings are efficiently stored in a vector index on `Qdrant` cloud, uploaded in batches of size 200.
    ## How to Use
    1. **Enter a Question**: Simply type in your question.
    2. **Select K Value (1-8)**: Choose how many similar questions you want to find.
    3. **Find Similar Questions**: Hit the button and the system will generate an embedding for your input question. It then performs a cosine distance query to the Qdrant cloud to retrieve the most similar questions based on your specified K value.
    Experience the power of semantic search and find the answers you need more efficiently.
    """.format(qdrant.get_collection('questions-binaryq').vectors_count)
  )

  with gr.Row():

    with gr.Column(scale=4):
      bq_results = gr.Label(
        label="Most similar questions w/ Binary Quantization",
        value={}
      )
    
    with gr.Column(scale=1):
      input_question = gr.Textbox(
        label="Question", 
        placeholder="Enter your question here"
      )
      top_k_slider = gr.Slider(
        value=3, 
        minimum=1, 
        maximum=30, 
        label="Top K", 
        interactive=True, 
        step=1
      )
      button = gr.Button("Find similar questions")
      json = gr.JSON()
      gr.Examples(
        examples=[["Obama", 5]],
        inputs=[input_question, top_k_slider]
      )

  button.click(
    fn=query, 
    inputs=[
      input_question, 
      top_k_slider
    ], 
    outputs=[
      bq_results, 
      json
    ]
  )


semantic_search_demo.launch(share=True)