|
from pymilvus import MilvusClient, AnnSearchRequest, RRFRanker |
|
from langchain_community.embeddings.ollama import OllamaEmbeddings |
|
from pymilvus import WeightedRanker |
|
|
|
reranker = RRFRanker(k=10) |
|
|
|
""" |
|
embed_model = OllamaEmbeddings(model="bge-m3") |
|
client = MilvusClient(uri="http://192.168.5.103:19530") |
|
|
|
|
|
query = "Can I take pills?" |
|
query_embedding = embed_model.embed_query(query) |
|
|
|
# single vector search |
|
res = client.search( |
|
collection_name="t_sur_sex_ed_article_spider", |
|
data=[query_embedding], |
|
limit=2, |
|
search_params={"metric_type": "COSINE", "params": {}}, |
|
anns_field="chunk_vector", |
|
output_fields=["title", "chunk", "link", "category"] |
|
) |
|
""" |
|
|
|
|
|
def hybrid_search(query, embed_model: OllamaEmbeddings, collection_name, client: MilvusClient): |
|
query_embedding = embed_model.embed_query(query) |
|
search_param1 = { |
|
"data":[query_embedding], |
|
"anns_field": "title_vector", |
|
"param":{ |
|
"metric_type": "COSINE", |
|
"params": {"nprobe":10, "level": 3} |
|
}, |
|
"limit": 3 |
|
} |
|
request1 = AnnSearchRequest(**search_param1) |
|
|
|
search_param2 = { |
|
"data":[query_embedding], |
|
"anns_field": "chunk_vector", |
|
"param":{ |
|
"metric_type": "COSINE", |
|
"params": {"nprobe":10, "level": 3} |
|
}, |
|
"limit": 3 |
|
} |
|
request2 = AnnSearchRequest(**search_param2) |
|
|
|
search_param3 = { |
|
"data":[query_embedding], |
|
"anns_field": "tags", |
|
"param":{ |
|
"metric_type": "COSINE", |
|
"params": {"nprobe":10, "level": 3} |
|
}, |
|
"limit": 3 |
|
} |
|
request3 = AnnSearchRequest(**search_param3) |
|
|
|
candidates = [request1, request2, request3] |
|
|
|
rerank = WeightedRanker(0.3, 0.6, 0.1) |
|
res = client.hybrid_search( |
|
collection_name=collection_name, |
|
ranker=rerank, |
|
reqs=candidates, |
|
limit=3, |
|
output_fields=["title", "chunk", "link"] |
|
) |
|
|
|
return res |
|
|
|
|
|
def single_vector_search(query, embed_model: OllamaEmbeddings, collection_name, client: MilvusClient, anns_field): |
|
query_embedding = embed_model.embed_query(query) |
|
res = client.search( |
|
collection_name=collection_name, |
|
data=[query_embedding], |
|
limit=20, |
|
search_params={"metric_type": "COSINE", "params": {}}, |
|
anns_field=anns_field, |
|
filter="content_type == 'A'", |
|
output_fields=["title", "content", "url", "content_type", "likes", "dislikes"] |
|
)[0] |
|
res = sorted(res, key=lambda x: x["entity"]["likes"]/(x["entity"]["dislikes"] + 1), reverse=True) |
|
return res |
|
|
|
|
|
if __name__ == "__main__": |
|
embed_model = OllamaEmbeddings(model="bge-m3") |
|
client = MilvusClient(uri="http://192.168.5.103:19530") |
|
|
|
query = "How to make a good blow job" |
|
search_res = hybrid_search(query, embed_model, "t_sur_sex_ed_article_spider", client)[0] |
|
|
|
for res in search_res: |
|
print(res["entity"]["chunk"]) |
|
print("\n #############################") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|