searchpic / insert.py
wangfuqiang
submit
33f7820
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import towhee
import os
def create_milvus_collection(collection_name, dim):
connections.connect(
alias="default",
host=os.getenv("milvus.host"),
port=os.getenv("milvus.port"),
user=os.getenv("milvus.user"),
password=os.getenv("milvus.password")
)
if utility.has_collection(collection_name):
utility.drop_collection(collection_name)
fields = [
FieldSchema(name='path', dtype=DataType.VARCHAR, descrition='ids',max_length=100, is_primary=True, auto_id=False),
FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)
]
schema = CollectionSchema(fields=fields, description='reverse image search')
collection = Collection(name=collection_name, schema=schema)
# create IVF_FLAT index for collection.
index_params = {
'metric_type': 'L2',
'index_type': "IVF_FLAT",
'params': {"nlist": 2048}
}
collection.create_index(field_name="embedding", index_params=index_params)
return collection
collection = create_milvus_collection('reverse_image_search', 2048)
import pandas as pd
df = pd.read_csv('reverse_image_search.csv')
import cv2
from towhee._types.image import Image
id_img = df.set_index('id')['path'].to_dict()
dc = (
towhee.read_csv('reverse_image_search.csv') #读取 CSV 格式的表格,包含了 id,path 和 label 列
.set_parallel(3) #3并发处理数据
.runas_op['id', 'id'](func=lambda x: int(x)) #将每一行的 id 从 str 类型转为 int 类型
.image_decode['path', 'img']() #读取每一行 path 对应的图像,并将其解码为 Towhee 的图像格式
.image_embedding.timm['img', 'vec'](model_name='resnet50') #提取特征向量
.tensor_normalize['vec', 'vec']() #将向量进行归一化
.to_milvus['path','vec'](collection=collection, batch=100) #将 id 和 vec 批量 100 条插入到 Milvus 集合
)