from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility import towhee import os def create_milvus_collection(collection_name, dim): connections.connect( alias="default", host=os.getenv("milvus.host"), port=os.getenv("milvus.port"), user=os.getenv("milvus.user"), password=os.getenv("milvus.password") ) if utility.has_collection(collection_name): utility.drop_collection(collection_name) fields = [ FieldSchema(name='path', dtype=DataType.VARCHAR, descrition='ids',max_length=100, is_primary=True, auto_id=False), FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim) ] schema = CollectionSchema(fields=fields, description='reverse image search') collection = Collection(name=collection_name, schema=schema) # create IVF_FLAT index for collection. index_params = { 'metric_type': 'L2', 'index_type': "IVF_FLAT", 'params': {"nlist": 2048} } collection.create_index(field_name="embedding", index_params=index_params) return collection collection = create_milvus_collection('reverse_image_search', 2048) import pandas as pd df = pd.read_csv('reverse_image_search.csv') import cv2 from towhee._types.image import Image id_img = df.set_index('id')['path'].to_dict() dc = ( towhee.read_csv('reverse_image_search.csv') #读取 CSV 格式的表格,包含了 id,path 和 label 列 .set_parallel(3) #3并发处理数据 .runas_op['id', 'id'](func=lambda x: int(x)) #将每一行的 id 从 str 类型转为 int 类型 .image_decode['path', 'img']() #读取每一行 path 对应的图像,并将其解码为 Towhee 的图像格式 .image_embedding.timm['img', 'vec'](model_name='resnet50') #提取特征向量 .tensor_normalize['vec', 'vec']() #将向量进行归一化 .to_milvus['path','vec'](collection=collection, batch=100) #将 id 和 vec 批量 100 条插入到 Milvus 集合 )