from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import towhee
import os


def create_milvus_collection(collection_name, dim):
    connections.connect(
        alias="default",
        host=os.getenv("milvus.host"),
        port=os.getenv("milvus.port"),
        user=os.getenv("milvus.user"),
        password=os.getenv("milvus.password")
    )

    if utility.has_collection(collection_name):
        utility.drop_collection(collection_name)

    fields = [
        FieldSchema(name='path', dtype=DataType.VARCHAR, descrition='ids',max_length=100, is_primary=True, auto_id=False),
        FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)
    ]
    schema = CollectionSchema(fields=fields, description='reverse image search')
    collection = Collection(name=collection_name, schema=schema)

    # create IVF_FLAT index for collection.
    index_params = {
        'metric_type': 'L2',
        'index_type': "IVF_FLAT",
        'params': {"nlist": 2048}
    }
    collection.create_index(field_name="embedding", index_params=index_params)
    return collection


collection = create_milvus_collection('reverse_image_search', 2048)


import pandas as pd

df = pd.read_csv('reverse_image_search.csv')
import cv2
from towhee._types.image import Image

id_img = df.set_index('id')['path'].to_dict()

dc = (
    towhee.read_csv('reverse_image_search.csv') #读取 CSV 格式的表格，包含了 id，path 和 label 列
 .set_parallel(3)  #3并发处理数据
 .runas_op['id', 'id'](func=lambda x: int(x)) #将每一行的 id 从 str 类型转为 int 类型
 .image_decode['path', 'img']() #读取每一行 path 对应的图像，并将其解码为 Towhee 的图像格式
 .image_embedding.timm['img', 'vec'](model_name='resnet50') #提取特征向量
 .tensor_normalize['vec', 'vec']() #将向量进行归一化
 .to_milvus['path','vec'](collection=collection, batch=100) #将 id 和 vec 批量 100 条插入到 Milvus 集合
)