Spaces:

fuqiang
/

searchpic

Runtime error

searchpic / insert.py

wangfuqiang

submit

33f7820 about 1 year ago

2.02 kB

	from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
	import towhee
	import os


	def create_milvus_collection(collection_name, dim):
	connections.connect(
	alias="default",
	host=os.getenv("milvus.host"),
	port=os.getenv("milvus.port"),
	user=os.getenv("milvus.user"),
	password=os.getenv("milvus.password")
	)

	if utility.has_collection(collection_name):
	utility.drop_collection(collection_name)

	fields = [
	FieldSchema(name='path', dtype=DataType.VARCHAR, descrition='ids',max_length=100, is_primary=True, auto_id=False),
	FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)
	]
	schema = CollectionSchema(fields=fields, description='reverse image search')
	collection = Collection(name=collection_name, schema=schema)

	# create IVF_FLAT index for collection.
	index_params = {
	'metric_type': 'L2',
	'index_type': "IVF_FLAT",
	'params': {"nlist": 2048}
	}
	collection.create_index(field_name="embedding", index_params=index_params)
	return collection


	collection = create_milvus_collection('reverse_image_search', 2048)


	import pandas as pd

	df = pd.read_csv('reverse_image_search.csv')
	import cv2
	from towhee._types.image import Image

	id_img = df.set_index('id')['path'].to_dict()

	dc = (
	towhee.read_csv('reverse_image_search.csv') #读取 CSV 格式的表格，包含了 id，path 和 label 列
	.set_parallel(3) #3并发处理数据
	.runas_op['id', 'id'](func=lambda x: int(x)) #将每一行的 id 从 str 类型转为 int 类型
	.image_decode['path', 'img']() #读取每一行 path 对应的图像，并将其解码为 Towhee 的图像格式
	.image_embedding.timm['img', 'vec'](model_name='resnet50') #提取特征向量
	.tensor_normalize['vec', 'vec']() #将向量进行归一化
	.to_milvus['path','vec'](collection=collection, batch=100) #将 id 和 vec 批量 100 条插入到 Milvus 集合
	)