Spaces:
Running
on
Zero
Running
on
Zero
Update smart_breed_matcher.py
Browse files- smart_breed_matcher.py +102 -21
smart_breed_matcher.py
CHANGED
@@ -136,34 +136,34 @@ class SmartBreedMatcher:
|
|
136 |
desc2_embedding = self._get_cached_embedding(breed2_features['description'])
|
137 |
description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
|
138 |
|
139 |
-
#
|
140 |
-
size_similarity = self._calculate_size_similarity(
|
141 |
-
|
|
|
|
|
|
|
142 |
|
143 |
-
#
|
|
|
144 |
temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
|
145 |
temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
|
146 |
temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
|
147 |
-
|
148 |
-
# 健康分數相似度
|
149 |
health_score1 = self._calculate_health_score(breed1_features['breed_name'])
|
150 |
health_score2 = self._calculate_health_score(breed2_features['breed_name'])
|
151 |
health_similarity = 1.0 - abs(health_score1 - health_score2)
|
152 |
-
|
153 |
-
# 噪音水平相似度
|
154 |
noise_similarity = self._calculate_noise_similarity(
|
155 |
breed1_features['breed_name'],
|
156 |
breed2_features['breed_name']
|
157 |
)
|
158 |
|
159 |
-
#
|
160 |
weights = {
|
161 |
-
'description': 0.
|
162 |
'temperament': 0.20,
|
163 |
-
'exercise': 0.
|
164 |
-
'size': 0.
|
165 |
-
'health': 0.
|
166 |
-
'noise': 0.
|
167 |
}
|
168 |
|
169 |
final_similarity = (
|
@@ -240,14 +240,95 @@ class SmartBreedMatcher:
|
|
240 |
'scores': {k: round(v, 4) for k, v in scores.items()}
|
241 |
}
|
242 |
|
243 |
-
def
|
244 |
-
|
245 |
-
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
def _calculate_exercise_similarity(self, exercise1: str, exercise2: str) -> float:
|
253 |
exercise_map = {'Low': 1, 'Moderate': 2, 'High': 3, 'Very High': 4}
|
|
|
136 |
desc2_embedding = self._get_cached_embedding(breed2_features['description'])
|
137 |
description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
|
138 |
|
139 |
+
# 使用改進後的尺寸相似度計算
|
140 |
+
size_similarity = self._calculate_size_similarity(
|
141 |
+
breed1_features['size'],
|
142 |
+
breed2_features['size'],
|
143 |
+
self._get_preferred_size_range(breed1_features['description'])
|
144 |
+
)
|
145 |
|
146 |
+
# 其他相似度計算
|
147 |
+
exercise_similarity = self._calculate_exercise_similarity(breed1_features['exercise'], breed2_features['exercise'])
|
148 |
temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
|
149 |
temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
|
150 |
temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
|
|
|
|
|
151 |
health_score1 = self._calculate_health_score(breed1_features['breed_name'])
|
152 |
health_score2 = self._calculate_health_score(breed2_features['breed_name'])
|
153 |
health_similarity = 1.0 - abs(health_score1 - health_score2)
|
|
|
|
|
154 |
noise_similarity = self._calculate_noise_similarity(
|
155 |
breed1_features['breed_name'],
|
156 |
breed2_features['breed_name']
|
157 |
)
|
158 |
|
159 |
+
# 調整權重,增加尺寸的重要性
|
160 |
weights = {
|
161 |
+
'description': 0.20, # 降低描述權重
|
162 |
'temperament': 0.20,
|
163 |
+
'exercise': 0.20,
|
164 |
+
'size': 0.20, # 顯著提高尺寸權重
|
165 |
+
'health': 0.10, # 略微降低
|
166 |
+
'noise': 0.10 # 略微降低
|
167 |
}
|
168 |
|
169 |
final_similarity = (
|
|
|
240 |
'scores': {k: round(v, 4) for k, v in scores.items()}
|
241 |
}
|
242 |
|
243 |
+
def _get_preferred_size_range(self, description: str) -> tuple:
|
244 |
+
"""分析描述文本,確定用戶偏好的尺寸範圍"""
|
245 |
+
description = description.lower()
|
246 |
+
|
247 |
+
# 定義關鍵詞匹配
|
248 |
+
size_indicators = {
|
249 |
+
'small': ['small', 'tiny', 'little'],
|
250 |
+
'medium': ['medium', 'medium-sized', 'moderate size'],
|
251 |
+
'medium-large': ['medium to large', 'slightly larger', 'medium-large'],
|
252 |
+
'large': ['large', 'big'],
|
253 |
+
'giant': ['giant', 'huge', 'very large']
|
254 |
+
}
|
255 |
+
|
256 |
+
# 檢測負面提及
|
257 |
+
negative_indicators = {
|
258 |
+
'small': ['not too small', 'not small'],
|
259 |
+
'large': ['not too large', 'not too big', 'not large'],
|
260 |
+
'giant': ['not giant', 'not huge']
|
261 |
+
}
|
262 |
+
|
263 |
+
# 默認為中型
|
264 |
+
preferred_min = 2 # medium
|
265 |
+
preferred_max = 3 # large
|
266 |
+
|
267 |
+
# 分析描述中的尺寸偏好
|
268 |
+
for size, keywords in size_indicators.items():
|
269 |
+
for keyword in keywords:
|
270 |
+
if keyword in description:
|
271 |
+
if size == 'small':
|
272 |
+
preferred_min, preferred_max = 1, 2
|
273 |
+
elif size == 'medium':
|
274 |
+
preferred_min, preferred_max = 2, 2
|
275 |
+
elif size == 'medium-large':
|
276 |
+
preferred_min, preferred_max = 2, 3
|
277 |
+
elif size == 'large':
|
278 |
+
preferred_min, preferred_max = 3, 3
|
279 |
+
elif size == 'giant':
|
280 |
+
preferred_min, preferred_max = 3, 4
|
281 |
+
|
282 |
+
# 檢查負面提及並調整
|
283 |
+
for size, keywords in negative_indicators.items():
|
284 |
+
for keyword in keywords:
|
285 |
+
if keyword in description:
|
286 |
+
if size == 'small':
|
287 |
+
preferred_min = max(2, preferred_min)
|
288 |
+
elif size == 'large':
|
289 |
+
preferred_max = min(2, preferred_max)
|
290 |
+
elif size == 'giant':
|
291 |
+
preferred_max = min(3, preferred_max)
|
292 |
+
|
293 |
+
return (preferred_min, preferred_max)
|
294 |
|
295 |
+
def _calculate_size_similarity(self, size1: str, size2: str, preferred_range: tuple = None) -> float:
|
296 |
+
"""改進的尺寸相似度計算"""
|
297 |
+
# 更細緻的尺寸映射
|
298 |
+
size_map = {
|
299 |
+
'Tiny': 0.5,
|
300 |
+
'Small': 1,
|
301 |
+
'Small-Medium': 1.5,
|
302 |
+
'Medium': 2,
|
303 |
+
'Medium-Large': 2.5,
|
304 |
+
'Large': 3,
|
305 |
+
'Giant': 4
|
306 |
+
}
|
307 |
+
|
308 |
+
# 獲取數值
|
309 |
+
value1 = size_map.get(size1, 2)
|
310 |
+
value2 = size_map.get(size2, 2)
|
311 |
+
|
312 |
+
# 基礎相似度計算
|
313 |
+
base_similarity = 1.0 - (abs(value1 - value2) / 3.5) # 3.5 是最大可能差異
|
314 |
+
|
315 |
+
# 如果有偏好範圍,進行額外調整
|
316 |
+
if preferred_range:
|
317 |
+
preferred_min, preferred_max = preferred_range
|
318 |
+
|
319 |
+
# 檢查是否在偏好範圍內
|
320 |
+
in_range = (preferred_min <= value2 <= preferred_max)
|
321 |
+
|
322 |
+
# 如果不在範圍內,根據距離降低分數
|
323 |
+
if not in_range:
|
324 |
+
distance_to_range = min(
|
325 |
+
abs(value2 - preferred_min),
|
326 |
+
abs(value2 - preferred_max)
|
327 |
+
)
|
328 |
+
penalty = distance_to_range * 0.2 # 每單位差異降低20%
|
329 |
+
base_similarity *= (1 - penalty)
|
330 |
+
|
331 |
+
return max(0.0, min(1.0, base_similarity)) # 確保在 [0, 1] 範圍內
|
332 |
|
333 |
def _calculate_exercise_similarity(self, exercise1: str, exercise2: str) -> float:
|
334 |
exercise_map = {'Low': 1, 'Moderate': 2, 'High': 3, 'Very High': 4}
|