DawnC commited on
Commit
f03d5fa
1 Parent(s): edb9086

Update smart_breed_matcher.py

Browse files
Files changed (1) hide show
  1. smart_breed_matcher.py +129 -107
smart_breed_matcher.py CHANGED
@@ -130,51 +130,66 @@ class SmartBreedMatcher:
130
  # return final_similarity
131
 
132
  def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
133
- """計算兩個品種之間的相似度,包含健康和噪音因素"""
134
- # 計算描述文本的相似度
135
  desc1_embedding = self._get_cached_embedding(breed1_features['description'])
136
  desc2_embedding = self._get_cached_embedding(breed2_features['description'])
137
  description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
138
-
139
- # 使用改進後的尺寸相似度計算
140
- size_similarity = self._calculate_size_similarity(
141
- breed1_features['size'],
142
  breed2_features['size'],
143
- self._get_preferred_size_range(breed1_features['description'])
144
  )
145
-
146
- # 其他相似度計算
147
- exercise_similarity = self._calculate_exercise_similarity(breed1_features['exercise'], breed2_features['exercise'])
 
 
 
 
 
 
 
 
 
 
 
148
  temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
149
  temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
150
  temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
 
151
  health_score1 = self._calculate_health_score(breed1_features['breed_name'])
152
  health_score2 = self._calculate_health_score(breed2_features['breed_name'])
153
  health_similarity = 1.0 - abs(health_score1 - health_score2)
 
154
  noise_similarity = self._calculate_noise_similarity(
155
  breed1_features['breed_name'],
156
  breed2_features['breed_name']
157
  )
158
 
159
- # 調整權重,增加尺寸的重要性
160
  weights = {
161
- 'description': 0.20, # 降低描述權重
162
- 'temperament': 0.20,
163
- 'exercise': 0.20,
164
- 'size': 0.20, # 顯著提高尺寸權重
165
- 'health': 0.10, # 略微降低
166
- 'noise': 0.10 # 略微降低
 
167
  }
168
-
169
  final_similarity = (
170
- description_similarity * weights['description'] +
171
- temperament_similarity * weights['temperament'] +
172
- exercise_similarity * weights['exercise'] +
173
  size_similarity * weights['size'] +
 
 
 
 
174
  health_similarity * weights['health'] +
175
  noise_similarity * weights['noise']
176
  )
177
-
178
  return final_similarity
179
 
180
 
@@ -240,104 +255,111 @@ class SmartBreedMatcher:
240
  'scores': {k: round(v, 4) for k, v in scores.items()}
241
  }
242
 
243
- def _get_preferred_size_range(self, description: str) -> tuple:
244
- """分析描述文本,確定用戶偏好的尺寸範圍"""
245
- description = description.lower()
246
-
247
- # 定義關鍵詞匹配
248
- size_indicators = {
249
- 'small': ['small', 'tiny', 'little'],
250
- 'medium': ['medium', 'medium-sized', 'moderate size'],
251
- 'medium-large': ['medium to large', 'slightly larger', 'medium-large'],
252
- 'large': ['large', 'big'],
253
- 'giant': ['giant', 'huge', 'very large']
254
  }
255
 
256
- # 檢測負面提及
257
- negative_indicators = {
258
- 'small': ['not too small', 'not small'],
259
- 'large': ['not too large', 'not too big', 'not large'],
260
- 'giant': ['not giant', 'not huge']
261
- }
262
 
263
- # 默認為中型
264
- preferred_min = 2 # medium
265
- preferred_max = 3 # large
266
 
267
- # 分析描述中的尺寸偏好
268
- for size, keywords in size_indicators.items():
269
- for keyword in keywords:
270
- if keyword in description:
271
- if size == 'small':
272
- preferred_min, preferred_max = 1, 2
273
- elif size == 'medium':
274
- preferred_min, preferred_max = 2, 2
275
- elif size == 'medium-large':
276
- preferred_min, preferred_max = 2, 3
277
- elif size == 'large':
278
- preferred_min, preferred_max = 3, 3
279
- elif size == 'giant':
280
- preferred_min, preferred_max = 3, 4
281
 
282
- # 檢查負面提及並調整
283
- for size, keywords in negative_indicators.items():
284
- for keyword in keywords:
285
- if keyword in description:
286
- if size == 'small':
287
- preferred_min = max(2, preferred_min)
288
- elif size == 'large':
289
- preferred_max = min(2, preferred_max)
290
- elif size == 'giant':
291
- preferred_max = min(3, preferred_max)
292
 
293
- return (preferred_min, preferred_max)
294
 
295
- def _calculate_size_similarity(self, size1: str, size2: str, preferred_range: tuple = None) -> float:
296
- """改進的尺寸相似度計算"""
297
- # 更細緻的尺寸映射
298
- size_map = {
299
- 'Tiny': 0.5,
300
- 'Small': 1,
301
- 'Small-Medium': 1.5,
302
- 'Medium': 2,
303
- 'Medium-Large': 2.5,
304
- 'Large': 3,
305
- 'Giant': 4
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  }
307
 
308
- # 獲取數值
309
- value1 = size_map.get(size1, 2)
310
- value2 = size_map.get(size2, 2)
311
 
312
- # 基礎相似度計算
313
- base_similarity = 1.0 - (abs(value1 - value2) / 3.5) # 3.5 是最大可能差異
314
 
315
- # 如果有偏好範圍,進行額外調整
316
- if preferred_range:
317
- preferred_min, preferred_max = preferred_range
318
-
319
- # 檢查是否在偏好範圍內
320
- in_range = (preferred_min <= value2 <= preferred_max)
321
-
322
- # 如果不在範圍內,根據距離降低分數
323
- if not in_range:
324
- distance_to_range = min(
325
- abs(value2 - preferred_min),
326
- abs(value2 - preferred_max)
327
- )
328
- penalty = distance_to_range * 0.2 # 每單位差異降低20%
329
- base_similarity *= (1 - penalty)
330
 
331
- return max(0.0, min(1.0, base_similarity)) # 確保在 [0, 1] 範圍內
332
-
333
- def _calculate_exercise_similarity(self, exercise1: str, exercise2: str) -> float:
334
- exercise_map = {'Low': 1, 'Moderate': 2, 'High': 3, 'Very High': 4}
335
- value1 = exercise_map.get(exercise1, 2) # 預設為 'Moderate'
336
- value2 = exercise_map.get(exercise2, 2) # 預設為 'Moderate'
337
 
338
- # 計算相似度
339
- exercise_similarity = 1.0 - abs(value1 - value2) / 3
340
- return max(0.0, exercise_similarity) # 確保相似度在 [0, 1] 範圍內
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
  def _calculate_health_score(self, breed_name: str) -> float:
343
  """計算品種的健康分數"""
 
130
  # return final_similarity
131
 
132
  def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
133
+ """增強版品種相似度計算"""
134
+ # 基礎相似度計算
135
  desc1_embedding = self._get_cached_embedding(breed1_features['description'])
136
  desc2_embedding = self._get_cached_embedding(breed2_features['description'])
137
  description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
138
+
139
+ # 尺寸相似度(加強版)
140
+ size_similarity = self._calculate_size_similarity_enhanced(
141
+ breed1_features['size'],
142
  breed2_features['size'],
143
+ breed2_features['description'] # 加入描述以判斷適應性
144
  )
145
+
146
+ # 運動需求相似度(加強版)
147
+ exercise_similarity = self._calculate_exercise_similarity_enhanced(
148
+ breed1_features['exercise'],
149
+ breed2_features['exercise']
150
+ )
151
+
152
+ # 美容需求相似度
153
+ grooming_similarity = self._calculate_grooming_similarity(
154
+ breed1_features['breed_name'],
155
+ breed2_features['breed_name']
156
+ )
157
+
158
+ # 其他相似度計算保持不變
159
  temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
160
  temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
161
  temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
162
+
163
  health_score1 = self._calculate_health_score(breed1_features['breed_name'])
164
  health_score2 = self._calculate_health_score(breed2_features['breed_name'])
165
  health_similarity = 1.0 - abs(health_score1 - health_score2)
166
+
167
  noise_similarity = self._calculate_noise_similarity(
168
  breed1_features['breed_name'],
169
  breed2_features['breed_name']
170
  )
171
 
172
+ # 調整權重分配
173
  weights = {
174
+ 'size': 0.20, # 仍然重要但不過分主導
175
+ 'exercise': 0.20, # 保持高權重因為這是主要需求
176
+ 'temperament': 0.15, # 保持不變因為性格很重要
177
+ 'grooming': 0.15, # 保持不變
178
+ 'health': 0.15, # 提高一點因為這影響長期生活
179
+ 'description': 0.10, # 保持不變
180
+ 'noise': 0.05 # 保持不變因為不是主要考慮因素
181
  }
182
+
183
  final_similarity = (
 
 
 
184
  size_similarity * weights['size'] +
185
+ exercise_similarity * weights['exercise'] +
186
+ grooming_similarity * weights['grooming'] +
187
+ temperament_similarity * weights['temperament'] +
188
+ description_similarity * weights['description'] +
189
  health_similarity * weights['health'] +
190
  noise_similarity * weights['noise']
191
  )
192
+
193
  return final_similarity
194
 
195
 
 
255
  'scores': {k: round(v, 4) for k, v in scores.items()}
256
  }
257
 
258
+ def _calculate_size_similarity_enhanced(self, size1: str, size2: str, description: str) -> float:
259
+ """增強版尺寸相似度計算"""
260
+ # 更細緻的尺寸映射
261
+ size_map = {
262
+ 'Tiny': 0,
263
+ 'Small': 1,
264
+ 'Small-Medium': 2,
265
+ 'Medium': 3,
266
+ 'Medium-Large': 4,
267
+ 'Large': 5,
268
+ 'Giant': 6
269
  }
270
 
271
+ # 轉換尺寸到數值
272
+ value1 = size_map.get(self._normalize_size(size1), 3) # 預設為 Medium
273
+ value2 = size_map.get(self._normalize_size(size2), 3)
 
 
 
274
 
275
+ # 計算基礎相似度
276
+ base_similarity = 1.0 - (abs(value1 - value2) / 6.0)
 
277
 
278
+ # 根據用戶需求的尺寸偏好調整分數
279
+ if size2 in ['Small', 'Tiny']:
280
+ base_similarity *= 0.5 # 顯著降低小型犬的分數
281
+ elif size2 == 'Giant':
282
+ base_similarity *= 0.6 # 顯著降低巨型犬的分數
283
+ elif size2 in ['Medium', 'Medium-Large']:
284
+ base_similarity *= 1.2 # 提高中型和中大型犬的分數
 
 
 
 
 
 
 
285
 
286
+ # 考慮適應性
287
+ if 'apartment' in description.lower() and size2 in ['Large', 'Giant']:
288
+ base_similarity *= 0.8 # 降低大型犬在公寓的適應性分數
 
 
 
 
 
 
 
289
 
290
+ return min(1.0, base_similarity) # 確保不超過1.0
291
 
292
+ def _normalize_size(self, size: str) -> str:
293
+ """標準化尺寸分類"""
294
+ size = size.lower()
295
+ if 'tiny' in size:
296
+ return 'Tiny'
297
+ elif 'small' in size:
298
+ return 'Small'
299
+ elif 'medium' in size and 'large' in size:
300
+ return 'Medium-Large'
301
+ elif 'medium' in size:
302
+ return 'Medium'
303
+ elif 'giant' in size:
304
+ return 'Giant'
305
+ elif 'large' in size:
306
+ return 'Large'
307
+ return 'Medium' # 預設
308
+
309
+ def _calculate_exercise_similarity_enhanced(self, exercise1: str, exercise2: str) -> float:
310
+ """增強版運動需求相似度計算"""
311
+ exercise_map = {
312
+ 'Low': 1,
313
+ 'Moderate': 2,
314
+ 'High': 3,
315
+ 'Very High': 4
316
  }
317
 
318
+ value1 = exercise_map.get(exercise1, 2)
319
+ value2 = exercise_map.get(exercise2, 2)
 
320
 
321
+ # 基礎相似度
322
+ base_similarity = 1.0 - abs(value1 - value2) / 3.0
323
 
324
+ # 根據用戶需求調整
325
+ if exercise2 in ['High', 'Very High']:
326
+ base_similarity *= 1.2 # 提高高運動量品種的分數
327
+ elif exercise2 == 'Low':
328
+ base_similarity *= 0.7 # 降低低運動量品種的分數
 
 
 
 
 
 
 
 
 
 
329
 
330
+ return min(1.0, base_similarity)
 
 
 
 
 
331
 
332
+ def _calculate_grooming_similarity(self, breed1: str, breed2: str) -> float:
333
+ """計算美容需求相似度"""
334
+ grooming_map = {
335
+ 'Low': 1,
336
+ 'Moderate': 2,
337
+ 'High': 3
338
+ }
339
+
340
+ # 從dog_data中獲取美容需求
341
+ breed1_info = next((dog for dog in self.dog_data if dog[1] == breed1), None)
342
+ breed2_info = next((dog for dog in self.dog_data if dog[1] == breed2), None)
343
+
344
+ if not breed1_info or not breed2_info:
345
+ return 0.5 # 默認中等相似度
346
+
347
+ grooming1 = breed1_info[8] # Grooming_Needs index
348
+ grooming2 = breed2_info[8]
349
+
350
+ value1 = grooming_map.get(grooming1, 2)
351
+ value2 = grooming_map.get(grooming2, 2)
352
+
353
+ # 基礎相似度
354
+ base_similarity = 1.0 - abs(value1 - value2) / 2.0
355
+
356
+ # 根據用戶需求調整
357
+ if grooming2 == 'Moderate':
358
+ base_similarity *= 1.1 # 稍微提高中等美容需求的分數
359
+ elif grooming2 == 'High':
360
+ base_similarity *= 0.9 # 稍微降低高美容需求的分數
361
+
362
+ return min(1.0, base_similarity)
363
 
364
  def _calculate_health_score(self, breed_name: str) -> float:
365
  """計算品種的健康分數"""