fschwartzer commited on
Commit
6baa204
1 Parent(s): f555ad2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -25
app.py CHANGED
@@ -58,38 +58,35 @@ def calcular_fator_avaliacao(titulo, EC, PU):
58
  fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
59
  return fator_avaliacao
60
 
61
- def select_nearest_items(df, query):
62
- # Calculate title similarity for relevance
63
  df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
64
- # Filter by title similarity threshold
65
- df_filtered_by_similarity = df[df['Title_Similarity'] > 70] # Adjust similarity threshold as needed
66
-
67
- if df_filtered_by_similarity.empty:
68
- # If no closely matching titles are found, return an empty DataFrame
69
- return pd.DataFrame()
70
-
71
- # Filter by price, considering only those within a reasonable range
72
- reasonable_price_df = df_filtered_by_similarity[df_filtered_by_similarity['Price'] <= df_filtered_by_similarity['Price'].quantile(0.75)]
73
- # Calculate distance from target price for sorting
74
- target_price = reasonable_price_df['Price'].mode().min() if not reasonable_price_df['Price'].mode().empty else reasonable_price_df['Price'].median()
75
- reasonable_price_df['Distance'] = (reasonable_price_df['Price'] - target_price).abs()
76
 
77
- # Initialize container for selected items ensuring different marketplaces
78
- selected_items = []
79
- included_marketplaces = set()
80
 
81
- # Sort by distance to target price, then by title similarity
82
- df_sorted = reasonable_price_df.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])
 
83
 
84
  for _, row in df_sorted.iterrows():
85
- marketplace = row['Marketplace']
86
- if marketplace not in included_marketplaces:
87
- selected_items.append(row)
88
- included_marketplaces.add(marketplace)
89
- if len(selected_items) >= 5:
90
  break
91
 
92
- return pd.DataFrame(selected_items)
 
93
 
94
  def search_with_fallback(query, df, limit=15):
95
  query_parts = query.split()
 
58
  fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
59
  return fator_avaliacao
60
 
61
+ def select_nearest_items_adjusted(df, query):
62
+ # Lower the title similarity threshold if necessary
63
  df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
64
+ df_filtered = df[df['Title_Similarity'] > 50] # Adjusted threshold
65
+
66
+ # Calculate mode price in a more inclusive manner
67
+ mode_price = df_filtered['Price'].mode()
68
+ if mode_price.empty:
69
+ target_price = df_filtered['Price'].median()
70
+ else:
71
+ target_price = mode_price.min()
 
 
 
 
72
 
73
+ df_filtered['Distance'] = (df_filtered['Price'] - target_price).abs()
74
+ df_sorted = df_filtered.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])
 
75
 
76
+ # Ensure diversity in marketplaces
77
+ marketplaces_selected = set()
78
+ results = []
79
 
80
  for _, row in df_sorted.iterrows():
81
+ if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 3:
82
+ results.append(row)
83
+ marketplaces_selected.add(row['Marketplace'])
84
+
85
+ if len(results) >= 3:
86
  break
87
 
88
+ return pd.DataFrame(results)
89
+
90
 
91
  def search_with_fallback(query, df, limit=15):
92
  query_parts = query.split()