yoniif commited on
Commit
0d6d77d
Β·
verified Β·
1 Parent(s): 9e29eab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -12
app.py CHANGED
@@ -1,31 +1,74 @@
1
- # πŸ”§ Install dependencies first (uncomment for local testing)
2
  # !pip install gradio pandas sentence-transformers
3
 
4
- import gradio as gr
 
 
5
  import pandas as pd
 
6
  from sentence_transformers import SentenceTransformer, util
7
 
8
- # Load influencer dataset (replace path with uploaded HF dataset if needed)
9
- df = pd.read_csv("top_100_influencers.csv") # <- upload to HF Space alongside this script
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # Fill NA just in case
 
 
 
 
 
 
 
 
 
 
 
 
12
  df.fillna("", inplace=True)
13
 
14
- # Combine fields for embedding
 
 
 
 
 
 
15
  df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
16
 
17
- # Load embedding model
 
18
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
19
 
20
- # Precompute embeddings
 
21
  influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
22
 
23
- # πŸ” Recommendation Function
 
24
  def recommend_influencers(brand_description):
25
  query_embedding = model.encode(brand_description, convert_to_tensor=True)
26
  cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]
27
  top_indices = cosine_scores.topk(3).indices.tolist()
28
-
29
  recommendations = []
30
  for idx in top_indices:
31
  row = df.iloc[idx]
@@ -39,7 +82,6 @@ def recommend_influencers(brand_description):
39
  })
40
  return recommendations
41
 
42
- # πŸ–ΌοΈ Gradio UI
43
  def format_output(brand_input):
44
  recs = recommend_influencers(brand_input)
45
  output = ""
@@ -53,7 +95,7 @@ def format_output(brand_input):
53
 
54
  demo = gr.Interface(
55
  fn=format_output,
56
- inputs=gr.Textbox(label="Enter your brand description (e.g. 'Sustainable fashion for Gen Z')", placeholder="Describe your brand..."),
57
  outputs=gr.Markdown(label="Top 3 Influencer Matches"),
58
  title="InfluMatch: Influencer Recommender",
59
  description="Describe your brand or campaign and get 3 matching influencer suggestions.",
 
1
+ # πŸ”§ Install dependencies (uncomment if running locally)
2
  # !pip install gradio pandas sentence-transformers
3
 
4
+ import os
5
+ import zipfile
6
+ import requests
7
  import pandas as pd
8
+ import gradio as gr
9
  from sentence_transformers import SentenceTransformer, util
10
 
11
+ ### STEP 1: Download and unzip the influencer dataset from Hugging Face
12
+
13
+ # Replace this with your actual dataset ZIP URL
14
+ url = "https://huggingface.co/datasets/your-username/influencer-dataset-merged/resolve/main/top_100_influencers.zip"
15
+ zip_path = "top_100_influencers.zip"
16
+
17
+ # Download zip file if not already present
18
+ if not os.path.exists(zip_path):
19
+ print("πŸ“₯ Downloading influencer dataset...")
20
+ r = requests.get(url)
21
+ with open(zip_path, "wb") as f:
22
+ f.write(r.content)
23
+
24
+ # Unzip the file into a folder
25
+ unzip_dir = "influencer_data"
26
+ if not os.path.exists(unzip_dir):
27
+ print("πŸ“¦ Unzipping dataset...")
28
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
29
+ zip_ref.extractall(unzip_dir)
30
+
31
+ ### STEP 2: Merge all CSVs into one
32
 
33
+ print("πŸ”— Merging influencer files...")
34
+ all_dfs = []
35
+ for file in os.listdir(unzip_dir):
36
+ if file.endswith(".csv"):
37
+ df = pd.read_csv(os.path.join(unzip_dir, file))
38
+ df["Source File"] = file # Optional: keep track of file origin
39
+ all_dfs.append(df)
40
+
41
+ df = pd.concat(all_dfs, ignore_index=True)
42
+
43
+ # Basic cleanup
44
+ df.drop_duplicates(inplace=True)
45
+ df.dropna(subset=["Name", "Niche"], inplace=True)
46
  df.fillna("", inplace=True)
47
 
48
+ # Save combined dataset (optional)
49
+ df.to_csv("top_100_influencers_combined.csv", index=False)
50
+ print("βœ… Combined dataset ready!")
51
+
52
+ ### STEP 3: Build the recommender engine
53
+
54
+ # Combine fields for semantic embedding
55
  df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
56
 
57
+ # Load sentence embedding model
58
+ print("🧠 Loading embedding model...")
59
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
60
 
61
+ # Precompute influencer embeddings
62
+ print("πŸ”’ Encoding influencer profiles...")
63
  influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
64
 
65
+ ### STEP 4: Define similarity search + UI
66
+
67
  def recommend_influencers(brand_description):
68
  query_embedding = model.encode(brand_description, convert_to_tensor=True)
69
  cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]
70
  top_indices = cosine_scores.topk(3).indices.tolist()
71
+
72
  recommendations = []
73
  for idx in top_indices:
74
  row = df.iloc[idx]
 
82
  })
83
  return recommendations
84
 
 
85
  def format_output(brand_input):
86
  recs = recommend_influencers(brand_input)
87
  output = ""
 
95
 
96
  demo = gr.Interface(
97
  fn=format_output,
98
+ inputs=gr.Textbox(label="Enter your brand or campaign description", placeholder="e.g. Sustainable fashion for Gen Z"),
99
  outputs=gr.Markdown(label="Top 3 Influencer Matches"),
100
  title="InfluMatch: Influencer Recommender",
101
  description="Describe your brand or campaign and get 3 matching influencer suggestions.",