nbeuchat commited on
Commit
6631114
1 Parent(s): 6bfabbb

process images

Browse files
Files changed (2) hide show
  1. data/actors_embeddings.csv +3 -0
  2. process_images.py +10 -6
data/actors_embeddings.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b2d6501a7fa59db2646f9d0438afe0e07358bd7d66eb00199227b3af2d1e26f
3
+ size 54033196
process_images.py CHANGED
@@ -3,6 +3,7 @@ import requests
3
  import pandas as pd
4
  from io import BytesIO
5
  from tqdm import tqdm
 
6
 
7
 
8
  def get_image(url: str):
@@ -14,7 +15,8 @@ def get_image(url: str):
14
  def get_embeddings(url: str):
15
  try:
16
  image = get_image(url)
17
- return list(face_recognition.face_encodings(image, num_jitters=5, model="large")[0])
 
18
  except Exception as e:
19
  print(e)
20
 
@@ -32,12 +34,14 @@ def process_all_images(input_file, output_file):
32
  df = df.sample(frac=1) # shuffle so you get some images for everybody while it's running
33
  for i, row in tqdm(df.iterrows(), total=df.shape[0]):
34
  embeddings = get_embeddings(row["contentUrl"])
35
- if embeddings:
36
- new_row = row.copy()
37
- new_row["embeddings"] = embeddings
38
- df_emb = df_emb.append(new_row, ignore_index=True)
 
39
  df_emb.to_csv(output_file, index=False)
40
-
 
41
  return df_emb
42
 
43
  def build_annoy_index():
 
3
  import pandas as pd
4
  from io import BytesIO
5
  from tqdm import tqdm
6
+ from time import time
7
 
8
 
9
  def get_image(url: str):
 
15
  def get_embeddings(url: str):
16
  try:
17
  image = get_image(url)
18
+ embeddings = face_recognition.face_encodings(image, num_jitters=2, model="large")
19
+ return list(embeddings[0])
20
  except Exception as e:
21
  print(e)
22
 
 
34
  df = df.sample(frac=1) # shuffle so you get some images for everybody while it's running
35
  for i, row in tqdm(df.iterrows(), total=df.shape[0]):
36
  embeddings = get_embeddings(row["contentUrl"])
37
+ new_row = row.copy()
38
+ new_row["embeddings"] = embeddings
39
+ df_emb = df_emb.append(new_row, ignore_index=True)
40
+
41
+ if i % 5 == 0:
42
  df_emb.to_csv(output_file, index=False)
43
+
44
+ df_emb.to_csv(output_file, index=False)
45
  return df_emb
46
 
47
  def build_annoy_index():