# Copyright (c) Meta Platforms, Inc. and affiliates import json import numpy as np import os import random from tqdm import tqdm def balance_sampling(matched_entry_ids, entry_prob): # this can be placed in a pipeline or on-the-fly in a data loader. # see a numpy impl. at metaclip.indexing.balance_sampling.balance_sampling for entry_id in matched_entry_ids: if random.random() < entry_prob[entry_id]: return True return False