yashonwu
add captioning
9bf9e42
raw
history blame contribute delete
No virus
1.08 kB
from random import uniform
import numpy as np
from collections import OrderedDict, defaultdict
from itertools import tee
import time
# -----------------------------------------------
def find_ngrams(input_list, n):
return zip(*[input_list[i:] for i in range(n)])
def compute_div_n(caps,n=1):
aggr_div = []
for k in caps:
all_ngrams = set()
lenT = 0.
for c in caps[k]:
tkns = c.split()
lenT += len(tkns)
ng = find_ngrams(tkns, n)
all_ngrams.update(ng)
aggr_div.append(float(len(all_ngrams))/ (1e-6 + float(lenT)))
return np.array(aggr_div).mean(), np.array(aggr_div)
def compute_global_div_n(caps,n=1):
aggr_div = []
all_ngrams = set()
lenT = 0.
for k in caps:
for c in caps[k]:
tkns = c.split()
lenT += len(tkns)
ng = find_ngrams(tkns, n)
all_ngrams.update(ng)
if n == 1:
aggr_div.append(float(len(all_ngrams)))
else:
aggr_div.append(float(len(all_ngrams))/ (1e-6 + float(lenT)))
return aggr_div[0], np.repeat(np.array(aggr_div),len(caps))