amphora commited on
Commit
4bf425b
1 Parent(s): 2fabe75

feature: created embed caption

Browse files
Files changed (1) hide show
  1. embed_captions +25 -0
embed_captions ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import argparse
3
+ from utils import load_model
4
+
5
+ def main(args):
6
+ caption_txt_path = args.text_path
7
+ f = open(caption_txt_path)
8
+ captions = [sent.strip() for sent in f.readlines()
9
+
10
+ for model_name in ["koclip-base", "koclip-large"]:
11
+ model, processor = load_model(f"koclip/{model_name}")
12
+ captions_processed = [processor(sent,images=None,return_tensors='jax') for sent in captions]
13
+ vec = [np.asarray(model.get_text_features(**c)) for c in captions_processed]
14
+
15
+ with open(os.path.join(args.out_path, f"{model_name}.tsv"), "a+") as f:
16
+ writer = csv.writer(f, delimiter="\t")
17
+ for text, feature in zip(captions, vec):
18
+ writer.writerow([text, ",".join(map(lambda x: str(x), feature))])
19
+
20
+ if __name__ == "__main__":
21
+ parser = argparse.ArgumentParser()
22
+ parser.add_argument("--text_path", default="text")
23
+ parser.add_argument("--out_path", default="features/text")
24
+ args = parser.parse_args()
25
+ main(args)