File size: 1,602 Bytes
5f735a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from argparse import Namespace

from nomic import atlas
import numpy as np
from transformers import AutoModel

import text

num_embeddings = 10000


def download_models():
    # Import our models. The package will take care of downloading the models automatically
    model_args = Namespace(do_mlm=None, pooler_type="cls", temp=0.05, mlp_only_train=False,
                           init_embeddings_model=None)
    model = AutoModel.from_pretrained("silk-road/luotuo-bert", trust_remote_code=True, model_args=model_args)
    return model


pkl_path = './pkl/texts.pkl'
maps_path = './pkl/maps.pkl'
dict_path = "../characters/haruhi/text_image_dict.txt"
image_path = "../characters/haruhi/images"
model = download_models()
text = text.Text("../characters/haruhi/texts", model=model, num_steps=50, pkl_path=pkl_path, dict_path=dict_path,
                 image_path=image_path, maps_path=maps_path)
# text.read_text(save_maps=True, save_embeddings=True)
embeddings = np.array([i.numpy() for i in list(text.load(load_pkl=True).values())])
data = text.load(load_maps=True)
project = atlas.map_embeddings(embeddings=embeddings,
                               data=data,
                               build_topic_model=True,
                               topic_label_field="titles",
                               id_field='id',
                               name="Chat-Haruhi",
                               colorable_fields=['titles'],
                               description="Embedding Visualization",
                               # reset_project_if_exists=True,
                               )