cheesexuebao commited on
Commit
8518918
1 Parent(s): fcc1b6e

Gray Testing

Browse files
.gitattributes CHANGED
@@ -1,3 +1,4 @@
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
1
+ * text=auto
2
  *.7z filter=lfs diff=lfs merge=lfs -text
3
  *.arrow filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ /output/*
2
+ .vscode
3
+ __pycache__
Prediction.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from tqdm.auto import tqdm
3
+ import torch
4
+ from transformers import BertTokenizerFast as BertTokenizer, BertForSequenceClassification
5
+ import os
6
+ import glob
7
+
8
+
9
+ RANDOM_SEED = 42
10
+ pd.RANDOM_SEED = 42
11
+ LABEL_COLUMNS = ["Assertive Tone", "Conversational Tone", "Emotional Tone", "Informative Tone"]
12
+
13
+
14
+ @torch.no_grad()
15
+ def predict_csv(data, text_col, tokenizer, model, device, text_bs=16, max_token_len=128):
16
+ predictions = []
17
+ post = data[text_col]
18
+ num_text = len(post)
19
+ generator = range(0, num_text, text_bs)
20
+ for i in tqdm(generator, total=len(generator), desc="Processing..."):
21
+ texts = post[i: min(num_text, i+text_bs)].tolist()
22
+ encoding = tokenizer(
23
+ texts,
24
+ add_special_tokens=True,
25
+ max_length=max_token_len,
26
+ return_token_type_ids=False,
27
+ padding="max_length",
28
+ truncation=True,
29
+ return_attention_mask=True,
30
+ return_tensors='pt',
31
+ )
32
+ logits = model(
33
+ encoding["input_ids"].to(device),
34
+ encoding["attention_mask"].to(device),
35
+ return_dict=True
36
+ ).logits
37
+ prediction = torch.sigmoid(logits)
38
+ predictions.append(prediction.detach().cpu())
39
+
40
+ final_pred = torch.cat(predictions, dim=0)
41
+ y_inten = final_pred.numpy().T
42
+
43
+ data[LABEL_COLUMNS[0]] = y_inten[0].tolist()
44
+ data[LABEL_COLUMNS[1]] = y_inten[1].tolist()
45
+ data[LABEL_COLUMNS[2]] = y_inten[2].tolist()
46
+ data[LABEL_COLUMNS[3]] = y_inten[3].tolist()
47
+ return data
48
+
49
+ @torch.no_grad()
50
+ def predict_single(sentence, tokenizer, model, device, max_token_len=128):
51
+ encoding = tokenizer(
52
+ sentence,
53
+ add_special_tokens=True,
54
+ max_length=max_token_len,
55
+ return_token_type_ids=False,
56
+ padding="max_length",
57
+ truncation=True,
58
+ return_attention_mask=True,
59
+ return_tensors='pt',
60
+ )
61
+ logits = model(
62
+ encoding["input_ids"].to(device),
63
+ encoding["attention_mask"].to(device),
64
+ return_dict=True
65
+ ).logits
66
+ prediction = torch.sigmoid(logits)
67
+ y_inten = prediction.flatten().cpu().numpy().T.tolist()
68
+ return y_inten
69
+
70
+ def model_factory(local_path, device):
71
+ manager = {}
72
+ for model_path in glob.glob(f"{local_path}/*"):
73
+ base_name = os.path.basename(model_path)
74
+ model_name = os.path.splitext(base_name)[0]
75
+ tokenizer = BertTokenizer.from_pretrained(model_path)
76
+ model = BertForSequenceClassification.from_pretrained(model_path)
77
+ model = model.to(device)
78
+ manager[model_name] = {
79
+ "model": model,
80
+ "tokenizer": tokenizer
81
+ }
82
+ return manager
83
+
84
+
85
+ if __name__ == "__main__":
86
+
87
+ Data = pd.read_csv("Kickstarter_sentence_level_5000.csv")
88
+ Data = Data[:20]
89
+ device = torch.device('cpu')
90
+
91
+ manager = model_factory("./models", device)
92
+ for model_name, dct in manager.items():
93
+ model, tokenizer = dct['model'], dct['tokenizer']
94
+ fk_doc_result = predict_csv(Data,"content", tokenizer, model, device)
95
+ single_response = predict_single("Games of the imagination teach us actions have consequences in a realm that can be reset.", tokenizer, model, device)
96
+ fk_doc_result.to_csv(f"output/prediction_{model_name}.csv")
app.py CHANGED
@@ -1,54 +1,132 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
- # from Prediction import *
 
 
5
 
6
- # 定义处理函数
7
- def process_data(csv_file, num, model_name):
8
- # 读取CSV文件
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  df = pd.read_csv(csv_file.name)
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # 处理数据
12
- processed_data = df * num
13
-
14
- # 生成图片
15
- plt.plot(processed_data)
16
- plt.xlabel('X Label')
17
- plt.ylabel('Y Label')
18
- plt.title('Processed Data')
19
- plt.grid(True)
20
- plt.savefig('output.png')
21
- plt.close()
22
-
23
- # 生成字符串结果
24
- result = pd.DataFrame({'result':[1,2,3]})
25
- result.to_csv('output.csv')
26
- print(model_name)
27
- return 'output.csv', ['output.png','output.png']
28
-
29
- my_theme = gr.Theme.from_hub("gstaff/sketch")
30
- with gr.Blocks(theme=my_theme, title='Test') as demo:
31
- gr.Markdown("""# Test
32
- xxxx
33
- """)
34
 
35
  with gr.Tab("Single Sentence"):
36
- with gr.Column():
37
- csv_input = gr.File(label="CSV文件")
38
- text_output = gr.File(label="结果")
39
- image_output = gr.Gallery(label="图像")
40
  with gr.Row():
41
- seed_input = gr.Slider(minimum=0, maximum=100, step=1, label="seed",info="Different seeds may generate different results")
42
- model_input = gr.CheckboxGroup(["ALL_Data", "Facebook", "Kickstarter", "Twitter"], label="Countries", info="Where are they from?")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  with gr.Row():
45
  button = gr.Button("Submit", variant="primary")
46
- button.click(fn=process_data, inputs=[csv_input, seed_input, model_input], outputs=[text_output, image_output])
47
- clear = gr.ClearButton([csv_input, text_output, image_output])
48
 
49
- with gr.Tab("Csv File"):
50
- ...
51
- with gr.Tab("README"):
52
- seed_input = gr.Textbox(label="seed")
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
 
 
 
54
  demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
+ from Prediction import *
5
+ import os
6
+ from datetime import datetime
7
 
8
+
9
+ examples = []
10
+ if os.path.exists("assets/examples.txt"):
11
+ with open("assets/examples.txt", "r", encoding="utf8") as file:
12
+ for sentence in file:
13
+ sentence = sentence.strip()
14
+ examples.append(sentence)
15
+ else:
16
+ examples = [
17
+ "Games of the imagination teach us actions have consequences in a realm that can be reset.",
18
+ "But New Jersey farmers are retiring and all over the state, development continues to push out dwindling farmland.",
19
+ "He also is the Head Designer of The Design Trust so-to-speak, besides his regular job ..."
20
+ ]
21
+
22
+ device = torch.device('cpu')
23
+ manager = model_factory("./models", device)
24
+
25
+
26
+ def single_sentence(sentence, model_select):
27
+ df = []
28
+ for model_name in model_select:
29
+ dct = manager[model_name]
30
+ model, tokenizer = dct['model'], dct['tokenizer']
31
+ predictions = predict_single(sentence, tokenizer, model, device)
32
+ df.append([model_name] + predictions)
33
+ return df
34
+
35
+ def csv_process(csv_file, model_select, attr="content"):
36
+ current_time = datetime.now()
37
+ formatted_time = current_time.strftime("%Y_%m_%d_%H_%M_%S")
38
  df = pd.read_csv(csv_file.name)
39
+ outputs = []
40
+ for model_name in model_select:
41
+ data = df.copy(deep=True)
42
+ dct = manager[model_name]
43
+ model, tokenizer = dct['model'], dct['tokenizer']
44
+ predictions = predict_csv(data, attr, tokenizer, model, device)
45
+ output_path = f"output/prediction_{model_name}_{formatted_time}.csv"
46
+ predictions.to_csv(output_path)
47
+ outputs.append(output_path)
48
+ return outputs
49
+
50
 
51
+ my_theme = gr.Theme.from_hub("JohnSmith9982/small_and_pretty")
52
+ with gr.Blocks(theme=my_theme, title='XXX') as demo:
53
+ gr.HTML(
54
+ """
55
+ <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
56
+ <a href="https://github.com/xxx" style="margin-right: 20px; text-decoration: none; display: flex; align-items: center;">
57
+ </a>
58
+ <div>
59
+ <h1 >Place the title of the paper here</h1>
60
+ <h5 style="margin: 0;">If you like our project, please give us a star ✨ on Github for the latest update.</h5>
61
+ <div style="display: flex; justify-content: center; align-items: center; text-align: center;>
62
+ <a href="https://arxiv.org/abs/xx.xx"><img src="https://img.shields.io/badge/Arxiv-xx.xx-red"></a>
63
+ <a href='https://huggingface.co/spaces/cheesexuebao/murphy'><img src='https://img.shields.io/badge/Project_Page-Murphy/xxBert' alt='Project Page'></a>
64
+ <a href='https://github.com'><img src='https://img.shields.io/badge/Github-Code-blue'></a>
65
+ </div>
66
+ </div>
67
+ </div>
68
+ """)
 
 
 
 
 
69
 
70
  with gr.Tab("Single Sentence"):
 
 
 
 
71
  with gr.Row():
72
+ tbox_input = gr.Textbox(label="Input",
73
+ info="Please input a sentence here:")
74
+ model_select = gr.CheckboxGroup(manager.keys(),
75
+ label="Models:",
76
+ info="Selecting different model variants to obtain aggregated predictions.")
77
+ tab_output = gr.DataFrame(label='Probability Predictions:',
78
+ headers=["model"] + LABEL_COLUMNS,
79
+ datatype=["str"] * (len(LABEL_COLUMNS)+1),
80
+ interactive=False,
81
+ wrap=True)
82
+ with gr.Row():
83
+ button_ss = gr.Button("Submit", variant="primary")
84
+ button_ss.click(fn=single_sentence, inputs=[tbox_input, model_select], outputs=[tab_output])
85
+ gr.ClearButton([tbox_input, tab_output])
86
+
87
+ gr.Markdown("## Examples")
88
+ gr.Examples(
89
+ examples=examples,
90
+ inputs=tbox_input,
91
+ examples_per_page=5
92
+ )
93
+
94
+ with gr.Tab("Csv File"):
95
+ with gr.Row():
96
+ csv_input = gr.File(label="CSV File:",
97
+ file_types=['.csv'],
98
+ file_count="single"
99
+ )
100
+ csv_output = gr.File(label="Predictions:")
101
+
102
+ model_select = gr.CheckboxGroup(manager.keys(),
103
+ label="Models:",
104
+ info="Selecting different model variants to obtain aggregated predictions.")
105
 
106
  with gr.Row():
107
  button = gr.Button("Submit", variant="primary")
108
+ button.click(fn=csv_process, inputs=[csv_input, model_select], outputs=[csv_output])
109
+ gr.ClearButton([csv_input, csv_output])
110
 
111
+ gr.Markdown("## Examples")
112
+ gr.Examples(
113
+ examples=["assets/csv_examples.csv",],
114
+ inputs=csv_input
115
+ )
116
+
117
+ with gr.Tab("Readme"):
118
+ gr.Markdown(
119
+ """
120
+ # Paper Name
121
+
122
+ # Authors
123
+
124
+ + First author
125
+ + Corresponding author
126
+
127
+ # Detailed Information
128
 
129
+ ...
130
+ """
131
+ )
132
  demo.launch()
assets/Kickstarter_sentence_level_5000.csv ADDED
The diff for this file is too large to render. See raw diff
 
assets/Prediction.py.bak ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### install the needed package
2
+ # !pip install transformers
3
+ # !pip install torchmetrics
4
+ # !pip3 install ogb pytorch_lightning -q
5
+
6
+
7
+
8
+ import pandas as pd
9
+ from tqdm.auto import tqdm
10
+ import torch
11
+ import torch.nn as nn
12
+ from torch.utils.data import DataLoader, Dataset
13
+ from transformers import BertTokenizerFast as BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
14
+ # import pytorch_lightning as pl
15
+
16
+ pd.set_option('display.max_columns', 500)
17
+
18
+ RANDOM_SEED = 42
19
+
20
+
21
+ class ModelTagger(nn.Module):
22
+ def __init__(self, model_path="bert-base-uncased"):
23
+ super().__init__()
24
+
25
+ self.bert = BertModel.from_pretrained(model_path, return_dict=True)
26
+ self.classifier = nn.Linear(self.bert.config.hidden_size, 4)
27
+ self.criterion = nn.BCELoss()
28
+
29
+
30
+ def forward(self, input_ids, attention_mask, labels=None):
31
+
32
+ output = self.bert(input_ids, attention_mask=attention_mask)
33
+ output = self.classifier(output.pooler_output)
34
+ output = torch.sigmoid(output)
35
+ loss = 0
36
+
37
+ if labels is not None:
38
+ loss = self.criterion(output, labels)
39
+ return loss, output
40
+
41
+
42
+ class Predict_Dataset(Dataset):
43
+ def __init__(
44
+ self,
45
+ data: pd.DataFrame,
46
+ text_col: str,
47
+ tokenizer: BertTokenizer,
48
+ max_token_len: int = 128
49
+ ):
50
+ self.text_col = text_col
51
+ self.tokenizer = tokenizer
52
+ self.data = data
53
+ self.max_token_len = max_token_len
54
+
55
+ def __len__(self):
56
+ return len(self.data)
57
+
58
+
59
+ def __getitem__(self, index: int):
60
+ data_row = self.data.iloc[index]
61
+ post = data_row[self.text_col]
62
+ encoding = self.tokenizer.encode_plus(
63
+ post,
64
+ add_special_tokens=True,
65
+ max_length=self.max_token_len,
66
+ return_token_type_ids=False,
67
+ padding="max_length",
68
+ truncation=True,
69
+ return_attention_mask=True,
70
+ return_tensors='pt',
71
+ )
72
+ return dict(
73
+ post=post,
74
+ input_ids=encoding["input_ids"].flatten(),
75
+ attention_mask=encoding["attention_mask"].flatten(),
76
+ )
77
+
78
+
79
+ def predict(data, text_col, tokenizer, model, device, LABEL_COLUMNS, max_token_len=128):
80
+ predictions = []
81
+
82
+ df_token = Predict_Dataset(data, text_col, tokenizer, max_token_len=max_token_len)
83
+ loader = DataLoader(df_token, batch_size=1000, num_workers=0)
84
+
85
+ for item in tqdm(loader):
86
+ _, prediction = model(
87
+ item["input_ids"].to(device),
88
+ item["attention_mask"].to(device)
89
+ )
90
+ predictions.append(prediction.detach().cpu())
91
+
92
+ final_pred = torch.cat(predictions, dim=0)
93
+ y_inten = final_pred.numpy().T
94
+
95
+ return {
96
+ LABEL_COLUMNS[0]: y_inten[0].tolist(),
97
+ LABEL_COLUMNS[1]: y_inten[1].tolist(),
98
+ LABEL_COLUMNS[2]: y_inten[2].tolist(),
99
+ LABEL_COLUMNS[3]: y_inten[3].tolist()
100
+ }
101
+
102
+
103
+ def get_result(df, result, LABEL_COLUMNS):
104
+ df[LABEL_COLUMNS[0]] = result[LABEL_COLUMNS[0]]
105
+ df[LABEL_COLUMNS[1]] = result[LABEL_COLUMNS[1]]
106
+ df[LABEL_COLUMNS[2]] = result[LABEL_COLUMNS[2]]
107
+ df[LABEL_COLUMNS[3]] = result[LABEL_COLUMNS[3]]
108
+ return df
109
+
110
+
111
+ Data = pd.read_csv("Kickstarter_sentence_level_5000.csv")
112
+ Data = Data[:20]
113
+ device = torch.device('cpu')
114
+ BERT_MODEL_NAME = 'bert-base-uncased'
115
+ tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
116
+ LABEL_COLUMNS = ["Assertive Tone", "Conversational Tone", "Emotional Tone", "Informative Tone"]
117
+
118
+ params = torch.load("checkpoints/Kickstarter.ckpt", map_location='cpu')['state_dict']
119
+ kick_model = ModelTagger()
120
+ kick_model.load_state_dict(params, strict=True)
121
+ kick_model.eval()
122
+
123
+ kick_model = kick_model.to(device)
124
+
125
+ kick_fk_doc_result = predict(Data,"content", tokenizer,kick_model, device, LABEL_COLUMNS)
126
+
127
+ fk_result = get_result(Data, kick_fk_doc_result, LABEL_COLUMNS)
128
+
129
+ fk_result.to_csv("output/prediction_origin_Kickstarter.csv")
assets/csv_examples.csv ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,index,content,word_count
2
+ 0,225644,The first prototype did not clip together well and had strength issues so we redesigned it with new sides and a different tabs structure.,24
3
+ 1,989071,Maybe you own a shop or perhaps you and your friends want to go in on this together to save some money.,22
4
+ 2,332310,"With this campaign we want to propose ""Eternity Dice Regular and Charms Edition"", sculpted by hand in stone, with a polished finish and highly accurate details.",26
5
+ 3,101474,"It's hand cut from a thick and reliable high quality calf skin, which is soft and flexible enough for wearing with utmost comfort.",23
6
+ 4,1641986,"a# by 5 WHAT SEPARATES US FROM THE COMPETITION a lax-ll 360 AUDIO FLOATABLE Full submergable up to Superior surround sound Counter balanced for optimal 1 meter for 30 minutes audio direction while floating WIRELESS SPECIFICATIONS MATERIALS sarr of whreless Small and compact, with Engineered to perfection streaming range enormous sound with the highest quality materials avalable PRICE-POINT WARRANTY BVURABILITY Affordable technology Cone yearlimited warranty | Rubberized shock absorbing cover PATENTS BUILT-IN MIC BATTERY LIFE Patent.Pending stabalization .",78
7
+ 5,1632938,Much of the known world is either from this culture or has converted to the faith.,16
8
+ 6,1141502,"The more I play it, the more I want to play it.",12
9
+ 7,1424712,"There are weapons all around you, you just never thought about your household goods that way.",16
10
+ 8,460625,"In September, I'm going down to Virginia with a bunch of my music buddies to record the album.",18
11
+ 9,179267,"It is suitable for use with Cthulhu, Horror, Space and Dungeon - style miniature games.",15
12
+ 10,1092530,Games of the imagination teach us actions have consequences in a realm that can be reset.,16
13
+ 11,1050585,"Intense cleaning of the existing space, brick repairs, and removal of unneeded materials is also necessary.",16
14
+ 12,1126342,These will include color artwork and fully designed stats to help you build exciting and unique Shadowlands encounters.,18
15
+ 13,277427,"If you're leaving the backpack unattended, the bag itself can be secured to almost any fixed object using the integrated steel wire and combination lock, making it impossible for opportunistic thieves to access your belongings or steal the bag, without special cutting equipment.",43
16
+ 14,307425,Their parents had recruited the police and even had the church issuing official statements forbidding the girls to walk through monastery doors.,22
17
+ 15,611566,is a children’s book for elementary school age kids with illustrations appealing to people of all ages.,17
18
+ 16,951173,"Thanks to you we reached our original goal, so we got festival fees and insurance covered.",16
19
+ 17,1294624," It's been really well-received, and recently won an online award for Best New Tabletop Sports Game of 2013.",19
20
+ 18,686912,"But New Jersey farmers are retiring and all over the state, development continues to push out dwindling farmland.",18
21
+ 19,1291430,"Support Cards for easily setting initiative and keeping track of hit points, ammo, etc, speeding things up and eliminating the need for any writing/erasing Deep character creation with options designed for interesting roleplaying, and super fast to create (5 minutes or less) Specially laminated Character Cards take the place of the old character sheet, making information extremely easy to find and removing clutter from the gaming table Easily expandable without having to purchase and read through lengthy new books - new equipment, weapons, powers, skills, and opponents can be instantly added to your game with Setting Cards All special rules for equipment, weapons, powers, skills, and opponents printed on cards kept in player hands, so you never have to go searching for them Completely genre neutral, so assets from any setting are completely compatible with any others, making your game infinitely expandable and customizable Tech-themed Resolution Deck Concept Built from the ground up with VTTs (Virtual Table Tops) in mind, with all digital assets ready to drop into your game to integrate seamlessly with groups who play remotely Complete playable module with starter adventure included in backer rewards of $10 or more!",192
22
+ 20,1656635,"Their bond of friendship makes the journey more important than the destination as they share their dreams, frustrations and fears. The story goes on to show the dramatic impact this innocent childhood adventure has on their young adult lives.",39
23
+ 21,1679298,"He also is the Head Designer of The Design Trust so-to-speak, besides his regular job ...",16
24
+ 22,337389,"This year, the film team has plans to produce a short comedy, based on a true story set in the city of Jerusalem.",23
25
+ 23,980529,"$12,000 - Roguelike Player Mat This player mat will include extra rules to play Baldrick's Tomb as a solo player Roguelike.",21
26
+ 24,1700094,_ Thank you for viewing the project!,7
27
+ 25,420192,We appreciate your support and thank you for joining us in helping cause this mission stay in action.,18
28
+ 26,1469419,It'll even be foil-wrapped like baseball cards!,7
29
+ 27,105008,We believe that the major players with their massive branding campaigns together with the margins applied by distributors and retailers are a business model that doesn’t deliver a fair value to customers.,32
30
+ 28,1505209,"If you want to take advantage of the Rhino Slider's versatility, you'll have an option to add extra sets of rails after the campaign ends.",25
assets/examples.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Games of the imagination teach us actions have consequences in a realm that can be reset.
2
+ Intense cleaning of the existing space, brick repairs, and removal of unneeded materials is also necessary.
3
+ Thanks to you we reached our original goal, so we got festival fees and insurance covered.
4
+  It's been really well-received, and recently won an online award for Best New Tabletop Sports Game of 2013.
5
+ But New Jersey farmers are retiring and all over the state, development continues to push out dwindling farmland.
6
+ Our chemical-free process provides unmatched comfort.
7
+ However, this chart does not factor in special ability influence since that varies with the ability being used.
8
+ I'd like to do something similar with pictures.
9
+ This means you can feel more than comfortable putting them in your back pocket or purse.
10
+ She holds a degree from the Advertising University of Madrid.
11
+ Skeleton Birds are heading to Groovebox Studios on March 17th to record and film a live GBS Detroit EP and video.
12
+ Please help support us & make this awesome case a reality!
13
+ So... We're asking for $3,000 per song.
14
+ You also have battle items and action cards to defeat your gnome enemies.
convert.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import glob
3
+ import os
4
+ from transformers import BertTokenizerFast as BertTokenizer, BertForSequenceClassification
5
+
6
+ LABEL_COLUMNS = ["Assertive Tone", "Conversational Tone", "Emotional Tone", "Informative Tone"]
7
+
8
+ tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
9
+ model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)
10
+ id2label = {i:label for i,label in enumerate(LABEL_COLUMNS)}
11
+ label2id = {label:i for i,label in enumerate(LABEL_COLUMNS)}
12
+
13
+ for ckpt in glob.glob('checkpoints/*.ckpt'):
14
+ base_name = os.path.basename(ckpt)
15
+ # 去除文件后缀
16
+ model_name = os.path.splitext(base_name)[0]
17
+ params = torch.load(ckpt, map_location="cpu")['state_dict']
18
+ msg = model.load_state_dict(params, strict=True)
19
+ path = f'models/{model_name}'
20
+ os.makedirs(path, exist_ok=True)
21
+
22
+ torch.save(model.state_dict(), f'{path}/pytorch_model.bin')
23
+ config = model.config
24
+ config.architectures = ['BertForSequenceClassification']
25
+ config.label2id = label2id
26
+ config.id2label = id2label
27
+ model.config.to_json_file(f'{path}/config.json')
28
+ tokenizer.save_vocabulary(path)
{bert-base-uncased → models/All_Data}/config.json RENAMED
@@ -1,14 +1,28 @@
1
  {
 
2
  "architectures": [
3
- "BertForMaskedLM"
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
 
6
  "gradient_checkpointing": false,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
 
 
 
 
 
 
10
  "initializer_range": 0.02,
11
  "intermediate_size": 3072,
 
 
 
 
 
 
12
  "layer_norm_eps": 1e-12,
13
  "max_position_embeddings": 512,
14
  "model_type": "bert",
@@ -16,7 +30,7 @@
16
  "num_hidden_layers": 12,
17
  "pad_token_id": 0,
18
  "position_embedding_type": "absolute",
19
- "transformers_version": "4.6.0.dev0",
20
  "type_vocab_size": 2,
21
  "use_cache": true,
22
  "vocab_size": 30522
 
1
  {
2
+ "_name_or_path": "bert-base-uncased",
3
  "architectures": [
4
+ "BertForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Assertive Tone",
14
+ "1": "Conversational Tone",
15
+ "2": "Emotional Tone",
16
+ "3": "Informative Tone"
17
+ },
18
  "initializer_range": 0.02,
19
  "intermediate_size": 3072,
20
+ "label2id": {
21
+ "Assertive Tone": 0,
22
+ "Conversational Tone": 1,
23
+ "Emotional Tone": 2,
24
+ "Informative Tone": 3
25
+ },
26
  "layer_norm_eps": 1e-12,
27
  "max_position_embeddings": 512,
28
  "model_type": "bert",
 
30
  "num_hidden_layers": 12,
31
  "pad_token_id": 0,
32
  "position_embedding_type": "absolute",
33
+ "transformers_version": "4.36.2",
34
  "type_vocab_size": 2,
35
  "use_cache": true,
36
  "vocab_size": 30522
{bert-base-uncased → models/All_Data}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:097417381d6c7230bd9e3557456d726de6e83245ec8b24f529f60198a67b203a
3
- size 440473133
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4edf18d14298c9a7057bbbdbc88cddf3b673e452103c6c4b882e1cec14d51c53
3
+ size 438021294
{bert-base-uncased → models/All_Data}/vocab.txt RENAMED
File without changes
bert-base-uncased/bert_config.json → models/Facebook/config.json RENAMED
@@ -1,13 +1,37 @@
1
  {
 
 
 
 
2
  "attention_probs_dropout_prob": 0.1,
 
 
3
  "hidden_act": "gelu",
4
  "hidden_dropout_prob": 0.1,
5
  "hidden_size": 768,
 
 
 
 
 
 
6
  "initializer_range": 0.02,
7
  "intermediate_size": 3072,
 
 
 
 
 
 
 
8
  "max_position_embeddings": 512,
 
9
  "num_attention_heads": 12,
10
  "num_hidden_layers": 12,
 
 
 
11
  "type_vocab_size": 2,
 
12
  "vocab_size": 30522
13
  }
 
1
  {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
  "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Assertive Tone",
14
+ "1": "Conversational Tone",
15
+ "2": "Emotional Tone",
16
+ "3": "Informative Tone"
17
+ },
18
  "initializer_range": 0.02,
19
  "intermediate_size": 3072,
20
+ "label2id": {
21
+ "Assertive Tone": 0,
22
+ "Conversational Tone": 1,
23
+ "Emotional Tone": 2,
24
+ "Informative Tone": 3
25
+ },
26
+ "layer_norm_eps": 1e-12,
27
  "max_position_embeddings": 512,
28
+ "model_type": "bert",
29
  "num_attention_heads": 12,
30
  "num_hidden_layers": 12,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "absolute",
33
+ "transformers_version": "4.36.2",
34
  "type_vocab_size": 2,
35
+ "use_cache": true,
36
  "vocab_size": 30522
37
  }
models/Facebook/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f511b8b4b91b5fa408c5b3220ce0fe9b61b2f9a3a54dd00acb3a81aa0a2a19e8
3
+ size 438021294
models/Facebook/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/Kickstarter/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Assertive Tone",
14
+ "1": "Conversational Tone",
15
+ "2": "Emotional Tone",
16
+ "3": "Informative Tone"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "Assertive Tone": 0,
22
+ "Conversational Tone": 1,
23
+ "Emotional Tone": 2,
24
+ "Informative Tone": 3
25
+ },
26
+ "layer_norm_eps": 1e-12,
27
+ "max_position_embeddings": 512,
28
+ "model_type": "bert",
29
+ "num_attention_heads": 12,
30
+ "num_hidden_layers": 12,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "absolute",
33
+ "transformers_version": "4.36.2",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 30522
37
+ }
models/Kickstarter/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b98553cd5a9b23babc4e20ade9abda931497de3103acf09656eb39cfcbb0c485
3
+ size 438021294
models/Kickstarter/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/Twitter/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Assertive Tone",
14
+ "1": "Conversational Tone",
15
+ "2": "Emotional Tone",
16
+ "3": "Informative Tone"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "Assertive Tone": 0,
22
+ "Conversational Tone": 1,
23
+ "Emotional Tone": 2,
24
+ "Informative Tone": 3
25
+ },
26
+ "layer_norm_eps": 1e-12,
27
+ "max_position_embeddings": 512,
28
+ "model_type": "bert",
29
+ "num_attention_heads": 12,
30
+ "num_hidden_layers": 12,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "absolute",
33
+ "transformers_version": "4.36.2",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 30522
37
+ }
models/Twitter/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6abf83c8c66c4f3fcaba340dcab3b5b1f4f2b66381b21a5aacab086194cf0cbd
3
+ size 438021294
models/Twitter/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  torch
2
  transformers
3
  tqdm
4
- pandas
 
 
1
  torch
2
  transformers
3
  tqdm
4
+ pandas
5
+ datetime