TheAutonomous commited on
Commit
0f9b91a
1 Parent(s): fd71db7

Upload 4 files

Browse files
Inference.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, time, torch, warnings
2
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer
3
+
4
+ class Inference():
5
+
6
+ def __init__(self, silent=False) -> None:
7
+ start_time = time.perf_counter()
8
+ self.tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
9
+ self.model = GPT2LMHeadModel.from_pretrained(self.local_file_path("SaveState"))
10
+ self.model.eval()
11
+ if not silent:
12
+ print(f"Model Loading Took {time.perf_counter()-start_time} Seconds")
13
+
14
+ def local_file_path(self, path):
15
+ return os.path.join(os.path.dirname(os.path.abspath(__file__)), path)
16
+
17
+ def generate(self, prompt, max_length=2000, temperature=0.5, do_sample=True, stop_token=None, callback=None, silent=True):
18
+ with warnings.catch_warnings():
19
+ warnings.simplefilter("ignore")
20
+ start_time = time.perf_counter()
21
+ input_ids = self.tokenizer.encode(prompt, return_tensors='pt')
22
+ generated_text = input_ids
23
+ while generated_text.shape[1] < max_length:
24
+ length = min(50, max_length - generated_text.shape[1])
25
+ with torch.no_grad():
26
+ outputs = self.model.generate(input_ids, max_length=length, temperature=temperature, do_sample=do_sample, pad_token_id=self.tokenizer.eos_token_id)
27
+ new_tokens = outputs[0][-length:]
28
+ if callback is not None:
29
+ for token in new_tokens:
30
+ callback(self.tokenizer.decode([token]))
31
+ generated_text = torch.cat((generated_text, new_tokens.unsqueeze(0)), dim=-1)
32
+ input_ids = new_tokens.unsqueeze(0)
33
+ if stop_token is not None and stop_token in self.tokenizer.decode(generated_text[0]):
34
+ break
35
+ if not silent:
36
+ print(f"Model Loading Took {time.perf_counter()-start_time} Seconds")
37
+ return self.tokenizer.decode(generated_text[0], skip_special_tokens=True)
38
+
39
+ Inference = Inference()
40
+
41
+ def spec(stre):
42
+ print(stre, end="")
43
+
44
+ if __name__=="__main__":
45
+ while True:
46
+ print(Inference.generate(input(">>> "), max_length=100, temperature=0.8, silent=True))
TrainData.txt ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Scene: Sunshine
2
+ Person1: Ahh beautiful sunshine
3
+ Person2: I love the way it bounces off your - beautiful face
4
+ Person1: You were going to say something else there
5
+ Person2: I resisted the bald joke because it's still quite early in the show but now you've opened the door so here it goes, you have no hair
6
+ Person1: I know I don't have any hair, but no we should not fight. It is our anniversary night.
7
+ Person2: It is our anniversary
8
+ Person1: Yes
9
+ Person2: And we always come to this picnic spot for our anniversary in the sunshine
10
+ Person1: Now come, let us eat some bread and cheese.
11
+ Person2: For we are...
12
+ Both: French
13
+ Person1: Thank You
14
+ Person2: To us
15
+ Person1: To us
16
+ Both: Clink
17
+ Person2: So how's your affair going
18
+ Person1: ha ha
19
+ Person2: I know. *Spits out wine*
20
+ Person1: What did you just do?
21
+ Person2: I kept the wine in my mouth the whole time because I was going to dramatically reveal that it was poisoned. You're going to die in the sunshine for what you did I know about Jean Claude.
22
+ Person1: Jean Claude!? Jean Claude and I are just buddies!
23
+ Person2: Buddies? More like sex buddies! I turn the corner of bakers street and there you two were raw dogging it in the streets!
24
+ Person1: No we weren't! We were just having good times together having a play roll!
25
+ Person2: A play roll!? You think that I am that naive?
26
+ Person1: Oh-
27
+ Person2: Wow this is a very slow acting poison
28
+ Person3: OH MY GOODNESS!! THAT MAN IS BALD!
29
+ **End**
30
+ Scene: Territory
31
+ Person1: Easy there, you're about to step on my territory. You actually have.
32
+ Person2: Well I see I've made some kind of mistake
33
+ Person1: Well all you gotta do is just wander back over there and we ain't got no problems.
34
+ Person2: I just gotta wander back over there and we ain't gonna have a problem?
35
+ Person1: Oh what, are you repeating everything I'm saying? What is there an echo in here?
36
+ Person2: I JUST WANNA BE REAL CLEAR ON WHATS I GOTS TO DO! I JUST HAVE TO WALK OVER THERE AND WE AIN'T GONNA HAVE NO MORE PROBLEMS IS THAT WHAT YA SAID!?
37
+ Person1: I ain't saying we're gonna not have no problems I mean we still is gonna have taxation and relationship troubles. I'm saying the problem of me having a gun pointed on you is gonna diminish awful quick like
38
+ Person2: So you were being over generalized before when you said we ain't gonna have no problems you meant this new specific problem will be gone but we'll still have problems such as inflation.
39
+ Person1: I apoligize for not being hyper-specific with my initial sentence. I will try to do better in the future. Now please move exactly two meters to your left...
40
+ Person2: I UNDERSTAND YOUR MISTAKE THAT RHETORIC CAN BE A TRICKY MISTRESS I WANT TO UNDERSTAND PRECISELY WHAT WILL OCCUR IF I'M TO STEP BACK OVER THERE?
41
+ Person3: Hey if you do too far you'll be on my property and I don't want there to be any issues because thats my-
42
+ Person2: DO YOU UNDERSTAND WHAT HE'S SAYIN?
43
+ Person1: I dont give a f*** about what he's saying.
44
+ Person2: We have no idea what your sayin you made this a 3 body problem here which is an unsolvable mathematical issue.
45
+ Person3: I'm just saying that this is my turf right here okay-
46
+ Person2: only you are on my best I do not see why
47
+ Person1: When you say thats your turf do you only own the turf or do you own the land underneath it. If we don't go on the turf are we still allowed over there or is it a property line? Be more specific!
48
+ **End**
49
+ Scene: Be More Specific
50
+ Person1: Just be more specific about it.
51
+ Person2: Okay okay so... like...
52
+ Person1: Cmon!
53
+ Person2: Im leaving for work, right? I hear a knock at the door and its the delivery driver and I say "Oh hello!" and he says "Hello!" and he is delivering a parcel and he says its for me and he hands me a stylus to sign for it and my hand slips and the stylus goes through the device into his heart and now he's dead.
54
+ Person1: Okay you're underarrest.
55
+ **End**
56
+ Scene: Hunting
57
+ Person1: I've been hunting wild and exotic animals for about two weeks now and I realize I really should have prepared a lot more, as you can see I've lost me legs and me arms. Turns out crocodiles, even if they look like they're smiling, aren't actually your friends. Thanks for coming to the Ted Talk. Don't f*** with nature 'cause nature f***s back. Now what I wanna recommend for anyone who wants to do what I did is do your research. Do your reading and start with something small like a doll house. Very unlikely a doll house is going to give you such grievous bodily injuries. Nexy slide!
58
+ Person2: *Snaps Fingers*
59
+ Person1: There's Jim, he's my assistant. Since the incident I can't click anymore so we made a click activated powerpoint presentation. As you can see this is a photo of the incident, actually about 2 seconds before, and I am now waving at the crocodile in the water.
60
+ Person2: *Snaps Fingers*
61
+ Person1: Now I'm really in water. As you can see one of the arms is gone. Why did I not run away then you might ask? I've always been a stubborn man.
62
+ **End**
63
+ Scene: It's Okay
64
+ Person1: Hey it's okay
65
+ Person2:I just um... I just need this time.
66
+ Person1: You want us to leave?
67
+ Person2: Yeah
68
+ Person1: *Leaves*
69
+ Person3: *Leaves*
70
+ Person2: No wait don't leave.
71
+ Person3: Listen mate it will be okay
72
+ Person2: I don't want to be in a solo scene
73
+ Person1: That was my bad mate I really thought someone might come in at some point
74
+ Person2: I just... sometimes it's funny just to let someone off stage
75
+ Person3: Listen it's alright if you can't think of anything
76
+ Person1: I know its your first time doing improve Person2
77
+ Person2: Its just I've run out of ideas
78
+ Person1: Well I am sure you can think of something
79
+ Person2: You just have to be funny every moment and it's just f***ing exhausting. Plus we are running out of accents we've done Australian, German, French, British...
80
+ Person3: Don't do the offensive ones. Leave those in the nineties
81
+ Person2: I don't know mate I think I should just quit.
82
+ Person1: We don't want you to go... We just gotta do a couple more games in this half then have a break and a lemonade and just come back for the second half, alright?
83
+ **End**
84
+ Scene: Digging
85
+ Person1: That's good Laddy, Listen we need to get these bodies in here by the time the police come alright
86
+ Person2: and these
87
+ Person1: and th- f***in ell how many people?
88
+ Person2: sixpence word 6 pennies
89
+ Person1: My goodness. We've robbed this bank okay and it's gone a bit wrong there's some- we've killed a few people all right this is...
90
+ Person3: violence criminel
91
+ Person1: I know this is what we are. we're violent criminals. We are violent criminals! Person3 are you alright?
92
+ Person3: Volie destruction
93
+ Person1: I know we blew up that bank
94
+ Person3: Ah sense negative
95
+ Person1: let's not In know you put-
96
+ Person2: Do you understand it or do you find it confusing?
97
+ Person3: Oh! Le blue fance!
98
+ Person1: Oh my goodness the police!! Get down! It's the police!
99
+ Person2: She glanced out of the window
100
+ Person1: No No! Their going past just keep quiet!
101
+ Person2: MONEY!
102
+ Person3: VIOLENCE CRIMINEL!
103
+ Person1: NO NO! SHUSH!
104
+ Person2: I EXPECT YOU'LL BE WANTING SOME CHANGE
105
+ Person1: Listen, listen, guys keep your f***ing voices down.
106
+ Person2: MONEYYYYYY!!!!
107
+ Person1: Okay I think they've gone. You want money? Fine here's your money
108
+ Person2: Seventy Seven
109
+ Person1: Seventy Seven pounds. Here you go. Congratulations. And for you-
110
+ Person2: 77!?
111
+ Person1: That's what we agreed-
112
+ Person3: TYRANNY
113
+ Person2: 77!?
114
+ Person1: Guys. Listen it wasn't the biggest bank robbery in the world. We robbed 100 quid. Alright? You get 77
115
+ Person2: Money?
116
+ Person1: I'll get 10 and Person3 gets whatever the rest of the maths is.
117
+ Person3: Uhhhh... 108.
118
+ **End**
__init__.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import torch
4
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
5
+
6
+ class GptHumorTrainer:
7
+
8
+ def __init__(self, silent=False) -> None:
9
+ start_time = time.perf_counter()
10
+ self.tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
11
+ self.model = GPT2LMHeadModel.from_pretrained(self.local_file_path("SaveState"))
12
+ self.model.eval()
13
+ if not silent:
14
+ print(f"Model Loading Took {time.perf_counter()-start_time} Seconds")
15
+
16
+ def local_file_path(self, path):
17
+ return os.path.join(os.path.dirname(os.path.abspath(__file__)), path)
18
+
19
+ def train(self, train_file, epochs=3):
20
+ device = torch.device("cpu")
21
+ self.model.to(device)
22
+
23
+ # Prepare the dataset
24
+ train_dataset = TextDataset(
25
+ tokenizer=self.tokenizer,
26
+ file_path=train_file,
27
+ block_size=128,
28
+ )
29
+
30
+ # We use a special data collator for language modeling tasks
31
+ data_collator = DataCollatorForLanguageModeling(
32
+ tokenizer=self.tokenizer,
33
+ mlm=False,
34
+ )
35
+
36
+ for epoch in range(epochs):
37
+ # Define the training arguments for each epoch
38
+ training_args = TrainingArguments(
39
+ output_dir=f"./results/epoch_{epoch+1}", # The output directory for this epoch
40
+ overwrite_output_dir=True, # Overwrite the content of the output directory
41
+ num_train_epochs=3, # Train for 1 epoch at a time
42
+ per_device_train_batch_size=3, # Batch size for training
43
+ save_steps=-1, # Save model after each epoch
44
+ save_total_limit=None, # No limit on the total amount of checkpoints
45
+ prediction_loss_only=True, # Focus on the prediction loss only
46
+ )
47
+
48
+ # Initialize the Trainer
49
+ trainer = Trainer(
50
+ model=self.model,
51
+ args=training_args,
52
+ data_collator=data_collator,
53
+ train_dataset=train_dataset,
54
+ )
55
+
56
+ # Train the model for one epoch
57
+ trainer.train()
58
+
59
+ # Save the model after each epoch
60
+ self.model.save_pretrained(self.local_file_path("SaveState"))
61
+
62
+ if __name__ == "__main__":
63
+ humor_trainer = GptHumorTrainer()
64
+ humor_trainer.train(humor_trainer.local_file_path("TrainData.txt"), epochs=5) # Replace with the path to your training file
cached_lm_GPT2Tokenizer_128_TrainData.txt ADDED
Binary file (5.75 kB). View file