chanhen commited on
Commit
d0c1c22
1 Parent(s): 95e1766

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Senior[[:space:]]Project[[:space:]]-[[:space:]]Final[[:space:]]Report[[:space:]]Example.pdf filter=lfs diff=lfs merge=lfs -text
CPIS 498 - Final Presentation Template .pptx ADDED
Binary file (185 kB). View file
 
CPIS 498 - Poster Template.pptx ADDED
Binary file (141 kB). View file
 
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Echo Chatbot
3
- emoji: 📉
4
- colorFrom: gray
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.16.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: echo-chatbot
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.16.0
 
 
6
  ---
 
 
Senior Project - Final Report Example.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:491805426b30d23083763aaaf20194ae41426d6d9bdd1f17c9f9d3f5781c7370
3
+ size 1393475
app.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def slow_echo(message, history):
4
+ return message
5
+
6
+ demo = gr.ChatInterface(slow_echo).queue().launch()
chapter1_2.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://huggingface.co/MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
2
+ # from transformers import pipeline
3
+ # classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")
4
+ # sequence_to_classify = "Angela Merkel is a politician in Germany and leader of the CDU"
5
+ # candidate_labels = ["politics", "economy", "entertainment", "environment"]
6
+ # output = classifier(sequence_to_classify, candidate_labels, multi_label=False)
7
+ # print(output)
8
+
9
+ # from transformers import pipeline
10
+
11
+ # generator = pipeline("text-generation", model="distilgpt2")
12
+ # output = generator("In this course, we will teach you how to")
13
+ # print(output)
14
+
15
+ # https://huggingface.co/bigscience/bloom-560m
16
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
17
+ # import transformers
18
+ # import torch
19
+
20
+ # model = "bigscience/bloom-560m"
21
+
22
+ # tokenizer = AutoTokenizer.from_pretrained(model)
23
+ # pipeline = transformers.pipeline(
24
+ # "text-generation",
25
+ # model=model,
26
+ # tokenizer=tokenizer,
27
+ # torch_dtype=torch.bfloat16,
28
+ # trust_remote_code=True,
29
+ # device_map="auto",
30
+ # )
31
+ # sequences = pipeline(
32
+ # "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
33
+ # max_length=200,
34
+ # do_sample=True,
35
+ # top_k=10,
36
+ # num_return_sequences=1,
37
+ # eos_token_id=tokenizer.eos_token_id,
38
+ # )
39
+ # for seq in sequences:
40
+ # print(f"Result: {seq['generated_text']}")
41
+
42
+ # https://huggingface.co/bert-base-uncased
43
+ # from transformers import pipeline
44
+ # unmasker = pipeline('fill-mask', model='bert-base-multilingual-cased')
45
+ # output = unmasker("tu es [MASK] homme?")
46
+
47
+
48
+ # named entity recognition
49
+ # from transformers import pipeline
50
+
51
+ # ner = pipeline("ner", grouped_entities=True)
52
+ # output = ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
53
+
54
+ # https://huggingface.co/facebook/bart-large-cnn
55
+ from transformers import pipeline
56
+
57
+ # summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
58
+ # output = summarizer(
59
+ # """
60
+ # America has changed dramatically during recent years. Not only has the number of
61
+ # graduates in traditional engineering disciplines such as mechanical, civil,
62
+ # electrical, chemical, and aeronautical engineering declined, but in most of
63
+ # the premier American universities engineering curricula now concentrate on
64
+ # and encourage largely the study of engineering science. As a result, there
65
+ # are declining offerings in engineering subjects dealing with infrastructure,
66
+ # the environment, and related issues, and greater concentration on high
67
+ # technology subjects, largely supporting increasingly complex scientific
68
+ # developments. While the latter is important, it should not be at the expense
69
+ # of more traditional engineering.
70
+
71
+ # Rapidly developing economies such as China and India, as well as other
72
+ # industrial countries in Europe and Asia, continue to encourage and advance
73
+ # the teaching of engineering. Both China and India, respectively, graduate
74
+ # six and eight times as many traditional engineers as does the United States.
75
+ # Other industrial countries at minimum maintain their output, while America
76
+ # suffers an increasingly serious decline in the number of engineering graduates
77
+ # and a lack of well-educated engineers.
78
+ # """
79
+ # )
80
+
81
+ # from transformers import pipeline
82
+
83
+ # translator = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
84
+ # output = translator("屌")
85
+
86
+ # print(output)
87
+
88
+ # from transformers import AutoTokenizer
89
+
90
+ # tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
91
+
92
+ # sequence = "Using a Transformer network is simple"
93
+ # tokens = tokenizer.tokenize(sequence)
94
+ # print(tokens)
95
+ # ids = tokenizer.convert_tokens_to_ids(tokens)
96
+ # print(ids)
97
+ # decoded_string = tokenizer.decode(ids)
98
+ # print(decoded_string)
99
+ # print("----------------------")
100
+
101
+ # sequence = "Using a Transform network are simple"
102
+ # tokens = tokenizer.tokenize(sequence)
103
+ # print(tokens)
104
+ # ids = tokenizer.convert_tokens_to_ids(tokens)
105
+ # print(ids)
106
+ # decoded_string = tokenizer.decode(ids)
107
+ # print(decoded_string)
108
+
109
+ # import torch
110
+ # from transformers import AutoTokenizer, AutoModelForSequenceClassification
111
+
112
+ # checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
113
+ # tokenizer = AutoTokenizer.from_pretrained(checkpoint)
114
+ # model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
115
+
116
+ # sequence = "I’ve been waiting for a HuggingFace course my whole life."
117
+ # tokens = tokenizer.tokenize(sequence)
118
+ # print(tokens)
119
+ # sequence1_ids = tokenizer.convert_tokens_to_ids(tokens)
120
+ # print(sequence1_ids)
121
+
122
+ # sequence = "I hate this so much!"
123
+ # tokens = tokenizer.tokenize(sequence)
124
+ # print(tokens)
125
+ # sequence2_ids = tokenizer.convert_tokens_to_ids(tokens)
126
+ # print(sequence2_ids)
127
+
128
+ # sequence1_ids = [[200, 200, 200]]
129
+ # sequence2_ids = [[200, 200]]
130
+ # batched_ids = [
131
+ # [1045, 1521, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012],
132
+ # [1045, 5223, 2023, 2061, 2172, 999, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id, tokenizer.pad_token_id],
133
+ # ]
134
+
135
+ # attention_mask = [
136
+ # [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
137
+ # [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
138
+ # ]
139
+
140
+ # outputs = model(torch.tensor(batched_ids), attention_mask=torch.tensor(attention_mask))
141
+ # print(outputs.logits)
142
+
143
+ # from transformers import AutoTokenizer
144
+
145
+ # checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
146
+ # tokenizer = AutoTokenizer.from_pretrained(checkpoint)
147
+
148
+ # sequence = "I've been waiting for a HuggingFace course my whole life."
149
+
150
+ # model_inputs = tokenizer(sequence)
151
+
152
+ # print(model_inputs)
153
+
154
+ # sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
155
+ # Will pad the sequences up to the maximum sequence length
156
+ # model_inputs = tokenizer(sequences, padding="longest")
157
+ # print(model_inputs)
158
+ # print("-------------------------")
159
+
160
+ # Will pad the sequences up to the specified max length
161
+ # model_inputs = tokenizer(sequences, padding="max_length", max_length=8)
162
+ # print(model_inputs)
163
+
164
+ # from transformers import AutoTokenizer
165
+ # checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
166
+ # tokenizer = AutoTokenizer.from_pretrained(checkpoint)
167
+
168
+ # sequence = "I've been waiting for a HuggingFace course my whole life."
169
+
170
+ # model_inputs = tokenizer(sequence)
171
+ # print("model_inputs = tokenizer(sequence)")
172
+ # print(model_inputs)
173
+ # print(model_inputs["input_ids"])
174
+
175
+ # tokens = tokenizer.tokenize(sequence)
176
+ # print("tokens = tokenizer.tokenize(sequence)")
177
+ # print(tokens)
178
+ # ids = tokenizer.convert_tokens_to_ids(tokens)
179
+ # print(sequence)
180
+ # print(ids)
181
+
182
+ # import torch
183
+ # from transformers import AutoTokenizer, AutoModelForSequenceClassification
184
+
185
+ # checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
186
+ # tokenizer = AutoTokenizer.from_pretrained(checkpoint)
187
+ # model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
188
+ # sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
189
+
190
+ # tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="pt")
191
+ # output = model(**tokens)
192
+ # print(output)
193
+
194
+ from transformers import AutoTokenizer, AutoModel
195
+
196
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
197
+ model = AutoModel.from_pretrained("gpt2")
198
+
199
+ encoded = tokenizer("Hey!", return_tensors="pt")
200
+ result = model(**encoded)
201
+ print(result)
chapter3.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import torch
2
+ # from transformers import AdamW, AutoTokenizer, AutoModelForSequenceClassification
3
+
4
+ # # Same as before
5
+ # checkpoint = "bert-base-uncased"
6
+ # tokenizer = AutoTokenizer.from_pretrained(checkpoint)
7
+ # model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
8
+ # sequences = [
9
+ # "I've been waiting for a HuggingFace course my whole life.",
10
+ # "This course is amazing!",
11
+ # ]
12
+ # batch = tokenizer(sequences, padding=True, truncation=True, return_tensors="pt")
13
+
14
+ # # This is new
15
+ # batch["labels"] = torch.tensor([1, 1])
16
+
17
+ # optimizer = AdamW(model.parameters())
18
+ # loss = model(**batch).loss
19
+ # loss.backward()
20
+ # optimizer.step()
21
+
22
+ from datasets import load_dataset
23
+
24
+ # raw_datasets = load_dataset("glue", "sst2")
25
+ # raw_datasets
26
+ # raw_train_dataset = raw_datasets["train"]
27
+ # output = raw_train_dataset[0]['sentence']
28
+ # print(output)
29
+
30
+ # raw_train_dataset = raw_datasets["validation"]
31
+ # output = raw_train_dataset[87]
32
+
33
+ # print(raw_train_dataset.features)
34
+
35
+ # from transformers import AutoTokenizer
36
+
37
+ # checkpoint = "bert-base-uncased"
38
+ # tokenizer = AutoTokenizer.from_pretrained(checkpoint)
39
+ # print(tokenizer(output))
40
+ # inputs = tokenizer(output)
41
+ # print(tokenizer.convert_ids_to_tokens(inputs["input_ids"]))
42
+
43
+ # inputs = tokenizer("This is the first sentence.")
44
+ # print(inputs)
45
+ # print(tokenizer.convert_ids_to_tokens(inputs["input_ids"]))
46
+ # # tokenized_sentences_1 = tokenizer(raw_datasets["train"]["sentence1"])
47
+ # # tokenized_sentences_2 = tokenizer(raw_datasets["train"]["sentence2"])
48
+
49
+ # # inputs = tokenizer("This is the first sentence.", "This is the second one.")
50
+ # # inputs = tokenizer.convert_ids_to_tokens(inputs["input_ids"])
51
+ # # print(inputs)
52
+
53
+ # def tokenize_function(example):
54
+ # return tokenizer(example["sentence"], truncation=True)
55
+
56
+ # tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
57
+ # print(tokenized_datasets)
58
+
59
+ # from transformers import DataCollatorWithPadding
60
+
61
+ # data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
62
+ # samples = tokenized_datasets["train"][:8]
63
+ # samples = {k: v for k, v in samples.items() if k not in ["idx", "sentence1", "sentence2"]}
64
+
65
+ # print([len(x) for x in samples["input_ids"]])
66
+
67
+ # batch = data_collator(samples)
68
+ # print(batch)
69
+ # print({k: v.shape for k, v in batch.items()})
70
+
71
+ # # Try it yourself
72
+ from datasets import load_dataset
73
+
74
+ raw_datasets = load_dataset("glue", "sst2")
75
+ raw_train_dataset = raw_datasets["train"]
76
+ output = raw_train_dataset[0]['sentence']
77
+ # print(output)
78
+
79
+ from transformers import AutoTokenizer
80
+
81
+ checkpoint = "bert-base-uncased"
82
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
83
+ # print(tokenizer(output))
84
+ inputs = tokenizer(output)
85
+ # print(tokenizer.convert_ids_to_tokens(inputs["input_ids"]))
86
+
87
+ tokenized_dataset = tokenizer(
88
+ output,
89
+ padding=True,
90
+ truncation=True,
91
+ )
92
+
93
+ def tokenize_function(example):
94
+ return tokenizer(example["sentence"], truncation=True)
95
+
96
+ tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
97
+ # print(tokenized_datasets)
98
+
99
+
100
+ # from datasets import load_dataset
101
+ # from transformers import AutoTokenizer, DataCollatorWithPadding
102
+
103
+ # raw_datasets = load_dataset("glue", "mrpc")
104
+ # checkpoint = "bert-base-uncased"
105
+ # tokenizer = AutoTokenizer.from_pretrained(checkpoint)
106
+
107
+
108
+ # def tokenize_function(example):
109
+ # return tokenizer(example["sentence1"], example["sentence2"], truncation=True)
110
+
111
+
112
+ # tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
113
+ # data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
114
+
115
+ # from transformers import TrainingArguments
116
+
117
+ # training_args = TrainingArguments("test-trainer")
118
+
119
+ # from transformers import AutoModelForSequenceClassification
120
+
121
+ # model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
122
+
123
+ # from transformers import Trainer
124
+
125
+ # trainer = Trainer(
126
+ # model,
127
+ # training_args,
128
+ # train_dataset=tokenized_datasets["train"],
129
+ # eval_dataset=tokenized_datasets["validation"],
130
+ # data_collator=data_collator,
131
+ # tokenizer=tokenizer,
132
+ # )
133
+ # predictions = trainer.predict(tokenized_datasets["validation"])
134
+ # print(predictions.predictions.shape, predictions.label_ids.shape)
135
+
136
+ # import numpy as np
137
+
138
+ # preds = np.argmax(predictions.predictions, axis=-1)
139
+
140
+ # import evaluate
141
+
142
+ # metric = evaluate.load("glue", "mrpc")
143
+ # metric.compute(predictions=preds, references=predictions.label_ids)
144
+
145
+ # def compute_metrics(eval_preds):
146
+ # metric = evaluate.load("glue", "mrpc")
147
+ # logits, labels = eval_preds
148
+ # predictions = np.argmax(logits, axis=-1)
149
+ # return metric.compute(predictions=predictions, references=labels)
150
+
151
+ # training_args = TrainingArguments("test-trainer", evaluation_strategy="epoch")
152
+ # model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
153
+
154
+ # trainer = Trainer(
155
+ # model,
156
+ # training_args,
157
+ # train_dataset=tokenized_datasets["train"],
158
+ # eval_dataset=tokenized_datasets["validation"],
159
+ # data_collator=data_collator,
160
+ # tokenizer=tokenizer,
161
+ # compute_metrics=compute_metrics,
162
+ # )
163
+ # trainer.train()
164
+ from transformers import AutoTokenizer, DataCollatorWithPadding
165
+
166
+ data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
167
+ tokenized_datasets = tokenized_datasets.remove_columns(["sentence", "idx"])
168
+ tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
169
+ tokenized_datasets.set_format("torch")
170
+ tokenized_datasets["train"].column_names
171
+
172
+ from torch.utils.data import DataLoader
173
+
174
+ train_dataloader = DataLoader(
175
+ tokenized_datasets["train"], shuffle=True, batch_size=8, collate_fn=data_collator
176
+ )
177
+ eval_dataloader = DataLoader(
178
+ tokenized_datasets["validation"], batch_size=8, collate_fn=data_collator
179
+ )
180
+ for batch in train_dataloader:
181
+ break
182
+ output = {k: v.shape for k, v in batch.items()}
183
+ # print(output)
184
+
185
+ from transformers import AutoModelForSequenceClassification
186
+
187
+ model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
188
+
189
+ outputs = model(**batch)
190
+ # print(outputs.loss, outputs.logits.shape)
191
+
192
+ from transformers import AdamW
193
+
194
+ optimizer = AdamW(model.parameters(), lr=5e-5)
195
+
196
+ from transformers import get_scheduler
197
+
198
+ num_epochs = 3
199
+ num_training_steps = num_epochs * len(train_dataloader)
200
+ lr_scheduler = get_scheduler(
201
+ "linear",
202
+ optimizer=optimizer,
203
+ num_warmup_steps=0,
204
+ num_training_steps=num_training_steps,
205
+ )
206
+ print(num_training_steps)
207
+
208
+ # The training loop
209
+ import torch
210
+
211
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
212
+ model.to(device)
213
+ # print(device)
214
+
215
+
216
+ from tqdm.auto import tqdm
217
+
218
+ progress_bar = tqdm(range(num_training_steps))
219
+
220
+ model.train()
221
+ for epoch in range(num_epochs):
222
+ for batch in train_dataloader:
223
+ batch = {k: v.to(device) for k, v in batch.items()}
224
+ outputs = model(**batch)
225
+ loss = outputs.loss
226
+ loss.backward()
227
+
228
+ optimizer.step()
229
+ lr_scheduler.step()
230
+ optimizer.zero_grad()
231
+ progress_bar.update(1)
232
+
233
+ # The evaluation loop
234
+ import evaluate
235
+
236
+ metric = evaluate.load("glue", "mrpc")
237
+ model.eval()
238
+ for batch in eval_dataloader:
239
+ batch = {k: v.to(device) for k, v in batch.items()}
240
+ with torch.no_grad():
241
+ outputs = model(**batch)
242
+
243
+ logits = outputs.logits
244
+ predictions = torch.argmax(logits, dim=-1)
245
+ metric.add_batch(predictions=predictions, references=batch["labels"])
246
+
247
+ metric.compute()
chapter4.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
2
+
3
+ tokenizer = AutoTokenizer.from_pretrained("camembert-base", force_download=True, resume_download=False)
4
+ model = AutoModelForMaskedLM.from_pretrained("camembert-base")
chat.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ def random_response(message, history):
4
+ return random.choice(["Yes", "No"])
5
+
6
+ import time
7
+ import gradio as gr
8
+
9
+ def yes_man(message, history):
10
+ if message.endswith("?"):
11
+ return "Yes"
12
+ else:
13
+ return "Ask me anything!"
14
+
15
+ def echo(message, history, system_prompt, tokens):
16
+ response = f"System prompt: {system_prompt}\n Message: {message}."
17
+ for i in range(min(len(response), int(tokens))):
18
+ time.sleep(0.05)
19
+ yield response[: i+1]
20
+
21
+ # from langchain.chat_models import ChatOpenAI
22
+ # from langchain.schema import AIMessage, HumanMessage
23
+ # import openai
24
+ # import gradio as gr
25
+ # import os
26
+
27
+ # os.environ["OPENAI_API_KEY"] = "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" # Replace with your key
28
+
29
+ # llm = ChatOpenAI(temperature=1.0, model='gpt-3.5-turbo-0613')
30
+
31
+ # def predict(message, history):
32
+ # history_langchain_format = []
33
+ # for human, ai in history:
34
+ # history_langchain_format.append(HumanMessage(content=human))
35
+ # history_langchain_format.append(AIMessage(content=ai))
36
+ # history_langchain_format.append(HumanMessage(content=message))
37
+ # gpt_response = llm(history_langchain_format)
38
+ # return gpt_response.content
39
+
40
+ # gr.ChatInterface(predict).launch()
41
+
42
+ import openai
43
+ import gradio as gr
44
+
45
+ openai.api_key = "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" # Replace with your key
46
+
47
+ from langchain.chat_models import ChatOpenAI
48
+ from langchain.schema import AIMessage, HumanMessage
49
+ import openai
50
+ import gradio as gr
51
+ import os
52
+
53
+ os.environ["OPENAI_API_KEY"] = "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"
54
+
55
+ llm = ChatOpenAI(temperature=1.0, model='gpt-3.5-turbo-0613')
56
+
57
+ def predict(message, history):
58
+ history_langchain_format = []
59
+ for human, ai in history:
60
+ history_langchain_format.append(HumanMessage(content=human))
61
+ history_langchain_format.append(AIMessage(content=ai))
62
+ history_langchain_format.append(HumanMessage(content=message))
63
+ gpt_response = llm(history_langchain_format)
64
+ return gpt_response.content
65
+
66
+ gr.ChatInterface(predict).launch()
get-pip.py ADDED
The diff for this file is too large to render. See raw diff
 
gradio_cached_examples/16/log.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ component 0,flag,username,timestamp
2
+ "[[""Hello"",""Ask me anything!""]]",,,2024-01-28 21:12:34.005450
3
+ "[[""Want a fuck?"",""Yes""]]",,,2024-01-28 21:12:34.005450
4
+ "[[""Why so fucking sexy?"",""Yes""]]",,,2024-01-28 21:12:34.021247
test-trainer/checkpoint-500/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.37.1",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
test-trainer/checkpoint-500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26d438974d3ca04d8c051e587f62520fc01652131e9b1e0ba7811f4cbbc47510
3
+ size 437958648
test-trainer/checkpoint-500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3a88fbdf3bc025085a10bc736f4c05b4cd1e0fafdf303bccf70e2272e5de73e
3
+ size 876032762
test-trainer/checkpoint-500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8ce914157a8fa18ed7f5bc895c6169e23a29396d283307ed2eadabfbf64eece
3
+ size 13990
test-trainer/checkpoint-500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb639f9f8f7e062a303535330b8bbcc38edb9ca16539b24483e41c8655ec97d
3
+ size 1064
test-trainer/checkpoint-500/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
test-trainer/checkpoint-500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
test-trainer/checkpoint-500/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
test-trainer/checkpoint-500/trainer_state.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0893246187363834,
5
+ "eval_steps": 500,
6
+ "global_step": 500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.09,
13
+ "learning_rate": 3.184458968772695e-05,
14
+ "loss": 0.4912,
15
+ "step": 500
16
+ }
17
+ ],
18
+ "logging_steps": 500,
19
+ "max_steps": 1377,
20
+ "num_input_tokens_seen": 0,
21
+ "num_train_epochs": 3,
22
+ "save_steps": 500,
23
+ "total_flos": 147381246548880.0,
24
+ "train_batch_size": 8,
25
+ "trial_name": null,
26
+ "trial_params": null
27
+ }
test-trainer/checkpoint-500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5817e64c3b9b34fc6bf81ac86329df992d56705557d2276ff5fae264646f7b08
3
+ size 4728
test-trainer/checkpoint-500/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
test-trainer/runs/Jan27_22-08-26_DESKTOP-KTM59NT/events.out.tfevents.1706382507.DESKTOP-KTM59NT.3612.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f1636178a7430729e8acf5a52f10f6c9661d1c0295049a135bfb233d27c43c7
3
+ size 4492
test-trainer/runs/Jan28_06-39-34_DESKTOP-KTM59NT/events.out.tfevents.1706413175.DESKTOP-KTM59NT.17992.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dd42909f5f35f4ff8a387e96c95ae5809f9d8e3ba2923a207e71e71dc70407d
3
+ size 4335