Spaces:
Sleeping
Sleeping
smhavens
commited on
Commit
•
7a2e05d
1
Parent(s):
f47dc44
Testing dataset and model fine-tuning
Browse files
app.py
CHANGED
@@ -41,6 +41,10 @@ def training():
|
|
41 |
dataset = dataset["train"]
|
42 |
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
43 |
|
|
|
|
|
|
|
|
|
44 |
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
|
45 |
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
|
46 |
|
@@ -50,9 +54,11 @@ def training():
|
|
50 |
|
51 |
|
52 |
def finetune(train, eval):
|
53 |
-
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)
|
|
|
|
|
54 |
|
55 |
-
training_args = TrainingArguments(output_dir="test_trainer")
|
56 |
|
57 |
# USE THIS LINK
|
58 |
# https://huggingface.co/blog/how-to-train-sentence-transformers
|
@@ -140,6 +146,8 @@ def main():
|
|
140 |
text_button.click(check_answer, inputs=[text_input], outputs=[text_output, text_guesses])
|
141 |
# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
142 |
iface.launch()
|
|
|
|
|
143 |
|
144 |
|
145 |
|
|
|
41 |
dataset = dataset["train"]
|
42 |
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
43 |
|
44 |
+
print(f"- The {dataset_id} dataset has {dataset['train'].num_rows} examples.")
|
45 |
+
print(f"- Each example is a {type(dataset['train'][0])} with a {type(dataset['train'][0]['set'])} as value.")
|
46 |
+
print(f"- Examples look like this: {dataset['train'][0]}")
|
47 |
+
|
48 |
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
|
49 |
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
|
50 |
|
|
|
54 |
|
55 |
|
56 |
def finetune(train, eval):
|
57 |
+
# model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)
|
58 |
+
model_id = "sentence-transformers/all-MiniLM-L6-v2"
|
59 |
+
model = SentenceTransformer(model_id)
|
60 |
|
61 |
+
# training_args = TrainingArguments(output_dir="test_trainer")
|
62 |
|
63 |
# USE THIS LINK
|
64 |
# https://huggingface.co/blog/how-to-train-sentence-transformers
|
|
|
146 |
text_button.click(check_answer, inputs=[text_input], outputs=[text_output, text_guesses])
|
147 |
# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
148 |
iface.launch()
|
149 |
+
|
150 |
+
training()
|
151 |
|
152 |
|
153 |
|