Spaces:
Runtime error
Runtime error
import random | |
def repeat(text, n_max_gram=3, n_max_repeat=3): | |
"""repeat n-grams""" | |
tokens = text.split() | |
n_gram = random.randint(1, n_max_gram) | |
repeat_token_idx = random.randint(0, len(tokens) - n_gram) | |
repeated_tokens = tokens[repeat_token_idx:repeat_token_idx+n_gram] | |
n_repeat = random.randint(1, n_max_repeat) | |
for _ in range(n_repeat): | |
insert_idx = random.randint(0, len(tokens)) | |
tokens = tokens[:insert_idx] + \ | |
repeated_tokens + tokens[insert_idx:] | |
new_text = " ".join(tokens) | |
return new_text | |
def remove(text, n_max_gram=3): | |
"""remove n-grams""" | |
tokens = text.split() | |
n_gram = random.randint(1, n_max_gram) | |
remove_token_idx = random.randint(0, len(tokens) - n_gram) | |
tokens = tokens[:remove_token_idx] + tokens[remove_token_idx + n_gram:] | |
new_text = " ".join(tokens) | |
return new_text | |
def insert(text, vocab, n_max_tokens=3): | |
"""Insert tokens""" | |
tokens = text.split() | |
n_insert_token = random.randint(1, n_max_tokens) | |
for _ in range(n_insert_token): | |
insert_token_idx = random.randint(0, len(tokens) - 1) | |
insert_token = random.choice(vocab) | |
tokens = tokens[:insert_token_idx] + [insert_token] + tokens[insert_token_idx:] | |
new_text = " ".join(tokens) | |
return new_text | |
def swap(text, vocab, n_max_tokens=3): | |
"""Swap tokens""" | |
tokens = text.split() | |
n_swap_tokens = random.randint(1, n_max_tokens) | |
for _ in range(n_swap_tokens): | |
swap_token_idx = random.randint(0, len(tokens) - 1) | |
swap_token = random.choice(vocab) | |
while swap_token == tokens[swap_token_idx]: | |
swap_token = random.choice(vocab) | |
tokens[swap_token_idx] = swap_token | |
new_text = " ".join(tokens) | |
return new_text | |
def shuffle(text): | |
"""shuffle tokens""" | |
tokens = text.split() | |
random.shuffle(tokens) | |
new_text = " ".join(tokens) | |
return new_text | |