|
import re |
|
|
|
|
|
|
|
|
|
|
|
def answer_cleansing_zero_shot(dataset, pred, must_choice=False): |
|
pred = pred.strip() |
|
if dataset in ("commonsense-mc"): |
|
pred = re.findall(r'A|B|C|D|E', pred) |
|
elif dataset in ("arithmetic"): |
|
if must_choice: |
|
pred = re.findall(r'A|B|C|D', pred) |
|
else: |
|
pred = pred.replace(",", "") |
|
pred = [s for s in re.findall(r'-?\d+\.?\d*', pred)] |
|
elif dataset in ("commonsense-verify", "symbolic-coin"): |
|
pred = pred.lower() |
|
pred = re.sub("\"|\'|\n|\.|\s|\:|\,", " ", pred) |
|
pred = pred.split(" ") |
|
pred = [i for i in pred if i in ("yes", "no")] |
|
elif dataset == "symbolic-letter": |
|
pred = re.sub("\"|\'|\n|\.|\s", "", pred) |
|
pred = [pred] |
|
else: |
|
raise ValueError("dataset is not properly defined ...") |
|
|
|
|
|
if len(pred) == 0: |
|
pred = "" |
|
else: |
|
|
|
pred = pred[0] |
|
|
|
|
|
if pred != "": |
|
if pred[-1] == ".": |
|
pred = pred[:-1] |
|
|
|
return pred |
|
|
|
def type_cleasing(type): |
|
type = re.findall(r'arithmetic|commonsense-mc|commonsense-verify|symbolic-coin|symbolic-letter', type) |
|
if len(type) == 0: |
|
type = "UNDEFINED" |
|
else: |
|
type = type[0] |
|
return type |
|
|
|
|
|
def entity_cleansing(ent): |
|
ent = re.sub("\n|\s*-\s*|\.", ",", ent) |
|
ent = ent.split(",") |
|
ent = [e.strip() for e in ent if e != ""] |
|
return ent |
|
|
|
def knowledge_cleansing(knowledge): |
|
|
|
knowledge = knowledge.strip() |
|
if knowledge.startswith("No, "): |
|
knowledge = re.sub("No, ", "", knowledge) |
|
knowledge = re.sub("\s"," ", knowledge) |
|
|
|
return knowledge |
|
|
|
|
|
|
|
|
|
|