idiomify / explore /explore_fetch_pie_annotate.py
eubinecto's picture
[#5] literal2idiomatic:d-1-3 done (annotating with special tokens). Some of the data however are erroneous
fca50f9
from idiomify.fetchers import fetch_pie
from preprocess import annotate
def main():
pie_df = fetch_pie()
pie_df = pie_df.pipe(annotate, boi_token="<idiom>", eoi_token="</idiom>")
for _, row in pie_df.iterrows():
print(row['Idiomatic_Sent'])
if __name__ == '__main__':
main()