assert [i[0] for i in output_text] == split_full_text AssertionError

#2
by liaoch - opened

python3.8 punctuation.py "3qHkcs3kG44.transcript.en"

f.read() return type is: <class 'str'>
Traceback (most recent call last):
File "punctuation.py", line 24, in
results = rp.punctuate(text, lang='en')
File "/home/liao/.local/lib/python3.8/site-packages/rpunct/punctuate.py", line 45, in punctuate
combined_preds = self.combine_results(text, preds_lst)
File "/home/liao/.local/lib/python3.8/site-packages/rpunct/punctuate.py", line 129, in combine_results
assert [i[0] for i in output_text] == split_full_text
AssertionError

# A python program to test bert-restore-punctuation from huggingface.
# This only works for python 3.8, not 3.10

# check python version is 3.8
import sys
if sys.version_info[0] != 3 or sys.version_info[1] != 8:
    print("This only works for python 3.8, not 3.10")
    sys.exit(1)

from rpunct import RestorePuncts
rp = RestorePuncts()

results =[]
# check if the program has one required argument
if len(sys.argv) != 2:
    print("Usage: python3 punctuation.py <filename>")
    sys.exit(1)

# open a file with a list of sentences. The file name is given as a command line argument
with open(sys.argv[1], 'r') as f:
    text =f.read()
    # check the data type of text
    results = rp.punctuate(text, lang='en')
# save the results to a file
output_file = sys.argv[1] + ".punctuated.txt"
with open(output_file, 'w') as f:
    f.write(results)

Input is the text fields of json transcript of https://www.youtube.com/watch?v=3qHkcs3kG44.

Sign up or log in to comment