Spaces:
Runtime error
Runtime error
File size: 456 Bytes
90653e1 174d40b 90653e1 9f10a81 90653e1 |
1 2 3 4 5 6 7 8 9 10 11 |
import numpy as np
import pandas as pd
df = pd.read_json(r"data/regItems.json")
df = df.replace(to_replace="", value=np.nan).dropna(axis=0) # remove null values
df['paragraphText'] = df['paragraphText'].str.replace("OLD SECTION.*", "", regex=True) # remove any dirty words
# df['paragraphText'] = df['paragraphText'].str.replace("[a-zA-z]\d\w+", ". ", regex=True)
df['paragraphText'] = df['paragraphText'].str.lower()
data = df['paragraphText'].tolist() |