Spaces:
Runtime error
Runtime error
from pandas import json_normalize | |
import pandas as pd | |
import numpy as np | |
from iblou_merger import ent_merge | |
def word(df): | |
word_data = json_normalize(df, record_path=['words']) | |
word_data.rename(columns={0: 'words'}, inplace=True) | |
return word_data | |
def ent(df): | |
ent_data = json_normalize(df, record_path=['entities'], meta="text") | |
ent_data.rename(columns={0: 'entities'}, inplace=True) | |
return ent_data | |
def merge(word_data, ent_data): | |
return pd.merge(word_data, ent_data, left_index=True, right_index=True) | |
def process_df(df): | |
new_df = merge(word(df), ent(df)) | |
new_df = pd.DataFrame(ent_merge(new_df), columns=["words", "entity"]) | |
new_df["clean_words"] = new_df['words'].replace(r'[^\w\s]+', np.NAN, regex=True) | |
new_df["clean_entity"] = new_df['entity'].replace(r'^(?![\s\S])', np.NAN, regex=True) | |
new_df = new_df[new_df.any(axis=1)] | |
new_df.reset_index(drop=True) | |
return new_df | |