from genbit.genbit_metrics import GenBitMetrics import pandas as pd import plotly.express as px from plotly.subplots import make_subplots def plot_genbit(result_json): data1 = { "Metric": [ "Female Gender", "Male Gender", "Non-Binary Gender", ], "Value": [ result_json["percentage_of_female_gender_definition_words"], result_json["percentage_of_male_gender_definition_words"], result_json["percentage_of_non_binary_gender_definition_words"], ], } data2 = { "Metric": [ "Trans Gender", "Cis Gender", ], "Value": [ result_json["percentage_of_trans_gender_definition_words"], result_json["percentage_of_cis_gender_definition_words"], ], } df1 = pd.DataFrame(data1) df2 = pd.DataFrame(data2) fig1 = px.pie( df1, names="Metric", values="Value", title="Combined Gender Definition Words Distribution", ) fig1.update_traces(textposition="inside", textinfo="percent+label") # fig2 = px.pie( # df2, # names="Metric", # values="Value", # ) # fig2.update_traces(textposition="inside", textinfo="percent+label") # fig = make_subplots(rows=2, cols=1, specs=[[{"type": "pie"}], [{"type": "pie"}]]) # fig.add_trace(fig1.data[0], row=1, col=1) # fig.add_trace(fig2.data[0], row=2, col=1) return fig1 def eval_genbit(data): genbit_metrics = GenBitMetrics( language_code="en", context_window=5, distance_weight=0.95, percentile_cutoff=80 ) data = data[data.columns[0]].to_list() genbit_metrics.add_data(data, tokenized=False) result_json = genbit_metrics.get_metrics(output_word_list=False) result_df = ( pd.DataFrame.from_dict(result_json, orient="index") .reset_index() .rename(columns={"index": "Metric", 0: "Value"}) ) result_plot = plot_genbit(result_json) return result_df, result_plot