Spaces:
Sleeping
Sleeping
File size: 7,650 Bytes
ab7200a 32bf8cb ab7200a d043bf5 ab7200a 32bf8cb ab7200a d043bf5 ab7200a d043bf5 32bf8cb d043bf5 ab7200a d043bf5 ab7200a 32bf8cb ab7200a d043bf5 32bf8cb d043bf5 ab7200a d043bf5 ab7200a d043bf5 ab7200a d043bf5 ab7200a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
class LLaMAScoreAnalyzer:
def __init__(self):
self.languages = ["Nepali", "Hindi"]
self.models = ["Baseline", "LoRA"]
self.scores_gpt = ["relevance_score", "cc_score", "syntax_score", "complete_score"]
self.rouge_bleu = ["rougeL", "bleu"]
self.categories = ["hallucination_type", "is_repeat"]
self.DATA_PATH = {
"Nepali": {"Baseline": "./data/nepali_baseline_all_scores.csv", "LoRA": "./data/nepali_lora_all_scores.csv"},
"Hindi": {"Baseline": "./data/hindi_baseline_all_scores.csv", "LoRA": "./data/nepali_baseline_all_scores.csv"}
}
def load_samples(self, lang):
cols_to_show = ["instruction", "input", "output"]
for model in self.DATA_PATH[lang]:
df = pd.read_csv(self.DATA_PATH[lang][model])
df.rename({"output": "expected_output"})
df[model+"_Response"] = df["cleaned_response"]
cols_to_show.append(model+"_Response")
cols_to_show = cols_to_show + ["relevance_score", "cc_score", "syntax_score", "complete_score", "rougeL", "blue", "is_repeat", "hallucination_type"]
df = df[[col for col in cols_to_show if col in df.columns]]
st.write(df.sample(5))
def load_data(self, lang, model):
df = pd.read_csv(self.DATA_PATH[lang][model])
df['Language'] = lang
df['Model'] = model
return df
def draw_specific_plots(self, data, categories, x_variable, title):
fig, ax = plt.subplots(figsize=(12, 6))
palette = sns.color_palette("pastel", len(categories) * len(data[x_variable].unique()))
for i, category in enumerate(categories):
for j, unique_value in enumerate(data[x_variable].unique()):
subset = data[data[x_variable] == unique_value]
sns.kdeplot(data=subset, x=category, fill=True, common_norm=False, alpha=0.5,
ax=ax, color=palette[i * len(data[x_variable].unique()) + j],
label=f"{category} ({unique_value})")
ax.set_title(title, fontsize=16)
ax.set_xlabel("Score", fontsize=12)
ax.set_ylabel("Density", fontsize=12)
ax.legend(title="Category (Language/Model)")
return fig
def draw_combined_density_plot(self, data, title):
fig, ax = plt.subplots(figsize=(12, 8))
palette = sns.color_palette("pastel", len(self.scores_gpt))
for i, category in enumerate(self.scores_gpt):
sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i])
ax.set_title(title, fontsize=16)
ax.set_xlabel("Score", fontsize=12)
ax.set_ylabel("Density", fontsize=12)
ax.legend(title="Score Categories")
return fig
def draw_bar_plot(self, data, categories, x_variable, title):
fig, axs = plt.subplots(len(categories), 1, figsize=(10, 6 * len(categories)))
palette = sns.color_palette("pastel", len(data))
if len(categories) == 1:
axs = [axs] # Ensure axs is iterable even for a single plot
for i, category in enumerate(categories):
sns.countplot(data=data, x=category, hue=x_variable, palette=palette, ax=axs[i])
axs[i].set_title(f"Distribution of {category} for {title}", fontsize=16)
axs[i].set_xlabel(category, fontsize=12)
axs[i].set_ylabel("Count", fontsize=12)
axs[i].legend(title=x_variable)
plt.tight_layout()
return fig
def score_analyzer(self):
st.sidebar.markdown("""
This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi"
""")
st.title("Findings from Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hind! ")
st.markdown("""
Full post here:
""")
show_samples = st.sidebar.checkbox("Show Sample Data", value=False)
detailed_view = st.sidebar.checkbox("Enable Detailed Charts View", value=False)
selected_languages = st.sidebar.multiselect("Select Languages", self.languages, default="Nepali")
selected_gpt_scoring = st.sidebar.multiselect("Select Score Category", self.scores_gpt, default="relevance_score")
selected_models = st.sidebar.multiselect("Select Models", self.models, default="Baseline")
dfs = []
for lang in selected_languages:
for model in selected_models:
df = self.load_data(lang, model)
dfs.append(df)
if show_samples:
for lang in selected_languages:
st.write(f"Sample data for {lang}")
self.load_samples(lang)
combined_data = pd.concat(dfs, ignore_index=True)
if detailed_view:
for language in selected_languages:
language_data = combined_data[combined_data['Language'] == language]
title = f"Distribution of Scores for {language}"
fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title)
st.pyplot(fig)
if len(selected_languages) > 1:
for model in selected_models:
model_data = combined_data[combined_data['Model'] == model]
title = f"Distribution of Scores for {model}"
fig = self.draw_specific_plots(model_data, selected_gpt_scoring, 'Language', title)
st.pyplot(fig)
st.sidebar.markdown("""
Show additional evaluation scores and categories below:
""")
additional_score_categories = st.sidebar.checkbox("Hallucination and Instruction Repeat Statistics", value=False)
if additional_score_categories:
additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type")
for language in selected_languages:
language_data = combined_data[combined_data['Language'] == language]
title = f"{language}"
fig = self.draw_bar_plot(language_data, additional_categories, 'Model', title)
st.pyplot(fig)
if len(selected_languages) > 1:
for model in selected_models:
model_data = combined_data[combined_data['Model'] == model]
title = f"{model}"
fig = self.draw_bar_plot(model_data, additional_categories, 'Language', title)
st.pyplot(fig)
else:
for language in selected_languages:
for model in selected_models:
title = f"Distribution of Scores for Different Evaluation Criterias for {language} [{model} Model]"
fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) &
(combined_data['Model'] == model)], title)
st.pyplot(fig)
def main():
st.sidebar.header("Findings from Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi!")
analyzer = LLaMAScoreAnalyzer()
analyzer.score_analyzer()
if __name__ == "__main__":
main()
|