File size: 7,650 Bytes
ab7200a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32bf8cb
ab7200a
 
d043bf5
ab7200a
 
 
 
 
 
 
 
 
32bf8cb
ab7200a
 
 
 
 
 
 
 
d043bf5
ab7200a
 
 
 
 
 
 
 
 
 
d043bf5
 
 
 
32bf8cb
d043bf5
 
 
ab7200a
d043bf5
 
 
 
 
 
 
 
 
 
ab7200a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32bf8cb
ab7200a
 
d043bf5
 
 
32bf8cb
d043bf5
 
ab7200a
 
 
 
 
 
 
 
 
d043bf5
 
ab7200a
d043bf5
 
 
 
 
 
ab7200a
 
 
d043bf5
ab7200a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import streamlit as st

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd


class LLaMAScoreAnalyzer:
    def __init__(self):
        self.languages = ["Nepali", "Hindi"]
        self.models = ["Baseline", "LoRA"]
        self.scores_gpt = ["relevance_score", "cc_score", "syntax_score", "complete_score"]
        self.rouge_bleu = ["rougeL", "bleu"]
        self.categories = ["hallucination_type", "is_repeat"]
        self.DATA_PATH = {
                "Nepali": {"Baseline": "./data/nepali_baseline_all_scores.csv", "LoRA": "./data/nepali_lora_all_scores.csv"},
                "Hindi": {"Baseline": "./data/hindi_baseline_all_scores.csv", "LoRA": "./data/nepali_baseline_all_scores.csv"}
            }
        
    def load_samples(self, lang):
        cols_to_show = ["instruction", "input", "output"]
        for model in self.DATA_PATH[lang]:
            df = pd.read_csv(self.DATA_PATH[lang][model])
            df.rename({"output": "expected_output"})
            df[model+"_Response"] = df["cleaned_response"]
            cols_to_show.append(model+"_Response")
        cols_to_show = cols_to_show + ["relevance_score", "cc_score", "syntax_score", "complete_score", "rougeL", "blue", "is_repeat", "hallucination_type"]
        df = df[[col for col in cols_to_show if col in df.columns]]
        st.write(df.sample(5))
        

    def load_data(self, lang, model):
        df = pd.read_csv(self.DATA_PATH[lang][model])
        df['Language'] = lang
        df['Model'] = model
        return df

    def draw_specific_plots(self, data, categories, x_variable, title):

        fig, ax = plt.subplots(figsize=(12, 6))

        palette = sns.color_palette("pastel", len(categories) * len(data[x_variable].unique()))

        for i, category in enumerate(categories):
            for j, unique_value in enumerate(data[x_variable].unique()):
                subset = data[data[x_variable] == unique_value]
                sns.kdeplot(data=subset, x=category, fill=True, common_norm=False, alpha=0.5,
                            ax=ax, color=palette[i * len(data[x_variable].unique()) + j],
                            label=f"{category} ({unique_value})")
        
        ax.set_title(title, fontsize=16)
        ax.set_xlabel("Score", fontsize=12)
        ax.set_ylabel("Density", fontsize=12)
        ax.legend(title="Category (Language/Model)")
        
        return fig

    def draw_combined_density_plot(self, data, title):
        fig, ax = plt.subplots(figsize=(12, 8))

        palette = sns.color_palette("pastel", len(self.scores_gpt))

        for i, category in enumerate(self.scores_gpt):
            sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i])
        
        ax.set_title(title, fontsize=16)
        ax.set_xlabel("Score", fontsize=12)
        ax.set_ylabel("Density", fontsize=12)
        ax.legend(title="Score Categories")

        return fig
    
    def draw_bar_plot(self, data, categories, x_variable, title):
        fig, axs = plt.subplots(len(categories), 1, figsize=(10, 6 * len(categories)))

        palette = sns.color_palette("pastel", len(data))

        if len(categories) == 1:
            axs = [axs]  # Ensure axs is iterable even for a single plot

        for i, category in enumerate(categories):
            sns.countplot(data=data, x=category, hue=x_variable, palette=palette, ax=axs[i])
            axs[i].set_title(f"Distribution of {category} for {title}", fontsize=16)
            axs[i].set_xlabel(category, fontsize=12)
            axs[i].set_ylabel("Count", fontsize=12)
            axs[i].legend(title=x_variable)

        plt.tight_layout()
        return fig
    
    def score_analyzer(self):
        st.sidebar.markdown("""
                    This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi"
                            """)   
        st.title("Findings from Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hind! ")
        st.markdown("""
                    Full post here: 
            """)
        show_samples = st.sidebar.checkbox("Show Sample Data", value=False)
        detailed_view = st.sidebar.checkbox("Enable Detailed Charts View", value=False)

        selected_languages = st.sidebar.multiselect("Select Languages", self.languages, default="Nepali")
        selected_gpt_scoring = st.sidebar.multiselect("Select Score Category", self.scores_gpt, default="relevance_score")
        selected_models = st.sidebar.multiselect("Select Models", self.models, default="Baseline")

        dfs = []
        for lang in selected_languages:
            for model in selected_models:
                df = self.load_data(lang, model)
                dfs.append(df)
        if show_samples:
            for lang in selected_languages:
                st.write(f"Sample data for {lang}")
                self.load_samples(lang)
        
        combined_data = pd.concat(dfs, ignore_index=True)
        if detailed_view:
            for language in selected_languages:
                language_data = combined_data[combined_data['Language'] == language]
                title = f"Distribution of Scores for {language}"
                fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title)
                st.pyplot(fig)
            if len(selected_languages) > 1:
                for model in selected_models:
                    model_data = combined_data[combined_data['Model'] == model]
                    title = f"Distribution of Scores for {model}"
                    fig = self.draw_specific_plots(model_data, selected_gpt_scoring, 'Language', title)
                    st.pyplot(fig)
            
            st.sidebar.markdown("""
                    Show additional evaluation scores and categories below:
                            """)   
            additional_score_categories = st.sidebar.checkbox("Hallucination and Instruction Repeat Statistics", value=False)
            if additional_score_categories:
                additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type")
                for language in selected_languages:
                    language_data = combined_data[combined_data['Language'] == language]
                    title = f"{language}"
                    fig = self.draw_bar_plot(language_data, additional_categories, 'Model', title)
                    st.pyplot(fig)
                if len(selected_languages) > 1:
                    for model in selected_models:
                        model_data = combined_data[combined_data['Model'] == model]
                        title = f"{model}"
                        fig = self.draw_bar_plot(model_data, additional_categories, 'Language', title)
                        st.pyplot(fig)
        else:
           for language in selected_languages:
                for model in selected_models:
                    title = f"Distribution of Scores for Different Evaluation Criterias for {language} [{model} Model]"
                    fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) & 
                                                                        (combined_data['Model'] == model)], title)
                    st.pyplot(fig)

        


def main():
    
    st.sidebar.header("Findings from Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi!")

    analyzer = LLaMAScoreAnalyzer()
    analyzer.score_analyzer()


    
if __name__ == "__main__":
    main()