File size: 3,420 Bytes
0fba077
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Copyright 2023 by Jan Philip Wahle, https://jpwahle.com/
# All rights reserved.

import os

import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde

dirname = os.path.dirname(__file__)

# Load the csv file into a pandas DataFrame
papers_df = pd.read_csv(
    os.path.join(dirname, "data/nlp_papers_field_diversity.csv")
)

# Compute the mean CFDI
mean_cfdi = papers_df["incoming_diversity"].mean()

# Compute the mean CADI
mean_citation_ages = []

# Open the file and read the content in a list
with open(
    os.path.join(dirname, "data/nlp_papers_citation_age.txt"),
    "r",
    encoding="utf-8",
) as filehandle:
    for line in filehandle:
        temp = float(line[:-1])
        mean_citation_ages.append(temp)


def generate_cfdi_plot(input_cfdi):
    """
    Function to generate a plot for CFDI
    """
    # Using kdeplot to fill the distribution curve
    sns.set(font_scale=1.3, style="whitegrid")

    data = papers_df[papers_df["incoming_diversity"] > 0]["incoming_diversity"]
    kde = gaussian_kde(data)
    x_vals = np.linspace(data.min(), data.max(), 1000)
    y_vals = kde.evaluate(x_vals)

    fig, ax = plt.subplots()  # create a new figure and axis

    ax.fill_between(x_vals, y_vals, color="skyblue", alpha=0.3)
    ax.plot(x_vals, y_vals, color="skyblue", linewidth=2, label="Distribution")

    interpolated_y_cfdi = np.interp(input_cfdi, x_vals, y_vals)
    ax.scatter(
        input_cfdi,
        interpolated_y_cfdi,
        c="r",
        marker="*",
        linewidths=1,
        zorder=2,
    )
    ax.vlines(
        input_cfdi, 0, interpolated_y_cfdi, color="tomato", ls="--", lw=1.5
    )
    epsilon = 0.005
    # ax.text(
    #     input_cfdi + epsilon,
    #     interpolated_y_cfdi + epsilon,
    #     "Your paper",
    #     {"color": "#DC143C", "fontsize": 13},
    #     ha="left",  # Horizontal alignment
    # )

    ax.set_xlabel("Citation Field Diversity Index (CFDI)", fontsize=15)
    ax.set_ylabel("Density", fontsize=15)
    sns.despine(left=True, bottom=True, right=True, top=True)

    return fig


def generate_maoc_plot(input_maoc):
    """
    Function to generate a plot for CFDI
    """
    # Using kdeplot to fill the distribution curve
    sns.set(font_scale=1.3, style="whitegrid")

    data = pd.DataFrame(mean_citation_ages)[0]
    kde = gaussian_kde(data)
    x_vals = np.linspace(data.min(), data.max(), 1000)
    y_vals = kde.evaluate(x_vals)

    fig, ax = plt.subplots()  # create a new figure and axis
    ax.fill_between(x_vals, y_vals, color="skyblue", alpha=0.3)
    ax.plot(x_vals, y_vals, color="skyblue", linewidth=2, label="Distribution")

    interpolated_y_cfdi = np.interp(input_maoc, x_vals, y_vals)
    ax.scatter(
        input_maoc,
        interpolated_y_cfdi,
        c="r",
        marker="*",
        linewidths=1,
        zorder=2,
    )
    ax.vlines(
        input_maoc, 0, interpolated_y_cfdi, color="tomato", ls="--", lw=1.5
    )
    epsilon = 0.005
    # ax.text(
    #     input_maoc + epsilon,
    #     interpolated_y_cfdi + epsilon,
    #     "Your paper",
    #     {"color": "#DC143C", "fontsize": 13},
    #     ha="left",  # Horizontal alignment
    # )

    ax.set_xlabel("Mean Age of Citation (mAoC)", fontsize=15)
    ax.set_ylabel("Density", fontsize=15)
    sns.despine(left=True, bottom=True, right=True, top=True)

    return fig