Spaces:

jpwahle
/

field-diversity

Running

App Files Files Community

field-diversity / plots.py

jpwahle

Comment ages

1380b5a 7 months ago

raw history blame contribute delete

No virus

4.56 kB

	# Copyright 2023 by Jan Philip Wahle, https://jpwahle.com/
	# All rights reserved.

	import os

	import numpy as np
	import pandas as pd
	import seaborn as sns
	from matplotlib import pyplot as plt
	from scipy.stats import gaussian_kde

	dirname = os.path.dirname(__file__)

	# Load the csv file into a pandas DataFrame
	papers_df = pd.read_csv(
	os.path.join(dirname, "data/nlp_papers_field_diversity.csv")
	)

	# Compute the mean CFDI
	mean_cfdi = papers_df["incoming_diversity"].mean()

	# Compute the mean CADI
	mean_citation_ages = []

	# Commenting out the old code
	#\|# Open the file and read the content in a list
	#\|with open(
	#\| os.path.join(dirname, "data/nlp_papers_citation_age.txt"),
	#\| "r",
	#\| encoding="utf-8",
	#\|) as filehandle:
	#\| for line in filehandle:
	#\| temp = float(line[:-1])
	#\| mean_citation_ages.append(temp)


	def generate_cfdi_plot(input_cfdi, compute_type="paper"):
	"""
	Function to generate a plot for CFDI
	"""
	# Using kdeplot to fill the distribution curve
	sns.set(font_scale=1.3, style="whitegrid")

	data = papers_df[papers_df["incoming_diversity"] > 0]["incoming_diversity"]
	kde = gaussian_kde(data)
	x_vals = np.linspace(data.min(), data.max(), 1000)
	y_vals = kde.evaluate(x_vals)

	fig, ax = plt.subplots() # create a new figure and axis

	ax.fill_between(x_vals, y_vals, color="skyblue", alpha=0.3)
	ax.plot(x_vals, y_vals, color="skyblue", linewidth=2, label="Distribution")

	interpolated_y_cfdi = np.interp(input_cfdi, x_vals, y_vals)
	ax.scatter(
	input_cfdi,
	interpolated_y_cfdi,
	c="r",
	marker="*",
	linewidths=2,
	zorder=2,
	s=32,
	)
	ax.vlines(
	input_cfdi,
	0,
	interpolated_y_cfdi,
	color="tomato",
	ls="--",
	lw=1.5,
	)

	epsilon = 0.005
	# Compute the average and plot it as a light grey vertical line
	mean_val = np.mean(data)
	# Interpolate the y value for the mean
	interpolated_y_mean = np.interp(mean_val, x_vals, y_vals)

	ax.vlines(mean_val, 0, interpolated_y_mean, color="grey", ls="--", lw=1.5)
	ax.text(
	mean_val + epsilon,
	interpolated_y_mean + epsilon,
	"Avg.",
	{"color": "grey", "fontsize": 13},
	ha="left", # Horizontal alignment
	)
	ax.text(
	input_cfdi + epsilon,
	interpolated_y_cfdi + epsilon,
	f"This {compute_type}",
	{"color": "#DC143C", "fontsize": 13},
	ha="left", # Horizontal alignment
	)

	ax.set_xlabel("Citation Field Diversity Index (CFDI)", fontsize=15)
	ax.set_ylabel("Density", fontsize=15)
	sns.despine(left=True, bottom=True, right=True, top=True)

	return fig


	def generate_maoc_plot(input_maoc, compute_type="paper"):
	"""
	Function to generate a plot for MAOC
	"""
	# Using kdeplot to fill the distribution curve
	sns.set(font_scale=1.3, style="whitegrid")

	data = pd.DataFrame(mean_citation_ages)[0]
	kde = gaussian_kde(data)
	x_vals = np.linspace(data.min(), data.max(), 1000)
	y_vals = kde.evaluate(x_vals)

	fig, ax = plt.subplots() # create a new figure and axis
	ax.fill_between(x_vals, y_vals, color="skyblue", alpha=0.3)
	ax.plot(x_vals, y_vals, color="skyblue", linewidth=2, label="Distribution")

	interpolated_y_cfdi = np.interp(input_maoc, x_vals, y_vals)
	ax.scatter(
	input_maoc,
	interpolated_y_cfdi,
	c="r",
	marker="*",
	linewidths=2,
	zorder=2,
	s=32,
	)
	ax.vlines(
	input_maoc,
	0,
	interpolated_y_cfdi,
	color="tomato",
	ls="--",
	lw=1.5,
	)

	epsilon = 0.005
	# Compute the average and plot it as a light grey vertical line
	mean_val = np.mean(data)
	# Interpolate the y value for the mean
	interpolated_y_mean = np.interp(mean_val, x_vals, y_vals)

	ax.vlines(mean_val, 0, interpolated_y_mean, color="grey", ls="--", lw=1.5)
	ax.text(
	mean_val + epsilon,
	interpolated_y_mean + epsilon,
	"Avg.",
	{"color": "grey", "fontsize": 13},
	ha="left", # Horizontal alignment
	)
	ax.text(
	input_maoc + epsilon,
	interpolated_y_cfdi + epsilon,
	f"This {compute_type}",
	{"color": "#DC143C", "fontsize": 13},
	ha="left", # Horizontal alignment
	)

	ax.set_xlabel("Mean Age of Citation (mAoC)", fontsize=15)
	ax.set_ylabel("Density", fontsize=15)
	sns.despine(left=True, bottom=True, right=True, top=True)

	return fig