Spaces:
Paused
Paused
| import json | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import numpy as np | |
| infile = "output.jsonl" | |
| date = "2024-03" # used in the plot | |
| durations = [] | |
| with open(infile) as f: | |
| for line in f: | |
| data = json.loads(line) | |
| l = data["left"]["finish"] | |
| r = data["right"]["finish"] | |
| v = data["timestamp"] | |
| durations.append(v - max(l, r)) | |
| print( | |
| f"Avg: {np.mean(durations)}, Median: {np.median(durations)}, Max: {np.max(durations)}" | |
| ) | |
| # Define the new cutoff and number of bins | |
| cutoff = 200.0 # New cutoff value | |
| num_bins_inside_cutoff = 20 # Number of bins from 0 to cutoff | |
| for i, n in enumerate(durations): | |
| if n > cutoff: | |
| durations[i] = cutoff + 0.5 * cutoff / num_bins_inside_cutoff | |
| # Create bin edges from 0 to cutoff, with the specified number of bins | |
| bin_edges = np.linspace(0, cutoff, num_bins_inside_cutoff + 1) | |
| # Adjusting the overflow bin to end at 110 | |
| overflow_cap = ( | |
| cutoff + cutoff / num_bins_inside_cutoff | |
| ) # Adjust as needed based on distribution | |
| bin_edges = np.append(bin_edges, overflow_cap) | |
| # Create the plot with custom bins | |
| sns.histplot( | |
| durations, bins=bin_edges, kde=False | |
| ) # Turn off KDE for clearer bar visibility | |
| plt.title(f'Distribution of "time to vote" {date}') | |
| plt.xlabel("Duration (seconds)") | |
| plt.ylabel("Frequency") | |
| # Highlight the overflow bin | |
| plt.axvline(x=cutoff, color="red", linestyle="--") | |
| plt.text( | |
| cutoff + 1, plt.ylim()[1] * 0.9, "Overflow", color="red", ha="left" | |
| ) # Adjust text alignment | |
| # Customizing x-axis labels to hide the "110" | |
| ax = plt.gca() # Get current axis | |
| labels = [item.get_text() for item in ax.get_xticklabels()] | |
| if "110" in labels: | |
| labels[labels.index("110")] = "" # Replace "110" with an empty string | |
| ax.set_xticklabels(labels) | |
| # Ensure nothing is cut off in the plot | |
| plt.tight_layout() | |
| # Save the plot to a file with high resolution | |
| plt.savefig(f"duration_distribution_time_to_vote_{date}.png", dpi=300) | |