| | |
| |
|
| | r'''############################################################################ |
| | ################################################################################ |
| | # |
| | # |
| | # Tegridy Plots Python Module (TPLOTS) |
| | # Version 1.0 |
| | # |
| | # Project Los Angeles |
| | # |
| | # Tegridy Code 2025 |
| | # |
| | # https://github.com/asigalov61/tegridy-tools |
| | # |
| | # |
| | ################################################################################ |
| | # |
| | # Copyright 2024 Project Los Angeles / Tegridy Code |
| | # |
| | # Licensed under the Apache License, Version 2.0 (the "License"); |
| | # you may not use this file except in compliance with the License. |
| | # You may obtain a copy of the License at |
| | # |
| | # http://www.apache.org/licenses/LICENSE-2.0 |
| | # |
| | # Unless required by applicable law or agreed to in writing, software |
| | # distributed under the License is distributed on an "AS IS" BASIS, |
| | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| | # See the License for the specific language governing permissions and |
| | # limitations under the License. |
| | # |
| | ################################################################################ |
| | ################################################################################ |
| | # |
| | # Critical dependencies |
| | # |
| | # !pip install numpy==1.24.4 |
| | # !pip install scipy |
| | # !pip install matplotlib |
| | # !pip install networkx |
| | # !pip3 install scikit-learn |
| | # |
| | ################################################################################ |
| | # |
| | # Future critical dependencies |
| | # |
| | # !pip install umap-learn |
| | # !pip install alphashape |
| | # |
| | ################################################################################ |
| | ''' |
| |
|
| | |
| | |
| | |
| |
|
| | import os |
| | from collections import Counter |
| | from itertools import groupby |
| |
|
| | import numpy as np |
| |
|
| | import networkx as nx |
| |
|
| | from sklearn.manifold import TSNE |
| | from sklearn import metrics |
| | from sklearn.preprocessing import MinMaxScaler |
| | from sklearn.decomposition import PCA |
| |
|
| | from scipy.ndimage import zoom |
| | from scipy.spatial import distance_matrix |
| | from scipy.sparse.csgraph import minimum_spanning_tree |
| | from scipy.stats import zscore |
| |
|
| | import matplotlib.pyplot as plt |
| | from PIL import Image |
| |
|
| | |
| | |
| | |
| |
|
| | ALL_CHORDS_FULL = [[0], [0, 3], [0, 3, 5], [0, 3, 5, 8], [0, 3, 5, 9], [0, 3, 5, 10], [0, 3, 6], |
| | [0, 3, 6, 9], [0, 3, 6, 10], [0, 3, 7], [0, 3, 7, 10], [0, 3, 8], [0, 3, 9], |
| | [0, 3, 10], [0, 4], [0, 4, 6], [0, 4, 6, 9], [0, 4, 6, 10], [0, 4, 7], |
| | [0, 4, 7, 10], [0, 4, 8], [0, 4, 9], [0, 4, 10], [0, 5], [0, 5, 8], [0, 5, 9], |
| | [0, 5, 10], [0, 6], [0, 6, 9], [0, 6, 10], [0, 7], [0, 7, 10], [0, 8], [0, 9], |
| | [0, 10], [1], [1, 4], [1, 4, 6], [1, 4, 6, 9], [1, 4, 6, 10], [1, 4, 6, 11], |
| | [1, 4, 7], [1, 4, 7, 10], [1, 4, 7, 11], [1, 4, 8], [1, 4, 8, 11], [1, 4, 9], |
| | [1, 4, 10], [1, 4, 11], [1, 5], [1, 5, 8], [1, 5, 8, 11], [1, 5, 9], |
| | [1, 5, 10], [1, 5, 11], [1, 6], [1, 6, 9], [1, 6, 10], [1, 6, 11], [1, 7], |
| | [1, 7, 10], [1, 7, 11], [1, 8], [1, 8, 11], [1, 9], [1, 10], [1, 11], [2], |
| | [2, 5], [2, 5, 8], [2, 5, 8, 11], [2, 5, 9], [2, 5, 10], [2, 5, 11], [2, 6], |
| | [2, 6, 9], [2, 6, 10], [2, 6, 11], [2, 7], [2, 7, 10], [2, 7, 11], [2, 8], |
| | [2, 8, 11], [2, 9], [2, 10], [2, 11], [3], [3, 5], [3, 5, 8], [3, 5, 8, 11], |
| | [3, 5, 9], [3, 5, 10], [3, 5, 11], [3, 6], [3, 6, 9], [3, 6, 10], [3, 6, 11], |
| | [3, 7], [3, 7, 10], [3, 7, 11], [3, 8], [3, 8, 11], [3, 9], [3, 10], [3, 11], |
| | [4], [4, 6], [4, 6, 9], [4, 6, 10], [4, 6, 11], [4, 7], [4, 7, 10], [4, 7, 11], |
| | [4, 8], [4, 8, 11], [4, 9], [4, 10], [4, 11], [5], [5, 8], [5, 8, 11], [5, 9], |
| | [5, 10], [5, 11], [6], [6, 9], [6, 10], [6, 11], [7], [7, 10], [7, 11], [8], |
| | [8, 11], [9], [10], [11]] |
| |
|
| | |
| |
|
| | CHORDS_TYPES = ['WHITE', 'BLACK', 'UNKNOWN', 'MIXED WHITE', 'MIXED BLACK', 'MIXED GRAY'] |
| |
|
| | |
| |
|
| | WHITE_NOTES = [0, 2, 4, 5, 7, 9, 11] |
| |
|
| | |
| |
|
| | BLACK_NOTES = [1, 3, 6, 8, 10] |
| |
|
| | |
| | |
| | |
| |
|
| | def tones_chord_type(tones_chord, |
| | return_chord_type_index=True, |
| | ): |
| |
|
| | """ |
| | Returns tones chord type |
| | """ |
| |
|
| | WN = WHITE_NOTES |
| | BN = BLACK_NOTES |
| | MX = WHITE_NOTES + BLACK_NOTES |
| |
|
| |
|
| | CHORDS = ALL_CHORDS_FULL |
| |
|
| | tones_chord = sorted(tones_chord) |
| |
|
| | ctype = 'UNKNOWN' |
| |
|
| | if tones_chord in CHORDS: |
| |
|
| | if sorted(set(tones_chord) & set(WN)) == tones_chord: |
| | ctype = 'WHITE' |
| |
|
| | elif sorted(set(tones_chord) & set(BN)) == tones_chord: |
| | ctype = 'BLACK' |
| |
|
| | if len(tones_chord) > 1 and sorted(set(tones_chord) & set(MX)) == tones_chord: |
| |
|
| | if len(sorted(set(tones_chord) & set(WN))) == len(sorted(set(tones_chord) & set(BN))): |
| | ctype = 'MIXED GRAY' |
| |
|
| | elif len(sorted(set(tones_chord) & set(WN))) > len(sorted(set(tones_chord) & set(BN))): |
| | ctype = 'MIXED WHITE' |
| |
|
| | elif len(sorted(set(tones_chord) & set(WN))) < len(sorted(set(tones_chord) & set(BN))): |
| | ctype = 'MIXED BLACK' |
| |
|
| | if return_chord_type_index: |
| | return CHORDS_TYPES.index(ctype) |
| |
|
| | else: |
| | return ctype |
| |
|
| | |
| |
|
| | def tone_type(tone, |
| | return_tone_type_index=True |
| | ): |
| |
|
| | """ |
| | Returns tone type |
| | """ |
| |
|
| | tone = tone % 12 |
| |
|
| | if tone in BLACK_NOTES: |
| | if return_tone_type_index: |
| | return CHORDS_TYPES.index('BLACK') |
| | else: |
| | return "BLACK" |
| |
|
| | else: |
| | if return_tone_type_index: |
| | return CHORDS_TYPES.index('WHITE') |
| | else: |
| | return "WHITE" |
| |
|
| | |
| |
|
| | def find_closest_points(points, return_points=True): |
| |
|
| | """ |
| | Find closest 2D points |
| | """ |
| |
|
| | coords = np.array(points) |
| |
|
| | num_points = coords.shape[0] |
| | closest_matches = np.zeros(num_points, dtype=int) |
| | distances = np.zeros((num_points, num_points)) |
| |
|
| | for i in range(num_points): |
| | for j in range(num_points): |
| | if i != j: |
| | distances[i, j] = np.linalg.norm(coords[i] - coords[j]) |
| | else: |
| | distances[i, j] = np.inf |
| |
|
| | closest_matches = np.argmin(distances, axis=1) |
| | |
| | if return_points: |
| | points_matches = coords[closest_matches].tolist() |
| | return points_matches |
| | |
| | else: |
| | return closest_matches.tolist() |
| |
|
| | |
| |
|
| | def reduce_dimensionality_tsne(list_of_valies, |
| | n_comp=2, |
| | n_iter=5000, |
| | verbose=True |
| | ): |
| |
|
| | """ |
| | Reduces the dimensionality of the values using t-SNE. |
| | """ |
| |
|
| | vals = np.array(list_of_valies) |
| |
|
| | tsne = TSNE(n_components=n_comp, |
| | n_iter=n_iter, |
| | verbose=verbose) |
| |
|
| | reduced_vals = tsne.fit_transform(vals) |
| |
|
| | return reduced_vals.tolist() |
| |
|
| | |
| |
|
| | def compute_mst_edges(similarity_scores_list): |
| |
|
| | """ |
| | Computes the Minimum Spanning Tree (MST) edges based on the similarity scores. |
| | """ |
| | |
| | num_tokens = len(similarity_scores_list[0]) |
| |
|
| | graph = nx.Graph() |
| |
|
| | for i in range(num_tokens): |
| | for j in range(i + 1, num_tokens): |
| | weight = 1 - similarity_scores_list[i][j] |
| | graph.add_edge(i, j, weight=weight) |
| |
|
| | mst = nx.minimum_spanning_tree(graph) |
| |
|
| | mst_edges = list(mst.edges(data=False)) |
| |
|
| | return mst_edges |
| |
|
| | |
| |
|
| | def square_binary_matrix(binary_matrix, |
| | matrix_size=128, |
| | interpolation_order=5, |
| | return_square_matrix_points=False |
| | ): |
| |
|
| | """ |
| | Reduces an arbitrary binary matrix to a square binary matrix |
| | """ |
| |
|
| | zoom_factors = (matrix_size / len(binary_matrix), 1) |
| |
|
| | resized_matrix = zoom(binary_matrix, zoom_factors, order=interpolation_order) |
| |
|
| | resized_matrix = (resized_matrix > 0.5).astype(int) |
| |
|
| | final_matrix = np.zeros((matrix_size, matrix_size), dtype=int) |
| | final_matrix[:, :resized_matrix.shape[1]] = resized_matrix |
| |
|
| | points = np.column_stack(np.where(final_matrix == 1)).tolist() |
| |
|
| | if return_square_matrix_points: |
| | return points |
| |
|
| | else: |
| | return resized_matrix |
| |
|
| | |
| |
|
| | def square_matrix_points_colors(square_matrix_points): |
| |
|
| | """ |
| | Returns colors for square matrix points |
| | """ |
| |
|
| | cmap = generate_colors(12) |
| |
|
| | chords = [] |
| | chords_dict = set() |
| | counts = [] |
| |
|
| | for k, v in groupby(square_matrix_points, key=lambda x: x[0]): |
| | pgroup = [vv[1] for vv in v] |
| | chord = sorted(set(pgroup)) |
| | tchord = sorted(set([p % 12 for p in chord])) |
| | chords_dict.add(tuple(tchord)) |
| | chords.append(tuple(tchord)) |
| | counts.append(len(pgroup)) |
| |
|
| | chords_dict = sorted(chords_dict) |
| |
|
| | colors = [] |
| |
|
| | for i, c in enumerate(chords): |
| | colors.extend([cmap[round(sum(c) / len(c))]] * counts[i]) |
| |
|
| | return colors |
| |
|
| | |
| |
|
| | def hsv_to_rgb(h, s, v): |
| |
|
| | if s == 0.0: |
| | return v, v, v |
| |
|
| | i = int(h*6.0) |
| | f = (h*6.0) - i |
| | p = v*(1.0 - s) |
| | q = v*(1.0 - s*f) |
| | t = v*(1.0 - s*(1.0-f)) |
| | i = i%6 |
| | |
| | return [(v, t, p), (q, v, p), (p, v, t), (p, q, v), (t, p, v), (v, p, q)][i] |
| |
|
| | |
| |
|
| | def generate_colors(n): |
| | return [hsv_to_rgb(i/n, 1, 1) for i in range(n)] |
| |
|
| | |
| |
|
| | def add_arrays(a, b): |
| | return [sum(pair) for pair in zip(a, b)] |
| |
|
| | |
| |
|
| | def calculate_similarities(lists_of_values, metric='cosine'): |
| | return metrics.pairwise_distances(lists_of_values, metric=metric).tolist() |
| |
|
| | |
| |
|
| | def get_tokens_embeddings(x_transformer_model): |
| | return x_transformer_model.net.token_emb.emb.weight.detach().cpu().tolist() |
| |
|
| | |
| |
|
| | def minkowski_distance_matrix(X, p=3): |
| |
|
| | X = np.array(X) |
| |
|
| | n = X.shape[0] |
| | dist_matrix = np.zeros((n, n)) |
| |
|
| | for i in range(n): |
| | for j in range(n): |
| | dist_matrix[i, j] = np.sum(np.abs(X[i] - X[j])**p)**(1/p) |
| |
|
| | return dist_matrix.tolist() |
| |
|
| | |
| |
|
| | def robust_normalize(values): |
| |
|
| | values = np.array(values) |
| | q1 = np.percentile(values, 25) |
| | q3 = np.percentile(values, 75) |
| | iqr = q3 - q1 |
| |
|
| | filtered_values = values[(values >= q1 - 1.5 * iqr) & (values <= q3 + 1.5 * iqr)] |
| |
|
| | min_val = np.min(filtered_values) |
| | max_val = np.max(filtered_values) |
| | normalized_values = (values - min_val) / (max_val - min_val) |
| |
|
| | normalized_values = np.clip(normalized_values, 0, 1) |
| |
|
| | return normalized_values.tolist() |
| |
|
| | |
| |
|
| | def min_max_normalize(values): |
| |
|
| | scaler = MinMaxScaler() |
| |
|
| | return scaler.fit_transform(values).tolist() |
| |
|
| | |
| |
|
| | def remove_points_outliers(points, z_score_threshold=3): |
| |
|
| | points = np.array(points) |
| |
|
| | z_scores = np.abs(zscore(points, axis=0)) |
| |
|
| | return points[(z_scores < z_score_threshold).all(axis=1)].tolist() |
| |
|
| | |
| |
|
| | def generate_labels(lists_of_values, |
| | return_indices_labels=False |
| | ): |
| |
|
| | ordered_indices = list(range(len(lists_of_values))) |
| | ordered_indices_labels = [str(i) for i in ordered_indices] |
| | ordered_values_labels = [str(lists_of_values[i]) for i in ordered_indices] |
| |
|
| | if return_indices_labels: |
| | return ordered_indices_labels |
| | |
| | else: |
| | return ordered_values_labels |
| |
|
| | |
| |
|
| | def reduce_dimensionality_pca(list_of_values, n_components=2): |
| |
|
| | """ |
| | Reduces the dimensionality of the values using PCA. |
| | """ |
| |
|
| | pca = PCA(n_components=n_components) |
| | pca_data = pca.fit_transform(list_of_values) |
| | |
| | return pca_data.tolist() |
| |
|
| | def reduce_dimensionality_simple(list_of_values, |
| | return_means=True, |
| | return_std_devs=True, |
| | return_medians=False, |
| | return_vars=False |
| | ): |
| | |
| | ''' |
| | Reduces dimensionality of the values in a simple way |
| | ''' |
| |
|
| | array = np.array(list_of_values) |
| | results = [] |
| |
|
| | if return_means: |
| | means = np.mean(array, axis=1) |
| | results.append(means) |
| |
|
| | if return_std_devs: |
| | std_devs = np.std(array, axis=1) |
| | results.append(std_devs) |
| |
|
| | if return_medians: |
| | medians = np.median(array, axis=1) |
| | results.append(medians) |
| |
|
| | if return_vars: |
| | vars = np.var(array, axis=1) |
| | results.append(vars) |
| |
|
| | merged_results = np.column_stack(results) |
| | |
| | return merged_results.tolist() |
| |
|
| | |
| |
|
| | def reduce_dimensionality_2d_distance(list_of_values, p=5): |
| |
|
| | ''' |
| | Reduces the dimensionality of the values using 2d distance |
| | ''' |
| |
|
| | values = np.array(list_of_values) |
| |
|
| | dist_matrix = distance_matrix(values, values, p=p) |
| |
|
| | mst = minimum_spanning_tree(dist_matrix).toarray() |
| |
|
| | points = [] |
| |
|
| | for i in range(len(values)): |
| | for j in range(len(values)): |
| | if mst[i, j] > 0: |
| | points.append([i, j]) |
| |
|
| | return points |
| |
|
| | |
| |
|
| | def normalize_to_range(values, n): |
| | |
| | min_val = min(values) |
| | max_val = max(values) |
| | |
| | range_val = max_val - min_val |
| | |
| | normalized_values = [((value - min_val) / range_val * 2 * n) - n for value in values] |
| | |
| | return normalized_values |
| |
|
| | |
| |
|
| | def reduce_dimensionality_simple_pca(list_of_values, n_components=2): |
| |
|
| | ''' |
| | Reduces the dimensionality of the values using simple PCA |
| | ''' |
| |
|
| | reduced_values = [] |
| |
|
| | for l in list_of_values: |
| |
|
| | norm_values = [round(v * len(l)) for v in normalize_to_range(l, (n_components+1) // 2)] |
| |
|
| | pca_values = Counter(norm_values).most_common() |
| | pca_values = [vv[0] / len(l) for vv in pca_values] |
| | pca_values = pca_values[:n_components] |
| | pca_values = pca_values + [0] * (n_components - len(pca_values)) |
| |
|
| | reduced_values.append(pca_values) |
| |
|
| | return reduced_values |
| |
|
| | |
| |
|
| | def filter_and_replace_values(list_of_values, |
| | threshold, |
| | replace_value, |
| | replace_above_threshold=False |
| | ): |
| |
|
| | array = np.array(list_of_values) |
| |
|
| | modified_array = np.copy(array) |
| | |
| | if replace_above_threshold: |
| | modified_array[modified_array > threshold] = replace_value |
| | |
| | else: |
| | modified_array[modified_array < threshold] = replace_value |
| | |
| | return modified_array.tolist() |
| |
|
| | |
| |
|
| | def find_shortest_constellation_path(points, |
| | start_point_idx, |
| | end_point_idx, |
| | p=5, |
| | return_path_length=False, |
| | return_path_points=False, |
| | ): |
| |
|
| | """ |
| | Finds the shortest path between two points of the points constellation |
| | """ |
| |
|
| | points = np.array(points) |
| |
|
| | dist_matrix = distance_matrix(points, points, p=p) |
| |
|
| | mst = minimum_spanning_tree(dist_matrix).toarray() |
| |
|
| | G = nx.Graph() |
| |
|
| | for i in range(len(points)): |
| | for j in range(len(points)): |
| | if mst[i, j] > 0: |
| | G.add_edge(i, j, weight=mst[i, j]) |
| |
|
| | path = nx.shortest_path(G, |
| | source=start_point_idx, |
| | target=end_point_idx, |
| | weight='weight' |
| | ) |
| | |
| | path_length = nx.shortest_path_length(G, |
| | source=start_point_idx, |
| | target=end_point_idx, |
| | weight='weight') |
| | |
| | path_points = points[np.array(path)].tolist() |
| |
|
| |
|
| | if return_path_points: |
| | return path_points |
| |
|
| | if return_path_length: |
| | return path_length |
| |
|
| | return path |
| |
|
| | |
| | |
| | |
| |
|
| | def plot_ms_SONG(ms_song, |
| | preview_length_in_notes=0, |
| | block_lines_times_list = None, |
| | plot_title='ms Song', |
| | max_num_colors=129, |
| | drums_color_num=128, |
| | plot_size=(11,4), |
| | note_height = 0.75, |
| | show_grid_lines=False, |
| | return_plt = False, |
| | timings_multiplier=1, |
| | save_plt='', |
| | save_only_plt_image=True, |
| | save_transparent=False |
| | ): |
| |
|
| | '''ms SONG plot''' |
| |
|
| | notes = [s for s in ms_song if s[0] == 'note'] |
| |
|
| | if (len(max(notes, key=len)) != 7) and (len(min(notes, key=len)) != 7): |
| | print('The song notes do not have patches information') |
| | print('Ploease add patches to the notes in the song') |
| |
|
| | else: |
| |
|
| | start_times = [(s[1] * timings_multiplier) / 1000 for s in notes] |
| | durations = [(s[2] * timings_multiplier) / 1000 for s in notes] |
| | pitches = [s[4] for s in notes] |
| | patches = [s[6] for s in notes] |
| |
|
| | colors = generate_colors(max_num_colors) |
| | colors[drums_color_num] = (1, 1, 1) |
| |
|
| | pbl = (notes[preview_length_in_notes][1] * timings_multiplier) / 1000 |
| |
|
| | fig, ax = plt.subplots(figsize=plot_size) |
| |
|
| | for start, duration, pitch, patch in zip(start_times, durations, pitches, patches): |
| | rect = plt.Rectangle((start, pitch), duration, note_height, facecolor=colors[patch]) |
| | ax.add_patch(rect) |
| |
|
| | ax.set_xlim([min(start_times), max(add_arrays(start_times, durations))]) |
| | ax.set_ylim([min(pitches)-1, max(pitches)+1]) |
| |
|
| | ax.set_facecolor('black') |
| | fig.patch.set_facecolor('white') |
| |
|
| | if preview_length_in_notes > 0: |
| | ax.axvline(x=pbl, c='white') |
| |
|
| | if block_lines_times_list: |
| | for bl in block_lines_times_list: |
| | ax.axvline(x=bl, c='white') |
| | |
| | if show_grid_lines: |
| | ax.grid(color='white') |
| |
|
| | plt.xlabel('Time (s)', c='black') |
| | plt.ylabel('MIDI Pitch', c='black') |
| |
|
| | plt.title(plot_title) |
| |
|
| | if save_plt != '': |
| | if save_only_plt_image: |
| | plt.axis('off') |
| | plt.title('') |
| | plt.savefig(save_plt, |
| | transparent=save_transparent, |
| | bbox_inches='tight', |
| | pad_inches=0, |
| | facecolor='black' |
| | ) |
| | plt.close() |
| | |
| | else: |
| | plt.savefig(save_plt) |
| | plt.close() |
| |
|
| | if return_plt: |
| | return fig |
| |
|
| | plt.show() |
| | plt.close() |
| |
|
| | |
| |
|
| | def plot_square_matrix_points(list_of_points, |
| | list_of_points_colors, |
| | plot_size=(7, 7), |
| | point_size = 10, |
| | show_grid_lines=False, |
| | plot_title = 'Square Matrix Points Plot', |
| | return_plt=False, |
| | save_plt='', |
| | save_only_plt_image=True, |
| | save_transparent=False |
| | ): |
| |
|
| | '''Square matrix points plot''' |
| |
|
| | fig, ax = plt.subplots(figsize=plot_size) |
| |
|
| | ax.set_facecolor('black') |
| |
|
| | if show_grid_lines: |
| | ax.grid(color='white') |
| |
|
| | plt.xlabel('Time Step', c='black') |
| | plt.ylabel('MIDI Pitch', c='black') |
| |
|
| | plt.title(plot_title) |
| |
|
| | plt.scatter([p[0] for p in list_of_points], |
| | [p[1] for p in list_of_points], |
| | c=list_of_points_colors, |
| | s=point_size |
| | ) |
| |
|
| | if save_plt != '': |
| | if save_only_plt_image: |
| | plt.axis('off') |
| | plt.title('') |
| | plt.savefig(save_plt, |
| | transparent=save_transparent, |
| | bbox_inches='tight', |
| | pad_inches=0, |
| | facecolor='black' |
| | ) |
| | plt.close() |
| | |
| | else: |
| | plt.savefig(save_plt) |
| | plt.close() |
| |
|
| | if return_plt: |
| | return fig |
| |
|
| | plt.show() |
| | plt.close() |
| |
|
| | |
| |
|
| | def plot_cosine_similarities(lists_of_values, |
| | plot_size=(7, 7), |
| | save_plot='' |
| | ): |
| |
|
| | """ |
| | Cosine similarities plot |
| | """ |
| |
|
| | cos_sim = metrics.pairwise_distances(lists_of_values, metric='cosine') |
| |
|
| | plt.figure(figsize=plot_size) |
| |
|
| | plt.imshow(cos_sim, cmap="inferno", interpolation="nearest") |
| |
|
| | im_ratio = cos_sim.shape[0] / cos_sim.shape[1] |
| |
|
| | plt.colorbar(fraction=0.046 * im_ratio, pad=0.04) |
| |
|
| | plt.xlabel("Index") |
| | plt.ylabel("Index") |
| |
|
| | plt.tight_layout() |
| |
|
| | if save_plot != '': |
| | plt.savefig(save_plot, bbox_inches="tight") |
| | plt.close() |
| |
|
| | plt.show() |
| | plt.close() |
| |
|
| | |
| |
|
| | def plot_points_with_mst_lines(points, |
| | points_labels, |
| | points_mst_edges, |
| | plot_size=(20, 20), |
| | labels_size=24, |
| | save_plot='' |
| | ): |
| |
|
| | """ |
| | Plots 2D points with labels and MST lines. |
| | """ |
| |
|
| | plt.figure(figsize=plot_size) |
| |
|
| | for i, label in enumerate(points_labels): |
| | plt.scatter(points[i][0], points[i][1]) |
| | plt.annotate(label, (points[i][0], points[i][1]), fontsize=labels_size) |
| |
|
| | for edge in points_mst_edges: |
| | i, j = edge |
| | plt.plot([points[i][0], points[j][0]], [points[i][1], points[j][1]], 'k-', alpha=0.5) |
| |
|
| | plt.title('Points Map with MST Lines', fontsize=labels_size) |
| | plt.xlabel('X-axis', fontsize=labels_size) |
| | plt.ylabel('Y-axis', fontsize=labels_size) |
| |
|
| | if save_plot != '': |
| | plt.savefig(save_plot, bbox_inches="tight") |
| | plt.close() |
| |
|
| | plt.show() |
| |
|
| | plt.close() |
| |
|
| | |
| |
|
| | def plot_points_constellation(points, |
| | points_labels, |
| | p=5, |
| | plot_size=(15, 15), |
| | labels_size=12, |
| | show_grid=False, |
| | save_plot='' |
| | ): |
| |
|
| | """ |
| | Plots 2D points constellation |
| | """ |
| |
|
| | points = np.array(points) |
| |
|
| | dist_matrix = distance_matrix(points, points, p=p) |
| |
|
| | mst = minimum_spanning_tree(dist_matrix).toarray() |
| |
|
| | plt.figure(figsize=plot_size) |
| |
|
| | plt.scatter(points[:, 0], points[:, 1], color='blue') |
| |
|
| | for i, label in enumerate(points_labels): |
| | plt.annotate(label, (points[i, 0], points[i, 1]), |
| | textcoords="offset points", |
| | xytext=(0, 10), |
| | ha='center', |
| | fontsize=labels_size |
| | ) |
| |
|
| | for i in range(len(points)): |
| | for j in range(len(points)): |
| | if mst[i, j] > 0: |
| | plt.plot([points[i, 0], points[j, 0]], [points[i, 1], points[j, 1]], 'k--') |
| |
|
| | plt.xlabel('X-axis', fontsize=labels_size) |
| | plt.ylabel('Y-axis', fontsize=labels_size) |
| | plt.title('2D Coordinates with Minimum Spanning Tree', fontsize=labels_size) |
| |
|
| | plt.grid(show_grid) |
| |
|
| | if save_plot != '': |
| | plt.savefig(save_plot, bbox_inches="tight") |
| | plt.close() |
| |
|
| | plt.show() |
| |
|
| | plt.close() |
| |
|
| | |
| |
|
| | def binary_matrix_to_images(matrix, |
| | step, |
| | overlap, |
| | output_folder='./Dataset/', |
| | output_img_prefix='image', |
| | output_img_ext='.png', |
| | save_to_array=False, |
| | verbose=True |
| | ): |
| |
|
| | if not save_to_array: |
| |
|
| | if verbose: |
| | print('=' * 70) |
| | print('Checking output folder dir...') |
| |
|
| | os.makedirs(os.path.dirname(output_folder), exist_ok=True) |
| |
|
| | if verbose: |
| | print('Done!') |
| |
|
| | if verbose: |
| | print('=' * 70) |
| | print('Writing images...') |
| |
|
| | matrix = np.array(matrix, dtype=np.uint8) |
| | |
| | image_array = [] |
| | |
| | for i in range(0, max(1, matrix.shape[0]), overlap): |
| | |
| | submatrix = matrix[i:i+step, :] |
| |
|
| | if submatrix.shape[0] < 128: |
| | zeros_array = np.zeros((128-submatrix.shape[0], 128)) |
| | submatrix = np.vstack((submatrix, zeros_array)) |
| |
|
| | img = Image.fromarray(submatrix * 255).convert('1') |
| | |
| | if save_to_array: |
| | image_array.append(np.array(img)) |
| |
|
| | else: |
| | img.save(output_folder + output_img_prefix + '_' + str(matrix.shape[1]) + '_' + str(i).zfill(7) + output_img_ext) |
| | |
| | if verbose: |
| | print('Done!') |
| | print('=' * 70) |
| | print('Saved', (matrix.shape[0] // min(step, overlap))+1, 'imges!') |
| | print('=' * 70) |
| |
|
| | if save_to_array: |
| | return np.array(image_array).tolist() |
| |
|
| | |
| |
|
| | def images_to_binary_matrix(list_of_images): |
| |
|
| | image_array = np.array(list_of_images) |
| | |
| | original_matrix = [] |
| | |
| | for img in image_array: |
| |
|
| | submatrix = np.array(img) |
| | original_matrix.extend(submatrix.tolist()) |
| | |
| | return original_matrix |
| |
|
| | |
| |
|
| | def square_image_matrix(image_matrix, |
| | matrix_size=128, |
| | num_pca_components=5, |
| | filter_out_zero_rows=False, |
| | return_square_matrix_points=False |
| | ): |
| |
|
| | """ |
| | Reduces an arbitrary image matrix to a square image matrix |
| | """ |
| |
|
| | matrix = np.array(image_matrix) |
| |
|
| | if filter_out_zero_rows: |
| | matrix = matrix[~np.all(matrix == 0, axis=1)] |
| |
|
| | target_rows = matrix_size |
| |
|
| | rows_per_group = matrix.shape[0] // target_rows |
| |
|
| | compressed_matrix = np.zeros((target_rows, matrix.shape[1]), dtype=np.int32) |
| |
|
| | for i in range(target_rows): |
| | start_row = i * rows_per_group |
| | end_row = (i + 1) * rows_per_group |
| | group = matrix[start_row:end_row, :] |
| | |
| | pca = PCA(n_components=num_pca_components) |
| | pca.fit(group) |
| | |
| | principal_component = np.mean(pca.components_, axis=0) |
| | contributions = np.dot(group, principal_component) |
| | selected_row_index = np.argmax(contributions) |
| | |
| | compressed_matrix[i, :] = group[selected_row_index, :] |
| |
|
| | if return_square_matrix_points: |
| | filtered_matrix = compressed_matrix[~np.all(compressed_matrix == 0, axis=1)] |
| |
|
| | row_indexes, col_indexes = np.where(filtered_matrix != 0) |
| | points = np.column_stack((row_indexes, filtered_matrix[row_indexes, col_indexes])).tolist() |
| |
|
| | return points |
| |
|
| | else: |
| | return compressed_matrix.tolist() |
| |
|
| | |
| |
|
| | def image_matrix_to_images(image_matrix, |
| | step, |
| | overlap, |
| | num_img_channels=3, |
| | output_folder='./Dataset/', |
| | output_img_prefix='image', |
| | output_img_ext='.png', |
| | save_to_array=False, |
| | verbose=True |
| | ): |
| |
|
| | if num_img_channels > 1: |
| | n_mat_channels = 3 |
| |
|
| | else: |
| | n_mat_channels = 1 |
| |
|
| | if not save_to_array: |
| |
|
| | if verbose: |
| | print('=' * 70) |
| | print('Checking output folder dir...') |
| |
|
| | os.makedirs(os.path.dirname(output_folder), exist_ok=True) |
| |
|
| | if verbose: |
| | print('Done!') |
| |
|
| | if verbose: |
| | print('=' * 70) |
| | print('Writing images...') |
| |
|
| | matrix = np.array(image_matrix) |
| |
|
| | image_array = [] |
| |
|
| | for i in range(0, max(1, matrix.shape[0]), overlap): |
| |
|
| | submatrix = matrix[i:i+step, :] |
| |
|
| | if submatrix.shape[0] < 128: |
| | zeros_array = np.zeros((128-submatrix.shape[0], 128)) |
| | submatrix = np.vstack((submatrix, zeros_array)) |
| |
|
| | if n_mat_channels == 3: |
| |
|
| | r = (submatrix // (256*256)) % 256 |
| | g = (submatrix // 256) % 256 |
| | b = submatrix % 256 |
| |
|
| | rgb_image = np.stack((r, g, b), axis=-1).astype(np.uint8) |
| | img = Image.fromarray(rgb_image, 'RGB') |
| |
|
| | else: |
| | grayscale_image = submatrix.astype(np.uint8) |
| | img = Image.fromarray(grayscale_image, 'L') |
| |
|
| | if save_to_array: |
| | image_array.append(np.array(img)) |
| |
|
| | else: |
| | img.save(output_folder + output_img_prefix + '_' + str(matrix.shape[1]) + '_' + str(i).zfill(7) + output_img_ext) |
| |
|
| | if verbose: |
| | print('Done!') |
| | print('=' * 70) |
| | print('Saved', (matrix.shape[0] // min(step, overlap))+1, 'imges!') |
| | print('=' * 70) |
| |
|
| | if save_to_array: |
| | return np.array(image_array).tolist() |
| |
|
| | |
| |
|
| | def images_to_image_matrix(list_of_images, |
| | num_img_channels=3 |
| | ): |
| |
|
| | if num_img_channels > 1: |
| | n_mat_channels = 3 |
| |
|
| | else: |
| | n_mat_channels = 1 |
| |
|
| | image_array = np.array(list_of_images) |
| |
|
| | original_matrix = [] |
| |
|
| | for img in image_array: |
| |
|
| | if num_img_channels == 3: |
| |
|
| | rgb_array = np.array(img) |
| |
|
| | matrix = (rgb_array[..., 0].astype(np.int64) * 256*256 + |
| | rgb_array[..., 1].astype(np.int64) * 256 + |
| | rgb_array[..., 2].astype(np.int64)) |
| |
|
| | else: |
| | matrix = np.array(img) |
| |
|
| | original_matrix.extend(matrix) |
| |
|
| | return original_matrix |
| |
|
| | |
| |
|
| | def square_matrix_to_RGB_matrix(square_matrix): |
| |
|
| | smatrix = np.array(square_matrix) |
| | sq_matrix = smatrix[:smatrix.shape[1]] |
| |
|
| | r = (sq_matrix // (256 ** 2)) % 256 |
| | g = (sq_matrix // 256) % 256 |
| | b = sq_matrix % 256 |
| |
|
| | rgb_array = np.stack((r, g, b), axis=-1) |
| |
|
| | return rgb_array.tolist() |
| |
|
| | |
| |
|
| | def upsample_square_matrix(square_matrix, upsampling_factor=4): |
| |
|
| | smatrix = np.array(square_matrix) |
| | sq_matrix = smatrix[:smatrix.shape[1]] |
| |
|
| | scaling_array = np.ones((upsampling_factor, upsampling_factor)) |
| | scaled_array = np.kron(sq_matrix, scaling_array) |
| | scaled_array = scaled_array.astype('int') |
| |
|
| | return scaled_array.tolist() |
| |
|
| | |
| |
|
| | def downsample_square_matrix(square_matrix, downsampling_factor=4): |
| |
|
| | smatrix = np.array(square_matrix) |
| | sq_matrix = smatrix[:smatrix.shape[1]] |
| |
|
| | dmatrix = sq_matrix[::downsampling_factor, ::downsampling_factor] |
| | dmatrix = dmatrix.astype('int') |
| |
|
| | return dmatrix.tolist() |
| |
|
| | |
| |
|
| | def plot_parsons_code(parsons_code, |
| | start_pitch=60, |
| | return_plot_dict=False, |
| | return_plot_string=False, |
| | plot_size=(10, 10), |
| | labels_size=16, |
| | save_plot='' |
| | ): |
| | |
| | ''' |
| | Plot parsons code string |
| | ''' |
| |
|
| | if parsons_code[0] != "*": |
| | return None |
| |
|
| | contour_dict = {} |
| | pitch = 0 |
| | index = 0 |
| |
|
| | maxp = 0 |
| | minp = 0 |
| |
|
| | contour_dict[(pitch, index)] = "*" |
| |
|
| | for point in parsons_code: |
| | if point == "R": |
| | index += 1 |
| | contour_dict[(pitch, index)] = "-" |
| |
|
| | index += 1 |
| | contour_dict[(pitch, index)] = "*" |
| | |
| | elif point == "U": |
| | index += 1 |
| | pitch -= 1 |
| | contour_dict[(pitch, index)] = "/" |
| |
|
| | index += 1 |
| | pitch -= 1 |
| | contour_dict[(pitch, index)] = "*" |
| |
|
| | if pitch < maxp: |
| | maxp = pitch |
| |
|
| | elif point == "D": |
| | index += 1 |
| | pitch += 1 |
| | contour_dict[(pitch, index)] = "\\" |
| |
|
| | index += 1 |
| | pitch += 1 |
| | contour_dict[(pitch, index)] = "*" |
| |
|
| | if pitch > minp: |
| | minp = pitch |
| |
|
| | if return_plot_dict: |
| | return contour_dict |
| | |
| | if return_plot_string: |
| |
|
| | plot_string = '' |
| |
|
| | for pitch in range(maxp, minp+1): |
| | line = [" " for _ in range(index + 1)] |
| | for pos in range(index + 1): |
| | if (pitch, pos) in contour_dict: |
| | line[pos] = contour_dict[(pitch, pos)] |
| |
|
| | plot_string = "".join(line) |
| |
|
| | return plot_string |
| |
|
| | labels = [] |
| | pitches = [] |
| | positions = [] |
| | cur_pitch = start_pitch |
| | pitch_idx = 0 |
| |
|
| | for k, v in contour_dict.items(): |
| |
|
| | if v != '*': |
| |
|
| | pitches.append(cur_pitch) |
| | positions.append(pitch_idx) |
| |
|
| | if v == '/': |
| | cur_pitch += 1 |
| | labels.append('U') |
| | |
| | elif v == '\\': |
| | cur_pitch -= 1 |
| | labels.append('D') |
| |
|
| | elif v == '-': |
| | labels.append('R') |
| |
|
| | pitch_idx += 1 |
| |
|
| | plt.figure(figsize=plot_size) |
| |
|
| | |
| | plt.plot(pitches) |
| |
|
| | for i, point in enumerate(zip(positions, pitches)): |
| | plt.annotate(labels[i], point, fontsize=labels_size) |
| | |
| |
|
| | plt.title('Parsons Code with Labels', fontsize=labels_size) |
| | plt.xlabel('Position', fontsize=labels_size) |
| | plt.ylabel('Pitch', fontsize=labels_size) |
| |
|
| | if save_plot != '': |
| | plt.savefig(save_plot, bbox_inches="tight") |
| | plt.close() |
| |
|
| | plt.show() |
| |
|
| | plt.close() |
| | |
| | |
| |
|
| | def plot_tokens_embeddings_constellation(tokens_embeddings, |
| | start_token, |
| | end_token, |
| | plot_size=(10, 10), |
| | labels_size=12, |
| | show_grid=False, |
| | save_plot=''): |
| | |
| | """ |
| | Plots token embeddings constellation using MST and graph layout |
| | without dimensionality reduction. |
| | """ |
| |
|
| | distance_matrix = metrics.pairwise_distances(tokens_embeddings[start_token:end_token], metric='cosine') |
| | |
| | token_labels = [str(i) for i in range(start_token, end_token)] |
| |
|
| | mst = minimum_spanning_tree(distance_matrix).toarray() |
| | |
| | n = distance_matrix.shape[0] |
| | G = nx.Graph() |
| | |
| | for i in range(n): |
| | for j in range(n): |
| | if mst[i, j] > 0: |
| | weight = 1 / (distance_matrix[i, j] + 1e-8) |
| | G.add_edge(i, j, weight=weight) |
| | |
| | pos = nx.kamada_kawai_layout(G, weight='weight') |
| | |
| | points = np.array([pos[i] for i in range(n)]) |
| | |
| | plt.figure(figsize=plot_size) |
| | plt.scatter(points[:, 0], points[:, 1], color='blue') |
| | |
| | for i, label in enumerate(token_labels): |
| | plt.annotate(label, (points[i, 0], points[i, 1]), |
| | textcoords="offset points", |
| | xytext=(0, 10), |
| | ha='center', |
| | fontsize=labels_size) |
| | |
| | for i in range(n): |
| | for j in range(n): |
| | if mst[i, j] > 0: |
| | plt.plot([points[i, 0], points[j, 0]], |
| | [points[i, 1], points[j, 1]], |
| | 'k--', alpha=0.5) |
| | |
| | plt.title('Token Embeddings Constellation with MST', fontsize=labels_size) |
| | plt.grid(show_grid) |
| | |
| | if save_plot: |
| | plt.savefig(save_plot, bbox_inches="tight") |
| | plt.close() |
| | |
| | else: |
| | plt.show() |
| | |
| | plt.close() |
| | |
| | |
| |
|
| | def find_token_path(tokens_embeddings, |
| | start_token, |
| | end_token, |
| | verbose=False |
| | ): |
| | |
| | """ |
| | Finds the path of tokens between start_token and end_token using |
| | the Minimum Spanning Tree (MST) derived from the distance matrix. |
| | """ |
| |
|
| | distance_matrix = metrics.pairwise_distances(tokens_embeddings, metric='cosine') |
| | |
| | token_labels = [str(i) for i in range(len(distance_matrix))] |
| | |
| | if verbose: |
| | print('Total number of tokens:', len(distance_matrix)) |
| | |
| | mst = minimum_spanning_tree(distance_matrix).toarray() |
| | |
| | n = distance_matrix.shape[0] |
| | G = nx.Graph() |
| | |
| | for i in range(n): |
| | for j in range(n): |
| | if mst[i, j] > 0: |
| | weight = 1 / (distance_matrix[i, j] + 1e-8) |
| | G.add_edge(i, j, weight=weight) |
| | |
| | try: |
| | start_idx = token_labels.index(str(start_token)) |
| | end_idx = token_labels.index(str(end_token)) |
| | |
| | except ValueError: |
| | raise ValueError("Start or end token not found in the provided token labels.") |
| | |
| | path_indices = nx.shortest_path(G, source=start_idx, target=end_idx) |
| | |
| | token_path = [int(token_labels[idx]) for idx in path_indices] |
| | |
| | return token_path |
| |
|
| | |
| | |
| | |
| |
|
| | ''' |
| | import umap |
| | |
| | def reduce_dimensionality_umap(list_of_values, |
| | n_comp=2, |
| | n_neighbors=15, |
| | ): |
| | |
| | """ |
| | Reduces the dimensionality of the values using UMAP. |
| | """ |
| | |
| | vals = np.array(list_of_values) |
| | |
| | umap_reducer = umap.UMAP(n_components=n_comp, |
| | n_neighbors=n_neighbors, |
| | n_epochs=5000, |
| | verbose=True |
| | ) |
| | |
| | reduced_vals = umap_reducer.fit_transform(vals) |
| | |
| | return reduced_vals.tolist() |
| | ''' |
| |
|
| | |
| |
|
| | ''' |
| | import alphashape |
| | from shapely.geometry import Point |
| | from matplotlib.tri import Triangulation, LinearTriInterpolator |
| | from scipy.stats import zscore |
| | |
| | #=============================================================================== |
| | |
| | coordinates = points |
| | |
| | dist_matrix = minkowski_distance_matrix(coordinates, p=3) # You can change the value of p as needed |
| | |
| | # Centering matrix |
| | n = dist_matrix.shape[0] |
| | H = np.eye(n) - np.ones((n, n)) / n |
| | |
| | # Apply double centering |
| | B = -0.5 * H @ dist_matrix**2 @ H |
| | |
| | # Eigen decomposition |
| | eigvals, eigvecs = np.linalg.eigh(B) |
| | |
| | # Sort eigenvalues and eigenvectors |
| | idx = np.argsort(eigvals)[::-1] |
| | eigvals = eigvals[idx] |
| | eigvecs = eigvecs[:, idx] |
| | |
| | # Select the top 2 eigenvectors |
| | X_transformed = eigvecs[:, :2] * np.sqrt(eigvals[:2]) |
| | |
| | #=============================================================================== |
| | |
| | src_points = X_transformed |
| | src_values = np.array([[p[1]] for p in points]) #np.random.rand(X_transformed.shape[0]) |
| | |
| | #=============================================================================== |
| | |
| | # Normalize the points to the range [0, 1] |
| | scaler = MinMaxScaler() |
| | points_normalized = scaler.fit_transform(src_points) |
| | |
| | values_normalized = custom_normalize(src_values) |
| | |
| | # Remove outliers based on z-score |
| | z_scores = np.abs(zscore(points_normalized, axis=0)) |
| | filtered_points = points_normalized[(z_scores < 3).all(axis=1)] |
| | filtered_values = values_normalized[(z_scores < 3).all(axis=1)] |
| | |
| | # Compute the concave hull (alpha shape) |
| | alpha = 8 # Adjust alpha as needed |
| | hull = alphashape.alphashape(filtered_points, alpha) |
| | |
| | # Create a triangulation |
| | tri = Triangulation(filtered_points[:, 0], filtered_points[:, 1]) |
| | |
| | # Interpolate the values on the triangulation |
| | interpolator = LinearTriInterpolator(tri, filtered_values[:, 0]) |
| | xi, yi = np.meshgrid(np.linspace(0, 1, 100), np.linspace(0, 1, 100)) |
| | zi = interpolator(xi, yi) |
| | |
| | # Mask out points outside the concave hull |
| | mask = np.array([hull.contains(Point(x, y)) for x, y in zip(xi.flatten(), yi.flatten())]) |
| | zi = np.ma.array(zi, mask=~mask.reshape(zi.shape)) |
| | |
| | # Plot the filled contour based on the interpolated values |
| | plt.contourf(xi, yi, zi, levels=50, cmap='viridis') |
| | |
| | # Plot the original points |
| | #plt.scatter(filtered_points[:, 0], filtered_points[:, 1], c=filtered_values, edgecolors='k') |
| | |
| | plt.title('Filled Contour Plot with Original Values') |
| | plt.xlabel('X-axis') |
| | plt.ylabel('Y-axis') |
| | plt.colorbar(label='Value') |
| | plt.show() |
| | ''' |
| |
|
| | |
| |
|
| | def plot_tree_horizontal(data): |
| | |
| | """ |
| | Given data as a list of levels (each level is a tuple or list of |
| | displacements for each branch), this function computes the cumulative |
| | value per branch (starting from 0) and plots each branch |
| | with the tree level mapped to the x-axis and the cumulative value mapped |
| | to the y-axis. This gives a left-to-right tree with branches spanning up |
| | (positive) and down (negative). |
| | |
| | Parameters: |
| | data (list of tuple/list): Each element represents a tree level. |
| | It is assumed every level has the same length. |
| | """ |
| | |
| | |
| | data = np.array(data) |
| | n_levels, n_branches = data.shape |
| |
|
| | |
| | |
| | cum = np.zeros((n_levels + 1, n_branches)) |
| | for i in range(n_levels): |
| | cum[i + 1, :] = cum[i, :] + data[i, :] |
| | |
| | plt.figure(figsize=(12, 8)) |
| | |
| | |
| | |
| | |
| | for j in range(n_branches): |
| | x = np.arange(n_levels + 1) |
| | y = cum[:, j] |
| | plt.plot(x, y, marker='o', label=f'Branch {j}') |
| | |
| | plt.title("Horizontal Tree Visualization: Branches Spanning Up and Down", fontsize=14) |
| | plt.xlabel("Tree Level (Left = Root)") |
| | plt.ylabel("Cumulative Value (Up = Positive, Down = Negative)") |
| | |
| | |
| | plt.axhline(0, color="gray", linestyle="--") |
| | |
| | |
| | plt.tight_layout() |
| | plt.show() |
| | |
| | |
| | |
| | |