Spaces:

ohollo
/

harmonic-analysis

Running

App Files Files Community

ohollo commited on Nov 22, 2025

Commit

681b241

1 Parent(s): a7d861a

Update assets and fix up scoring

Browse files

Files changed (14) hide show

assets/chords_20251119.index +3 -0
assets/quantile_transformers_20251122.joblib +3 -0
src/__pycache__/analysis.cpython-311.pyc +0 -0
src/__pycache__/convert.cpython-311.pyc +0 -0
src/__pycache__/methodology.cpython-311.pyc +0 -0
src/__pycache__/neighbours.cpython-311.pyc +0 -0
src/__pycache__/scorer.cpython-311.pyc +0 -0
src/__pycache__/utils.cpython-311.pyc +0 -0
src/analysis.py +4 -4
src/convert.py +28 -0
src/methodology.py +2 -1
src/neighbours.py +5 -2
src/scorer.py +1 -1
src/utils.py +9 -1

assets/chords_20251119.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1170d567f73e344462260571285ece153c468a79be65cd21cf6d52fc435bce6
+size 281385005

assets/quantile_transformers_20251122.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20b23160289a8a3b3c56ce9a27193c1e2eb658824f843fd2c8b8db103c15818d
+size 164145

src/__pycache__/analysis.cpython-311.pyc ADDED Viewed

Binary file (2.41 kB). View file

src/__pycache__/convert.cpython-311.pyc ADDED Viewed

Binary file (2.13 kB). View file

src/__pycache__/methodology.cpython-311.pyc ADDED Viewed

Binary file (3.74 kB). View file

src/__pycache__/neighbours.cpython-311.pyc ADDED Viewed

Binary file (3.93 kB). View file

src/__pycache__/scorer.cpython-311.pyc ADDED Viewed

Binary file (2.77 kB). View file

src/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (1.47 kB). View file

src/analysis.py CHANGED Viewed

@@ -3,7 +3,7 @@ from dataclasses import dataclass
 import pandas as pd
 from src.methodology import SimpleMethodology
 from src.neighbours import EmbeddingClosestNeighbours
-from src.scorer import EmbeddingScorer
@@ -14,11 +14,11 @@ class EmbeddingsAnalysis:
         self._ecn = EmbeddingClosestNeighbours(index, all_labels_np, all_lengths_np, lookup, close_threshold=close_threshold)
         specific_scalers = {i: scaler for (l, r), scaler in scalers.items() for i in range(l, r)}
         sm = SimpleMethodology(specific_scalers, specific_scalers[99])
-        self._scorer = EmbeddingScorer(index, all_labels_np, sm)
-    def get_score(self, embeddings, lengths):
-        score = self._scorer.score(embeddings, lengths)
         return score
     def get_neighbours(self, embeddings, limit=None):

 import pandas as pd
 from src.methodology import SimpleMethodology
 from src.neighbours import EmbeddingClosestNeighbours
+from src.scorer import EmbeddingsOriginalityScorer
         self._ecn = EmbeddingClosestNeighbours(index, all_labels_np, all_lengths_np, lookup, close_threshold=close_threshold)
         specific_scalers = {i: scaler for (l, r), scaler in scalers.items() for i in range(l, r)}
         sm = SimpleMethodology(specific_scalers, specific_scalers[99])
+        self._scorer = EmbeddingsOriginalityScorer(index, all_labels_np, sm)
+    def get_scores(self, embeddings, lengths):
+        score = self._scorer.score(embeddings, pd.Series(lengths))
         return score
     def get_neighbours(self, embeddings, limit=None):

src/convert.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import numpy as np
+from gradio_client import Client
+import os
+import json
+_CONSTANT_GAP_SECS = 2
+_SEQ_EMBED_SPACE = 'ohollo/chord-seq-embed'
+_client = Client(_SEQ_EMBED_SPACE)
+def _call_embedding_service(chords_w_timestamps):
+    result = _client.predict(json.dumps(chords_w_timestamps), api_name="/predict")
+    return json.loads(result)
+def get_embeddings_from_chord_sequences(chord_sequences: list[list[str]], constant_gap_secs: float = _CONSTANT_GAP_SECS) -> np.ndarray:
+    """
+    Converts chord sequences into its corresponding embeddings.
+    :param chord_sequence: List of chords representing the chord sequence.
+    :return: 2-d numpy array of embeddings per chord sequence.
+    """
+    chords_w_timestamps = [
+        {'label': chord_sequence, 'timestamp': [i* constant_gap_secs for i, _ in enumerate(chord_sequence)]}
+        for chord_sequence in chord_sequences
+    ]
+    return np.array(_call_embedding_service(chords_w_timestamps)['embeddings'])

src/methodology.py CHANGED Viewed

@@ -31,7 +31,8 @@ class SimpleMethodology(CountBasedMethodology):
     def execute(self, neighbours_df: pd.DataFrame, lengths: pd.Series) -> pd.Series:
         unscaled = (neighbours_df['0.8'] - 1 ) * 1 + (neighbours_df['0.85'] - 1) * 2 + (neighbours_df['0.9'] - 1) * 3 + (neighbours_df['0.925'] - 1) * 4 + (neighbours_df['0.95'] - 1) * 5
-        scaled = unscaled.apply(
             lambda row: self._scalers.get(row['length'], self._fallback_scaler).transform(pd.DataFrame({_SCALER_X_LABEL: row['unscaled']}, index=[0]))[0][0],
             axis=1
         )

     def execute(self, neighbours_df: pd.DataFrame, lengths: pd.Series) -> pd.Series:
         unscaled = (neighbours_df['0.8'] - 1 ) * 1 + (neighbours_df['0.85'] - 1) * 2 + (neighbours_df['0.9'] - 1) * 3 + (neighbours_df['0.925'] - 1) * 4 + (neighbours_df['0.95'] - 1) * 5
+        concat = pd.concat([unscaled.rename('unscaled'), lengths.rename('length')], axis=1)
+        scaled = concat.apply(
             lambda row: self._scalers.get(row['length'], self._fallback_scaler).transform(pd.DataFrame({_SCALER_X_LABEL: row['unscaled']}, index=[0]))[0][0],
             axis=1
         )

src/neighbours.py CHANGED Viewed

@@ -7,6 +7,8 @@ import pandas as pd
 from src.utils import indices_distances_gen
 class Neighbour(NamedTuple):
     distance: float
@@ -24,7 +26,8 @@ class EmbeddingClosestNeighbours:
     :param metadata: Pandas DataFrame containing metadata for each indexed entry. Index should be aligned with labels.
     :param close_threshold: Similarity threshold to consider embeddings as "close".
     """
-    def __init__(self, index: faiss.Index, labels: np.ndarray, lengths: np.ndarray, metadata: pd.DataFrame, close_threshold: float = CLOSE_THRESHOLD):
         self._index = index
         self._labels = labels
         self._lengths = lengths
@@ -32,7 +35,7 @@ class EmbeddingClosestNeighbours:
         self._close_threshold = close_threshold
     def get(self, embeddings: np.ndarray, limit: int = None) -> list[list[Neighbour]]:
-        lims, D, I = self._index.range_search(embeddings, self._close_threshold)
         all_neighbours = []
         for indices_, distances_ in indices_distances_gen(embeddings, self._close_threshold, self._index):
             lengths_ = self._lengths[indices_]

 from src.utils import indices_distances_gen
+_CLOSE_THRESHOLD_DEFAULT = 0.99
 class Neighbour(NamedTuple):
     distance: float
     :param metadata: Pandas DataFrame containing metadata for each indexed entry. Index should be aligned with labels.
     :param close_threshold: Similarity threshold to consider embeddings as "close".
     """
+    def __init__(self, index: faiss.Index, labels: np.ndarray, lengths: np.ndarray,
+                 metadata: pd.DataFrame, close_threshold: float = _CLOSE_THRESHOLD_DEFAULT):
         self._index = index
         self._labels = labels
         self._lengths = lengths
         self._close_threshold = close_threshold
     def get(self, embeddings: np.ndarray, limit: int = None) -> list[list[Neighbour]]:
+        # lims, D, I = self._index.range_search(embeddings, self._close_threshold)
         all_neighbours = []
         for indices_, distances_ in indices_distances_gen(embeddings, self._close_threshold, self._index):
             lengths_ = self._lengths[indices_]

src/scorer.py CHANGED Viewed

@@ -14,7 +14,7 @@ def _count_unique_neighbours(embeddings, radius, index, all_labels):
     return res
-class EmbeddingScorer:
     """
     Scores embeddings based on their originality. Specifically using counts of unique neighbours within certain radii.

     return res
+class EmbeddingsOriginalityScorer:
     """
     Scores embeddings based on their originality. Specifically using counts of unique neighbours within certain radii.

src/utils.py CHANGED Viewed

@@ -3,7 +3,15 @@ import numpy as np
 def indices_distances_gen(embeddings: np.ndarray, radius: float, index: faiss.Index):
-    lims, D, I = index.range_search(embeddings, radius)
     # Iterate over lims and get indices per embedding
     for i in range(len(lims) - 1):
         start = lims[i]

 def indices_distances_gen(embeddings: np.ndarray, radius: float, index: faiss.Index):
+    """
+    Generator that yields indices and distances of neighbors within a given radius for each embedding.
+    :param embeddings: 2-d Numpy array where each row is an embedding to search neighbors for.
+    :param radius: Similarity radius to search within.
+    :param index: FAISS index for similarity search.
+    """
+    embeddings_copy = embeddings.copy().astype(np.float32)
+    faiss.normalize_L2(embeddings_copy)
+    lims, D, I = index.range_search(embeddings_copy, radius)
     # Iterate over lims and get indices per embedding
     for i in range(len(lims) - 1):
         start = lims[i]