Spaces:
Running
Running
Introduce score power
Browse files- app.py +5 -5
- cfg.py +4 -3
- src/analysis.py +2 -2
- src/convert.py +4 -1
- src/methodology.py +4 -3
app.py
CHANGED
|
@@ -20,7 +20,7 @@ logging.basicConfig(level=logging.INFO)
|
|
| 20 |
logger = logging.getLogger(__name__)
|
| 21 |
|
| 22 |
# Load models and data
|
| 23 |
-
|
| 24 |
all_labels = pd.read_csv(cfg.LABELS_LOCATION)
|
| 25 |
scalers = joblib.load(cfg.SCALER_DICT_LOCATION)
|
| 26 |
index = faiss.read_index(cfg.INDEX_LOCATION)
|
|
@@ -28,8 +28,8 @@ ds = load_dataset(cfg.LOOKUP_DS_NAME)
|
|
| 28 |
lookup = ds['train'].to_pandas().set_index('track_id')[['title', 'artist']]
|
| 29 |
|
| 30 |
# Initialize analysis
|
| 31 |
-
ea = EmbeddingsAnalysis(index, all_labels, lookup, scalers, cfg.RADII, close_threshold=cfg.CLOSE_THRESHOLD)
|
| 32 |
-
|
| 33 |
|
| 34 |
# Load how it works content
|
| 35 |
with open(cfg.HOW_IT_WORKS_MD_LOCATION, 'r') as f:
|
|
@@ -130,8 +130,8 @@ def analyze_chord_sequence_text(chord_text: str) -> tuple[Optional[float], Optio
|
|
| 130 |
embeddings = get_embeddings_from_chord_sequences([chords])
|
| 131 |
neighbour_embeddings = None
|
| 132 |
if len(chords) < cfg.MIN_SEQUENCE_LENGTH_FOR_NEIGHBOURS:
|
| 133 |
-
|
| 134 |
-
neighbour_embeddings = get_embeddings_from_chord_sequences([
|
| 135 |
score, neighbours = _perform_analysis(embeddings, [len(chords)], neighbour_embeddings)
|
| 136 |
return score, neighbours
|
| 137 |
except AppError as e:
|
|
|
|
| 20 |
logger = logging.getLogger(__name__)
|
| 21 |
|
| 22 |
# Load models and data
|
| 23 |
+
logging.info("Loading models and data...")
|
| 24 |
all_labels = pd.read_csv(cfg.LABELS_LOCATION)
|
| 25 |
scalers = joblib.load(cfg.SCALER_DICT_LOCATION)
|
| 26 |
index = faiss.read_index(cfg.INDEX_LOCATION)
|
|
|
|
| 28 |
lookup = ds['train'].to_pandas().set_index('track_id')[['title', 'artist']]
|
| 29 |
|
| 30 |
# Initialize analysis
|
| 31 |
+
ea = EmbeddingsAnalysis(index, all_labels, lookup, scalers, cfg.RADII, close_threshold=cfg.CLOSE_THRESHOLD, score_power=cfg.SCORE_POWER)
|
| 32 |
+
logging.info("Models loaded successfully!")
|
| 33 |
|
| 34 |
# Load how it works content
|
| 35 |
with open(cfg.HOW_IT_WORKS_MD_LOCATION, 'r') as f:
|
|
|
|
| 130 |
embeddings = get_embeddings_from_chord_sequences([chords])
|
| 131 |
neighbour_embeddings = None
|
| 132 |
if len(chords) < cfg.MIN_SEQUENCE_LENGTH_FOR_NEIGHBOURS:
|
| 133 |
+
chords = _pad_sequence_by_repetition(chords, cfg.MIN_SEQUENCE_LENGTH_FOR_NEIGHBOURS)
|
| 134 |
+
neighbour_embeddings = get_embeddings_from_chord_sequences([chords])
|
| 135 |
score, neighbours = _perform_analysis(embeddings, [len(chords)], neighbour_embeddings)
|
| 136 |
return score, neighbours
|
| 137 |
except AppError as e:
|
cfg.py
CHANGED
|
@@ -6,6 +6,7 @@ SCALER_DICT_LOCATION = './assets/quantile_transformers.joblib'
|
|
| 6 |
MIN_SEQUENCE_LENGTH_FOR_NEIGHBOURS = 18
|
| 7 |
HOW_IT_WORKS_MD_LOCATION = './how_it_works.md'
|
| 8 |
HOW_IT_WORKS_SVG_LOCATION = './assets/harmonic_analysis_simple.svg'
|
| 9 |
-
RADII = (0.8, 0.85, 0.9, 0.925, 0.95)
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
| 6 |
MIN_SEQUENCE_LENGTH_FOR_NEIGHBOURS = 18
|
| 7 |
HOW_IT_WORKS_MD_LOCATION = './how_it_works.md'
|
| 8 |
HOW_IT_WORKS_SVG_LOCATION = './assets/harmonic_analysis_simple.svg'
|
| 9 |
+
# RADII = (0.8, 0.85, 0.9, 0.925, 0.95)
|
| 10 |
+
RADII = (0.85, 0.9) + tuple(range(1, 50)) + (0.925, 0.95, 0.975)
|
| 11 |
+
SCORE_POWER = 0.5
|
| 12 |
+
|
src/analysis.py
CHANGED
|
@@ -17,12 +17,12 @@ class EmbeddingsAnalysis:
|
|
| 17 |
:param scalers: Dictionary mapping length ranges to quantile transformers for score normalization.
|
| 18 |
:param close_threshold: Similarity threshold for neighbor search.
|
| 19 |
"""
|
| 20 |
-
def __init__(self, index, all_labels, lookup, scalers, radii, close_threshold=0.95):
|
| 21 |
all_labels_np = all_labels['track_id'].to_numpy()
|
| 22 |
all_lengths_np = all_labels['length'].to_numpy()
|
| 23 |
self._ecn = EmbeddingClosestNeighbours(index, all_labels_np, all_lengths_np, lookup, close_threshold=close_threshold)
|
| 24 |
specific_scalers = {i: scaler for (l, r), scaler in scalers.items() for i in range(l, r)}
|
| 25 |
-
sm = SimpleMethodology(specific_scalers, specific_scalers[_FALLBACK_INDEX])
|
| 26 |
self._scorer = EmbeddingsOriginalityScorer(index, all_labels_np, radii, sm)
|
| 27 |
|
| 28 |
|
|
|
|
| 17 |
:param scalers: Dictionary mapping length ranges to quantile transformers for score normalization.
|
| 18 |
:param close_threshold: Similarity threshold for neighbor search.
|
| 19 |
"""
|
| 20 |
+
def __init__(self, index, all_labels, lookup, scalers, radii, close_threshold=0.95, score_power=1.0):
|
| 21 |
all_labels_np = all_labels['track_id'].to_numpy()
|
| 22 |
all_lengths_np = all_labels['length'].to_numpy()
|
| 23 |
self._ecn = EmbeddingClosestNeighbours(index, all_labels_np, all_lengths_np, lookup, close_threshold=close_threshold)
|
| 24 |
specific_scalers = {i: scaler for (l, r), scaler in scalers.items() for i in range(l, r)}
|
| 25 |
+
sm = SimpleMethodology(specific_scalers, specific_scalers[_FALLBACK_INDEX], score_power=score_power)
|
| 26 |
self._scorer = EmbeddingsOriginalityScorer(index, all_labels_np, radii, sm)
|
| 27 |
|
| 28 |
|
src/convert.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
from gradio_client import Client
|
| 3 |
import os
|
|
@@ -5,6 +6,8 @@ import json
|
|
| 5 |
import time
|
| 6 |
import httpx
|
| 7 |
|
|
|
|
|
|
|
| 8 |
from chord_extractor.extractors import Chordino
|
| 9 |
from chord_extractor import clear_conversion_cache, LabelledChordSequence
|
| 10 |
|
|
@@ -28,7 +31,7 @@ def _create_client():
|
|
| 28 |
_client = _create_client()
|
| 29 |
|
| 30 |
def _call_embedding_service(chords_w_timestamps):
|
| 31 |
-
|
| 32 |
result = _client.predict(json.dumps(chords_w_timestamps), api_name="/predict")
|
| 33 |
return json.loads(result)
|
| 34 |
|
|
|
|
| 1 |
+
import logging
|
| 2 |
import numpy as np
|
| 3 |
from gradio_client import Client
|
| 4 |
import os
|
|
|
|
| 6 |
import time
|
| 7 |
import httpx
|
| 8 |
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
from chord_extractor.extractors import Chordino
|
| 12 |
from chord_extractor import clear_conversion_cache, LabelledChordSequence
|
| 13 |
|
|
|
|
| 31 |
_client = _create_client()
|
| 32 |
|
| 33 |
def _call_embedding_service(chords_w_timestamps):
|
| 34 |
+
logger.info(chords_w_timestamps)
|
| 35 |
result = _client.predict(json.dumps(chords_w_timestamps), api_name="/predict")
|
| 36 |
return json.loads(result)
|
| 37 |
|
src/methodology.py
CHANGED
|
@@ -18,15 +18,16 @@ class CountBasedMethodology(ABC):
|
|
| 18 |
|
| 19 |
|
| 20 |
class SimpleMethodology(CountBasedMethodology):
|
| 21 |
-
def __init__(self, scalers: dict[int, _TransformerProtocol], fallback_scaler: _TransformerProtocol):
|
| 22 |
self._scalers = scalers
|
| 23 |
self._fallback_scaler = fallback_scaler
|
|
|
|
| 24 |
|
| 25 |
def execute(self, neighbours_df: pd.DataFrame, lengths: pd.Series) -> pd.Series:
|
| 26 |
-
unscaled = sum(
|
| 27 |
concat = pd.concat([unscaled.rename('unscaled'), lengths.rename('length')], axis=1)
|
| 28 |
scaled = concat.apply(
|
| 29 |
lambda row: self._scalers.get(row['length'], self._fallback_scaler).transform(pd.DataFrame({_SCALER_X_LABEL: row['unscaled']}, index=[0]))[0][0],
|
| 30 |
axis=1
|
| 31 |
)
|
| 32 |
-
return 1 - scaled
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
class SimpleMethodology(CountBasedMethodology):
|
| 21 |
+
def __init__(self, scalers: dict[int, _TransformerProtocol], fallback_scaler: _TransformerProtocol, score_power: float = 1.0):
|
| 22 |
self._scalers = scalers
|
| 23 |
self._fallback_scaler = fallback_scaler
|
| 24 |
+
self._score_power = score_power
|
| 25 |
|
| 26 |
def execute(self, neighbours_df: pd.DataFrame, lengths: pd.Series) -> pd.Series:
|
| 27 |
+
unscaled = sum(neighbours_df[col] * (i + 1) for i, col in enumerate(neighbours_df.columns))
|
| 28 |
concat = pd.concat([unscaled.rename('unscaled'), lengths.rename('length')], axis=1)
|
| 29 |
scaled = concat.apply(
|
| 30 |
lambda row: self._scalers.get(row['length'], self._fallback_scaler).transform(pd.DataFrame({_SCALER_X_LABEL: row['unscaled']}, index=[0]))[0][0],
|
| 31 |
axis=1
|
| 32 |
)
|
| 33 |
+
return 1 - scaled ** self._score_power
|