add src

cnb.bofCdSsphPA
Commit 31a72045 ... 31a720458be99c6fb7c980d570e9db42ed40eed4 authored 2026-06-02 11:51:49 +0800 by cnb.bofCdSsphPA
Showing 188 changed files with 1994 additions and 43 deletions
acr-engine/__pycache__/run_demo.cpython-310.pyc
acr-engine/configs/default.yaml
acr-engine/data/dataset_registry.json
acr-engine/data/external/modelscope_music/manifests/bootstrap.json
acr-engine/data/index_api/chromaprint.pkl
acr-engine/data/index_api/reference_embs.npy
acr-engine/data/index_api/reference_ids.npy
acr-engine/data/index_v3/chromaprint.pkl
acr-engine/data/index_v3/reference_embs.npy
acr-engine/data/index_v3/reference_ids.npy
acr-engine/data/models_v3/best_model.pt
acr-engine/data/models_v3/song_to_idx.json
acr-engine/data/synthetic_v2/catalog.json
acr-engine/data/synthetic_v2/segments/song_0000_seg_00.wav
acr-engine/data/synthetic_v2/segments/song_0000_seg_01.wav
acr-engine/data/synthetic_v2/segments/song_0000_seg_02_augmented.wav
acr-engine/data/synthetic_v2/segments/song_0000_seg_03_humming_like.wav
acr-engine/data/synthetic_v2/segments/song_0000_seg_04_confused.wav
acr-engine/data/synthetic_v2/segments/song_0001_seg_00.wav
acr-engine/data/synthetic_v2/segments/song_0001_seg_01.wav
--- a/acr-engine/__pycache__/run_demo.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/__pycache__/run_demo.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/configs/default.yaml
View file @31a7204
+++ b/acr-engine/configs/default.yaml
View file @31a7204
@@ -38,8 +38,9 @@ engine:
    n_fft: 1024
    hop_length: 256
  hybrid:
-    chroma_weight: 0.3
-    ecapa_weight: 0.7
+    chroma_weight: 0.25
+    ecapa_weight: 0.5
+    melody_weight: 0.25
    reject_threshold: 0.4

 augmentation:
--- a/acr-engine/data/dataset_registry.json 0 → 100644
View file @31a7204
+++ b/acr-engine/data/dataset_registry.json 0 → 100644
View file @31a7204
+[
+  {
+    "name": "FMA",
+    "source_url": "https://github.com/mdeff/fma",
+    "license": "Track-dependent / metadata CC BY 4.0; verify per subset",
+    "commercial_use": "review_required",
+    "notes": "Good first realistic MIR baseline"
+  },
+  {
+    "name": "MTG-Jamendo",
+    "source_url": "https://github.com/MTG/mtg-jamendo-dataset",
+    "license": "Creative Commons source tracks; verify exact subset terms",
+    "commercial_use": "review_required",
+    "notes": "Good retrieval/tagging corpus with scripts"
+  },
+  {
+    "name": "CCMusic",
+    "source_url": "https://ccmusic-database.github.io/en/database/ccm.html",
+    "license": "varies / application may be required",
+    "commercial_use": "review_required",
+    "notes": "Useful Chinese MIR source, needs permission review"
+  },
+  {
+    "name": "ModelScope-music",
+    "source_url": "https://modelscope.cn/search?page=1&search=music&type=dataset",
+    "license": "varies by dataset",
+    "commercial_use": "deny_until_whitelisted",
+    "notes": "Discovery surface only until per-dataset review is complete"
+  }
+]
\ No newline at end of file
--- a/acr-engine/data/external/modelscope_music/manifests/bootstrap.json 0 → 100644
View file @31a7204
+++ b/acr-engine/data/external/modelscope_music/manifests/bootstrap.json 0 → 100644
View file @31a7204
+{
+  "dataset": "modelscope_music",
+  "root": "data/external/modelscope_music",
+  "status": "initialized",
+  "next_steps": [
+    "download raw audio according to upstream license terms",
+    "convert to catalog/query manifests",
+    "record license evidence before training"
+  ]
+}
\ No newline at end of file
--- a/acr-engine/data/index_api/chromaprint.pkl 0 → 100644
View file @31a7204
+++ b/acr-engine/data/index_api/chromaprint.pkl 0 → 100644
View file @31a7204
--- a/acr-engine/data/index_api/reference_embs.npy 0 → 100644
View file @31a7204
+++ b/acr-engine/data/index_api/reference_embs.npy 0 → 100644
View file @31a7204
--- a/acr-engine/data/index_api/reference_ids.npy 0 → 100644
View file @31a7204
+++ b/acr-engine/data/index_api/reference_ids.npy 0 → 100644
View file @31a7204
--- a/acr-engine/data/index_v3/chromaprint.pkl 0 → 100644
View file @31a7204
+++ b/acr-engine/data/index_v3/chromaprint.pkl 0 → 100644
View file @31a7204
--- a/acr-engine/data/index_v3/reference_embs.npy 0 → 100644
View file @31a7204
+++ b/acr-engine/data/index_v3/reference_embs.npy 0 → 100644
View file @31a7204
--- a/acr-engine/data/index_v3/reference_ids.npy 0 → 100644
View file @31a7204
+++ b/acr-engine/data/index_v3/reference_ids.npy 0 → 100644
View file @31a7204
--- a/acr-engine/data/models_v3/best_model.pt 0 → 100644
View file @31a7204
+++ b/acr-engine/data/models_v3/best_model.pt 0 → 100644
View file @31a7204
--- a/acr-engine/data/models_v3/song_to_idx.json 0 → 100644
View file @31a7204
+++ b/acr-engine/data/models_v3/song_to_idx.json 0 → 100644
View file @31a7204
+{
+  "song_0000": 0,
+  "song_0001": 1,
+  "song_0002": 2,
+  "song_0003": 3,
+  "song_0004": 4,
+  "song_0005": 5,
+  "song_0006": 6,
+  "song_0007": 7,
+  "song_0008": 8,
+  "song_0009": 9,
+  "song_0010": 10,
+  "song_0011": 11,
+  "song_0012": 12,
+  "song_0013": 13,
+  "song_0014": 14,
+  "song_0015": 15
+}
\ No newline at end of file
--- a/acr-engine/data/synthetic_v2/catalog.json 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/catalog.json 0 → 100644
View file @31a7204
+[
+  {
+    "song_id": "song_0000",
+    "audio_path": "songs/song_0000.wav",
+    "duration": 15.0,
+    "base_freq": 130.81,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0001",
+    "audio_path": "songs/song_0001.wav",
+    "duration": 15.0,
+    "base_freq": 146.83,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0002",
+    "audio_path": "songs/song_0002.wav",
+    "duration": 15.0,
+    "base_freq": 164.81,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0003",
+    "audio_path": "songs/song_0003.wav",
+    "duration": 15.0,
+    "base_freq": 174.61,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0004",
+    "audio_path": "songs/song_0004.wav",
+    "duration": 15.0,
+    "base_freq": 196.0,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0005",
+    "audio_path": "songs/song_0005.wav",
+    "duration": 15.0,
+    "base_freq": 220.0,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0006",
+    "audio_path": "songs/song_0006.wav",
+    "duration": 15.0,
+    "base_freq": 246.94,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0007",
+    "audio_path": "songs/song_0007.wav",
+    "duration": 15.0,
+    "base_freq": 261.63,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0008",
+    "audio_path": "songs/song_0008.wav",
+    "duration": 15.0,
+    "base_freq": 293.66,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0009",
+    "audio_path": "songs/song_0009.wav",
+    "duration": 15.0,
+    "base_freq": 329.63,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0010",
+    "audio_path": "songs/song_0010.wav",
+    "duration": 15.0,
+    "base_freq": 349.23,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0011",
+    "audio_path": "songs/song_0011.wav",
+    "duration": 15.0,
+    "base_freq": 392.0,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0012",
+    "audio_path": "songs/song_0012.wav",
+    "duration": 15.0,
+    "base_freq": 440.0,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0013",
+    "audio_path": "songs/song_0013.wav",
+    "duration": 15.0,
+    "base_freq": 493.88,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0014",
+    "audio_path": "songs/song_0014.wav",
+    "duration": 15.0,
+    "base_freq": 523.25,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0015",
+    "audio_path": "songs/song_0015.wav",
+    "duration": 15.0,
+    "base_freq": 587.33,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0016",
+    "audio_path": "songs/song_0016.wav",
+    "duration": 15.0,
+    "base_freq": 659.25,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0017",
+    "audio_path": "songs/song_0017.wav",
+    "duration": 15.0,
+    "base_freq": 698.46,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0018",
+    "audio_path": "songs/song_0018.wav",
+    "duration": 15.0,
+    "base_freq": 783.99,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0019",
+    "audio_path": "songs/song_0019.wav",
+    "duration": 15.0,
+    "base_freq": 880.0,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0020",
+    "audio_path": "songs/song_0020.wav",
+    "duration": 15.0,
+    "base_freq": 987.77,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0021",
+    "audio_path": "songs/song_0021.wav",
+    "duration": 15.0,
+    "base_freq": 146.8292605393491,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0022",
+    "audio_path": "songs/song_0022.wav",
+    "duration": 15.0,
+    "base_freq": 164.81110255326524,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0023",
+    "audio_path": "songs/song_0023.wav",
+    "duration": 15.0,
+    "base_freq": 184.99297018186778,
+    "type": "reference"
+  }
+]
\ No newline at end of file
--- a/acr-engine/data/synthetic_v2/segments/song_0000_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0000_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0000_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0000_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0000_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0000_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0000_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0000_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0000_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0000_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0001_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0001_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0001_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0001_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0001_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0001_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0001_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0001_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0001_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0001_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0002_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0002_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0002_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0002_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0002_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0002_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0002_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0002_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0002_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0002_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0003_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0003_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0003_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0003_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0003_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0003_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0003_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0003_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0003_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0003_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0004_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0004_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0004_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0004_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0004_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0004_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0004_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0004_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0004_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0004_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0005_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0005_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0005_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0005_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0005_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0005_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0005_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0005_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0005_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0005_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0006_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0006_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0006_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0006_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0006_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0006_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0006_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0006_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0006_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0006_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0007_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0007_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0007_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0007_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0007_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0007_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0007_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0007_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0007_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0007_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0008_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0008_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0008_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0008_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0008_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0008_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0008_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0008_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0008_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0008_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0009_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0009_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0009_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0009_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0009_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0009_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0009_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0009_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0009_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0009_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0010_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0010_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0010_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0010_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0010_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0010_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0010_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0010_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0010_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0010_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0011_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0011_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0011_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0011_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0011_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0011_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0011_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0011_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0011_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0011_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0012_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0012_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0012_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0012_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0012_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0012_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0012_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0012_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0012_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0012_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0013_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0013_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0013_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0013_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0013_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0013_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0013_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0013_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0013_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0013_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0014_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0014_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0014_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0014_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0014_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0014_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0014_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0014_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0014_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0014_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0015_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0015_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0015_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0015_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0015_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0015_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0015_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0015_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0015_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0015_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0016_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0016_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0016_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0016_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0016_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0016_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0016_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0016_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0016_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0016_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0017_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0017_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0017_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0017_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0017_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0017_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0017_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0017_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0017_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0017_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0018_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0018_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0018_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0018_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0018_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0018_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0018_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0018_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0018_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0018_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0019_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0019_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0019_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0019_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0019_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0019_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0019_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0019_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0019_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0019_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0020_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0020_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0020_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0020_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0020_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0020_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0020_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0020_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0020_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0020_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0021_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0021_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0021_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0021_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0021_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0021_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0021_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0021_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0021_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0021_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0022_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0022_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0022_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0022_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0022_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0022_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0022_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0022_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0022_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0022_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0023_seg_00.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0023_seg_00.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0023_seg_01.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0023_seg_01.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0023_seg_02_augmented.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0023_seg_02_augmented.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0023_seg_03_humming_like.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0023_seg_03_humming_like.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/segments/song_0023_seg_04_confused.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/segments/song_0023_seg_04_confused.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0000.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0000.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0001.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0001.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0002.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0002.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0003.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0003.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0004.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0004.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0005.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0005.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0006.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0006.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0007.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0007.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0008.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0008.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0009.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0009.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0010.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0010.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0011.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0011.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0012.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0012.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0013.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0013.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0014.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0014.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0015.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0015.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0016.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0016.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0017.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0017.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0018.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0018.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0019.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0019.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0020.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0020.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0021.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0021.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0022.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0022.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/songs/song_0023.wav 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/songs/song_0023.wav 0 → 100644
View file @31a7204
--- a/acr-engine/data/synthetic_v2/test.json 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/test.json 0 → 100644
View file @31a7204
+[
+  {
+    "song_id": "song_0020",
+    "audio_path": "segments/song_0020_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 4.349828784349853,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0020",
+    "audio_path": "segments/song_0020_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 9.642182747327407,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0020",
+    "audio_path": "segments/song_0020_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 2.367717347418965,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0020",
+    "audio_path": "segments/song_0020_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 3.180577192661006,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0020",
+    "audio_path": "segments/song_0020_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 4.660551124366617,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0020",
+    "audio_path": "songs/song_0020.wav",
+    "duration": 15.0,
+    "base_freq": 987.77,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0021",
+    "audio_path": "segments/song_0021_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 5.631088908640184,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0021",
+    "audio_path": "segments/song_0021_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 1.8823366490525628,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0021",
+    "audio_path": "segments/song_0021_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 9.88006210404643,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0021",
+    "audio_path": "segments/song_0021_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 0.9025737685090285,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0021",
+    "audio_path": "segments/song_0021_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 1.3048954561918258,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0021",
+    "audio_path": "songs/song_0021.wav",
+    "duration": 15.0,
+    "base_freq": 146.8292605393491,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0022",
+    "audio_path": "segments/song_0022_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 3.9746734850812295,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0022",
+    "audio_path": "segments/song_0022_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 4.890968121206573,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0022",
+    "audio_path": "segments/song_0022_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 6.610400547460049,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0022",
+    "audio_path": "segments/song_0022_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 2.6329596668288424,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0022",
+    "audio_path": "segments/song_0022_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 0.8570731183991709,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0022",
+    "audio_path": "songs/song_0022.wav",
+    "duration": 15.0,
+    "base_freq": 164.81110255326524,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0023",
+    "audio_path": "segments/song_0023_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 4.461034326075292,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0023",
+    "audio_path": "segments/song_0023_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 9.605203782802876,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0023",
+    "audio_path": "segments/song_0023_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 4.7458228906154805,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0023",
+    "audio_path": "segments/song_0023_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 8.308702013555955,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0023",
+    "audio_path": "segments/song_0023_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 2.213510770155481,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0023",
+    "audio_path": "songs/song_0023.wav",
+    "duration": 15.0,
+    "base_freq": 184.99297018186778,
+    "type": "reference"
+  }
+]
\ No newline at end of file
--- a/acr-engine/data/synthetic_v2/train.json 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/train.json 0 → 100644
View file @31a7204
+[
+  {
+    "song_id": "song_0000",
+    "audio_path": "segments/song_0000_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 9.538159275210802,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0000",
+    "audio_path": "segments/song_0000_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 8.75852940378194,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0000",
+    "audio_path": "segments/song_0000_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 2.6338905075109076,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0000",
+    "audio_path": "segments/song_0000_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 6.389494948660052,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0000",
+    "audio_path": "segments/song_0000_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 5.303536721951775,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0000",
+    "audio_path": "songs/song_0000.wav",
+    "duration": 15.0,
+    "base_freq": 130.81,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0001",
+    "audio_path": "segments/song_0001_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 5.227827155319589,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0001",
+    "audio_path": "segments/song_0001_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 9.347062577364273,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0001",
+    "audio_path": "segments/song_0001_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 2.042591994235364,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0001",
+    "audio_path": "segments/song_0001_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 3.1617719627185403,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0001",
+    "audio_path": "segments/song_0001_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 0.73260721099633,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0001",
+    "audio_path": "songs/song_0001.wav",
+    "duration": 15.0,
+    "base_freq": 146.83,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0002",
+    "audio_path": "segments/song_0002_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 3.0928466220865323,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0002",
+    "audio_path": "segments/song_0002_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 4.083929086192168,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0002",
+    "audio_path": "segments/song_0002_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 4.024003870577246,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0002",
+    "audio_path": "segments/song_0002_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 9.028055457325827,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0002",
+    "audio_path": "segments/song_0002_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 4.2988814998983464,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0002",
+    "audio_path": "songs/song_0002.wav",
+    "duration": 15.0,
+    "base_freq": 164.81,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0003",
+    "audio_path": "segments/song_0003_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 0.1938328705001069,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0003",
+    "audio_path": "segments/song_0003_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 5.394190479225337,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0003",
+    "audio_path": "segments/song_0003_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 9.999078285092093,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0003",
+    "audio_path": "segments/song_0003_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 9.496117327159888,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0003",
+    "audio_path": "segments/song_0003_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 2.1796454090650363,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0003",
+    "audio_path": "songs/song_0003.wav",
+    "duration": 15.0,
+    "base_freq": 174.61,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0004",
+    "audio_path": "segments/song_0004_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 9.654976431382948,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0004",
+    "audio_path": "segments/song_0004_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 2.524783904929726,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0004",
+    "audio_path": "segments/song_0004_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 8.617229646275131,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0004",
+    "audio_path": "segments/song_0004_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 1.5172700695095642,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0004",
+    "audio_path": "segments/song_0004_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 4.161740214103284,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0004",
+    "audio_path": "songs/song_0004.wav",
+    "duration": 15.0,
+    "base_freq": 196.0,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0005",
+    "audio_path": "segments/song_0005_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 5.088720150695117,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0005",
+    "audio_path": "segments/song_0005_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 2.734248967132742,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0005",
+    "audio_path": "segments/song_0005_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 8.347239455766944,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0005",
+    "audio_path": "segments/song_0005_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 5.08240891592894,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0005",
+    "audio_path": "segments/song_0005_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 9.3424839368252,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0005",
+    "audio_path": "songs/song_0005.wav",
+    "duration": 15.0,
+    "base_freq": 220.0,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0006",
+    "audio_path": "segments/song_0006_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 2.5062680004361604,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0006",
+    "audio_path": "segments/song_0006_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 7.555773237416772,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0006",
+    "audio_path": "segments/song_0006_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 7.674707744954641,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0006",
+    "audio_path": "segments/song_0006_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 0.33364531245632434,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0006",
+    "audio_path": "segments/song_0006_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 2.007947946500762,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0006",
+    "audio_path": "songs/song_0006.wav",
+    "duration": 15.0,
+    "base_freq": 246.94,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0007",
+    "audio_path": "segments/song_0007_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 6.589030736792923,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0007",
+    "audio_path": "segments/song_0007_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 3.016303290280887,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0007",
+    "audio_path": "segments/song_0007_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 6.433406842054888,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0007",
+    "audio_path": "segments/song_0007_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 4.435623293630087,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0007",
+    "audio_path": "segments/song_0007_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 5.8536468854812105,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0007",
+    "audio_path": "songs/song_0007.wav",
+    "duration": 15.0,
+    "base_freq": 261.63,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0008",
+    "audio_path": "segments/song_0008_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 0.42302261562791377,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0008",
+    "audio_path": "segments/song_0008_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 0.18741536585645702,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0008",
+    "audio_path": "segments/song_0008_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 9.211624345024124,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0008",
+    "audio_path": "segments/song_0008_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 4.176939598434806,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0008",
+    "audio_path": "segments/song_0008_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 8.320259130717071,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0008",
+    "audio_path": "songs/song_0008.wav",
+    "duration": 15.0,
+    "base_freq": 293.66,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0009",
+    "audio_path": "segments/song_0009_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 5.076897127246463,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0009",
+    "audio_path": "segments/song_0009_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 5.397707584136711,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0009",
+    "audio_path": "segments/song_0009_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 7.3864400300146755,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0009",
+    "audio_path": "segments/song_0009_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 5.9724644107162845,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0009",
+    "audio_path": "segments/song_0009_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 7.21182997805427,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0009",
+    "audio_path": "songs/song_0009.wav",
+    "duration": 15.0,
+    "base_freq": 329.63,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0010",
+    "audio_path": "segments/song_0010_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 3.1007588293689183,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0010",
+    "audio_path": "segments/song_0010_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 3.9822405568601704,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0010",
+    "audio_path": "segments/song_0010_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 8.154060806559823,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0010",
+    "audio_path": "segments/song_0010_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 2.7321660611387344,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0010",
+    "audio_path": "segments/song_0010_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 9.564787178236601,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0010",
+    "audio_path": "songs/song_0010.wav",
+    "duration": 15.0,
+    "base_freq": 349.23,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0011",
+    "audio_path": "segments/song_0011_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 8.949259168211244,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0011",
+    "audio_path": "segments/song_0011_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 8.459337061558657,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0011",
+    "audio_path": "segments/song_0011_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 2.5060530898199906,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0011",
+    "audio_path": "segments/song_0011_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 5.0257314474126265,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0011",
+    "audio_path": "segments/song_0011_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 8.42530004113389,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0011",
+    "audio_path": "songs/song_0011.wav",
+    "duration": 15.0,
+    "base_freq": 392.0,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0012",
+    "audio_path": "segments/song_0012_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 7.253242125518553,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0012",
+    "audio_path": "segments/song_0012_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 6.880436512027717,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0012",
+    "audio_path": "segments/song_0012_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 0.26647154963833186,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0012",
+    "audio_path": "segments/song_0012_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 7.214001122963067,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0012",
+    "audio_path": "segments/song_0012_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 1.4777570830033182,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0012",
+    "audio_path": "songs/song_0012.wav",
+    "duration": 15.0,
+    "base_freq": 440.0,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0013",
+    "audio_path": "segments/song_0013_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 3.3711217932975037,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0013",
+    "audio_path": "segments/song_0013_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 2.95024257658282,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0013",
+    "audio_path": "segments/song_0013_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 6.7440113989474435,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0013",
+    "audio_path": "segments/song_0013_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 3.27926658740176,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0013",
+    "audio_path": "segments/song_0013_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 0.06830120539555451,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0013",
+    "audio_path": "songs/song_0013.wav",
+    "duration": 15.0,
+    "base_freq": 493.88,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0014",
+    "audio_path": "segments/song_0014_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 4.389628114874606,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0014",
+    "audio_path": "segments/song_0014_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 5.397598089074283,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0014",
+    "audio_path": "segments/song_0014_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 7.543857087472844,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0014",
+    "audio_path": "segments/song_0014_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 5.77474814637882,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0014",
+    "audio_path": "segments/song_0014_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 5.212510542649235,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0014",
+    "audio_path": "songs/song_0014.wav",
+    "duration": 15.0,
+    "base_freq": 523.25,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0015",
+    "audio_path": "segments/song_0015_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 5.3221248501273655,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0015",
+    "audio_path": "segments/song_0015_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 4.113385082174164,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0015",
+    "audio_path": "segments/song_0015_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 0.16726147602629915,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0015",
+    "audio_path": "segments/song_0015_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 4.305732086760379,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0015",
+    "audio_path": "segments/song_0015_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 6.197808424119352,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0015",
+    "audio_path": "songs/song_0015.wav",
+    "duration": 15.0,
+    "base_freq": 587.33,
+    "type": "reference"
+  }
+]
\ No newline at end of file
--- a/acr-engine/data/synthetic_v2/val.json 0 → 100644
View file @31a7204
+++ b/acr-engine/data/synthetic_v2/val.json 0 → 100644
View file @31a7204
+[
+  {
+    "song_id": "song_0016",
+    "audio_path": "segments/song_0016_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 7.208994524555927,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0016",
+    "audio_path": "segments/song_0016_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 4.958024367228626,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0016",
+    "audio_path": "segments/song_0016_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 6.1666879203579,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0016",
+    "audio_path": "segments/song_0016_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 8.621983105655142,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0016",
+    "audio_path": "segments/song_0016_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 3.004352846791234,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0016",
+    "audio_path": "songs/song_0016.wav",
+    "duration": 15.0,
+    "base_freq": 659.25,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0017",
+    "audio_path": "segments/song_0017_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 5.277150196277827,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0017",
+    "audio_path": "segments/song_0017_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 6.391085856661506,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0017",
+    "audio_path": "segments/song_0017_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 5.969708292829935,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0017",
+    "audio_path": "segments/song_0017_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 6.1736267933642495,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0017",
+    "audio_path": "segments/song_0017_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 1.1786165266165671,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0017",
+    "audio_path": "songs/song_0017.wav",
+    "duration": 15.0,
+    "base_freq": 698.46,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0018",
+    "audio_path": "segments/song_0018_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 6.641438208318426,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0018",
+    "audio_path": "segments/song_0018_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 3.582227293409872,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0018",
+    "audio_path": "segments/song_0018_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 0.6333068606017467,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0018",
+    "audio_path": "segments/song_0018_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 3.3775515517078736,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0018",
+    "audio_path": "segments/song_0018_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 6.825519260932059,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0018",
+    "audio_path": "songs/song_0018.wav",
+    "duration": 15.0,
+    "base_freq": 783.99,
+    "type": "reference"
+  },
+  {
+    "song_id": "song_0019",
+    "audio_path": "segments/song_0019_seg_00.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 6.405372883123518,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0019",
+    "audio_path": "segments/song_0019_seg_01.wav",
+    "duration": 5.0,
+    "type": "clean",
+    "offset": 5.376553581360508,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0019",
+    "audio_path": "segments/song_0019_seg_02_augmented.wav",
+    "duration": 5.0,
+    "type": "augmented",
+    "offset": 1.5268044380447066,
+    "segment_type": "intro"
+  },
+  {
+    "song_id": "song_0019",
+    "audio_path": "segments/song_0019_seg_03_humming_like.wav",
+    "duration": 5.0,
+    "type": "humming_like",
+    "offset": 5.864371630124319,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0019",
+    "audio_path": "segments/song_0019_seg_04_confused.wav",
+    "duration": 5.0,
+    "type": "confused",
+    "offset": 4.37486043050575,
+    "segment_type": "mid"
+  },
+  {
+    "song_id": "song_0019",
+    "audio_path": "songs/song_0019.wav",
+    "duration": 15.0,
+    "base_freq": 880.0,
+    "type": "reference"
+  }
+]
\ No newline at end of file
--- a/acr-engine/data/tmp_catalog.json 0 → 100644
View file @31a7204
+++ b/acr-engine/data/tmp_catalog.json 0 → 100644
View file @31a7204
+[
+  {
+    "song_id": "foo",
+    "audio_path": "raw/foo.wav",
+    "duration": 10.5,
+    "type": "reference",
+    "source_dataset": "fma"
+  }
+]
\ No newline at end of file
--- a/acr-engine/evaluate.py
View file @31a7204
+++ b/acr-engine/evaluate.py
View file @31a7204
@@ -71,6 +71,7 @@ def main():
            })

    total = len(queries)
+    confusion_focus = {k:v for k,v in by_type.items() if k in {"confused", "humming_like"}}
    report = {
        "split": args.split,
        "num_queries": total,
@@ -84,6 +85,10 @@ def main():
            }
            for k, v in by_type.items()
        },
+        "hard_case_summary": {
+            k: {"n": v["n"], "top1": round(v["top1"]/v["n"],4) if v["n"] else 0.0, "topk": round(v["topk"]/v["n"],4) if v["n"] else 0.0}
+            for k,v in confusion_focus.items()
+        },
        "sample_failures": failures[:10],
    }
    print(json.dumps(report, ensure_ascii=False, indent=2))
--- a/acr-engine/requirements.txt
View file @31a7204
+++ b/acr-engine/requirements.txt
View file @31a7204
@@ -4,3 +4,6 @@ soundfile>=0.12
 librosa>=0.10
 tqdm>=4.66
 torch>=2.3
+fastapi>=0.115
+uvicorn>=0.30
+pydantic>=2.8
--- a/acr-engine/src/__pycache__/__init__.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/__pycache__/__init__.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/data/__pycache__/__init__.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/data/__pycache__/__init__.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/data/__pycache__/dataset.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/data/__pycache__/dataset.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/data/__pycache__/synthetic.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/data/__pycache__/synthetic.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/data/external_adapters.py 0 → 100644
View file @31a7204
+++ b/acr-engine/src/data/external_adapters.py 0 → 100644
View file @31a7204
+"""Dataset adapter skeletons for external/open music corpora."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, asdict
+from pathlib import Path
+from typing import Dict, List
+import argparse
+import json
+
+
+@dataclass
+class DatasetRecord:
+    name: str
+    source_url: str
+    license: str
+    commercial_use: str
+    notes: str
+
+
+class BaseAdapter:
+    name = "base"
+
+    def describe(self) -> Dict:
+        raise NotImplementedError
+
+    def init_layout(self, root: Path) -> Dict:
+        root.mkdir(parents=True, exist_ok=True)
+        for sub in ["raw", "processed", "manifests", "licenses"]:
+            (root / sub).mkdir(exist_ok=True)
+        manifest = {
+            "dataset": self.name,
+            "root": str(root),
+            "status": "initialized",
+            "next_steps": [
+                "download raw audio according to upstream license terms",
+                "convert to catalog/query manifests",
+                "record license evidence before training",
+            ],
+        }
+        with open(root / "manifests" / "bootstrap.json", "w") as f:
+            json.dump(manifest, f, indent=2, ensure_ascii=False)
+        return manifest
+
+
+class FMAAdapter(BaseAdapter):
+    name = "fma"
+
+    def describe(self) -> Dict:
+        return {
+            "name": "FMA",
+            "source_url": "https://github.com/mdeff/fma",
+            "recommended_subset": "fma_small",
+            "catalog_strategy": "full tracks as references; random 5-15s crops as queries",
+            "license_policy": "review per subset/track before commercial training",
+        }
+
+
+class MTGJamendoAdapter(BaseAdapter):
+    name = "mtg_jamendo"
+
+    def describe(self) -> Dict:
+        return {
+            "name": "MTG-Jamendo",
+            "source_url": "https://github.com/MTG/mtg-jamendo-dataset",
+            "recommended_subset": "small curated slice",
+            "catalog_strategy": "download upstream audio subset then build catalog/query manifests",
+            "license_policy": "verify CC terms for intended commercial use",
+        }
+
+
+class CCMusicAdapter(BaseAdapter):
+    name = "ccmusic"
+
+    def describe(self) -> Dict:
+        return {
+            "name": "CCMusic",
+            "source_url": "https://ccmusic-database.github.io/en/database/ccm.html",
+            "recommended_subset": "whitelisted approved subset only",
+            "catalog_strategy": "use approved corpora only; normalize to project manifests",
+            "license_policy": "application/permission review required before use",
+        }
+
+
+class ModelScopeMusicAdapter(BaseAdapter):
+    name = "modelscope_music"
+
+    def describe(self) -> Dict:
+        return {
+            "name": "ModelScope music datasets",
+            "source_url": "https://modelscope.cn/search?page=1&search=music&type=dataset",
+            "recommended_subset": "manual whitelist only",
+            "catalog_strategy": "treat as discovery surface; add per-dataset adapter after legal review",
+            "license_policy": "deny until whitelisted",
+        }
+
+
+ADAPTERS = {
+    "fma": FMAAdapter(),
+    "mtg_jamendo": MTGJamendoAdapter(),
+    "ccmusic": CCMusicAdapter(),
+    "modelscope_music": ModelScopeMusicAdapter(),
+}
+
+REGISTRY: List[DatasetRecord] = [
+    DatasetRecord(
+        name="FMA",
+        source_url="https://github.com/mdeff/fma",
+        license="Track-dependent / metadata CC BY 4.0; verify per subset",
+        commercial_use="review_required",
+        notes="Good first realistic MIR baseline",
+    ),
+    DatasetRecord(
+        name="MTG-Jamendo",
+        source_url="https://github.com/MTG/mtg-jamendo-dataset",
+        license="Creative Commons source tracks; verify exact subset terms",
+        commercial_use="review_required",
+        notes="Good retrieval/tagging corpus with scripts",
+    ),
+    DatasetRecord(
+        name="CCMusic",
+        source_url="https://ccmusic-database.github.io/en/database/ccm.html",
+        license="varies / application may be required",
+        commercial_use="review_required",
+        notes="Useful Chinese MIR source, needs permission review",
+    ),
+    DatasetRecord(
+        name="ModelScope-music",
+        source_url="https://modelscope.cn/search?page=1&search=music&type=dataset",
+        license="varies by dataset",
+        commercial_use="deny_until_whitelisted",
+        notes="Discovery surface only until per-dataset review is complete",
+    ),
+]
+
+
+def write_registry(output_path: str):
+    out = Path(output_path)
+    out.parent.mkdir(parents=True, exist_ok=True)
+    with open(out, "w") as f:
+        json.dump([asdict(x) for x in REGISTRY], f, indent=2, ensure_ascii=False)
+    return out
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    p = sub.add_parser("registry")
+    p.add_argument("--output", default="data/dataset_registry.json")
+
+    p = sub.add_parser("init")
+    p.add_argument("dataset", choices=sorted(ADAPTERS))
+    p.add_argument("--root", default="data/external")
+
+    p = sub.add_parser("describe")
+    p.add_argument("dataset", choices=sorted(ADAPTERS))
+
+    args = parser.parse_args()
+    if args.cmd == "registry":
+        path = write_registry(args.output)
+        print(path)
+    elif args.cmd == "init":
+        root = Path(args.root) / args.dataset
+        print(json.dumps(ADAPTERS[args.dataset].init_layout(root), indent=2, ensure_ascii=False))
+    elif args.cmd == "describe":
+        print(json.dumps(ADAPTERS[args.dataset].describe(), indent=2, ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    main()
--- a/acr-engine/src/data/manifest_tools.py 0 → 100644
View file @31a7204
+++ b/acr-engine/src/data/manifest_tools.py 0 → 100644
View file @31a7204
+"""External dataset manifest conversion templates."""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+from pathlib import Path
+from typing import List, Dict
+
+
+def write_catalog(records: List[Dict], output_path: Path):
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w") as f:
+        json.dump(records, f, indent=2, ensure_ascii=False)
+
+
+def csv_to_catalog(csv_path: Path, output_path: Path, path_field: str = "audio_path", id_field: str = "song_id"):
+    records = []
+    with open(csv_path, newline="") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            records.append(
+                {
+                    "song_id": row[id_field],
+                    "audio_path": row[path_field],
+                    "duration": float(row.get("duration", 0.0) or 0.0),
+                    "type": "reference",
+                    "source_dataset": row.get("source_dataset", "external"),
+                }
+            )
+    write_catalog(records, output_path)
+    return len(records)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    p = sub.add_parser("csv-to-catalog")
+    p.add_argument("csv_path")
+    p.add_argument("output_path")
+    p.add_argument("--path-field", default="audio_path")
+    p.add_argument("--id-field", default="song_id")
+
+    args = parser.parse_args()
+    if args.cmd == "csv-to-catalog":
+        count = csv_to_catalog(Path(args.csv_path), Path(args.output_path), args.path_field, args.id_field)
+        print(json.dumps({"status": "ok", "records": count}, ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    main()
--- a/acr-engine/src/engines/__pycache__/__init__.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/engines/__pycache__/__init__.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/engines/__pycache__/chromaprint_matcher.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/engines/__pycache__/chromaprint_matcher.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/engines/__pycache__/ecapa_embedder.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/engines/__pycache__/ecapa_embedder.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/engines/__pycache__/hybrid_engine.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/engines/__pycache__/hybrid_engine.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/engines/hybrid_engine.py
View file @31a7204
+++ b/acr-engine/src/engines/hybrid_engine.py
View file @31a7204
-"""
-Hybrid ACR Engine: Chromaprint fast pre-filter + ECAPA-TDNN deep re-ranking.
-"""
+"""Hybrid ACR Engine: Chromaprint + ECAPA + melody-aware re-ranking."""

 import json
 import time
+from pathlib import Path
 from typing import Dict, List, Optional

 import librosa
 import numpy as np

+from src.utils.audio import AudioProcessor
+

 class Candidate:
-    def __init__(self, song_id: str, chroma_score: float = 0.0, ecapa_score: float = 0.0):
+    def __init__(self, song_id: str, chroma_score: float = 0.0, ecapa_score: float = 0.0, melody_score: float = 0.0):
        self.song_id = song_id
        self.chroma_score = chroma_score
        self.ecapa_score = ecapa_score
+        self.melody_score = melody_score
        self.metadata: Dict = {}

-    def combined_score(self, chroma_weight: float, ecapa_weight: float) -> float:
-        return chroma_weight * self.chroma_score + ecapa_weight * self.ecapa_score
-
-    def __repr__(self):
-        return f"Candidate({self.song_id}, chroma={self.chroma_score:.3f}, ecapa={self.ecapa_score:.3f})"
+    def combined_score(self, chroma_weight: float, ecapa_weight: float, melody_weight: float) -> float:
+        return (
+            chroma_weight * self.chroma_score
+            + ecapa_weight * self.ecapa_score
+            + melody_weight * self.melody_score
+        )


 class HybridEngine:
@@ -32,8 +35,9 @@ class HybridEngine:
        ref_embs: Optional[np.ndarray] = None,
        ref_ids: Optional[List[str]] = None,
        sr: int = 16000,
-        chroma_weight: float = 0.35,
-        ecapa_weight: float = 0.65,
+        chroma_weight: float = 0.25,
+        ecapa_weight: float = 0.5,
+        melody_weight: float = 0.25,
        reject_threshold: float = 0.35,
    ):
        self.chroma = chroma_matcher
@@ -43,12 +47,16 @@ class HybridEngine:
        self.sr = sr
        self.chroma_weight = chroma_weight
        self.ecapa_weight = ecapa_weight
+        self.melody_weight = melody_weight
        self.reject_threshold = reject_threshold
        self.song_metadata: Dict[str, Dict] = {}
+        self.song_audio_paths: Dict[str, str] = {}
+        self.audio = AudioProcessor(sr=sr)

    def load_metadata(self, metadata_path: str):
        with open(metadata_path) as f:
            items = json.load(f)
+        base_dir = str(Path(metadata_path).parent)
        for item in items:
            sid = item["song_id"]
            existing = self.song_metadata.get(sid, {})
@@ -59,15 +67,15 @@ class HybridEngine:
                    "audio_path": item.get("audio_path", existing.get("audio_path", "")),
                    "type": item.get("type", existing.get("type", "unknown")),
                }
+            if item.get("type") == "reference":
+                self.song_audio_paths[sid] = str(Path(base_dir) / item["audio_path"])

    @staticmethod
-    def _normalize_scores(score_pairs: List[tuple], invert: bool = False) -> Dict[str, float]:
+    def _normalize_scores(score_pairs: List[tuple]) -> Dict[str, float]:
        if not score_pairs:
            return {}
        ids = [sid for sid, _ in score_pairs]
        values = np.array([float(score) for _, score in score_pairs], dtype=np.float32)
-        if invert:
-            values = -values
        if len(values) == 1:
            return {ids[0]: 1.0}
        vmin = float(values.min())
@@ -77,12 +85,18 @@ class HybridEngine:
        norm = (values - vmin) / (vmax - vmin)
        return {sid: float(score) for sid, score in zip(ids, norm)}

-    def recognize(
-        self,
-        audio_path: str,
-        top_n: int = 5,
-        mode: str = "auto",
-    ) -> Dict:
+    def _melody_scores(self, query_y: np.ndarray, candidate_ids: List[str]) -> Dict[str, float]:
+        scores = []
+        for song_id in candidate_ids:
+            ref_path = self.song_audio_paths.get(song_id)
+            if not ref_path or not Path(ref_path).exists():
+                continue
+            ref_y, _ = librosa.load(ref_path, sr=self.sr, mono=True, duration=8.0)
+            score = self.audio.melody_similarity(query_y, ref_y)
+            scores.append((song_id, score))
+        return self._normalize_scores(scores)
+
+    def recognize(self, audio_path: str, top_n: int = 5, mode: str = "auto") -> Dict:
        del mode
        start = time.time()
        y, _ = librosa.load(audio_path, sr=self.sr, mono=True)
@@ -96,41 +110,45 @@ class HybridEngine:
            ref_norm = self.ref_embs / (np.linalg.norm(self.ref_embs, axis=1, keepdims=True) + 1e-12)
            query_norm = query_emb / (np.linalg.norm(query_emb) + 1e-12)
            scores = query_norm @ ref_norm.T
-            top_indices = np.argsort(-scores)[: max(top_n * 5, 20)]
+            top_indices = np.argsort(-scores)[: max(top_n * 10, 30)]
            ecapa_matches = [(self.ref_ids[idx], float(scores[idx])) for idx in top_indices]
        ecapa_norm = self._normalize_scores(ecapa_matches)

-        all_song_ids = set(chroma_norm) | set(ecapa_norm)
+        candidate_pool = list(set(list(chroma_norm.keys())[: top_n * 8] + list(ecapa_norm.keys())[: top_n * 8]))
+        melody_norm = self._melody_scores(y, candidate_pool)
+
+        all_song_ids = set(candidate_pool) | set(melody_norm)
        combined: List[Candidate] = []
        for song_id in all_song_ids:
            candidate = Candidate(
                song_id=song_id,
                chroma_score=chroma_norm.get(song_id, 0.0),
                ecapa_score=ecapa_norm.get(song_id, 0.0),
+                melody_score=melody_norm.get(song_id, 0.0),
            )
            candidate.metadata = self.song_metadata.get(song_id, {})
            combined.append(candidate)

-        combined.sort(key=lambda c: c.combined_score(self.chroma_weight, self.ecapa_weight), reverse=True)
+        combined.sort(
+            key=lambda c: c.combined_score(self.chroma_weight, self.ecapa_weight, self.melody_weight),
+            reverse=True,
+        )
        results = combined[:top_n]
        elapsed = (time.time() - start) * 1000

        output = []
        for c in results:
-            fused = c.combined_score(self.chroma_weight, self.ecapa_weight)
+            fused = c.combined_score(self.chroma_weight, self.ecapa_weight, self.melody_weight)
            output.append(
                {
                    "song_id": c.song_id,
                    "confidence": round(fused, 4),
                    "chromaprint_score": round(c.chroma_score, 4),
                    "ecapa_score": round(c.ecapa_score, 4),
+                    "melody_score": round(c.melody_score, 4),
                    "accepted": fused >= self.reject_threshold,
                    "metadata": c.metadata,
                }
            )

-        return {
-            "candidates": output,
-            "processing_time_ms": round(elapsed, 1),
-            "num_candidates": len(results),
-        }
+        return {"candidates": output, "processing_time_ms": round(elapsed, 1), "num_candidates": len(results)}
--- a/acr-engine/src/models/__pycache__/__init__.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/models/__pycache__/__init__.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/models/__pycache__/ecapa_tdnn.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/models/__pycache__/ecapa_tdnn.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/models/__pycache__/losses.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/models/__pycache__/losses.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/service/__pycache__/app.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/service/__pycache__/app.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/service/app.py 0 → 100644
View file @31a7204
+++ b/acr-engine/src/service/app.py 0 → 100644
View file @31a7204
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+
+from src.engines.chromaprint_matcher import ChromaprintMatcher
+from src.engines.ecapa_embedder import ECAPAEmbedder
+from src.engines.hybrid_engine import HybridEngine
+
+
+class RecognizeRequest(BaseModel):
+    query_path: str
+    data_dir: str = "data/synthetic_v2"
+    model_path: str = "data/models_v3/best_model.pt"
+    index_prefix: str = "data/index_v3/reference"
+    top_n: int = 5
+    device: str = "cpu"
+
+
+class BuildIndexRequest(BaseModel):
+    data_dir: str
+    model_path: str
+    output_dir: str
+    device: str = "cpu"
+
+
+app = FastAPI(title="ACR Service", version="0.1.0")
+
+
+def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) -> HybridEngine:
+    matcher = ChromaprintMatcher()
+    chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl")
+    if not Path(chroma_path).exists():
+        raise HTTPException(status_code=400, detail=f"Missing chromaprint index: {chroma_path}")
+    matcher.load(chroma_path)
+
+    if not Path(model_path).exists():
+        raise HTTPException(status_code=400, detail=f"Missing model: {model_path}")
+    embedder = ECAPAEmbedder(model_path=model_path, device=device)
+
+    embs_path = f"{index_prefix}_embs.npy"
+    ids_path = f"{index_prefix}_ids.npy"
+    if not Path(embs_path).exists() or not Path(ids_path).exists():
+        raise HTTPException(status_code=400, detail="Missing embedding index files")
+
+    ref_embs = np.load(embs_path)
+    ref_ids = np.load(ids_path, allow_pickle=True).tolist()
+    engine = HybridEngine(matcher, embedder, ref_embs, ref_ids)
+    for split in ["catalog.json", "train.json", "val.json", "test.json"]:
+        p = Path(data_dir) / split
+        if p.exists():
+            engine.load_metadata(str(p))
+    return engine
+
+
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+
+
+@app.post("/recognize")
+def recognize(req: RecognizeRequest):
+    if not Path(req.query_path).exists():
+        raise HTTPException(status_code=400, detail=f"Missing query file: {req.query_path}")
+    engine = _load_engine(req.data_dir, req.model_path, req.index_prefix, req.device)
+    return engine.recognize(req.query_path, top_n=req.top_n)
+
+
+@app.post("/index/build")
+def build_index(req: BuildIndexRequest):
+    from run_demo import build_chroma_index, build_embedding_index
+
+    data_dir = Path(req.data_dir)
+    out_dir = Path(req.output_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    build_chroma_index(data_dir, out_dir)
+    _, ref_embs, ref_ids = build_embedding_index(data_dir, Path(req.model_path), out_dir / "reference", req.device)
+    return {"status": "ok", "num_reference_windows": len(ref_ids), "embedding_dim": int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0}
--- a/acr-engine/src/utils/__pycache__/__init__.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/utils/__pycache__/__init__.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/utils/__pycache__/audio.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/utils/__pycache__/audio.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/utils/__pycache__/augment.cpython-310.pyc 0 → 100644
View file @31a7204
+++ b/acr-engine/src/utils/__pycache__/augment.cpython-310.pyc 0 → 100644
View file @31a7204
--- a/acr-engine/src/utils/audio.py
View file @31a7204
+++ b/acr-engine/src/utils/audio.py
View file @31a7204
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
 import librosa
+import numpy as np
+import torch
 from typing import List, Optional, Tuple


 class AudioProcessor:
-    def __init__(self, sr: int = 16000, n_mels: int = 80, n_fft: int = 512, hop_length: int = 160):
+    def __init__(self, sr: int = 16000, n_mels: int = 128, n_fft: int = 512, hop_length: int = 160):
        self.sr = sr
        self.n_mels = n_mels
        self.n_fft = n_fft
@@ -19,8 +17,7 @@ class AudioProcessor:

    def to_mel(self, y: np.ndarray) -> np.ndarray:
        mel = librosa.feature.melspectrogram(
-            y=y, sr=self.sr, n_mels=self.n_mels,
-            n_fft=self.n_fft, hop_length=self.hop_length
+            y=y, sr=self.sr, n_mels=self.n_mels, n_fft=self.n_fft, hop_length=self.hop_length
        )
        return librosa.power_to_db(mel, ref=np.max)

@@ -36,7 +33,7 @@ class AudioProcessor:
            y = np.pad(y, (0, pad))
        windows = []
        for start in range(0, len(y) - win_len + 1, stride):
-            windows.append(y[start:start + win_len])
+            windows.append(y[start : start + win_len])
        if not windows:
            windows.append(y[:win_len])
        return windows
@@ -47,10 +44,32 @@ class AudioProcessor:
        return self.to_mel_tensor(y), duration

    def extract_chroma(self, y: np.ndarray) -> np.ndarray:
-        chroma = librosa.feature.chroma_cqt(y=y, sr=self.sr)
-        return chroma
+        return librosa.feature.chroma_cqt(y=y, sr=self.sr)

    def extract_f0(self, y: np.ndarray, fmin=65, fmax=2093) -> np.ndarray:
        f0, _, _ = librosa.pyin(y, sr=self.sr, fmin=fmin, fmax=fmax)
-        f0 = np.nan_to_num(f0, nan=0.0)
-        return f0
+        return np.nan_to_num(f0, nan=0.0)
+
+    def melody_signature(self, y: np.ndarray) -> np.ndarray:
+        f0 = self.extract_f0(y)
+        if f0.size == 0:
+            return np.zeros(32, dtype=np.float32)
+        nonzero = f0[f0 > 0]
+        if nonzero.size == 0:
+            return np.zeros(32, dtype=np.float32)
+        contour = np.diff(np.log2(nonzero + 1e-6), prepend=np.log2(nonzero[0] + 1e-6))
+        contour = np.clip(contour, -0.5, 0.5)
+        if contour.size < 32:
+            contour = np.pad(contour, (0, 32 - contour.size))
+        else:
+            idx = np.linspace(0, contour.size - 1, 32).astype(int)
+            contour = contour[idx]
+        return contour.astype(np.float32)
+
+    def melody_similarity(self, y1: np.ndarray, y2: np.ndarray) -> float:
+        s1 = self.melody_signature(y1)
+        s2 = self.melody_signature(y2)
+        denom = float(np.linalg.norm(s1) * np.linalg.norm(s2) + 1e-12)
+        if denom <= 1e-12:
+            return 0.0
+        return float(np.dot(s1, s2) / denom)
--- a/docs/CHANGELOG.md
View file @31a7204
+++ b/docs/CHANGELOG.md
View file @31a7204
@@ -53,3 +53,25 @@
 结论：
 - 结构性错误（catalog/index/fusion/评测缺失）已明显改善
 - 当前主要剩余短板是 humming_like / confused 的鲁棒识别
+
+## 2026-06-02
+
+### Stage: 工业化服务骨架 + 外部 manifest 转换模板
+
+完成项：
+- 新增 FastAPI 服务骨架：`acr-engine/src/service/app.py`
+- 新增 manifest 转换工具：`acr-engine/src/data/manifest_tools.py`
+- 新增工业 benchmark 文档：`docs/industrial-benchmark-spec.md`
+- 扩展外部 dataset adapter CLI：`acr-engine/src/data/external_adapters.py`
+- 新增服务 API 文档：`docs/service-api.md`
+- requirements 增加 FastAPI / uvicorn / pydantic
+
+验证结果：
+- `external_adapters.py registry` 成功
+- `external_adapters.py describe ccmusic` 成功
+- `external_adapters.py init modelscope_music` 成功
+- `manifest_tools.py csv-to-catalog` 成功生成 catalog
+- `service.app health()` 返回 `{"status":"ok"}`
+- API `build_index(...)` 成功返回 reference window 数量
+- API `recognize(...)` 成功返回候选结果
+- `train.py --dry-run` 成功
--- a/docs/dataset-sources-and-licensing.md 0 → 100644
View file @31a7204
+++ b/docs/dataset-sources-and-licensing.md 0 → 100644
View file @31a7204
+# Dataset Sources and Licensing Notes
+
+> 更新：2026-06-02
+
+## 注意
+以下仅为工程接入与研究规划说明，不等于法律意见。实际商用前需要逐条复核原始 license、dataset terms 和再训练约束。
+
+## 候选数据源
+
+### 1. FMA
+- URL: https://github.com/mdeff/fma
+- 特点: 开放、MIR 常用、适合 retrieval baseline
+- 风险: 音频 license 按 artist/track 可能不同，需逐条核验
+
+### 2. MTG-Jamendo
+- URL: https://github.com/MTG/mtg-jamendo-dataset
+- 特点: Creative Commons 来源，适合音乐检索/标签任务
+- 风险: 仍需按具体曲目用途与商业场景做 license 审查
+
+### 3. CCMusic
+- 论文/介绍: https://transactions.ismir.net/articles/10.5334/tismir.194
+- 主页: https://ccmusic-database.github.io/en/database/ccm.html
+- 特点: 中国音乐 MIR 数据资源丰富
+- 风险: 部分数据集可能需要申请或存在使用边界，必须单独核验
+
+### 4. ModelScope music datasets
+- 入口: https://www.modelscope.cn/datasets
+- 搜索: https://modelscope.cn/search?page=1&search=music&type=dataset
+- 特点: 数据发现方便，可扩充中文生态
+- 风险: license 分散，不能默认可商用；接入前必须建立白名单
+
+## 接入原则
+
+- 只接入 license 明确的数据集
+- 默认拒绝“来源不明 / 不允许商业使用 / 禁止训练衍生模型”的数据
+- 训练前把数据集及许可信息落盘到 registry
--- a/docs/industrial-benchmark-spec.md 0 → 100644
View file @31a7204
+++ b/docs/industrial-benchmark-spec.md 0 → 100644
View file @31a7204
+# Industrial Benchmark Spec
+
+> 更新：2026-06-02
+
+## 目标
+为工业级可商用 ACR 设立持续基准，不只看总体 top1/top5，还看场景化与风险化指标。
+
+## Benchmark 维度
+
+### 1. Retrieval Quality
+- top1
+- top5
+- MRR
+- recall@k
+
+### 2. Scenario Buckets
+- clean
+- noisy
+- compressed
+- time-stretched
+- pitch-shifted
+- humming_like
+- confused
+- partial-overlap
+- far-field / device-recorded
+
+### 3. Catalog Scale Buckets
+- 1K songs
+- 10K songs
+- 100K songs
+- 1M+ songs
+
+### 4. Operational Metrics
+- p50 / p95 latency
+- indexing throughput
+- incremental update time
+- memory / disk footprint
+
+### 5. Business Safety Metrics
+- false accept rate
+- rejection quality
+- near-duplicate confusion rate
+- license provenance coverage
+
+## Required Artifacts per Model Release
+- dataset registry snapshot
+- training config snapshot
+- benchmark report JSON
+- benchmark summary markdown
+- model card
+- license review manifest
+
+## Minimum Go/No-Go Gate
+- clean top1 >= 0.95
+- noisy top1 >= 0.85
+- confused top1 >= 0.70
+- humming_like top1 >= 0.60
+- top5 >= 0.95 on all production-relevant buckets
+- false accept below agreed threshold
--- a/docs/industrialization-roadmap.md 0 → 100644
View file @31a7204
+++ b/docs/industrialization-roadmap.md 0 → 100644
View file @31a7204
+# ACR 工业级可商用演进路线
+
+> 更新：2026-06-02
+
+## 1. 目标定义
+
+把当前原型升级为一个可商用的工业级 ACR 系统，满足：
+
+- 可扩展曲库管理
+- 可重复训练 / 评测 / 部署
+- 多数据源接入（synthetic / FMA / Jamendo / CCMusic / ModelScope）
+- 更强鲁棒性（噪声、失真、哼唱、混淆）
+- 检索服务化
+- 商用合规与授权边界可审计
+
+## 2. 工业级分层
+
+### 2.1 数据层
+- `catalog.json` / query manifests
+- 外部 dataset adapters
+- license / usage tracking
+- 数据版本与快照
+
+### 2.2 训练层
+- baseline encoder
+- foundation-model encoder
+- retrieval-first losses
+- hard negative mining
+- 数据平衡与生成增强
+
+### 2.3 索引层
+- window-level embeddings
+- ANN index (Faiss/HNSW)
+- 指纹索引与向量索引双路
+- 增量入库
+
+### 2.4 服务层
+- FastAPI / gRPC
+- batch ingest
+- recognize API
+- top-k candidate + rejection
+- metadata lookup
+
+### 2.5 质量层
+- regression benchmark
+- hard-case benchmark
+- online shadow evaluation
+- 数据/模型回滚机制
+
+## 3. 数据集策略
+
+### 第一梯队（优先）
+- FMA small / medium
+- MTG-Jamendo
+- CCMusic（需核验申请/授权方式）
+- ModelScope music datasets（按 license 白名单接入）
+
+### 第二梯队
+- humming / QBSH 数据集
+- instrument / structure / singing datasets 作为辅助监督
+
+## 4. 商用必做项
+
+- 每个 dataset 记录：
+  - 来源 URL
+  - license
+  - 是否允许商业使用
+  - 再分发限制
+  - 模型训练用途限制
+- 每个模型版本记录训练数据组成
+- 每次上线保留评测报告与可追溯哈希
+
+## 5. 当前到工业化的缺口
+
+- 缺 dataset adapter 层
+- 缺 ANN 检索
+- 缺 API 服务
+- 缺 license registry
+- 缺 foundation-model baseline
+- 缺真正的 hard-negative mining
+- 缺真实开源数据 benchmark
--- a/docs/service-api.md 0 → 100644
View file @31a7204
+++ b/docs/service-api.md 0 → 100644
View file @31a7204
+# ACR Service API
+
+## Endpoints
+
+### GET /health
+返回服务健康状态。
+
+### POST /recognize
+请求体：
+
+```json
+{
+  "query_path": "data/synthetic_v2/segments/song_0021_seg_01_augmented.wav",
+  "data_dir": "data/synthetic_v2",
+  "model_path": "data/models_v3/best_model.pt",
+  "index_prefix": "data/index_v3/reference",
+  "top_n": 5,
+  "device": "cpu"
+}
+```
+
+### POST /index/build
+请求体：
+
+```json
+{
+  "data_dir": "data/synthetic_v2",
+  "model_path": "data/models_v3/best_model.pt",
+  "output_dir": "data/index_v3",
+  "device": "cpu"
+}
+```