postgres_db_schema_samples.md 23.8 KB

PostgreSQL Schema Samples / song-centric 4 表 DDL 与样例

更新:2026-06-04
SQL 文件:acr-engine/sql/acr_pg_schema_songcentric_v1.sql


1. 一页结论

当前默认物理模型:

media_entity -> audio_object -> feature_fact -> set_membership

当前默认逻辑语义:

song -> asset -> window -> fingerprint / embedding

其中:

  • audio_object 统一承载原始音频和切片
  • feature_fact 统一承载 exact/semantic 特征
  • set_membership 统一承载 reference/eval/hot 集关系

2. 切片 / 模型 / feature 落在哪张表

对象 关键字段 示例
song media_entity entity_type='song' song_000001
asset audio_object object_type='asset' 一首歌的原始 wav/mp3/flac
window audio_object object_type='window' 0-5000ms, 2500-7500ms
fingerprint feature_fact feature_type='fingerprint' chromaprint
embedding feature_fact feature_type='embedding' MERT/MuQ/fallback vector
model feature_fact model_name, model_version mert-v1-95m, muq-base, local_wavehash_embed
feature set feature_fact feature_set_name, feature_schema_ver mert_5s_hop2.5_v1

3. DDL

3.1 media_entity

create table if not exists media_entity (
    entity_id           bigserial primary key,
    entity_type         text not null check (entity_type in ('song', 'work', 'recording')),
    root_song_id        bigint,
    parent_entity_id    bigint,
    biz_key             text,
    title               text not null,
    artist_name         text,
    entity_status       text not null default 'active',
    metadata_json       jsonb not null default '{}'::jsonb,
    created_at          timestamptz not null default now(),
    updated_at          timestamptz not null default now(),
    constraint fk_media_entity_root_song
        foreign key (root_song_id) references media_entity(entity_id),
    constraint fk_media_entity_parent
        foreign key (parent_entity_id) references media_entity(entity_id)
);

3.2 audio_object

create table if not exists audio_object (
    object_id             bigserial primary key,
    object_type           text not null check (object_type in ('asset', 'window')),
    song_id               bigint not null references media_entity(entity_id),
    parent_object_id      bigint references audio_object(object_id),
    source_type           text,
    storage_uri           text,
    storage_scheme        text,
    checksum              text,
    codec                 text,
    sample_rate           integer,
    channels              integer,
    duration_ms           integer,
    start_ms              integer,
    end_ms                integer,
    object_status         text not null default 'ready',
    metadata_json         jsonb not null default '{}'::jsonb,
    created_at            timestamptz not null default now(),
    updated_at            timestamptz not null default now(),
    constraint ck_audio_object_window_parent
        check (
            (object_type = 'asset' and parent_object_id is null)
            or (object_type = 'window' and parent_object_id is not null)
        )
);

3.3 feature_fact

create table if not exists feature_fact (
    feature_id            bigserial primary key,
    feature_type          text not null check (feature_type in ('fingerprint', 'embedding')),
    object_id             bigint not null references audio_object(object_id),
    song_id               bigint not null references media_entity(entity_id),
    model_name            text not null,
    model_version         text not null,
    feature_set_name      text not null,
    feature_schema_ver    text not null default 'v1',
    embedding_dim         integer,
    fingerprint_value     text,
    embedding_uri         text,
    vector_table_name     text,
    checksum              text,
    feature_status        text not null default 'ready',
    metadata_json         jsonb not null default '{}'::jsonb,
    created_at            timestamptz not null default now(),
    updated_at            timestamptz not null default now(),
    constraint ck_feature_payload
        check (
            (feature_type = 'fingerprint' and fingerprint_value is not null)
            or (feature_type = 'embedding' and (embedding_uri is not null or vector_table_name is not null))
        )
);

3.4 set_membership

create table if not exists set_membership (
    membership_id         bigserial primary key,
    set_type              text not null check (set_type in ('reference_set', 'eval_set', 'hot_set')),
    set_name              text not null,
    member_type           text not null check (member_type in ('song', 'asset', 'window', 'feature')),
    member_id             bigint not null,
    song_id               bigint references media_entity(entity_id),
    is_active             boolean not null default true,
    priority              integer not null default 100,
    metadata_json         jsonb not null default '{}'::jsonb,
    created_at            timestamptz not null default now(),
    updated_at            timestamptz not null default now()
);

4. 典型写入流程图

4.1 表写入顺序

flowchart TD
    A[insert song] --> B[insert asset]
    B --> C[insert windows]
    C --> D1[insert fingerprint facts]
    C --> D2[insert embedding facts]
    A --> E[insert set_membership]
    B --> E
    C --> E

4.2 查询回溯顺序

flowchart LR
    A[query features] --> B[feature_fact]
    B --> C[window]
    C --> D[asset]
    D --> E[song]

5. 样例数据

5.1 写 song

insert into media_entity (
    entity_type, biz_key, title, artist_name, metadata_json
) values (
    'song', 'song_000001', 'Song Alpha', 'Artist A',
    '{"source":"catalog_import","language":"zh"}'::jsonb
)
returning entity_id;

5.2 写 asset

insert into audio_object (
    object_type, song_id, source_type, storage_uri, storage_scheme,
    checksum, codec, sample_rate, channels, duration_ms, metadata_json
) values (
    'asset', :song_id, 'catalog_master',
    's3://bucket/song_alpha/clip1.wav', 's3',
    'sha256:asset001', 'wav', 44100, 2, 183000,
    '{"uploader":"pipeline_v1"}'::jsonb
)
returning object_id;

5.3 写 window

insert into audio_object (
    object_type, song_id, parent_object_id,
    start_ms, end_ms, duration_ms, metadata_json
) values
    ('window', :song_id, :asset_id, 0,    5000, 5000, '{"hop_ms":2500}'::jsonb),
    ('window', :song_id, :asset_id, 2500, 7500, 5000, '{"hop_ms":2500}'::jsonb)
returning object_id;

5.4 写 fingerprint

insert into feature_fact (
    feature_type, object_id, song_id,
    model_name, model_version, feature_set_name, feature_schema_ver,
    fingerprint_value, checksum, metadata_json
) values (
    'fingerprint', :window_id, :song_id,
    'chromaprint', '1.0', 'chromaprint_5s_v1', 'v1',
    'AQAAE0mUaEkSZSo...', 'sha256:fp001',
    '{"lane":"exact"}'::jsonb
);

5.5 写 embedding

insert into feature_fact (
    feature_type, object_id, song_id,
    model_name, model_version, feature_set_name, feature_schema_ver,
    embedding_dim, embedding_uri, vector_table_name, checksum, metadata_json
) values (
    'embedding', :window_id, :song_id,
    'mert-v1-95m', 'hf-main', 'mert_5s_hop2.5_v1', 'v1',
    768, 's3://bucket/embeddings/song_alpha_win0001.npy', 'audio_embedding_vector_768',
    'sha256:emb001', '{"lane":"semantic"}'::jsonb
);

5.6 写 set membership

insert into set_membership (
    set_type, set_name, member_type, member_id, song_id, priority, metadata_json
) values
    ('reference_set', 'phase1_hot_reference_v1', 'song',   :song_id,   :song_id, 100, '{}'::jsonb),
    ('reference_set', 'phase1_hot_reference_v1', 'asset',  :asset_id,  :song_id, 100, '{}'::jsonb),
    ('reference_set', 'phase1_hot_reference_v1', 'window', :window_id, :song_id, 100, '{}'::jsonb);

6. 典型查询

6.1 查看某首歌有哪些 asset

select object_id, storage_uri, checksum, duration_ms
from audio_object
where song_id = :song_id
  and object_type = 'asset'
order by object_id;

6.2 查看某个 asset 切了哪些 window

select object_id, start_ms, end_ms, duration_ms
from audio_object
where parent_object_id = :asset_id
  and object_type = 'window'
order by start_ms;

6.3 查看某个 window 被哪些模型编码过

select feature_type, model_name, model_version, feature_set_name, embedding_dim,
       fingerprint_value, embedding_uri, vector_table_name
from feature_fact
where object_id = :window_id
order by feature_type, model_name, model_version;

6.4 从 feature 回查 song

select ff.feature_id,
       ff.feature_type,
       ff.model_name,
       w.object_id as window_id,
       w.start_ms,
       w.end_ms,
       a.object_id as asset_id,
       a.storage_uri,
       s.entity_id as song_id,
       s.title,
       s.artist_name
from feature_fact ff
join audio_object w
  on w.object_id = ff.object_id
 and w.object_type = 'window'
join audio_object a
  on a.object_id = w.parent_object_id
 and a.object_type = 'asset'
join media_entity s
  on s.entity_id = ff.song_id
where ff.feature_id = :feature_id;

6.5 查询 reference set 中的全部 window

select sm.set_name,
       sm.member_id as window_id,
       sm.song_id,
       ao.parent_object_id as asset_id,
       ao.start_ms,
       ao.end_ms
from set_membership sm
join audio_object ao
  on ao.object_id = sm.member_id
 and sm.member_type = 'window'
where sm.set_type = 'reference_set'
  and sm.set_name = 'phase1_hot_reference_v1'
  and sm.is_active = true
order by sm.song_id, ao.start_ms;

7. 一个最小存储样例怎么理解

song(Song Alpha)
  -> asset(clip1.wav)
    -> window(0-5000ms)
      -> fingerprint(chromaprint)
      -> embedding(mert-v1-95m)
    -> window(2500-7500ms)
      -> fingerprint(chromaprint)
      -> embedding(mert-v1-95m)

落表后意味着:

  • songmedia_entity
  • asset/window 都在 audio_object
  • chromaprint/mert 都在 feature_fact
  • 它们是否属于 hot reference 在 set_membership

8. 设计意图总结

这套结构主要解决:

  • 同一 song 下多个音频文件
  • 同一 asset 下多个切片窗口
  • 同一 window 被多个模型重复编码
  • fingerprint / embedding 统一落库
  • reference/eval/hot 统一集合治理
  • 查询后快速归属到 song_id

9. 当前最该关注的后续演进点

  1. 保持 4 表主链不变
  2. 给 semantic lane 接真实 MERT / MuQ adapter
  3. 继续复用 feature_fact.model_name/model_version/feature_set_name 做模型演进
  4. 必要时再补更重的 registry / vector table 治理

10. 相关文档


11. 在线检索回溯样例

11.1 从命中的 feature 回查 song

flowchart LR
    A[feature_fact] --> B[window]
    B --> C[asset]
    C --> D[song]

11.2 典型在线查询 SQL

select ff.feature_id,
       ff.feature_type,
       ff.model_name,
       ff.feature_set_name,
       w.object_id as window_id,
       w.start_ms,
       w.end_ms,
       a.object_id as asset_id,
       a.storage_uri,
       s.entity_id as song_id,
       s.title,
       s.artist_name
from feature_fact ff
join audio_object w
  on w.object_id = ff.object_id
 and w.object_type = 'window'
join audio_object a
  on a.object_id = w.parent_object_id
 and a.object_type = 'asset'
join media_entity s
  on s.entity_id = ff.song_id
where ff.feature_id = :feature_id;

11.3 典型 song-level 聚合 SQL

select ff.song_id,
       s.title,
       s.artist_name,
       count(*) as matched_windows
from feature_fact ff
join media_entity s
  on s.entity_id = ff.song_id
where ff.feature_id = any(:matched_feature_ids)
group by ff.song_id, s.title, s.artist_name
order by matched_windows desc
limit 20;

12. exact + semantic 双通道融合样例

12.1 融合流程图

flowchart TD
    A[exact candidates] --> C[song aggregation]
    B[semantic candidates] --> C
    C --> D[rerank]
    D --> E[topK song_ids]

12.2 推荐的 Phase-1 融合口径

final_song_score =
    0.55 * exact_score_norm
  + 0.35 * semantic_score_norm
  + 0.10 * coverage_score_norm

12.3 融合聚合 SQL 骨架

with matched as (
    select ff.song_id,
           ff.feature_type,
           w.object_id as window_id,
           w.parent_object_id as asset_id,
           w.start_ms,
           w.end_ms,
           :score_map[ff.feature_id]::double precision as raw_score
    from feature_fact ff
    join audio_object w
      on w.object_id = ff.object_id
     and w.object_type = 'window'
    where ff.feature_id = any(:matched_feature_ids)
)
select m.song_id,
       s.title,
       s.artist_name,
       count(*) filter (where m.feature_type = 'fingerprint') as exact_hit_count,
       count(*) filter (where m.feature_type = 'embedding') as semantic_hit_count,
       max(raw_score) filter (where m.feature_type = 'fingerprint') as exact_best_score,
       max(raw_score) filter (where m.feature_type = 'embedding') as semantic_best_score,
       max(end_ms) - min(start_ms) as offset_coverage_ms
from matched m
join media_entity s
  on s.entity_id = m.song_id
group by m.song_id, s.title, s.artist_name
order by coalesce(max(raw_score) filter (where m.feature_type = 'fingerprint'), 0) desc,
         coalesce(max(raw_score) filter (where m.feature_type = 'embedding'), 0) desc,
         offset_coverage_ms desc
limit 20;

13. 绑定关系与开源模型落库样例

13.1 最小绑定关系

media_entity(song)
  -> audio_object(asset)
    -> audio_object(window)
      -> feature_fact(chromaprint)
      -> feature_fact(mert-v1-95m)
      -> feature_fact(muq-base)

13.2 具体样例

Step 1: song

insert into media_entity (
    entity_type, biz_key, title, artist_name
) values (
    'song', 'song_000123', 'Demo Song', 'Demo Artist'
)
returning entity_id;

Step 2: asset

insert into audio_object (
    object_type, song_id, storage_uri, checksum, codec, sample_rate, channels, duration_ms
) values (
    'asset', :song_id, 's3://bucket/demo_song/master.wav', 'sha256:asset-demo', 'wav', 44100, 2, 210000
)
returning object_id;

Step 3: window

insert into audio_object (
    object_type, song_id, parent_object_id, start_ms, end_ms, duration_ms
) values (
    'window', :song_id, :asset_id, 0, 5000, 5000
)
returning object_id;

Step 4: chromaprint fingerprint

insert into feature_fact (
    feature_type, object_id, song_id,
    model_name, model_version, feature_set_name, feature_schema_ver,
    fingerprint_value
) values (
    'fingerprint', :window_id, :song_id,
    'chromaprint', '1.0', 'chromaprint_5s_v1', 'v1',
    'AQAAE0mUaEkSZSo...'
);

Step 5: MERT embedding

insert into feature_fact (
    feature_type, object_id, song_id,
    model_name, model_version, feature_set_name, feature_schema_ver,
    embedding_dim, embedding_uri, vector_table_name
) values (
    'embedding', :window_id, :song_id,
    'mert-v1-95m', 'hf-main', 'mert_5s_hop2.5_v1', 'v1',
    768, 's3://bucket/emb/demo_song_win0001_mert.npy', 'audio_embedding_vector_768'
);

Step 6: MuQ embedding

insert into feature_fact (
    feature_type, object_id, song_id,
    model_name, model_version, feature_set_name, feature_schema_ver,
    embedding_dim, embedding_uri, vector_table_name
) values (
    'embedding', :window_id, :song_id,
    'muq-base', 'hf-main', 'muq_5s_hop2.5_v1', 'v1',
    768, 's3://bucket/emb/demo_song_win0001_muq.npy', 'audio_embedding_vector_768'
);

Step 7: fallback embedding

insert into feature_fact (
    feature_type, object_id, song_id,
    model_name, model_version, feature_set_name, feature_schema_ver,
    embedding_dim, embedding_uri, vector_table_name
) values (
    'embedding', :window_id, :song_id,
    'local_wavehash_embed', 'phase1_local', 'wavehash_5s_hop2.5_v1', 'v1',
    8, 'file:///tmp/demo_song_win0001_wavehash.npy', 'audio_embedding_vector_8_placeholder'
);

13.3 查询某个 window 已经被哪些开源模型编码过

select object_id,
       song_id,
       feature_type,
       model_name,
       model_version,
       feature_set_name,
       embedding_dim,
       fingerprint_value,
       embedding_uri,
       vector_table_name
from feature_fact
where object_id = :window_id
order by feature_type, model_name;

13.4 查询某个 song 当前有哪些模型特征

select ff.song_id,
       ff.model_name,
       ff.model_version,
       ff.feature_type,
       count(*) as feature_rows
from feature_fact ff
where ff.song_id = :song_id
group by ff.song_id, ff.model_name, ff.model_version, ff.feature_type
order by ff.feature_type, ff.model_name;

14. 一个完整的多 asset / 多 window / 多 model 样例

假设:

  • 同一个 song_id = 1001
  • 有 2 个音频文件:master.wavugc_clip.mp3
  • 每个 asset 切成 2 个 window
  • 每个 window 都跑 chromaprint + mert-v1-95m + muq-base

14.1 逻辑结构

song(1001)
  -> asset(2001, master.wav)
    -> window(3001, 0-5000)
      -> chromaprint
      -> mert-v1-95m
      -> muq-base
    -> window(3002, 2500-7500)
      -> chromaprint
      -> mert-v1-95m
      -> muq-base
  -> asset(2002, ugc_clip.mp3)
    -> window(3003, 10000-15000)
      -> chromaprint
      -> mert-v1-95m
      -> muq-base
    -> window(3004, 12500-17500)
      -> chromaprint
      -> mert-v1-95m
      -> muq-base

14.2 会落成多少行

行数 说明
media_entity 1 一个 song
audio_object 6 2 个 asset + 4 个 window
feature_fact 12 4 个 window × 3 个模型
set_membership 视需要 可给 song/asset/window 挂 reference_set

14.3 查询某个 song 的全量树状数据

select s.entity_id as song_id,
       s.title,
       a.object_id as asset_id,
       a.storage_uri,
       w.object_id as window_id,
       w.start_ms,
       w.end_ms,
       ff.feature_type,
       ff.model_name,
       ff.model_version,
       ff.feature_set_name
from media_entity s
join audio_object a
  on a.song_id = s.entity_id
 and a.object_type = 'asset'
join audio_object w
  on w.parent_object_id = a.object_id
 and w.object_type = 'window'
left join feature_fact ff
  on ff.object_id = w.object_id
where s.entity_id = :song_id
order by a.object_id, w.start_ms, ff.feature_type, ff.model_name;

14.4 查询哪些 window 缺某个模型

这个 SQL 很适合做补算任务扫描,比如检查哪些 window 还没跑 muq-base

select w.object_id as window_id,
       w.song_id,
       w.parent_object_id as asset_id,
       w.start_ms,
       w.end_ms
from audio_object w
where w.object_type = 'window'
  and not exists (
      select 1
      from feature_fact ff
      where ff.object_id = w.object_id
        and ff.feature_type = 'embedding'
        and ff.model_name = 'muq-base'
        and ff.model_version = 'hf-main'
        and ff.feature_set_name = 'muq_5s_hop2.5_v1'
  )
order by w.song_id, w.parent_object_id, w.start_ms;

14.5 查询某个 asset 下每个 window 已经具备哪些模型

select w.object_id as window_id,
       w.start_ms,
       w.end_ms,
       string_agg(ff.model_name || ':' || ff.feature_type, ', ' order by ff.model_name) as ready_features
from audio_object w
left join feature_fact ff
  on ff.object_id = w.object_id
where w.object_type = 'window'
  and w.parent_object_id = :asset_id
group by w.object_id, w.start_ms, w.end_ms
order by w.start_ms;

15. 批量入库与索引建设样例

15.1 推荐批量顺序

batch-1: media_entity(song)
batch-2: audio_object(asset)
batch-3: audio_object(window)
batch-4: feature_fact(chromaprint)
batch-5: feature_fact(mert-v1-95m)
batch-6: feature_fact(muq-base)

15.2 推荐补充索引

create index if not exists idx_feature_fact_model_lookup
    on feature_fact(model_name, model_version, feature_set_name, feature_type, song_id);

15.3 主链完整性巡检

没有 window 的 asset

select a.object_id as asset_id, a.song_id, a.storage_uri
from audio_object a
where a.object_type = 'asset'
  and not exists (
      select 1
      from audio_object w
      where w.parent_object_id = a.object_id
        and w.object_type = 'window'
  );

没有 chromaprint 的 window

select w.object_id as window_id, w.song_id
from audio_object w
where w.object_type = 'window'
  and not exists (
      select 1
      from feature_fact ff
      where ff.object_id = w.object_id
        and ff.feature_type = 'fingerprint'
        and ff.model_name = 'chromaprint'
  );

没有 MERT 的 window

select w.object_id as window_id, w.song_id
from audio_object w
where w.object_type = 'window'
  and not exists (
      select 1
      from feature_fact ff
      where ff.object_id = w.object_id
        and ff.feature_type = 'embedding'
        and ff.model_name = 'mert-v1-95m'
        and ff.model_version = 'hf-main'
        and ff.feature_set_name = 'mert_5s_hop2.5_v1'
  );

15.4 冷热分层口径

hot_set       -> 高频版权曲
reference_set -> 主 reference catalog
cold          -> 长尾曲库,先保主链 + exact

16. vector table / embedding 文件存储样例

16.1 feature_fact 中怎么记录 embedding 位置

insert into feature_fact (
    feature_type,
    object_id,
    song_id,
    model_name,
    model_version,
    feature_set_name,
    feature_schema_ver,
    embedding_dim,
    embedding_uri,
    vector_table_name,
    checksum,
    metadata_json
) values (
    'embedding',
    :window_id,
    :song_id,
    'mert-v1-95m',
    'hf-main',
    'mert_5s_hop2.5_v1',
    'v1',
    768,
    's3://acr-emb/phase1/mert-v1-95m/song_1001/asset_2001/window_3001_mert_5s_hop2.5_v1.npy',
    'audio_embedding_vector_768',
    'sha256:emb-demo-001',
    '{"storage_tier":"hot"}'::jsonb
);

16.2 一个推荐的向量侧表样例

这里是逻辑样例,真实向量类型可按你的 pgvector 版本落地。

create table if not exists audio_embedding_vector_768 (
    embedding_row_id bigserial primary key,
    feature_id       bigint not null references feature_fact(feature_id),
    vector_dim       integer not null default 768,
    embedding_vector vector(768),
    created_at       timestamptz not null default now()
);

16.3 feature_fact 与 vector table 的关联查询

select ff.feature_id,
       ff.song_id,
       ff.object_id as window_id,
       ff.model_name,
       ff.feature_set_name,
       ff.embedding_dim,
       ff.embedding_uri,
       ff.vector_table_name,
       v.embedding_row_id
from feature_fact ff
left join audio_embedding_vector_768 v
  on v.feature_id = ff.feature_id
where ff.feature_type = 'embedding'
  and ff.model_name = 'mert-v1-95m'
  and ff.feature_set_name = 'mert_5s_hop2.5_v1';

16.4 查哪些 embedding 还没进入 vector table

select ff.feature_id,
       ff.song_id,
       ff.object_id as window_id,
       ff.embedding_uri
from feature_fact ff
left join audio_embedding_vector_768 v
  on v.feature_id = ff.feature_id
where ff.feature_type = 'embedding'
  and ff.embedding_dim = 768
  and ff.vector_table_name = 'audio_embedding_vector_768'
  and v.feature_id is null
order by ff.song_id, ff.object_id;

16.5 查哪些冷层 embedding 可以先不进热索引

select ff.feature_id,
       ff.song_id,
       ff.object_id,
       ff.embedding_uri,
       ff.metadata_json
from feature_fact ff
where ff.feature_type = 'embedding'
  and coalesce(ff.metadata_json->>'storage_tier', 'cold') = 'cold'
  and ff.vector_table_name is null
order by ff.song_id, ff.object_id;