job3_mert_preflight_attempt.json 7.06 KB
{
  "worker": "run_embedding_job",
  "schema": "acr_test",
  "job": {
    "extraction_job_id": 3,
    "feature_set_id": 4,
    "target_scope": "reference_set:phase1_hot_reference_v1",
    "job_status": "pending",
    "shard_key": "phase1/reference/mert/v1-95m/10s_5s",
    "job_metadata": {
      "lane": "semantic",
      "role": "long_context_validation",
      "phase": "phase1"
    },
    "feature_name": "semantic_embedding",
    "feature_level": "window",
    "extraction_granularity": "sliding_window",
    "window_sec": 10.0,
    "hop_sec": 5.0,
    "embedding_dim": 768,
    "distance_metric": "cosine",
    "feature_config": {
      "role": "long_context_validation"
    },
    "model_id": 3,
    "model_name": "mert",
    "model_version": "v1-95m",
    "model_family": "music_ssl",
    "input_sample_rate": 24000,
    "output_embedding_dim": 768,
    "model_metadata": {
      "lane": "semantic",
      "role": "primary_baseline",
      "phase": "phase1"
    }
  },
  "target_scope_summary": {
    "scope_type": "reference_set",
    "scope_value": "phase1_hot_reference_v1",
    "reference_set_id": 2,
    "reference_set_name": "phase1_hot_reference_v1",
    "recording_count": 20,
    "ready_asset_count": 20,
    "active_window_count": 20
  },
  "scope_window_count": 20,
  "status_after_start": {
    "extraction_job_id": 3,
    "job_status": "running",
    "input_count": 20,
    "output_count": null,
    "started_at": "2026-06-04T14:23:29.790262+08:00",
    "finished_at": null,
    "log_uri": null,
    "metadata_json": {
      "lane": "semantic",
      "role": "long_context_validation",
      "phase": "phase1",
      "worker": "run_embedding_job",
      "dry_run": false,
      "vector_table": "audio_embedding_vector_768",
      "output_target": "audio_embedding",
      "execution_mode": "preflight",
      "runtime_report": {
        "ready": false,
        "model_name": "mert",
        "availability": {
          "numpy": true,
          "torch": false,
          "torchaudio": false,
          "transformers": false
        },
        "requirements": [
          "numpy",
          "torch",
          "torchaudio",
          "transformers"
        ],
        "missing_dependencies": [
          "torch",
          "torchaudio",
          "transformers"
        ]
      },
      "scope_window_count": 20,
      "vector_table_report": {
        "reason": null,
        "resolved": true,
        "expected_dim": 768,
        "table_exists": true,
        "allowed_vector_tables": [
          "audio_embedding_vector_192",
          "audio_embedding_vector_768"
        ],
        "requested_vector_table": "audio_embedding_vector_768"
      },
      "target_scope_summary": {
        "scope_type": "reference_set",
        "scope_value": "phase1_hot_reference_v1",
        "recording_count": 20,
        "reference_set_id": 2,
        "ready_asset_count": 20,
        "reference_set_name": "phase1_hot_reference_v1",
        "active_window_count": 20
      }
    }
  },
  "status_after_complete": null,
  "status_after_failed": {
    "extraction_job_id": 3,
    "job_status": "failed",
    "input_count": 20,
    "output_count": 0,
    "started_at": "2026-06-04T14:23:29.790262+08:00",
    "finished_at": "2026-06-04T14:23:29.791468+08:00",
    "log_uri": null,
    "metadata_json": {
      "lane": "semantic",
      "role": "long_context_validation",
      "phase": "phase1",
      "worker": "run_embedding_job",
      "dry_run": false,
      "artifact_dir": "data/pgvector_eval/music20/phase1_embeddings",
      "vector_table": "audio_embedding_vector_768",
      "output_target": "audio_embedding",
      "execution_mode": "preflight_failure",
      "failure_reason": "preflight_failed",
      "runtime_report": {
        "ready": false,
        "model_name": "mert",
        "availability": {
          "numpy": true,
          "torch": false,
          "torchaudio": false,
          "transformers": false
        },
        "requirements": [
          "numpy",
          "torch",
          "torchaudio",
          "transformers"
        ],
        "missing_dependencies": [
          "torch",
          "torchaudio",
          "transformers"
        ]
      },
      "preflight_blockers": [
        "unreadable_audio_assets",
        "model_runtime_unavailable"
      ],
      "scope_window_count": 20,
      "write_target_table": "audio_embedding",
      "vector_table_report": {
        "reason": null,
        "resolved": true,
        "expected_dim": 768,
        "table_exists": true,
        "allowed_vector_tables": [
          "audio_embedding_vector_192",
          "audio_embedding_vector_768"
        ],
        "requested_vector_table": "audio_embedding_vector_768"
      },
      "missing_window_count": 20,
      "target_scope_summary": {
        "scope_type": "reference_set",
        "scope_value": "phase1_hot_reference_v1",
        "recording_count": 20,
        "reference_set_id": 2,
        "ready_asset_count": 20,
        "reference_set_name": "phase1_hot_reference_v1",
        "active_window_count": 20
      },
      "missing_window_samples": [
        {
          "reason": "missing_audio",
          "asset_id": 1,
          "window_id": 1,
          "storage_uri": "/workspace/downloads/100/type_11/93dfdeb0-7da5-42a8-9c71-cf12af57dd191650256918.wav"
        },
        {
          "reason": "missing_audio",
          "asset_id": 2,
          "window_id": 2,
          "storage_uri": "/workspace/downloads/101/type_11/83c0c07f-4f96-4ff4-998c-58db910f3cfa1650256915.wav"
        },
        {
          "reason": "missing_audio",
          "asset_id": 3,
          "window_id": 3,
          "storage_uri": "/workspace/downloads/102/type_11/43440ec5-70b4-4d50-8683-d3e41cad29411650256908.wav"
        },
        {
          "reason": "missing_audio",
          "asset_id": 4,
          "window_id": 4,
          "storage_uri": "/workspace/downloads/103/type_11/19876dbb-fffc-40f8-9530-9322c9ed77681650256912.wav"
        },
        {
          "reason": "missing_audio",
          "asset_id": 5,
          "window_id": 5,
          "storage_uri": "/workspace/downloads/104/type_11/4c1d3e22-045f-445b-ab87-ba1ae3ee09b31650256912.wav"
        }
      ]
    }
  },
  "resolved_vector_table": "audio_embedding_vector_768",
  "vector_table_report": {
    "requested_vector_table": "audio_embedding_vector_768",
    "expected_dim": 768,
    "allowed_vector_tables": [
      "audio_embedding_vector_192",
      "audio_embedding_vector_768"
    ],
    "resolved": true,
    "table_exists": true,
    "reason": null
  },
  "runtime_report": {
    "model_name": "mert",
    "requirements": [
      "numpy",
      "torch",
      "torchaudio",
      "transformers"
    ],
    "availability": {
      "numpy": true,
      "torch": false,
      "torchaudio": false,
      "transformers": false
    },
    "missing_dependencies": [
      "torch",
      "torchaudio",
      "transformers"
    ],
    "ready": false
  },
  "processed_windows": [],
  "notes": [
    "this worker now validates planner -> job -> scope windows -> PostgreSQL failure semantics",
    "real model inference should replace the guarded failure path without changing the job contract or idempotent upsert keys"
  ]
}