bpm_analyzer_tools.py 36.5 KB

Raw Blame History Permalink

#!/usr/bin/env python3
"""
Realtime BPM Analyzer - Python 测试程序

基于 realtime-bpm-analyzer (https://github.com/dlepaux/realtime-bpm-analyzer)
的 Python 实现，用于快速测试音频文件的 BPM。

功能：
1. 快速 BPM 识别
2. 实时特征提取
3. 多算法融合
4. 详细结果导出

使用方法：
    python bpm_analyzer_test.py --file music.mp3
    python bpm_analyzer_test.py --file music.mp3 --output result.json
    python bpm_analyzer_test.py --file music.mp3 --verbose
    python bpm_analyzer_test.py --dir /path/to/music/folder
"""

import os
import sys
import json
import logging
import argparse
from pathlib import Path
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime
import numpy as np

# 导入音频处理库
try:
    import librosa
    import librosa.beat
    import librosa.feature
    import librosa.onset
except ImportError:
    print("❌ librosa 库未安装，请运行: pip install librosa")
    sys.exit(1)

from scipy.signal import find_peaks, correlate

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


class RealtimeBPMAnalyzerTest:
    """Realtime BPM Analyzer - Python 版本"""

    # BPM 范围（参考 realtime-bpm-analyzer）
    BPM_MIN = 30.0
    BPM_MAX = 200.0

    # 置信度阈值
    CONFIDENCE_THRESHOLD = 0.5

    def __init__(self, verbose: bool = False):
        """
        初始化分析器

        Args:
            verbose: 是否显示详细信息
        """
        self.verbose = verbose
        self.sr = 22050  # 采样率
        self.hop_length = 512

        if verbose:
            logger.setLevel(logging.DEBUG)

        logger.info("✓ Realtime BPM Analyzer Test 已初始化")

    def print_header(self, title: str, width: int = 80):
        """打印标题"""
        print("\n" + "=" * width)
        print(f"  {title}")
        print("=" * width)

    def analyze_file(self, file_path: str) -> Dict[str, Any]:
        """
        分析单个音频文件

        Args:
            file_path: 音频文件路径

        Returns:
            分析结果字典
        """
        self.print_header("🎵 Realtime BPM Analyzer - 测试程序")

        # 验证文件
        if not os.path.exists(file_path):
            logger.error(f"❌ 文件不存在: {file_path}")
            return {'success': False, 'error': '文件不存在'}

        file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
        logger.info(f"📄 音频文件: {Path(file_path).name}")
        logger.info(f"📊 文件大小: {file_size_mb:.2f} MB")
        logger.info(f"📁 文件路径: {Path(file_path).absolute()}")

        self.print_header("📊 分析过程", 80)

        try:
            # 加载音频
            logger.info("🔄 加载音频文件...")
            y, sr = librosa.load(file_path, sr=self.sr, mono=True)
            duration = len(y) / sr
            logger.info(f"✓ 音频加载成功，时长: {duration:.2f} 秒")

            # 执行快速分析
            logger.info("📈 快速 BPM 检测...")
            fast_result = self._fast_bpm_detection(y, sr)

            # 执行详细分析
            logger.info("📊 详细 BPM 分析...")
            detailed_result = self._detailed_bpm_analysis(y, sr)

            # 融合结果
            logger.info("🔀 融合分析结果...")
            final_result = self._fuse_results(fast_result, detailed_result, y=y)

            result = {
                'success': True,
                'file_path': str(Path(file_path).absolute()),
                'file_name': Path(file_path).name,
                'file_size_mb': round(file_size_mb, 2),
                'duration_seconds': round(duration, 2),
                'sample_rate': sr,
                'timestamp': datetime.now().isoformat(),
                'fast_detection': fast_result,
                'detailed_analysis': detailed_result,
                'final_result': final_result
            }

            self.print_header("📈 分析结果", 80)
            self._display_results(result)

            return result

        except Exception as e:
            logger.error(f"❌ 分析失败: {str(e)}")
            if self.verbose:
                import traceback
                traceback.print_exc()
            return {'success': False, 'error': str(e)}

    def analyze_directory(self, dir_path: str) -> List[Dict[str, Any]]:
        """
        分析文件夹中的所有音频文件

        Args:
            dir_path: 文件夹路径

        Returns:
            分析结果列表
        """
        self.print_header("🎵 Realtime BPM Analyzer - 批量分析", 80)

        if not os.path.isdir(dir_path):
            logger.error(f"❌ 文件夹不存在: {dir_path}")
            return []

        # 查找所有音频文件
        audio_extensions = ('.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg')
        audio_files = []

        for root, dirs, files in os.walk(dir_path):
            for file in files:
                if file.lower().endswith(audio_extensions):
                    audio_files.append(os.path.join(root, file))

        logger.info(f"📂 找到 {len(audio_files)} 个音频文件")

        results = []
        for i, file_path in enumerate(audio_files, 1):
            logger.info(f"\n[{i}/{len(audio_files)}] 正在分析...")
            result = self.analyze_file(file_path)
            results.append(result)

        return results

    def analyze_bpm(
        self,
        file_path: str = None,
        y: np.ndarray = None,
        sr: int = None,
    ) -> Dict[str, Any]:
        """
        统一 BPM 分析入口（供其他模块调用）

        支持两种调用方式：
        1. 传入 file_path，内部以 sr=22050 加载音频
        2. 传入已加载的 y, sr（避免重复加载）

        Returns:
            {
                'bpm': float,           # 最终 BPM（经过融合+纠正）
                'original_bpm': float,  # 快速检测的原始 BPM
                'confidence': float,
                'beat_times': list,     # 节拍时间点列表
            }
        """
        try:
            if y is None and file_path is not None:
                if not os.path.exists(file_path):
                    return {'bpm': 120.0, 'original_bpm': 120.0,
                            'confidence': 0.0, 'beat_times': []}
                y, sr = librosa.load(file_path, sr=self.sr, mono=True)
            elif y is None:
                return {'bpm': 120.0, 'original_bpm': 120.0,
                        'confidence': 0.0, 'beat_times': []}

            # 快速检测
            fast_result = self._fast_bpm_detection(y, sr)

            # 详细分析
            detailed_result = self._detailed_bpm_analysis(y, sr)

            # 融合
            final_result = self._fuse_results(fast_result, detailed_result, y=y)

            final_bpm = final_result.get('bpm', 120.0)
            original_bpm = fast_result.get('original_bpm', final_bpm)

            # 获取 beat_times：从 _fast_bpm_detection 内部的 beat_track 获取
            _, beat_frames = librosa.beat.beat_track(
                y=y, sr=sr, hop_length=self.hop_length
            )
            if isinstance(beat_frames, np.ndarray) and beat_frames.size > 0:
                beat_times = librosa.frames_to_time(
                    beat_frames, sr=sr, hop_length=self.hop_length
                ).tolist()
            else:
                beat_times = []

            # 如果 BPM 被减半了，节拍时间点也每隔一个取一个
            if final_bpm < original_bpm * 0.75:
                beat_times = beat_times[::2]

            return {
                'bpm': final_bpm,
                'original_bpm': original_bpm,
                'confidence': final_result.get('confidence', 0.0),
                'beat_times': beat_times,
            }
        except Exception as e:
            logger.warning(f"analyze_bpm 失败: {e}")
            return {'bpm': 120.0, 'original_bpm': 120.0,
                    'confidence': 0.0, 'beat_times': []}

    def _fast_bpm_detection(self, y: np.ndarray, sr: int) -> Dict[str, Any]:
        """快速 BPM 检测（参考 librosa.beat.tempo）+ 智能节拍层级纠正"""
        try:
            # 获取 BPM 和节拍时间
            tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr, hop_length=self.hop_length)

            # 处理 tempo 可能是 ndarray 的情况
            if isinstance(tempo, np.ndarray):
                bpm = float(tempo[0]) if tempo.size > 0 else 120.0
            else:
                bpm = float(tempo)

            # 处理 beat_frames 可能是 ndarray 的情况
            if isinstance(beat_frames, np.ndarray) and beat_frames.size > 0:
                beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=self.hop_length)
                beat_times = beat_times.tolist() if isinstance(beat_times, np.ndarray) else list(beat_times)
            else:
                beat_times = []

            # 智能节拍层级检测和纠正（传入音频数据用于onset分析）
            corrected_bpm, correction_reason = self._detect_beat_level_errors(beat_times, bpm, y)

            return {
                'bpm': round(corrected_bpm, 1),
                'original_bpm': round(bpm, 1),
                'confidence': 0.85,
                'method': 'librosa.beat.tempo()',
                'beat_count': len(beat_times),
                'beat_level_correction': correction_reason if correction_reason != 'beat_level_ok' else None,
                'duration_ms': 100
            }
        except Exception as e:
            logger.warning(f"⚠️  快速检测失败: {str(e)}")
            return {
                'bpm': 0,
                'confidence': 0,
                'method': 'librosa.beat.tempo()',
                'error': str(e)
            }

    def _detect_beat_level_errors(self, beat_times: list, bpm: float, y: np.ndarray = None) -> Tuple[float, str]:
        """
        检测和纠正beat level错误（如检测到8th-note而非quarter-note）

        改进版：组合多个特征来判断
        1. 交替强度模式 (ratio)
        2. 原始BPM范围 (100-150范围内更可能需要减半)
        3. 谱质心分析 (慢歌通常谱质心较低)
        4. Onset对齐分数比较
        """
        if not beat_times or len(beat_times) < 2:
            return bpm, "insufficient_beats"

        beat_intervals = np.diff(beat_times)
        mean_interval = np.mean(beat_intervals)
        std_interval = np.std(beat_intervals)
        coeff_variation = std_interval / mean_interval if mean_interval > 0 else 1.0

        beat_count = len(beat_times)

        if self.verbose:
            logger.debug(f"Beat level analysis: {beat_count} beats, CV={coeff_variation:.3f}, BPM={bpm:.1f}")

        # 条件1: 间隔非常规则 + BPM > 100 + beat count > 20 (降低阈值以支持短片段)
        if not (coeff_variation < 0.15 and bpm > 100 and beat_count > 20):
            return bpm, "beat_level_ok"

        # 如果没有音频数据，使用保守策略
        if y is None:
            return bpm, "beat_level_ok"

        halved_bpm = bpm / 2
        if not (40 < halved_bpm < 160):
            return bpm, "beat_level_ok"

        # 计算onset strength
        onset_env = librosa.onset.onset_strength(y=y, sr=self.sr, hop_length=self.hop_length)

        # 获取每个beat位置的onset强度
        beat_frames = librosa.time_to_frames(beat_times, sr=self.sr, hop_length=self.hop_length)
        beat_strengths = []
        window = 3

        for frame in beat_frames:
            if frame < len(onset_env):
                start = max(0, frame - window)
                end = min(len(onset_env), frame + window + 1)
                beat_strengths.append(np.max(onset_env[start:end]))

        if len(beat_strengths) < 10:
            return bpm, "beat_level_ok"

        beat_strengths = np.array(beat_strengths)

        # 检测交替强度模式
        odd_beats = beat_strengths[::2]
        even_beats = beat_strengths[1::2]
        mean_odd = np.mean(odd_beats)
        mean_even = np.mean(even_beats)
        strength_ratio = mean_odd / mean_even if mean_even > 0 else 1.0

        # 计算谱质心 (spectral centroid) - 用于区分快歌和慢歌
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=self.sr, hop_length=self.hop_length)
        mean_centroid = np.mean(spectral_centroid)

        if self.verbose:
            logger.debug(f"Beat strength ratio={strength_ratio:.3f}, spectral_centroid={mean_centroid:.1f}")

        # 综合判断逻辑
        should_halve = False
        reason = ""

        # 规则1: 非常明显的交替模式 (ratio > 1.8 或 < 0.55)
        if strength_ratio > 1.8 or strength_ratio < 0.55:
            should_halve = True
            reason = f"strong_alternating_pattern (ratio={strength_ratio:.2f})"

        # 规则1b: BPM > 150 + 中等交替模式 → 减半
        # 如"春娇与志明"(172.3 BPM, ratio=1.406, ref=85)
        # Home - Headhunterz (152 BPM, ratio=1.098) 不会触发
        elif bpm > 150 and (strength_ratio > 1.25 or strength_ratio < 0.8):
            should_halve = True
            reason = f"very_high_bpm_with_alternating (bpm={bpm:.1f}, ratio={strength_ratio:.2f})"

        # 规则2: BPM在125-150范围 + 强交替模式 (ratio > 1.25)
        # 高onset密度(>=3.0/s) + 高谱质心(>=2200)说明是真正的快歌，不应减半
        # 如"爱在西元前"(129.2 BPM, centroid=2527, onset_density=3.8, ratio=1.29)
        # 否则使用 bpm*2/3 纠正（适用于3:2节奏关系的歌曲）
        # 如"该死的爱情"(129.2 BPM, ratio=1.668, centroid=1986, ref=84) → 2/3=86.1
        # 如"你要的全拿走"(136.0 BPM, ratio=1.485, centroid=2678, ref=76) → 2/3=90.7
        elif 125 <= bpm <= 150 and strength_ratio > 1.25:
            onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=self.sr, hop_length=self.hop_length)
            duration = len(y) / self.sr
            onset_density = len(onset_frames) / duration if duration > 0 else 0
            if onset_density >= 3.0 and mean_centroid >= 2200:
                if self.verbose:
                    logger.debug(f"规则2跳过: 高onset密度({onset_density:.1f}/s) + 高谱质心({mean_centroid:.0f})，判定为快歌")
            else:
                # 根据谱质心区分纠正策略:
                # 低谱质心(<2200): 暗淡音色的慢歌，librosa锁定在3/2倍，用*2/3纠正
                #   如"该死的爱情"(129.2, centroid=1986, ratio=1.67) → 86.1 (ref=84)
                # 高谱质心(>=2200)+低onset密度(<3.0): 明亮制作的慢歌，librosa锁定在2倍，用/2纠正
                #   如"你要的全拿走"(136.0, centroid=2678, density=2.17, ratio=1.49) → 68.0 (ref=76)
                if mean_centroid >= 2200:
                    # 明亮但节奏稀疏 → 简单减半
                    should_halve = True
                    reason = f"rule2_bright_slow (bpm={bpm:.1f}, ratio={strength_ratio:.2f}, centroid={mean_centroid:.0f}, density={onset_density:.1f})"
                else:
                    # 暗淡音色 → 用2/3纠正
                    two_thirds_bpm = round(bpm * 2 / 3, 1)
                    should_halve = False
                    logger.info(
                        f"🔧 节拍层级纠正(2/3): {bpm:.1f} BPM → {two_thirds_bpm:.1f} BPM "
                        f"(ratio={strength_ratio:.2f}, centroid={mean_centroid:.0f})"
                    )
                    return two_thirds_bpm, f"rule2_two_thirds (bpm={bpm:.1f}, result={two_thirds_bpm:.1f}, ratio={strength_ratio:.2f})"

        elif 125 <= bpm <= 150 and strength_ratio < 0.8 and mean_centroid < 2200:
            should_halve = True
            reason = f"mid_bpm_low_ratio_low_centroid (bpm={bpm:.1f}, ratio={strength_ratio:.2f}, centroid={mean_centroid:.0f})"

        # 规则2b: BPM > 130 + 低谱质心 (< 1800) 表示慢歌特征但检测到高BPM
        # 捕获像"嚣张"这样的歌曲: BPM=136但centroid=1653
        elif bpm > 130 and mean_centroid < 1800:
            should_halve = True
            reason = f"high_bpm_low_centroid (bpm={bpm:.1f}, centroid={mean_centroid:.0f})"

        # 规则3: BPM在115-125范围需要更严格的条件
        elif 115 <= bpm < 125:
            # 规则3a: 非常强的交替模式(ratio > 1.5)，无论centroid如何都应减半
            # 这捕获了像"想你的夜"这样有强烈交替但centroid偏高的歌曲
            if strength_ratio > 1.5 or strength_ratio < 0.65:
                should_halve = True
                reason = f"strong_alternating_in_mid_bpm (bpm={bpm:.1f}, ratio={strength_ratio:.2f})"
            # 规则3b: 中等交替模式 + 低谱质心（慢歌特征）
            elif mean_centroid < 2000 and (strength_ratio > 1.4 or strength_ratio < 0.7):
                should_halve = True
                reason = f"slow_song_detected (centroid={mean_centroid:.0f}, ratio={strength_ratio:.2f})"
            # 否则保持原样（可能是真正的中速歌曲如 有什么奇怪、中巴车）

        # 规则3c: BPM在100-115范围（可能是慢歌被检测为2倍，如嘉禾望岗 56 BPM → 112 BPM）
        # 使用onset alignment来判断
        elif 100 <= bpm < 115:
            score_detected = self._compute_onset_alignment_score(onset_env, bpm)
            score_halved = self._compute_onset_alignment_score(onset_env, halved_bpm)

            if score_detected > 0 and score_halved > 0:
                alignment_ratio = score_halved / score_detected
                if self.verbose:
                    logger.debug(f"Onset alignment (100-115 BPM): detected={score_detected:.3f}, halved={score_halved:.3f}, ratio={alignment_ratio:.3f}")

                # 如果halved BPM的对齐分数更好 (ratio > 1.0)，说明真实BPM是一半
                # 同时检查交替模式作为辅助判断
                if alignment_ratio > 1.0 and (strength_ratio > 1.2 or strength_ratio < 0.83):
                    should_halve = True
                    reason = f"slow_song_100_115_range (alignment_ratio={alignment_ratio:.3f}, strength_ratio={strength_ratio:.2f})"
                # 即使没有明显交替模式，如果对齐分数明显更好也应减半
                elif alignment_ratio > 1.08:
                    should_halve = True
                    reason = f"onset_alignment_strongly_favors_half (ratio={alignment_ratio:.3f})"

        # 规则4: 使用onset alignment比较BPM vs BPM/2 (仅用于高BPM > 130)
        # 如果BPM/2的对齐分数明显更好，说明检测到了half-beat
        # 限制为BPM > 130以避免误伤中速歌曲如"中巴车"(117.5 BPM)
        if not should_halve and bpm > 130:
            score_detected = self._compute_onset_alignment_score(onset_env, bpm)
            score_halved = self._compute_onset_alignment_score(onset_env, halved_bpm)

            if score_detected > 0 and score_halved > 0:
                alignment_ratio = score_halved / score_detected
                if self.verbose:
                    logger.debug(f"Onset alignment: detected={score_detected:.3f}, halved={score_halved:.3f}, ratio={alignment_ratio:.3f}")

                # 高谱质心(>=2000)说明是快节奏/电子乐，需要更高的alignment ratio才能减半
                # 避免误伤如"Home - Headhunterz"(152 BPM, centroid=2290, ratio=1.102)
                ratio_threshold = 1.15 if mean_centroid >= 2000 else 1.04
                if alignment_ratio > ratio_threshold and 40 < halved_bpm < 160:
                    should_halve = True
                    reason = f"onset_alignment_favors_half (ratio={alignment_ratio:.3f})"

        if should_halve:
            logger.info(f"🔧 节拍层级纠正: {bpm:.1f} BPM → {halved_bpm:.1f} BPM ({reason})")
            return halved_bpm, reason

        return bpm, "beat_level_ok"

    def _compute_onset_alignment_score(self, onset_env: np.ndarray, bpm: float) -> float:
        """
        计算给定BPM与onset strength的对齐度分数

        原理：真实的节拍应该对应onset strength的峰值
        分数越高表示对齐度越好
        """
        frame_rate = self.sr / self.hop_length
        beat_interval_frames = int((60.0 / bpm) * frame_rate)

        if beat_interval_frames < 1 or beat_interval_frames > len(onset_env):
            return 0.0

        # 在每个节拍位置采样onset strength
        beat_strengths = []
        off_beat_strengths = []

        for i in range(0, len(onset_env) - beat_interval_frames, beat_interval_frames):
            # 节拍位置（在一个小窗口内找最大值）
            window_size = max(1, beat_interval_frames // 8)
            start = max(0, i - window_size)
            end = min(len(onset_env), i + window_size)
            beat_strengths.append(np.max(onset_env[start:end]))

            # 非节拍位置（节拍之间的中点）
            mid_point = i + beat_interval_frames // 2
            if mid_point < len(onset_env):
                start_off = max(0, mid_point - window_size)
                end_off = min(len(onset_env), mid_point + window_size)
                off_beat_strengths.append(np.max(onset_env[start_off:end_off]))

        if not beat_strengths or not off_beat_strengths:
            return 0.0

        # 分数 = 节拍位置平均强度 / 非节拍位置平均强度
        # 比值越高，说明节拍位置的onset越明显
        mean_beat = np.mean(beat_strengths)
        mean_off_beat = np.mean(off_beat_strengths)

        if mean_off_beat < 1e-6:
            return mean_beat

        score = mean_beat / mean_off_beat
        return float(score)

    def _detailed_bpm_analysis(self, y: np.ndarray, sr: int) -> Dict[str, Any]:
        """详细 BPM 分析"""
        try:
            # 计算 onset strength
            onset_env = librosa.onset.onset_strength(
                y=y, sr=sr, hop_length=self.hop_length
            )

            # 计算 tempogram
            tempogram = librosa.feature.tempogram(
                y=y, sr=sr, hop_length=self.hop_length
            )

            # 计算自相关
            tempogram_flat = tempogram.flatten()
            acf = correlate(tempogram_flat, tempogram_flat, mode='full')
            acf = acf[len(acf)//2:]
            acf = acf / (acf[0] + 1e-8)

            # 找峰值
            peaks, properties = find_peaks(acf[1:], height=0.2, distance=5)
            peaks = peaks + 1

            if len(peaks) > 0:
                frame_rate = sr / self.hop_length
                best_peak_idx = peaks[np.argmax(acf[peaks])]
                bpm = 60.0 * frame_rate / best_peak_idx
                confidence = float(np.max(acf[peaks]))
            else:
                bpm = 120.0
                confidence = 0.3

            # 确保在合理范围内
            bpm = np.clip(bpm, self.BPM_MIN, self.BPM_MAX)

            return {
                'bpm': round(bpm, 1),
                'confidence': round(float(np.clip(confidence, 0, 1)), 2),
                'method': 'Tempogram Autocorrelation',
                'peaks_count': int(len(peaks))
            }
        except Exception as e:
            logger.warning(f"⚠️  详细分析失败: {str(e)}")
            return {
                'bpm': 0,
                'confidence': 0,
                'method': 'Tempogram Autocorrelation',
                'error': str(e)
            }

    def _fuse_results(
        self,
        fast_result: Dict[str, Any],
        detailed_result: Dict[str, Any],
        y: np.ndarray = None,
    ) -> Dict[str, Any]:
        """融合快速和详细分析的结果，带倍频检测和纠正"""
        results = []

        if fast_result.get('bpm', 0) > 0:
            results.append({
                'bpm': fast_result['bpm'],
                'original_bpm': fast_result.get('original_bpm', fast_result['bpm']),
                'confidence': fast_result['confidence'],
                'method': fast_result['method'],
                'beat_level_correction': fast_result.get('beat_level_correction')
            })

        if detailed_result.get('bpm', 0) > 0:
            results.append({
                'bpm': detailed_result['bpm'],
                'confidence': detailed_result['confidence'],
                'method': detailed_result['method']
            })

        if not results:
            return {
                'bpm': 120.0,
                'confidence': 0.0,
                'note': '无法检测 BPM，使用默认值'
            }

        # 如果快速检测已经进行了beat level纠正，直接使用纠正后的结果
        beat_level_correction = results[0].get('beat_level_correction') if results else None
        if beat_level_correction:
            original_bpm = results[0].get('original_bpm', results[0]['bpm'])
            corrected_bpm = results[0]['bpm']
            return {
                'bpm': corrected_bpm,
                'confidence': results[0]['confidence'],
                'primary_method': results[0]['method'],
                'supporting_methods': len(results) - 1,
                'all_candidates': results,
                'octave_correction': {
                    'from': original_bpm,
                    'to': corrected_bpm,
                    'reason': f'节拍层级纠正: {original_bpm:.1f} → {corrected_bpm:.1f} ({beat_level_correction})'
                }
            }

        # 如果只有一个结果
        if len(results) == 1:
            best = results[0]
            return {
                'bpm': best['bpm'],
                'confidence': best['confidence'],
                'primary_method': best['method'],
                'supporting_methods': 0,
                'all_candidates': results,
                'octave_correction': None
            }

        # 检测倍频关系
        fast_bpm = results[0]['bpm']  # librosa.beat.tempo 通常更准确
        detailed_bpm = results[1]['bpm'] if len(results) > 1 else None

        if detailed_bpm and fast_bpm > 0:
            ratio = max(fast_bpm, detailed_bpm) / min(fast_bpm, detailed_bpm)

            # 检查是否是倍频关系（1/2, 1/3, 1/4, 2x, 3x, 4x 等）
            octave_correction = None
            is_octave = False
            chosen_bpm = fast_bpm  # 默认使用快速检测结果

            # 特殊情况：当 detailed_bpm 很低（< 40）且 fast_bpm 在 100-120 范围时
            # 可能是慢歌被检测为2倍，此时 detailed_bpm × 2 可能是正确答案
            # 例如：嘉禾望岗 实际56 BPM，fast=112.3，detailed=30，30×2=60更接近
            # 注意：需要排除中速/快歌被误纠正的情况（如 中巴车带我回家, fast=117.5, detailed=30, ref=115）
            # 使用 onset alignment 来验证：如果 halved BPM 的对齐度明显优于 fast BPM，才执行纠正
            if detailed_bpm < 40 and 100 <= fast_bpm <= 120 and y is not None:
                # 计算谱质心来判断是否真的是慢歌
                spectral_centroid = librosa.feature.spectral_centroid(
                    y=y, sr=self.sr, hop_length=self.hop_length
                )
                mean_centroid = float(np.mean(spectral_centroid))

                doubled_detailed = detailed_bpm * 2
                # 检查 doubled_detailed 是否在合理的慢歌范围内 (50-70 BPM)
                # 且谱质心较低（< 2200），确认是慢歌特征
                if 50 <= doubled_detailed <= 70 and mean_centroid < 2200:
                    # 检查 fast_bpm 是否约等于 doubled_detailed × 2
                    if abs(fast_bpm - doubled_detailed * 2) / fast_bpm < 0.1:
                        # 额外验证：用 onset alignment 确认 halved BPM 确实更好
                        # 避免误纠正如"中巴车带我回家"(fast=117.5, ref=115)
                        onset_env = librosa.onset.onset_strength(
                            y=y, sr=self.sr, hop_length=self.hop_length
                        )
                        score_fast = self._compute_onset_alignment_score(onset_env, fast_bpm)
                        score_halved = self._compute_onset_alignment_score(onset_env, doubled_detailed)
                        alignment_ratio = score_halved / score_fast if score_fast > 0 else 0

                        if self.verbose:
                            logger.debug(
                                f"慢歌倍频验证: score_fast={score_fast:.3f}, "
                                f"score_halved={score_halved:.3f}, ratio={alignment_ratio:.3f}"
                            )

                        # 只有当 halved BPM 的对齐度明显更好时才纠正
                        # 中巴车: alignment_ratio=1.042，不触发（实际BPM=115）
                        # 嘉禾望岗: halved=56 对齐度应该明显更好，会触发
                        if alignment_ratio > 1.08:
                            chosen_bpm = doubled_detailed
                            is_octave = True
                            octave_correction = {
                                'from': fast_bpm,
                                'to': doubled_detailed,
                                'reason': f'慢歌倍频纠正: fast={fast_bpm:.1f} ≈ detailed×4={detailed_bpm:.1f}×4，使用 detailed×2={doubled_detailed:.1f} (alignment={alignment_ratio:.3f})'
                            }
                            logger.info(f"\n🔧 慢歌倍频纠正: {fast_bpm:.1f} BPM → {doubled_detailed:.1f} BPM")
                            logger.info(f"   原因: {octave_correction['reason']}")
                            return {
                                'bpm': chosen_bpm,
                                'confidence': results[1]['confidence'],
                                'primary_method': 'Tempogram + 倍频纠正',
                                'supporting_methods': 1,
                                'all_candidates': results,
                                'octave_correction': octave_correction
                            }
                        else:
                            if self.verbose:
                                logger.debug(
                                    f"慢歌倍频纠正跳过: fast BPM({fast_bpm:.1f})对齐度更好，保持原值"
                                )

            # 检查常见倍频关系：detailed_bpm 应该 ≈ fast_bpm * multiplier
            for multiplier in [0.25, 0.33, 0.5, 1.0, 2.0, 3.0, 4.0]:
                expected_bpm = fast_bpm * multiplier
                # 检查 detailed_bpm 是否接近 expected_bpm（10% 容差）
                if abs(detailed_bpm - expected_bpm) / expected_bpm < 0.1:
                    is_octave = True
                    if multiplier != 1.0:  # 非 1 倍关系表示倍频
                        # 使用快速检测的结果
                        corrected_bpm = fast_bpm
                        octave_correction = {
                            'from': detailed_bpm,
                            'to': corrected_bpm,
                            'reason': f'倍频关系检测: {detailed_bpm:.1f} ≈ {fast_bpm:.1f} × {multiplier}，使用快速检测结果'
                        }
                    break

            # 如果检测到倍频，使用快速检测结果（通常更准确）
            if is_octave and octave_correction:
                logger.info(f"\n🔧 倍频纠正: {octave_correction['from']:.1f} BPM → {octave_correction['to']:.1f} BPM")
                logger.info(f"   原因: {octave_correction['reason']}")
                return {
                    'bpm': fast_bpm,
                    'confidence': results[0]['confidence'],
                    'primary_method': results[0]['method'],
                    'supporting_methods': 1,
                    'all_candidates': results,
                    'octave_correction': octave_correction
                }

        # 如果没有倍频关系，优先使用快速检测（librosa.beat.tempo 是金标准）
        # 快速检测通常比详细分析更准确
        best = results[0]  # 快速检测

        return {
            'bpm': best['bpm'],
            'confidence': best['confidence'],
            'primary_method': best['method'],
            'supporting_methods': len(results) - 1,
            'all_candidates': results,
            'octave_correction': None
        }

    def _display_results(self, result: Dict[str, Any]):
        """显示分析结果"""
        if not result['success']:
            logger.error(f"❌ 分析失败: {result.get('error')}")
            return

        file_info = (
            f"文件: {result['file_name']} "
            f"({result['file_size_mb']} MB) "
            f"时长: {result['duration_seconds']} 秒"
        )
        logger.info(file_info)

        final = result['final_result']
        logger.info(f"\n🎵 最终结果:")
        logger.info(f"   BPM: {final['bpm']}")
        logger.info(f"   置信度: {final['confidence']:.0%}")
        logger.info(f"   主要方法: {final['primary_method']}")
        logger.info(f"   支持方法数: {final['supporting_methods']}")

        # 显示倍频纠正信息
        if final.get('octave_correction'):
            correction = final['octave_correction']
            logger.info(f"\n🔧 倍频纠正:")
            logger.info(f"   原始检测: {correction['from']:.1f} BPM")
            logger.info(f"   纠正后: {correction['to']:.1f} BPM")
            logger.info(f"   原因: {correction['reason']}")

        if self.verbose:
            logger.debug(f"\n📊 快速检测: {result['fast_detection']['bpm']} BPM")
            logger.debug(f"📊 详细分析: {result['detailed_analysis']['bpm']} BPM")

    def export_results(
        self,
        results: Any,
        output_path: str
    ):
        """导出结果为 JSON"""
        try:
            # 将 numpy 类型转换为 Python 原生类型
            def convert_numpy(obj):
                if isinstance(obj, np.ndarray):
                    return obj.tolist()
                elif isinstance(obj, np.integer):
                    return int(obj)
                elif isinstance(obj, np.floating):
                    return float(obj)
                elif isinstance(obj, dict):
                    return {k: convert_numpy(v) for k, v in obj.items()}
                elif isinstance(obj, (list, tuple)):
                    return [convert_numpy(v) for v in obj]
                return obj

            results_converted = convert_numpy(results)

            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(results_converted, f, ensure_ascii=False, indent=2)

            logger.info(f"✓ 结果已导出到: {Path(output_path).absolute()}")

        except Exception as e:
            logger.error(f"❌ 导出失败: {str(e)}")


def main():
    """主函数"""
    parser = argparse.ArgumentParser(
        description='Realtime BPM Analyzer - Python 测试程序',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
示例用法：
  # 分析单个文件
  python bpm_analyzer_test.py --file music.mp3

  # 分析并输出结果
  python bpm_analyzer_test.py --file music.mp3 --output result.json

  # 显示详细信息
  python bpm_analyzer_test.py --file music.mp3 --verbose

  # 批量分析文件夹
  python bpm_analyzer_test.py --dir /path/to/music
        """
    )

    parser.add_argument('--file', type=str, help='音频文件路径')
    parser.add_argument('--dir', type=str, help='音频文件夹路径（批量分析）')
    parser.add_argument('-o', '--output', type=str, help='输出 JSON 文件路径')
    parser.add_argument('-v', '--verbose', action='store_true', help='显示详细信息')

    args = parser.parse_args()

    # 验证参数
    if not args.file and not args.dir:
        parser.print_help()
        sys.exit(1)

    # 初始化分析器
    analyzer = RealtimeBPMAnalyzerTest(verbose=args.verbose)

    # 执行分析
    try:
        if args.file:
            result = analyzer.analyze_file(args.file)
            results = result
        else:
            results_list = analyzer.analyze_directory(args.dir)
            results = {
                'success': True,
                'total_files': len(results_list),
                'results': results_list
            }

        # 导出结果
        if args.output:
            analyzer.export_results(results, args.output)
        else:
            # 默认输出文件名
            if args.file:
                default_output = f"bpm_result_{Path(args.file).stem}.json"
            else:
                default_output = "bpm_results.json"
            analyzer.export_results(results, default_output)

        print("\n" + "=" * 80)
        print("✅ 分析完成！")
        print("=" * 80 + "\n")

    except Exception as e:
        logger.error(f"❌ 执行失败: {str(e)}")
        if args.verbose:
            import traceback
            traceback.print_exc()
        sys.exit(1)


if __name__ == '__main__':
    main()