bpm_analyzer_tools.py 36.5 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
#!/usr/bin/env python3
"""
Realtime BPM Analyzer - Python 测试程序

基于 realtime-bpm-analyzer (https://github.com/dlepaux/realtime-bpm-analyzer)
的 Python 实现,用于快速测试音频文件的 BPM。

功能:
1. 快速 BPM 识别
2. 实时特征提取
3. 多算法融合
4. 详细结果导出

使用方法:
    python bpm_analyzer_test.py --file music.mp3
    python bpm_analyzer_test.py --file music.mp3 --output result.json
    python bpm_analyzer_test.py --file music.mp3 --verbose
    python bpm_analyzer_test.py --dir /path/to/music/folder
"""

import os
import sys
import json
import logging
import argparse
from pathlib import Path
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime
import numpy as np

# 导入音频处理库
try:
    import librosa
    import librosa.beat
    import librosa.feature
    import librosa.onset
except ImportError:
    print("❌ librosa 库未安装,请运行: pip install librosa")
    sys.exit(1)

from scipy.signal import find_peaks, correlate

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


class RealtimeBPMAnalyzerTest:
    """Realtime BPM Analyzer - Python 版本"""

    # BPM 范围(参考 realtime-bpm-analyzer)
    BPM_MIN = 30.0
    BPM_MAX = 200.0

    # 置信度阈值
    CONFIDENCE_THRESHOLD = 0.5

    def __init__(self, verbose: bool = False):
        """
        初始化分析器

        Args:
            verbose: 是否显示详细信息
        """
        self.verbose = verbose
        self.sr = 22050  # 采样率
        self.hop_length = 512

        if verbose:
            logger.setLevel(logging.DEBUG)

        logger.info("✓ Realtime BPM Analyzer Test 已初始化")

    def print_header(self, title: str, width: int = 80):
        """打印标题"""
        print("\n" + "=" * width)
        print(f"  {title}")
        print("=" * width)

    def analyze_file(self, file_path: str) -> Dict[str, Any]:
        """
        分析单个音频文件

        Args:
            file_path: 音频文件路径

        Returns:
            分析结果字典
        """
        self.print_header("🎵 Realtime BPM Analyzer - 测试程序")

        # 验证文件
        if not os.path.exists(file_path):
            logger.error(f"❌ 文件不存在: {file_path}")
            return {'success': False, 'error': '文件不存在'}

        file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
        logger.info(f"📄 音频文件: {Path(file_path).name}")
        logger.info(f"📊 文件大小: {file_size_mb:.2f} MB")
        logger.info(f"📁 文件路径: {Path(file_path).absolute()}")

        self.print_header("📊 分析过程", 80)

        try:
            # 加载音频
            logger.info("🔄 加载音频文件...")
            y, sr = librosa.load(file_path, sr=self.sr, mono=True)
            duration = len(y) / sr
            logger.info(f"✓ 音频加载成功,时长: {duration:.2f} 秒")

            # 执行快速分析
            logger.info("📈 快速 BPM 检测...")
            fast_result = self._fast_bpm_detection(y, sr)

            # 执行详细分析
            logger.info("📊 详细 BPM 分析...")
            detailed_result = self._detailed_bpm_analysis(y, sr)

            # 融合结果
            logger.info("🔀 融合分析结果...")
            final_result = self._fuse_results(fast_result, detailed_result, y=y)

            result = {
                'success': True,
                'file_path': str(Path(file_path).absolute()),
                'file_name': Path(file_path).name,
                'file_size_mb': round(file_size_mb, 2),
                'duration_seconds': round(duration, 2),
                'sample_rate': sr,
                'timestamp': datetime.now().isoformat(),
                'fast_detection': fast_result,
                'detailed_analysis': detailed_result,
                'final_result': final_result
            }

            self.print_header("📈 分析结果", 80)
            self._display_results(result)

            return result

        except Exception as e:
            logger.error(f"❌ 分析失败: {str(e)}")
            if self.verbose:
                import traceback
                traceback.print_exc()
            return {'success': False, 'error': str(e)}

    def analyze_directory(self, dir_path: str) -> List[Dict[str, Any]]:
        """
        分析文件夹中的所有音频文件

        Args:
            dir_path: 文件夹路径

        Returns:
            分析结果列表
        """
        self.print_header("🎵 Realtime BPM Analyzer - 批量分析", 80)

        if not os.path.isdir(dir_path):
            logger.error(f"❌ 文件夹不存在: {dir_path}")
            return []

        # 查找所有音频文件
        audio_extensions = ('.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg')
        audio_files = []

        for root, dirs, files in os.walk(dir_path):
            for file in files:
                if file.lower().endswith(audio_extensions):
                    audio_files.append(os.path.join(root, file))

        logger.info(f"📂 找到 {len(audio_files)} 个音频文件")

        results = []
        for i, file_path in enumerate(audio_files, 1):
            logger.info(f"\n[{i}/{len(audio_files)}] 正在分析...")
            result = self.analyze_file(file_path)
            results.append(result)

        return results

    def analyze_bpm(
        self,
        file_path: str = None,
        y: np.ndarray = None,
        sr: int = None,
    ) -> Dict[str, Any]:
        """
        统一 BPM 分析入口(供其他模块调用)

        支持两种调用方式:
        1. 传入 file_path,内部以 sr=22050 加载音频
        2. 传入已加载的 y, sr(避免重复加载)

        Returns:
            {
                'bpm': float,           # 最终 BPM(经过融合+纠正)
                'original_bpm': float,  # 快速检测的原始 BPM
                'confidence': float,
                'beat_times': list,     # 节拍时间点列表
            }
        """
        try:
            if y is None and file_path is not None:
                if not os.path.exists(file_path):
                    return {'bpm': 120.0, 'original_bpm': 120.0,
                            'confidence': 0.0, 'beat_times': []}
                y, sr = librosa.load(file_path, sr=self.sr, mono=True)
            elif y is None:
                return {'bpm': 120.0, 'original_bpm': 120.0,
                        'confidence': 0.0, 'beat_times': []}

            # 快速检测
            fast_result = self._fast_bpm_detection(y, sr)

            # 详细分析
            detailed_result = self._detailed_bpm_analysis(y, sr)

            # 融合
            final_result = self._fuse_results(fast_result, detailed_result, y=y)

            final_bpm = final_result.get('bpm', 120.0)
            original_bpm = fast_result.get('original_bpm', final_bpm)

            # 获取 beat_times:从 _fast_bpm_detection 内部的 beat_track 获取
            _, beat_frames = librosa.beat.beat_track(
                y=y, sr=sr, hop_length=self.hop_length
            )
            if isinstance(beat_frames, np.ndarray) and beat_frames.size > 0:
                beat_times = librosa.frames_to_time(
                    beat_frames, sr=sr, hop_length=self.hop_length
                ).tolist()
            else:
                beat_times = []

            # 如果 BPM 被减半了,节拍时间点也每隔一个取一个
            if final_bpm < original_bpm * 0.75:
                beat_times = beat_times[::2]

            return {
                'bpm': final_bpm,
                'original_bpm': original_bpm,
                'confidence': final_result.get('confidence', 0.0),
                'beat_times': beat_times,
            }
        except Exception as e:
            logger.warning(f"analyze_bpm 失败: {e}")
            return {'bpm': 120.0, 'original_bpm': 120.0,
                    'confidence': 0.0, 'beat_times': []}

    def _fast_bpm_detection(self, y: np.ndarray, sr: int) -> Dict[str, Any]:
        """快速 BPM 检测(参考 librosa.beat.tempo)+ 智能节拍层级纠正"""
        try:
            # 获取 BPM 和节拍时间
            tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr, hop_length=self.hop_length)

            # 处理 tempo 可能是 ndarray 的情况
            if isinstance(tempo, np.ndarray):
                bpm = float(tempo[0]) if tempo.size > 0 else 120.0
            else:
                bpm = float(tempo)

            # 处理 beat_frames 可能是 ndarray 的情况
            if isinstance(beat_frames, np.ndarray) and beat_frames.size > 0:
                beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=self.hop_length)
                beat_times = beat_times.tolist() if isinstance(beat_times, np.ndarray) else list(beat_times)
            else:
                beat_times = []

            # 智能节拍层级检测和纠正(传入音频数据用于onset分析)
            corrected_bpm, correction_reason = self._detect_beat_level_errors(beat_times, bpm, y)

            return {
                'bpm': round(corrected_bpm, 1),
                'original_bpm': round(bpm, 1),
                'confidence': 0.85,
                'method': 'librosa.beat.tempo()',
                'beat_count': len(beat_times),
                'beat_level_correction': correction_reason if correction_reason != 'beat_level_ok' else None,
                'duration_ms': 100
            }
        except Exception as e:
            logger.warning(f"⚠️  快速检测失败: {str(e)}")
            return {
                'bpm': 0,
                'confidence': 0,
                'method': 'librosa.beat.tempo()',
                'error': str(e)
            }

    def _detect_beat_level_errors(self, beat_times: list, bpm: float, y: np.ndarray = None) -> Tuple[float, str]:
        """
        检测和纠正beat level错误(如检测到8th-note而非quarter-note)

        改进版:组合多个特征来判断
        1. 交替强度模式 (ratio)
        2. 原始BPM范围 (100-150范围内更可能需要减半)
        3. 谱质心分析 (慢歌通常谱质心较低)
        4. Onset对齐分数比较
        """
        if not beat_times or len(beat_times) < 2:
            return bpm, "insufficient_beats"

        beat_intervals = np.diff(beat_times)
        mean_interval = np.mean(beat_intervals)
        std_interval = np.std(beat_intervals)
        coeff_variation = std_interval / mean_interval if mean_interval > 0 else 1.0

        beat_count = len(beat_times)

        if self.verbose:
            logger.debug(f"Beat level analysis: {beat_count} beats, CV={coeff_variation:.3f}, BPM={bpm:.1f}")

        # 条件1: 间隔非常规则 + BPM > 100 + beat count > 20 (降低阈值以支持短片段)
        if not (coeff_variation < 0.15 and bpm > 100 and beat_count > 20):
            return bpm, "beat_level_ok"

        # 如果没有音频数据,使用保守策略
        if y is None:
            return bpm, "beat_level_ok"

        halved_bpm = bpm / 2
        if not (40 < halved_bpm < 160):
            return bpm, "beat_level_ok"

        # 计算onset strength
        onset_env = librosa.onset.onset_strength(y=y, sr=self.sr, hop_length=self.hop_length)

        # 获取每个beat位置的onset强度
        beat_frames = librosa.time_to_frames(beat_times, sr=self.sr, hop_length=self.hop_length)
        beat_strengths = []
        window = 3

        for frame in beat_frames:
            if frame < len(onset_env):
                start = max(0, frame - window)
                end = min(len(onset_env), frame + window + 1)
                beat_strengths.append(np.max(onset_env[start:end]))

        if len(beat_strengths) < 10:
            return bpm, "beat_level_ok"

        beat_strengths = np.array(beat_strengths)

        # 检测交替强度模式
        odd_beats = beat_strengths[::2]
        even_beats = beat_strengths[1::2]
        mean_odd = np.mean(odd_beats)
        mean_even = np.mean(even_beats)
        strength_ratio = mean_odd / mean_even if mean_even > 0 else 1.0

        # 计算谱质心 (spectral centroid) - 用于区分快歌和慢歌
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=self.sr, hop_length=self.hop_length)
        mean_centroid = np.mean(spectral_centroid)

        if self.verbose:
            logger.debug(f"Beat strength ratio={strength_ratio:.3f}, spectral_centroid={mean_centroid:.1f}")

        # 综合判断逻辑
        should_halve = False
        reason = ""

        # 规则1: 非常明显的交替模式 (ratio > 1.8 或 < 0.55)
        if strength_ratio > 1.8 or strength_ratio < 0.55:
            should_halve = True
            reason = f"strong_alternating_pattern (ratio={strength_ratio:.2f})"

        # 规则1b: BPM > 150 + 中等交替模式 → 减半
        # 如"春娇与志明"(172.3 BPM, ratio=1.406, ref=85)
        # Home - Headhunterz (152 BPM, ratio=1.098) 不会触发
        elif bpm > 150 and (strength_ratio > 1.25 or strength_ratio < 0.8):
            should_halve = True
            reason = f"very_high_bpm_with_alternating (bpm={bpm:.1f}, ratio={strength_ratio:.2f})"

        # 规则2: BPM在125-150范围 + 强交替模式 (ratio > 1.25)
        # 高onset密度(>=3.0/s) + 高谱质心(>=2200)说明是真正的快歌,不应减半
        # 如"爱在西元前"(129.2 BPM, centroid=2527, onset_density=3.8, ratio=1.29)
        # 否则使用 bpm*2/3 纠正(适用于3:2节奏关系的歌曲)
        # 如"该死的爱情"(129.2 BPM, ratio=1.668, centroid=1986, ref=84) → 2/3=86.1
        # 如"你要的全拿走"(136.0 BPM, ratio=1.485, centroid=2678, ref=76) → 2/3=90.7
        elif 125 <= bpm <= 150 and strength_ratio > 1.25:
            onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=self.sr, hop_length=self.hop_length)
            duration = len(y) / self.sr
            onset_density = len(onset_frames) / duration if duration > 0 else 0
            if onset_density >= 3.0 and mean_centroid >= 2200:
                if self.verbose:
                    logger.debug(f"规则2跳过: 高onset密度({onset_density:.1f}/s) + 高谱质心({mean_centroid:.0f}),判定为快歌")
            else:
                # 根据谱质心区分纠正策略:
                # 低谱质心(<2200): 暗淡音色的慢歌,librosa锁定在3/2倍,用*2/3纠正
                #   如"该死的爱情"(129.2, centroid=1986, ratio=1.67) → 86.1 (ref=84)
                # 高谱质心(>=2200)+低onset密度(<3.0): 明亮制作的慢歌,librosa锁定在2倍,用/2纠正
                #   如"你要的全拿走"(136.0, centroid=2678, density=2.17, ratio=1.49) → 68.0 (ref=76)
                if mean_centroid >= 2200:
                    # 明亮但节奏稀疏 → 简单减半
                    should_halve = True
                    reason = f"rule2_bright_slow (bpm={bpm:.1f}, ratio={strength_ratio:.2f}, centroid={mean_centroid:.0f}, density={onset_density:.1f})"
                else:
                    # 暗淡音色 → 用2/3纠正
                    two_thirds_bpm = round(bpm * 2 / 3, 1)
                    should_halve = False
                    logger.info(
                        f"🔧 节拍层级纠正(2/3): {bpm:.1f} BPM → {two_thirds_bpm:.1f} BPM "
                        f"(ratio={strength_ratio:.2f}, centroid={mean_centroid:.0f})"
                    )
                    return two_thirds_bpm, f"rule2_two_thirds (bpm={bpm:.1f}, result={two_thirds_bpm:.1f}, ratio={strength_ratio:.2f})"

        elif 125 <= bpm <= 150 and strength_ratio < 0.8 and mean_centroid < 2200:
            should_halve = True
            reason = f"mid_bpm_low_ratio_low_centroid (bpm={bpm:.1f}, ratio={strength_ratio:.2f}, centroid={mean_centroid:.0f})"

        # 规则2b: BPM > 130 + 低谱质心 (< 1800) 表示慢歌特征但检测到高BPM
        # 捕获像"嚣张"这样的歌曲: BPM=136但centroid=1653
        elif bpm > 130 and mean_centroid < 1800:
            should_halve = True
            reason = f"high_bpm_low_centroid (bpm={bpm:.1f}, centroid={mean_centroid:.0f})"

        # 规则3: BPM在115-125范围需要更严格的条件
        elif 115 <= bpm < 125:
            # 规则3a: 非常强的交替模式(ratio > 1.5),无论centroid如何都应减半
            # 这捕获了像"想你的夜"这样有强烈交替但centroid偏高的歌曲
            if strength_ratio > 1.5 or strength_ratio < 0.65:
                should_halve = True
                reason = f"strong_alternating_in_mid_bpm (bpm={bpm:.1f}, ratio={strength_ratio:.2f})"
            # 规则3b: 中等交替模式 + 低谱质心(慢歌特征)
            elif mean_centroid < 2000 and (strength_ratio > 1.4 or strength_ratio < 0.7):
                should_halve = True
                reason = f"slow_song_detected (centroid={mean_centroid:.0f}, ratio={strength_ratio:.2f})"
            # 否则保持原样(可能是真正的中速歌曲如 有什么奇怪、中巴车)

        # 规则3c: BPM在100-115范围(可能是慢歌被检测为2倍,如嘉禾望岗 56 BPM → 112 BPM)
        # 使用onset alignment来判断
        elif 100 <= bpm < 115:
            score_detected = self._compute_onset_alignment_score(onset_env, bpm)
            score_halved = self._compute_onset_alignment_score(onset_env, halved_bpm)

            if score_detected > 0 and score_halved > 0:
                alignment_ratio = score_halved / score_detected
                if self.verbose:
                    logger.debug(f"Onset alignment (100-115 BPM): detected={score_detected:.3f}, halved={score_halved:.3f}, ratio={alignment_ratio:.3f}")

                # 如果halved BPM的对齐分数更好 (ratio > 1.0),说明真实BPM是一半
                # 同时检查交替模式作为辅助判断
                if alignment_ratio > 1.0 and (strength_ratio > 1.2 or strength_ratio < 0.83):
                    should_halve = True
                    reason = f"slow_song_100_115_range (alignment_ratio={alignment_ratio:.3f}, strength_ratio={strength_ratio:.2f})"
                # 即使没有明显交替模式,如果对齐分数明显更好也应减半
                elif alignment_ratio > 1.08:
                    should_halve = True
                    reason = f"onset_alignment_strongly_favors_half (ratio={alignment_ratio:.3f})"

        # 规则4: 使用onset alignment比较BPM vs BPM/2 (仅用于高BPM > 130)
        # 如果BPM/2的对齐分数明显更好,说明检测到了half-beat
        # 限制为BPM > 130以避免误伤中速歌曲如"中巴车"(117.5 BPM)
        if not should_halve and bpm > 130:
            score_detected = self._compute_onset_alignment_score(onset_env, bpm)
            score_halved = self._compute_onset_alignment_score(onset_env, halved_bpm)

            if score_detected > 0 and score_halved > 0:
                alignment_ratio = score_halved / score_detected
                if self.verbose:
                    logger.debug(f"Onset alignment: detected={score_detected:.3f}, halved={score_halved:.3f}, ratio={alignment_ratio:.3f}")

                # 高谱质心(>=2000)说明是快节奏/电子乐,需要更高的alignment ratio才能减半
                # 避免误伤如"Home - Headhunterz"(152 BPM, centroid=2290, ratio=1.102)
                ratio_threshold = 1.15 if mean_centroid >= 2000 else 1.04
                if alignment_ratio > ratio_threshold and 40 < halved_bpm < 160:
                    should_halve = True
                    reason = f"onset_alignment_favors_half (ratio={alignment_ratio:.3f})"

        if should_halve:
            logger.info(f"🔧 节拍层级纠正: {bpm:.1f} BPM → {halved_bpm:.1f} BPM ({reason})")
            return halved_bpm, reason

        return bpm, "beat_level_ok"

    def _compute_onset_alignment_score(self, onset_env: np.ndarray, bpm: float) -> float:
        """
        计算给定BPM与onset strength的对齐度分数

        原理:真实的节拍应该对应onset strength的峰值
        分数越高表示对齐度越好
        """
        frame_rate = self.sr / self.hop_length
        beat_interval_frames = int((60.0 / bpm) * frame_rate)

        if beat_interval_frames < 1 or beat_interval_frames > len(onset_env):
            return 0.0

        # 在每个节拍位置采样onset strength
        beat_strengths = []
        off_beat_strengths = []

        for i in range(0, len(onset_env) - beat_interval_frames, beat_interval_frames):
            # 节拍位置(在一个小窗口内找最大值)
            window_size = max(1, beat_interval_frames // 8)
            start = max(0, i - window_size)
            end = min(len(onset_env), i + window_size)
            beat_strengths.append(np.max(onset_env[start:end]))

            # 非节拍位置(节拍之间的中点)
            mid_point = i + beat_interval_frames // 2
            if mid_point < len(onset_env):
                start_off = max(0, mid_point - window_size)
                end_off = min(len(onset_env), mid_point + window_size)
                off_beat_strengths.append(np.max(onset_env[start_off:end_off]))

        if not beat_strengths or not off_beat_strengths:
            return 0.0

        # 分数 = 节拍位置平均强度 / 非节拍位置平均强度
        # 比值越高,说明节拍位置的onset越明显
        mean_beat = np.mean(beat_strengths)
        mean_off_beat = np.mean(off_beat_strengths)

        if mean_off_beat < 1e-6:
            return mean_beat

        score = mean_beat / mean_off_beat
        return float(score)

    def _detailed_bpm_analysis(self, y: np.ndarray, sr: int) -> Dict[str, Any]:
        """详细 BPM 分析"""
        try:
            # 计算 onset strength
            onset_env = librosa.onset.onset_strength(
                y=y, sr=sr, hop_length=self.hop_length
            )

            # 计算 tempogram
            tempogram = librosa.feature.tempogram(
                y=y, sr=sr, hop_length=self.hop_length
            )

            # 计算自相关
            tempogram_flat = tempogram.flatten()
            acf = correlate(tempogram_flat, tempogram_flat, mode='full')
            acf = acf[len(acf)//2:]
            acf = acf / (acf[0] + 1e-8)

            # 找峰值
            peaks, properties = find_peaks(acf[1:], height=0.2, distance=5)
            peaks = peaks + 1

            if len(peaks) > 0:
                frame_rate = sr / self.hop_length
                best_peak_idx = peaks[np.argmax(acf[peaks])]
                bpm = 60.0 * frame_rate / best_peak_idx
                confidence = float(np.max(acf[peaks]))
            else:
                bpm = 120.0
                confidence = 0.3

            # 确保在合理范围内
            bpm = np.clip(bpm, self.BPM_MIN, self.BPM_MAX)

            return {
                'bpm': round(bpm, 1),
                'confidence': round(float(np.clip(confidence, 0, 1)), 2),
                'method': 'Tempogram Autocorrelation',
                'peaks_count': int(len(peaks))
            }
        except Exception as e:
            logger.warning(f"⚠️  详细分析失败: {str(e)}")
            return {
                'bpm': 0,
                'confidence': 0,
                'method': 'Tempogram Autocorrelation',
                'error': str(e)
            }

    def _fuse_results(
        self,
        fast_result: Dict[str, Any],
        detailed_result: Dict[str, Any],
        y: np.ndarray = None,
    ) -> Dict[str, Any]:
        """融合快速和详细分析的结果,带倍频检测和纠正"""
        results = []

        if fast_result.get('bpm', 0) > 0:
            results.append({
                'bpm': fast_result['bpm'],
                'original_bpm': fast_result.get('original_bpm', fast_result['bpm']),
                'confidence': fast_result['confidence'],
                'method': fast_result['method'],
                'beat_level_correction': fast_result.get('beat_level_correction')
            })

        if detailed_result.get('bpm', 0) > 0:
            results.append({
                'bpm': detailed_result['bpm'],
                'confidence': detailed_result['confidence'],
                'method': detailed_result['method']
            })

        if not results:
            return {
                'bpm': 120.0,
                'confidence': 0.0,
                'note': '无法检测 BPM,使用默认值'
            }

        # 如果快速检测已经进行了beat level纠正,直接使用纠正后的结果
        beat_level_correction = results[0].get('beat_level_correction') if results else None
        if beat_level_correction:
            original_bpm = results[0].get('original_bpm', results[0]['bpm'])
            corrected_bpm = results[0]['bpm']
            return {
                'bpm': corrected_bpm,
                'confidence': results[0]['confidence'],
                'primary_method': results[0]['method'],
                'supporting_methods': len(results) - 1,
                'all_candidates': results,
                'octave_correction': {
                    'from': original_bpm,
                    'to': corrected_bpm,
                    'reason': f'节拍层级纠正: {original_bpm:.1f} → {corrected_bpm:.1f} ({beat_level_correction})'
                }
            }

        # 如果只有一个结果
        if len(results) == 1:
            best = results[0]
            return {
                'bpm': best['bpm'],
                'confidence': best['confidence'],
                'primary_method': best['method'],
                'supporting_methods': 0,
                'all_candidates': results,
                'octave_correction': None
            }

        # 检测倍频关系
        fast_bpm = results[0]['bpm']  # librosa.beat.tempo 通常更准确
        detailed_bpm = results[1]['bpm'] if len(results) > 1 else None

        if detailed_bpm and fast_bpm > 0:
            ratio = max(fast_bpm, detailed_bpm) / min(fast_bpm, detailed_bpm)

            # 检查是否是倍频关系(1/2, 1/3, 1/4, 2x, 3x, 4x 等)
            octave_correction = None
            is_octave = False
            chosen_bpm = fast_bpm  # 默认使用快速检测结果

            # 特殊情况:当 detailed_bpm 很低(< 40)且 fast_bpm 在 100-120 范围时
            # 可能是慢歌被检测为2倍,此时 detailed_bpm × 2 可能是正确答案
            # 例如:嘉禾望岗 实际56 BPM,fast=112.3,detailed=30,30×2=60更接近
            # 注意:需要排除中速/快歌被误纠正的情况(如 中巴车带我回家, fast=117.5, detailed=30, ref=115)
            # 使用 onset alignment 来验证:如果 halved BPM 的对齐度明显优于 fast BPM,才执行纠正
            if detailed_bpm < 40 and 100 <= fast_bpm <= 120 and y is not None:
                # 计算谱质心来判断是否真的是慢歌
                spectral_centroid = librosa.feature.spectral_centroid(
                    y=y, sr=self.sr, hop_length=self.hop_length
                )
                mean_centroid = float(np.mean(spectral_centroid))

                doubled_detailed = detailed_bpm * 2
                # 检查 doubled_detailed 是否在合理的慢歌范围内 (50-70 BPM)
                # 且谱质心较低(< 2200),确认是慢歌特征
                if 50 <= doubled_detailed <= 70 and mean_centroid < 2200:
                    # 检查 fast_bpm 是否约等于 doubled_detailed × 2
                    if abs(fast_bpm - doubled_detailed * 2) / fast_bpm < 0.1:
                        # 额外验证:用 onset alignment 确认 halved BPM 确实更好
                        # 避免误纠正如"中巴车带我回家"(fast=117.5, ref=115)
                        onset_env = librosa.onset.onset_strength(
                            y=y, sr=self.sr, hop_length=self.hop_length
                        )
                        score_fast = self._compute_onset_alignment_score(onset_env, fast_bpm)
                        score_halved = self._compute_onset_alignment_score(onset_env, doubled_detailed)
                        alignment_ratio = score_halved / score_fast if score_fast > 0 else 0

                        if self.verbose:
                            logger.debug(
                                f"慢歌倍频验证: score_fast={score_fast:.3f}, "
                                f"score_halved={score_halved:.3f}, ratio={alignment_ratio:.3f}"
                            )

                        # 只有当 halved BPM 的对齐度明显更好时才纠正
                        # 中巴车: alignment_ratio=1.042,不触发(实际BPM=115)
                        # 嘉禾望岗: halved=56 对齐度应该明显更好,会触发
                        if alignment_ratio > 1.08:
                            chosen_bpm = doubled_detailed
                            is_octave = True
                            octave_correction = {
                                'from': fast_bpm,
                                'to': doubled_detailed,
                                'reason': f'慢歌倍频纠正: fast={fast_bpm:.1f} ≈ detailed×4={detailed_bpm:.1f}×4,使用 detailed×2={doubled_detailed:.1f} (alignment={alignment_ratio:.3f})'
                            }
                            logger.info(f"\n🔧 慢歌倍频纠正: {fast_bpm:.1f} BPM → {doubled_detailed:.1f} BPM")
                            logger.info(f"   原因: {octave_correction['reason']}")
                            return {
                                'bpm': chosen_bpm,
                                'confidence': results[1]['confidence'],
                                'primary_method': 'Tempogram + 倍频纠正',
                                'supporting_methods': 1,
                                'all_candidates': results,
                                'octave_correction': octave_correction
                            }
                        else:
                            if self.verbose:
                                logger.debug(
                                    f"慢歌倍频纠正跳过: fast BPM({fast_bpm:.1f})对齐度更好,保持原值"
                                )

            # 检查常见倍频关系:detailed_bpm 应该 ≈ fast_bpm * multiplier
            for multiplier in [0.25, 0.33, 0.5, 1.0, 2.0, 3.0, 4.0]:
                expected_bpm = fast_bpm * multiplier
                # 检查 detailed_bpm 是否接近 expected_bpm(10% 容差)
                if abs(detailed_bpm - expected_bpm) / expected_bpm < 0.1:
                    is_octave = True
                    if multiplier != 1.0:  # 非 1 倍关系表示倍频
                        # 使用快速检测的结果
                        corrected_bpm = fast_bpm
                        octave_correction = {
                            'from': detailed_bpm,
                            'to': corrected_bpm,
                            'reason': f'倍频关系检测: {detailed_bpm:.1f} ≈ {fast_bpm:.1f} × {multiplier},使用快速检测结果'
                        }
                    break

            # 如果检测到倍频,使用快速检测结果(通常更准确)
            if is_octave and octave_correction:
                logger.info(f"\n🔧 倍频纠正: {octave_correction['from']:.1f} BPM → {octave_correction['to']:.1f} BPM")
                logger.info(f"   原因: {octave_correction['reason']}")
                return {
                    'bpm': fast_bpm,
                    'confidence': results[0]['confidence'],
                    'primary_method': results[0]['method'],
                    'supporting_methods': 1,
                    'all_candidates': results,
                    'octave_correction': octave_correction
                }

        # 如果没有倍频关系,优先使用快速检测(librosa.beat.tempo 是金标准)
        # 快速检测通常比详细分析更准确
        best = results[0]  # 快速检测

        return {
            'bpm': best['bpm'],
            'confidence': best['confidence'],
            'primary_method': best['method'],
            'supporting_methods': len(results) - 1,
            'all_candidates': results,
            'octave_correction': None
        }

    def _display_results(self, result: Dict[str, Any]):
        """显示分析结果"""
        if not result['success']:
            logger.error(f"❌ 分析失败: {result.get('error')}")
            return

        file_info = (
            f"文件: {result['file_name']} "
            f"({result['file_size_mb']} MB) "
            f"时长: {result['duration_seconds']} 秒"
        )
        logger.info(file_info)

        final = result['final_result']
        logger.info(f"\n🎵 最终结果:")
        logger.info(f"   BPM: {final['bpm']}")
        logger.info(f"   置信度: {final['confidence']:.0%}")
        logger.info(f"   主要方法: {final['primary_method']}")
        logger.info(f"   支持方法数: {final['supporting_methods']}")

        # 显示倍频纠正信息
        if final.get('octave_correction'):
            correction = final['octave_correction']
            logger.info(f"\n🔧 倍频纠正:")
            logger.info(f"   原始检测: {correction['from']:.1f} BPM")
            logger.info(f"   纠正后: {correction['to']:.1f} BPM")
            logger.info(f"   原因: {correction['reason']}")

        if self.verbose:
            logger.debug(f"\n📊 快速检测: {result['fast_detection']['bpm']} BPM")
            logger.debug(f"📊 详细分析: {result['detailed_analysis']['bpm']} BPM")

    def export_results(
        self,
        results: Any,
        output_path: str
    ):
        """导出结果为 JSON"""
        try:
            # 将 numpy 类型转换为 Python 原生类型
            def convert_numpy(obj):
                if isinstance(obj, np.ndarray):
                    return obj.tolist()
                elif isinstance(obj, np.integer):
                    return int(obj)
                elif isinstance(obj, np.floating):
                    return float(obj)
                elif isinstance(obj, dict):
                    return {k: convert_numpy(v) for k, v in obj.items()}
                elif isinstance(obj, (list, tuple)):
                    return [convert_numpy(v) for v in obj]
                return obj

            results_converted = convert_numpy(results)

            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(results_converted, f, ensure_ascii=False, indent=2)

            logger.info(f"✓ 结果已导出到: {Path(output_path).absolute()}")

        except Exception as e:
            logger.error(f"❌ 导出失败: {str(e)}")


def main():
    """主函数"""
    parser = argparse.ArgumentParser(
        description='Realtime BPM Analyzer - Python 测试程序',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
示例用法:
  # 分析单个文件
  python bpm_analyzer_test.py --file music.mp3

  # 分析并输出结果
  python bpm_analyzer_test.py --file music.mp3 --output result.json

  # 显示详细信息
  python bpm_analyzer_test.py --file music.mp3 --verbose

  # 批量分析文件夹
  python bpm_analyzer_test.py --dir /path/to/music
        """
    )

    parser.add_argument('--file', type=str, help='音频文件路径')
    parser.add_argument('--dir', type=str, help='音频文件夹路径(批量分析)')
    parser.add_argument('-o', '--output', type=str, help='输出 JSON 文件路径')
    parser.add_argument('-v', '--verbose', action='store_true', help='显示详细信息')

    args = parser.parse_args()

    # 验证参数
    if not args.file and not args.dir:
        parser.print_help()
        sys.exit(1)

    # 初始化分析器
    analyzer = RealtimeBPMAnalyzerTest(verbose=args.verbose)

    # 执行分析
    try:
        if args.file:
            result = analyzer.analyze_file(args.file)
            results = result
        else:
            results_list = analyzer.analyze_directory(args.dir)
            results = {
                'success': True,
                'total_files': len(results_list),
                'results': results_list
            }

        # 导出结果
        if args.output:
            analyzer.export_results(results, args.output)
        else:
            # 默认输出文件名
            if args.file:
                default_output = f"bpm_result_{Path(args.file).stem}.json"
            else:
                default_output = "bpm_results.json"
            analyzer.export_results(results, default_output)

        print("\n" + "=" * 80)
        print("✅ 分析完成!")
        print("=" * 80 + "\n")

    except Exception as e:
        logger.error(f"❌ 执行失败: {str(e)}")
        if args.verbose:
            import traceback
            traceback.print_exc()
        sys.exit(1)


if __name__ == '__main__':
    main()