audio_features.py 22.6 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
"""
音频特征提取模块
提供音频特征提取、节奏强度和能量级别计算功能
"""

import os
import warnings
import numpy as np
import librosa
from typing import Any, Dict, List, Optional, Tuple
from dataclasses import dataclass

from .bpm_analyzer_tools import RealtimeBPMAnalyzerTest

# 抑制 librosa 的 audioread 弃用警告
warnings.filterwarnings("ignore", category=FutureWarning, module="librosa")


@dataclass
class AudioFeatures:
    """音频特征数据"""

    # 时域特征
    rms_energy: np.ndarray  # RMS 能量 (帧级别)
    rms_times: np.ndarray  # 对应的时间戳

    # 频域特征
    spectral_centroid: np.ndarray  # 频谱质心 (亮度)
    spectral_rolloff: np.ndarray  # 频谱滚降 (低频占比)
    spectral_bandwidth: np.ndarray  # 频谱带宽

    # 节奏特征
    onset_strength: np.ndarray  # onset 强度
    tempo: float  # BPM

    # 统计信息
    duration: float
    sr: int


def extract_audio_features(audio_path: str, hop_length: int = 512) -> AudioFeatures:
    """
    提取音频特征

    Args:
        audio_path: 音频文件路径
        hop_length: 帧移长度 (默认 512 samples ≈ 11.6ms @ 44.1kHz)

    Returns:
        AudioFeatures: 音频特征对象
    """
    # 加载音频
    y, sr = librosa.load(audio_path, sr=None, mono=True)
    duration = librosa.get_duration(y=y, sr=sr)

    # 1. RMS 能量 (时域响度)
    rms = librosa.feature.rms(y=y, hop_length=hop_length)[0]
    rms_db = librosa.amplitude_to_db(rms, ref=np.max)
    rms_times = librosa.frames_to_time(
        np.arange(len(rms)), sr=sr, hop_length=hop_length
    )

    # 2. 频谱特征
    spectral_centroid = librosa.feature.spectral_centroid(
        y=y, sr=sr, hop_length=hop_length
    )[0]
    spectral_rolloff = librosa.feature.spectral_rolloff(
        y=y, sr=sr, hop_length=hop_length
    )[0]
    spectral_bandwidth = librosa.feature.spectral_bandwidth(
        y=y, sr=sr, hop_length=hop_length
    )[0]

    # 3. 节奏特征
    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)

    # 使用统一 BPM 分析入口(带倍频纠正)
    bpm_analyzer = RealtimeBPMAnalyzerTest(verbose=False)
    bpm_result = bpm_analyzer.analyze_bpm(y=y, sr=sr)
    corrected_tempo = bpm_result.get('bpm', 120.0)

    return AudioFeatures(
        rms_energy=rms_db,
        rms_times=rms_times,
        spectral_centroid=spectral_centroid,
        spectral_rolloff=spectral_rolloff,
        spectral_bandwidth=spectral_bandwidth,
        onset_strength=onset_env,
        tempo=corrected_tempo,
        duration=duration,
        sr=int(sr),
    )


def calculate_rhythm_intensity(features: AudioFeatures) -> int:
    """
    根据音频特征计算节奏强度 (1-5)

    基于以下因素综合计算:
    - BPM (速度)
    - Onset 强度 (节奏密度)
    - 能量变化 (动态范围)

    Args:
        features: 音频特征对象

    Returns:
        int: 节奏强度 (1-5)
    """
    tempo = features.tempo
    onset = features.onset_strength
    rms = features.rms_energy

    # 1. BPM 得分 (40-200 BPM 映射到 1-5)
    if tempo >= 160:
        tempo_score = 5
    elif tempo >= 130:
        tempo_score = 4
    elif tempo >= 100:
        tempo_score = 3
    elif tempo >= 70:
        tempo_score = 2
    else:
        tempo_score = 1

    # 2. Onset 密度得分
    onset_mean = np.mean(onset)
    onset_max = np.max(onset) if len(onset) > 0 else 1
    onset_density = onset_mean / onset_max if onset_max > 0 else 0

    if onset_density >= 0.5:
        density_score = 5
    elif onset_density >= 0.4:
        density_score = 4
    elif onset_density >= 0.3:
        density_score = 3
    elif onset_density >= 0.2:
        density_score = 2
    else:
        density_score = 1

    # 3. 能量动态得分
    rms_std = np.std(rms)
    if rms_std >= 15:
        dynamic_score = 5
    elif rms_std >= 12:
        dynamic_score = 4
    elif rms_std >= 9:
        dynamic_score = 3
    elif rms_std >= 6:
        dynamic_score = 2
    else:
        dynamic_score = 1

    # 加权平均 (BPM 40%, 密度 35%, 动态 25%)
    final_score = tempo_score * 0.4 + density_score * 0.35 + dynamic_score * 0.25

    return int(round(final_score))


def calculate_energy_level(
    features: AudioFeatures,
) -> Tuple[int, Dict[str, float]]:
    """
    计算能量级别 (1-5) 和详细信息

    Args:
        features: 音频特征对象

    Returns:
        Tuple[int, Dict]: (能量级别 1-5, 详细信息字典)
    """
    # 1. 响度得分 (基于 RMS 能量)
    rms_db = features.rms_energy
    loudness_normalized = np.clip((rms_db + 60) / 10, 0, 5)
    loudness_score = float(np.percentile(loudness_normalized, 75))

    # 2. 亮度得分 (基于频谱质心)
    centroid = features.spectral_centroid
    centroid_normalized = np.clip(centroid / 4000, 0, 1)
    brightness_score = float(np.mean(centroid_normalized)) * 5

    # 3. 节奏得分 (基于 onset 强度)
    onset = features.onset_strength
    onset_normalized = np.clip(onset / np.percentile(onset, 90), 0, 1)
    rhythm_score = float(np.mean(onset_normalized)) * 5

    # 4. BPM 因子
    tempo = features.tempo
    if tempo > 140:
        tempo_factor = 1.3
    elif tempo > 120:
        tempo_factor = 1.15
    elif tempo > 100:
        tempo_factor = 1.0
    elif tempo > 80:
        tempo_factor = 0.9
    else:
        tempo_factor = 0.8

    # 综合计算
    weights = {"loudness": 0.40, "brightness": 0.25, "rhythm": 0.35}

    composite_score = (
        weights["loudness"] * loudness_score
        + weights["brightness"] * brightness_score
        + weights["rhythm"] * rhythm_score
    ) * tempo_factor

    # 映射到 1-5 级别
    if composite_score < 1.5:
        energy_level = 1
    elif composite_score < 2.5:
        energy_level = 2
    elif composite_score < 3.5:
        energy_level = 3
    elif composite_score < 4.5:
        energy_level = 4
    else:
        energy_level = 5

    details = {
        "loudness_score": round(loudness_score, 2),
        "brightness_score": round(brightness_score, 2),
        "rhythm_score": round(rhythm_score, 2),
        "tempo_factor": tempo_factor,
        "composite_score": round(composite_score, 2),
    }

    return energy_level, details


def energy_level_to_string(level: int) -> str:
    """
    将能量级别数字转换为字符串描述

    Args:
        level: 能量级别 (1-5)

    Returns:
        str: 能量密度描述
    """
    mapping = {
        1: "舒缓",
        2: "柔和",
        3: "律动",
        4: "强烈",
        5: "爆发",
    }
    return mapping.get(level, "律动")


@dataclass
class BeatInfo:
    """节拍信息"""
    beat_timestamps: List[float]      # 所有节拍时间点
    downbeat_timestamps: List[float]  # 强拍时间点(每小节第一拍)
    tempo: float                       # BPM
    beat_intervals: List[float]       # 节拍间隔(用于检测节奏变化)


@dataclass
class EmotionCurve:
    """情绪曲线数据"""
    timestamps: List[float]           # 时间点
    energy_values: List[float]        # 能量值 (0-1)
    valence_values: List[float]       # 情绪效价 (0-1, 低=悲伤, 高=欢快)
    arousal_values: List[float]       # 情绪唤醒度 (0-1, 低=平静, 高=激动)
    smoothed_curve: List[float]       # 平滑后的综合情绪曲线


@dataclass
class SegmentEmotion:
    """段落情绪数据(与 songformer 段落对齐)"""
    start: float                      # 段落开始时间
    end: float                        # 段落结束时间
    label: str                        # 段落标签 (intro/verse/chorus/bridge/outro)
    intensity: float                  # 情绪强度 (0-1)
    energy: float                     # 能量值 (0-1)
    valence: float                    # 效价值 (0-1)
    arousal: float                    # 唤醒度 (0-1)
    trend: str                        # 情绪趋势 (rising/falling/stable/peak)


@dataclass
class BeatDensityInfo:
    """节拍密度信息(用于分镜时长规划)"""
    segment_label: str                # 段落标签
    start: float                      # 开始时间
    end: float                        # 结束时间
    beat_count: int                   # 节拍数
    avg_interval: float               # 平均间隔(秒)
    density_level: str                # sparse/normal/dense/very_dense
    recommended_shot_duration: str    # 推荐分镜时长


@dataclass
class EnhancedClimaxInfo:
    """增强高潮点信息(包含铺垫/持续/缓冲时长)"""
    time: float                       # 高潮时间点
    intensity: str                    # strong/strongest
    buildup_start: float              # 铺垫开始时间
    buildup_duration: float           # 铺垫时长(秒)
    climax_duration: float            # 高潮持续时长(秒)
    winddown_duration: float          # 缓冲时长(秒)


def extract_beat_timestamps(audio_path: str) -> BeatInfo:
    """
    提取节拍时间戳(卡点)

    使用智能 BPM 检测(带倍频纠正)

    Args:
        audio_path: 音频文件路径

    Returns:
        BeatInfo: 节拍信息对象
    """
    y, sr = librosa.load(audio_path, sr=22050, mono=True)

    # 使用统一 BPM 分析入口(带倍频纠正 + beat_times)
    bpm_analyzer = RealtimeBPMAnalyzerTest(verbose=False)
    bpm_result = bpm_analyzer.analyze_bpm(y=y, sr=sr)
    corrected_tempo = bpm_result.get('bpm', 120.0)

    # beat_times 已经由 analyze_bpm 根据 BPM 减半情况做了抽样处理
    beat_times = np.array(bpm_result.get('beat_times', []))

    # 强拍检测(每4拍取第1拍,假设4/4拍)
    downbeat_times = beat_times[::4].tolist() if len(beat_times) > 0 else []

    # 计算节拍间隔
    beat_intervals = np.diff(beat_times).tolist() if len(beat_times) > 1 else []

    return BeatInfo(
        beat_timestamps=beat_times.tolist(),
        downbeat_timestamps=downbeat_times,
        tempo=corrected_tempo,
        beat_intervals=beat_intervals,
    )


def extract_emotion_curve(
    audio_path: str,
    window_size: float = 2.0,  # 窗口大小(秒)
    hop_size: float = 0.5      # 步长(秒)
) -> EmotionCurve:
    """
    提取情绪曲线

    基于音频特征推断情绪:
    - Energy (能量): RMS 能量 → 情绪强度
    - Valence (效价): 频谱质心 + 大小调 → 正面/负面情绪
    - Arousal (唤醒度): 节奏密度 + 能量变化 → 激动/平静

    Args:
        audio_path: 音频文件路径
        window_size: 滑动窗口大小(秒)
        hop_size: 滑动步长(秒)

    Returns:
        EmotionCurve: 情绪曲线数据对象
    """
    y, sr = librosa.load(audio_path, sr=None, mono=True)

    timestamps: List[float] = []
    energy_values: List[float] = []
    valence_values: List[float] = []
    arousal_values: List[float] = []

    # 滑动窗口分析
    window_samples = int(window_size * sr)
    hop_samples = int(hop_size * sr)

    for start_sample in range(0, len(y) - window_samples, hop_samples):
        end_sample = start_sample + window_samples
        y_window = y[start_sample:end_sample]
        t = start_sample / sr

        timestamps.append(t)

        # 1. Energy: RMS 能量归一化
        rms = np.sqrt(np.mean(y_window ** 2))
        energy = min(rms / 0.1, 1.0)  # 归一化到 0-1
        energy_values.append(float(energy))

        # 2. Valence: 基于频谱质心(高=明亮=正面)
        centroid = librosa.feature.spectral_centroid(y=y_window, sr=sr)[0]
        valence = min(np.mean(centroid) / 4000, 1.0)
        valence_values.append(float(valence))

        # 3. Arousal: 基于 onset 密度和能量变化
        onset_env = librosa.onset.onset_strength(y=y_window, sr=sr)
        arousal = min(np.mean(onset_env) / 2.0, 1.0)
        arousal_values.append(float(arousal))

    # 4. 综合情绪曲线(加权平均)
    smoothed: List[float] = []
    for i in range(len(timestamps)):
        # 权重:能量 40%, 唤醒度 40%, 效价 20%
        combined = (
            energy_values[i] * 0.4 +
            arousal_values[i] * 0.4 +
            valence_values[i] * 0.2
        )
        smoothed.append(combined)

    # 平滑处理(移动平均)
    if len(smoothed) >= 3:
        smoothed = np.convolve(smoothed, np.ones(3)/3, mode='same').tolist()

    return EmotionCurve(
        timestamps=timestamps,
        energy_values=energy_values,
        valence_values=valence_values,
        arousal_values=arousal_values,
        smoothed_curve=smoothed,
    )


def aggregate_emotion_by_segments(
    emotion_curve: EmotionCurve,
    segments: List[Dict[str, Any]],
) -> List[SegmentEmotion]:
    """
    将情绪曲线按 songformer 段落结构聚合

    Args:
        emotion_curve: 原始情绪曲线数据
        segments: songformer 返回的段落列表,格式为:
                  [{"start": 0.0, "end": 30.5, "label": "intro"}, ...]

    Returns:
        List[SegmentEmotion]: 按段落聚合的情绪数据
    """
    if not segments or not emotion_curve.timestamps:
        return []

    result: List[SegmentEmotion] = []
    timestamps = np.array(emotion_curve.timestamps)
    energy_values = np.array(emotion_curve.energy_values)
    valence_values = np.array(emotion_curve.valence_values)
    arousal_values = np.array(emotion_curve.arousal_values)
    smoothed_values = np.array(emotion_curve.smoothed_curve)

    for seg in segments:
        start = float(seg.get("start", 0))
        end = float(seg.get("end", 0))
        label = str(seg.get("label", "unknown"))

        # 找出该段落内的数据点索引
        mask = (timestamps >= start) & (timestamps < end)
        indices = np.where(mask)[0]

        if len(indices) == 0:
            # 没有数据点落在该段落内,使用默认值
            result.append(SegmentEmotion(
                start=start,
                end=end,
                label=label,
                intensity=0.5,
                energy=0.5,
                valence=0.5,
                arousal=0.5,
                trend="stable",
            ))
            continue

        # 计算该段落的平均值
        seg_energy = float(np.mean(energy_values[indices]))
        seg_valence = float(np.mean(valence_values[indices]))
        seg_arousal = float(np.mean(arousal_values[indices]))
        seg_intensity = float(np.mean(smoothed_values[indices]))

        # 计算情绪趋势
        seg_smoothed = smoothed_values[indices]
        trend = _calculate_trend(seg_smoothed, seg_intensity)

        result.append(SegmentEmotion(
            start=start,
            end=end,
            label=label,
            intensity=round(seg_intensity, 3),
            energy=round(seg_energy, 3),
            valence=round(seg_valence, 3),
            arousal=round(seg_arousal, 3),
            trend=trend,
        ))

    return result


def _calculate_trend(values: np.ndarray, avg_intensity: float) -> str:
    """
    计算情绪趋势

    Args:
        values: 该段落内的情绪值数组
        avg_intensity: 平均情绪强度

    Returns:
        str: rising/falling/stable/peak
    """
    if len(values) < 3:
        return "stable"

    # 将段落分成前半和后半
    mid = len(values) // 2
    first_half_avg = float(np.mean(values[:mid]))
    second_half_avg = float(np.mean(values[mid:]))

    diff = second_half_avg - first_half_avg
    threshold = 0.05  # 5% 变化阈值

    # 检查是否是高峰(平均强度高且变化不大)
    if avg_intensity > 0.7 and abs(diff) < threshold:
        return "peak"

    if diff > threshold:
        return "rising"
    elif diff < -threshold:
        return "falling"
    else:
        return "stable"


def extract_segment_emotions(
    audio_path: str,
    segments: List[Dict[str, Any]],
) -> List[SegmentEmotion]:
    """
    一站式提取按段落聚合的情绪数据

    Args:
        audio_path: 音频文件路径
        segments: songformer 返回的段落列表

    Returns:
        List[SegmentEmotion]: 按段落聚合的情绪数据
    """
    emotion_curve = extract_emotion_curve(audio_path)
    return aggregate_emotion_by_segments(emotion_curve, segments)


def calculate_beat_density_by_segments(
    beat_timestamps: List[float],
    segments: List[Dict[str, Any]],
    tempo: float = 120.0,
) -> List[BeatDensityInfo]:
    """
    按段落计算节拍密度,用于指导分镜时长规划

    Args:
        beat_timestamps: 节拍时间戳列表
        segments: songformer 返回的段落列表,格式为:
                  [{"start": 0.0, "end": 30.5, "label": "intro"}, ...]
        tempo: BPM(用于辅助判断密度级别)

    Returns:
        List[BeatDensityInfo]: 按段落的节拍密度信息
    """
    if not segments or not beat_timestamps:
        return []

    result: List[BeatDensityInfo] = []
    beat_array = np.array(beat_timestamps)

    for seg in segments:
        start = float(seg.get("start", 0))
        end = float(seg.get("end", 0))
        label = str(seg.get("label", "unknown"))

        # 找出该段落内的节拍
        mask = (beat_array >= start) & (beat_array < end)
        segment_beats = beat_array[mask]
        beat_count = len(segment_beats)

        # 计算平均间隔
        if beat_count >= 2:
            intervals = np.diff(segment_beats)
            avg_interval = float(np.mean(intervals))
        elif beat_count == 1:
            # 只有一个节拍,使用 BPM 估算
            avg_interval = 60.0 / tempo
        else:
            # 没有节拍,使用默认值
            avg_interval = 60.0 / tempo

        # 根据平均间隔和 BPM 判断密度级别
        # 间隔越小 = 密度越高
        if avg_interval <= 0.3 or tempo >= 160:
            density_level = "very_dense"
            recommended_shot_duration = "2-4秒"
        elif avg_interval <= 0.45 or tempo >= 130:
            density_level = "dense"
            recommended_shot_duration = "3-5秒"
        elif avg_interval <= 0.6 or tempo >= 100:
            density_level = "normal"
            recommended_shot_duration = "4-6秒"
        else:
            density_level = "sparse"
            recommended_shot_duration = "6-10秒"

        result.append(BeatDensityInfo(
            segment_label=label,
            start=round(start, 2),
            end=round(end, 2),
            beat_count=beat_count,
            avg_interval=round(avg_interval, 3),
            density_level=density_level,
            recommended_shot_duration=recommended_shot_duration,
        ))

    return result


def enhance_climax_points(
    climax_points: List[Dict[str, Any]],
    segments: List[Dict[str, Any]],
    music_duration: float,
) -> List[EnhancedClimaxInfo]:
    """
    增强高潮点信息,添加铺垫/持续/缓冲时长指导

    Args:
        climax_points: 原始高潮点列表,格式为:
                       [{"time": 60.0, "intensity": "strong"}, ...]
        segments: songformer 返回的段落列表
        music_duration: 音乐总时长(秒)

    Returns:
        List[EnhancedClimaxInfo]: 增强后的高潮点信息
    """
    if not climax_points:
        return []

    result: List[EnhancedClimaxInfo] = []

    # 按时间排序高潮点
    sorted_climax = sorted(climax_points, key=lambda x: float(x.get("time", 0)))

    for i, climax in enumerate(sorted_climax):
        time = float(climax.get("time", 0))
        intensity = str(climax.get("intensity", "strong"))

        # 根据强度确定时长参数
        if intensity == "strongest":
            buildup_duration = 10.0   # 最强高潮:更长的铺垫
            climax_duration = 20.0    # 更长的高潮持续
            winddown_duration = 10.0  # 更长的缓冲
        else:
            buildup_duration = 5.0    # 普通高潮
            climax_duration = 10.0
            winddown_duration = 5.0

        # 计算铺垫开始时间(不能小于0或前一个高潮的结束)
        buildup_start = max(0, time - buildup_duration)

        # 如果有前一个高潮点,确保不重叠
        if i > 0:
            prev_climax_time = float(sorted_climax[i - 1].get("time", 0))
            prev_intensity = str(sorted_climax[i - 1].get("intensity", "strong"))
            prev_winddown = 10.0 if prev_intensity == "strongest" else 5.0
            prev_end = prev_climax_time + prev_winddown

            if buildup_start < prev_end:
                # 调整铺垫开始时间,避免重叠
                buildup_start = prev_end
                buildup_duration = time - buildup_start

        # 确保高潮持续+缓冲不超过音乐结束
        if time + climax_duration + winddown_duration > music_duration:
            # 按比例缩减
            remaining = music_duration - time
            if remaining > 0:
                ratio = remaining / (climax_duration + winddown_duration)
                climax_duration = climax_duration * ratio
                winddown_duration = winddown_duration * ratio

        result.append(EnhancedClimaxInfo(
            time=round(time, 2),
            intensity=intensity,
            buildup_start=round(buildup_start, 2),
            buildup_duration=round(buildup_duration, 2),
            climax_duration=round(climax_duration, 2),
            winddown_duration=round(winddown_duration, 2),
        ))

    return result


def format_beat_density_for_prompt(beat_density_list: List[BeatDensityInfo]) -> str:
    """
    将节拍密度信息格式化为提示词文本

    Args:
        beat_density_list: 节拍密度信息列表

    Returns:
        str: 格式化的文本
    """
    if not beat_density_list:
        return "(无节拍密度数据)"

    lines = []
    for info in beat_density_list:
        lines.append(
            f"- [{info.segment_label}] {info.start:.1f}s-{info.end:.1f}s: "
            f"节拍数={info.beat_count}, 平均间隔={info.avg_interval:.2f}s, "
            f"密度={info.density_level}, 推荐分镜时长={info.recommended_shot_duration}"
        )
    return "\n".join(lines)


def format_enhanced_climax_for_prompt(enhanced_climax_list: List[EnhancedClimaxInfo]) -> str:
    """
    将增强高潮点信息格式化为提示词文本

    Args:
        enhanced_climax_list: 增强高潮点信息列表

    Returns:
        str: 格式化的文本
    """
    if not enhanced_climax_list:
        return "(无高潮点数据)"

    lines = []
    for info in enhanced_climax_list:
        lines.append(
            f"- 高潮点 {info.time:.1f}s ({info.intensity}):\n"
            f"  · 铺垫阶段: {info.buildup_start:.1f}s - {info.time:.1f}s (约{info.buildup_duration:.1f}秒)\n"
            f"  · 高潮阶段: {info.time:.1f}s - {info.time + info.climax_duration:.1f}s (约{info.climax_duration:.1f}秒)\n"
            f"  · 缓冲阶段: {info.time + info.climax_duration:.1f}s - {info.time + info.climax_duration + info.winddown_duration:.1f}s (约{info.winddown_duration:.1f}秒)"
        )
    return "\n".join(lines)