gain/scripts/step3_generate_audio.py

import json
import html
from pathlib import Path
from google.cloud import texttospeech
from google.oauth2 import service_account

def generate_ssml_content(item_en, item_zh, english_voice_1, english_voice_2, chinese_voice):
    return f"""
    <speak>
        <break time="2s"/>
        <voice name="{english_voice_1}">
            <prosody rate="medium" pitch="medium">{item_en}</prosody>
        </voice>
        <break time="2s"/>
        <voice name="{english_voice_2}">
            <prosody rate="70%" pitch="medium">{item_en}</prosody>
        </voice>
        <break time="2s"/>
        <voice name="{chinese_voice}">
            <prosody rate="medium" pitch="+2st">{item_zh}</prosody>
        </voice>
        <break time="1.5s"/>
        <voice name="{english_voice_2}">
            <prosody rate="110%" pitch="medium">{item_en}</prosody>
        </voice>
        <break time="1s"/>
    </speak>
    """
def run_step3_generate_audio(
    project_path: Path,
    google_creds_path,
    english_voice_1: str = "en-US-Wavenet-I",
    english_voice_2: str = "en-US-Wavenet-F",
    chinese_voice: str = "cmn-TW-Wavenet-B",
):
    """
    為每個詞彙項目生成獨立的音訊檔案。
    """
    try:
        # 1. 定義路徑
        json_file_path = project_path / "data.json"
        output_audio_folder = project_path / "audio"
        output_audio_folder.mkdir(parents=True, exist_ok=True)

        # 2. 從 JSON 檔案載入資料
        if not json_file_path.exists():
            return False, f"錯誤：找不到 JSON 檔案 {json_file_path}"

        with open(json_file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)

        # --- ✨ 核心修改處：將字串分割成列表 ---

        # 首先獲取完整的字串，如果鍵不存在則返回空字串
        en_text = data.get("en", "")
        zh_text = data.get("zh", "")

        # 使用換行符 '\n' 將字串分割成列表，並過濾掉空行
        en_lines = [line.strip() for line in en_text.split('\n') if line.strip()]
        zh_lines = [line.strip() for line in zh_text.split('\n') if line.strip()]

        # 現在 en_lines 和 zh_lines 是我們期望的列表格式了

        # 進行驗證
        if not en_lines or not zh_lines or len(en_lines) != len(zh_lines):
            return False, "錯誤：JSON 檔案中的英文和中文句子列表為空或長度不匹配。"

        # 3. 初始化 Google Text-to-Speech 客戶端
        creds = service_account.Credentials.from_service_account_file(google_creds_path)
        client = texttospeech.TextToSpeechClient(credentials=creds)

        # 4. 迴圈遍歷每個詞彙項目並合成音訊
        total_files = len(en_lines)
        for i, (item_en, item_zh) in enumerate(zip(en_lines, zh_lines)):
            print(f"正在處理第 {i+1}/{total_files} 個檔案: {item_en}")

            safe_item_en = html.escape(item_en)
            safe_item_zh = html.escape(item_zh)

            ssml_content = generate_ssml_content(safe_item_en, safe_item_zh, english_voice_1, english_voice_2, chinese_voice)
            synthesis_input = texttospeech.SynthesisInput(ssml=ssml_content)

            voice_params = texttospeech.VoiceSelectionParams(language_code="en-US")
            audio_config = texttospeech.AudioConfig(
                audio_encoding=texttospeech.AudioEncoding.LINEAR16,
                sample_rate_hertz=24000
            )

            response = client.synthesize_speech(
                input=synthesis_input,
                voice=voice_params,
                audio_config=audio_config
            )

            output_file = output_audio_folder / f"vocab_{i:02d}.wav"
            with open(output_file, "wb") as out:
                out.write(response.audio_content)

        return True, f"成功！已在 '{output_audio_folder}' 資料夾中生成 {total_files} 個音訊檔案。"

    except Exception as e:
        error_message = f"生成音訊時發生未預期的錯誤: {e}"
        print(error_message)
        return False, error_message