import json import html from pathlib import Path from google.cloud import texttospeech from google.oauth2 import service_account def generate_ssml_content(item_en, item_zh, english_voice_1, english_voice_2, chinese_voice): return f""" {item_en} {item_en} {item_zh} {item_en} """ def run_step3_generate_audio( project_path: Path, google_creds_path, english_voice_1: str = "en-US-Wavenet-I", english_voice_2: str = "en-US-Wavenet-F", chinese_voice: str = "cmn-TW-Wavenet-B", ): """ 為每個詞彙項目生成獨立的音訊檔案。 """ try: # 1. 定義路徑 json_file_path = project_path / "data.json" output_audio_folder = project_path / "audio" output_audio_folder.mkdir(parents=True, exist_ok=True) # 2. 從 JSON 檔案載入資料 if not json_file_path.exists(): return False, f"錯誤：找不到 JSON 檔案 {json_file_path}" with open(json_file_path, 'r', encoding='utf-8') as f: data = json.load(f) # --- ✨ 核心修改處：將字串分割成列表 --- # 首先獲取完整的字串，如果鍵不存在則返回空字串 en_text = data.get("en", "") zh_text = data.get("zh", "") # 使用換行符 '\n' 將字串分割成列表，並過濾掉空行 en_lines = [line.strip() for line in en_text.split('\n') if line.strip()] zh_lines = [line.strip() for line in zh_text.split('\n') if line.strip()] # 現在 en_lines 和 zh_lines 是我們期望的列表格式了 # 進行驗證 if not en_lines or not zh_lines or len(en_lines) != len(zh_lines): return False, "錯誤：JSON 檔案中的英文和中文句子列表為空或長度不匹配。" # 3. 初始化 Google Text-to-Speech 客戶端 creds = service_account.Credentials.from_service_account_file(google_creds_path) client = texttospeech.TextToSpeechClient(credentials=creds) # 4. 迴圈遍歷每個詞彙項目並合成音訊 total_files = len(en_lines) for i, (item_en, item_zh) in enumerate(zip(en_lines, zh_lines)): print(f"正在處理第 {i+1}/{total_files} 個檔案: {item_en}") safe_item_en = html.escape(item_en) safe_item_zh = html.escape(item_zh) ssml_content = generate_ssml_content(safe_item_en, safe_item_zh, english_voice_1, english_voice_2, chinese_voice) synthesis_input = texttospeech.SynthesisInput(ssml=ssml_content) voice_params = texttospeech.VoiceSelectionParams(language_code="en-US") audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.LINEAR16, sample_rate_hertz=24000 ) response = client.synthesize_speech( input=synthesis_input, voice=voice_params, audio_config=audio_config ) output_file = output_audio_folder / f"vocab_{i:02d}.wav" with open(output_file, "wb") as out: out.write(response.audio_content) return True, f"成功！已在 '{output_audio_folder}' 資料夾中生成 {total_files} 個音訊檔案。" except Exception as e: error_message = f"生成音訊時發生未預期的錯誤: {e}" print(error_message) return False, error_message