Files
gain/scripts/step3_generate_audio.py
2025-07-08 15:27:03 +08:00

104 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import html
from pathlib import Path
from google.cloud import texttospeech
from google.oauth2 import service_account
def generate_ssml_content(item_en, item_zh, english_voice_1, english_voice_2, chinese_voice):
return f"""
<speak>
<break time="2s"/>
<voice name="{english_voice_1}">
<prosody rate="medium" pitch="medium">{item_en}</prosody>
</voice>
<break time="2s"/>
<voice name="{english_voice_2}">
<prosody rate="70%" pitch="medium">{item_en}</prosody>
</voice>
<break time="2s"/>
<voice name="{chinese_voice}">
<prosody rate="medium" pitch="+2st">{item_zh}</prosody>
</voice>
<break time="1.5s"/>
<voice name="{english_voice_2}">
<prosody rate="110%" pitch="medium">{item_en}</prosody>
</voice>
<break time="1s"/>
</speak>
"""
def run_step3_generate_audio(
project_path: Path,
google_creds_path,
english_voice_1: str = "en-US-Wavenet-I",
english_voice_2: str = "en-US-Wavenet-F",
chinese_voice: str = "cmn-TW-Wavenet-B",
):
"""
為每個詞彙項目生成獨立的音訊檔案。
"""
try:
# 1. 定義路徑
json_file_path = project_path / "data.json"
output_audio_folder = project_path / "audio"
output_audio_folder.mkdir(parents=True, exist_ok=True)
# 2. 從 JSON 檔案載入資料
if not json_file_path.exists():
return False, f"錯誤:找不到 JSON 檔案 {json_file_path}"
with open(json_file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# --- ✨ 核心修改處:將字串分割成列表 ---
# 首先獲取完整的字串,如果鍵不存在則返回空字串
en_text = data.get("en", "")
zh_text = data.get("zh", "")
# 使用換行符 '\n' 將字串分割成列表,並過濾掉空行
en_lines = [line.strip() for line in en_text.split('\n') if line.strip()]
zh_lines = [line.strip() for line in zh_text.split('\n') if line.strip()]
# 現在 en_lines 和 zh_lines 是我們期望的列表格式了
# 進行驗證
if not en_lines or not zh_lines or len(en_lines) != len(zh_lines):
return False, "錯誤JSON 檔案中的英文和中文句子列表為空或長度不匹配。"
# 3. 初始化 Google Text-to-Speech 客戶端
creds = service_account.Credentials.from_service_account_file(google_creds_path)
client = texttospeech.TextToSpeechClient(credentials=creds)
# 4. 迴圈遍歷每個詞彙項目並合成音訊
total_files = len(en_lines)
for i, (item_en, item_zh) in enumerate(zip(en_lines, zh_lines)):
print(f"正在處理第 {i+1}/{total_files} 個檔案: {item_en}")
safe_item_en = html.escape(item_en)
safe_item_zh = html.escape(item_zh)
ssml_content = generate_ssml_content(safe_item_en, safe_item_zh, english_voice_1, english_voice_2, chinese_voice)
synthesis_input = texttospeech.SynthesisInput(ssml=ssml_content)
voice_params = texttospeech.VoiceSelectionParams(language_code="en-US")
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.LINEAR16,
sample_rate_hertz=24000
)
response = client.synthesize_speech(
input=synthesis_input,
voice=voice_params,
audio_config=audio_config
)
output_file = output_audio_folder / f"vocab_{i:02d}.wav"
with open(output_file, "wb") as out:
out.write(response.audio_content)
return True, f"成功!已在 '{output_audio_folder}' 資料夾中生成 {total_files} 個音訊檔案。"
except Exception as e:
error_message = f"生成音訊時發生未預期的錯誤: {e}"
print(error_message)
return False, error_message