104 lines
4.0 KiB
Python
104 lines
4.0 KiB
Python
import json
|
||
import html
|
||
from pathlib import Path
|
||
from google.cloud import texttospeech
|
||
from google.oauth2 import service_account
|
||
|
||
def generate_ssml_content(item_en, item_zh, english_voice_1, english_voice_2, chinese_voice):
|
||
return f"""
|
||
<speak>
|
||
<break time="2s"/>
|
||
<voice name="{english_voice_1}">
|
||
<prosody rate="medium" pitch="medium">{item_en}</prosody>
|
||
</voice>
|
||
<break time="2s"/>
|
||
<voice name="{english_voice_2}">
|
||
<prosody rate="70%" pitch="medium">{item_en}</prosody>
|
||
</voice>
|
||
<break time="2s"/>
|
||
<voice name="{chinese_voice}">
|
||
<prosody rate="medium" pitch="+2st">{item_zh}</prosody>
|
||
</voice>
|
||
<break time="1.5s"/>
|
||
<voice name="{english_voice_2}">
|
||
<prosody rate="110%" pitch="medium">{item_en}</prosody>
|
||
</voice>
|
||
<break time="1s"/>
|
||
</speak>
|
||
"""
|
||
def run_step3_generate_audio(
|
||
project_path: Path,
|
||
google_creds_path,
|
||
english_voice_1: str = "en-US-Wavenet-I",
|
||
english_voice_2: str = "en-US-Wavenet-F",
|
||
chinese_voice: str = "cmn-TW-Wavenet-B",
|
||
):
|
||
"""
|
||
為每個詞彙項目生成獨立的音訊檔案。
|
||
"""
|
||
try:
|
||
# 1. 定義路徑
|
||
json_file_path = project_path / "data.json"
|
||
output_audio_folder = project_path / "audio"
|
||
output_audio_folder.mkdir(parents=True, exist_ok=True)
|
||
|
||
# 2. 從 JSON 檔案載入資料
|
||
if not json_file_path.exists():
|
||
return False, f"錯誤:找不到 JSON 檔案 {json_file_path}"
|
||
|
||
with open(json_file_path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
|
||
# --- ✨ 核心修改處:將字串分割成列表 ---
|
||
|
||
# 首先獲取完整的字串,如果鍵不存在則返回空字串
|
||
en_text = data.get("en", "")
|
||
zh_text = data.get("zh", "")
|
||
|
||
# 使用換行符 '\n' 將字串分割成列表,並過濾掉空行
|
||
en_lines = [line.strip() for line in en_text.split('\n') if line.strip()]
|
||
zh_lines = [line.strip() for line in zh_text.split('\n') if line.strip()]
|
||
|
||
# 現在 en_lines 和 zh_lines 是我們期望的列表格式了
|
||
|
||
# 進行驗證
|
||
if not en_lines or not zh_lines or len(en_lines) != len(zh_lines):
|
||
return False, "錯誤:JSON 檔案中的英文和中文句子列表為空或長度不匹配。"
|
||
|
||
# 3. 初始化 Google Text-to-Speech 客戶端
|
||
creds = service_account.Credentials.from_service_account_file(google_creds_path)
|
||
client = texttospeech.TextToSpeechClient(credentials=creds)
|
||
|
||
# 4. 迴圈遍歷每個詞彙項目並合成音訊
|
||
total_files = len(en_lines)
|
||
for i, (item_en, item_zh) in enumerate(zip(en_lines, zh_lines)):
|
||
print(f"正在處理第 {i+1}/{total_files} 個檔案: {item_en}")
|
||
|
||
safe_item_en = html.escape(item_en)
|
||
safe_item_zh = html.escape(item_zh)
|
||
|
||
ssml_content = generate_ssml_content(safe_item_en, safe_item_zh, english_voice_1, english_voice_2, chinese_voice)
|
||
synthesis_input = texttospeech.SynthesisInput(ssml=ssml_content)
|
||
|
||
voice_params = texttospeech.VoiceSelectionParams(language_code="en-US")
|
||
audio_config = texttospeech.AudioConfig(
|
||
audio_encoding=texttospeech.AudioEncoding.LINEAR16,
|
||
sample_rate_hertz=24000
|
||
)
|
||
|
||
response = client.synthesize_speech(
|
||
input=synthesis_input,
|
||
voice=voice_params,
|
||
audio_config=audio_config
|
||
)
|
||
|
||
output_file = output_audio_folder / f"vocab_{i:02d}.wav"
|
||
with open(output_file, "wb") as out:
|
||
out.write(response.audio_content)
|
||
|
||
return True, f"成功!已在 '{output_audio_folder}' 資料夾中生成 {total_files} 個音訊檔案。"
|
||
|
||
except Exception as e:
|
||
error_message = f"生成音訊時發生未預期的錯誤: {e}"
|
||
print(error_message)
|
||
return False, error_message |