# scripts/step5_generate_ass.py import json import re import librosa from pathlib import Path import pysubs2 # 使用 pysubs2 函式庫 def number_to_unicode_circled(num): """ 將數字 1-10 轉換為 Unicode 帶圈數字符號。 """ if num < 0: return None if num == 0: return '\u24ea' result = '' digits = [] while num > 0: digits.append(num % 10) num //= 10 digits.reverse() for d in digits: if d == 0: result += '\u24ea' else: result += chr(0x2460 + d - 1) return result def run_step5_generate_ass(project_path: Path): """ 根據專案中的音訊檔案和 JSON 文本生成一個多層的 .ass 字幕檔。 Args: project_path (Path): 專案的根目錄路徑。 Returns: tuple[bool, str, Path | None]: (操作是否成功, 附帶的訊息, 輸出檔案的路徑或 None)。 """ try: # 1. 定義路徑 json_file_path = project_path / "data.json" audio_folder = project_path / "audio" output_dir = project_path / "output" final_ass_path = output_dir / "subtitles.ass" # 確保輸出資料夾存在 output_dir.mkdir(parents=True, exist_ok=True) # 2. 載入 JSON 資料並分割成行 with open(json_file_path, 'r', encoding='utf-8') as f: data = json.load(f) en_lines = [line.strip() for line in data.get("en", "").split('\n') if line.strip()] zh_lines = [line.strip() for line in data.get("zh", "").split('\n') if line.strip()] ipa_lines = [line.strip() for line in data.get("ipa", "").split('\n') if line.strip()] number_list = [number_to_unicode_circled(i + 1) for i in range(len(en_lines))] # 3. 獲取所有音訊檔案並排序 file_pattern = r"vocab_(\d{2})\.wav" pattern = re.compile(file_pattern) wav_files = sorted( [p for p in audio_folder.iterdir() if p.is_file() and pattern.fullmatch(p.name)], key=lambda p: int(pattern.fullmatch(p.name).group(1)) ) # 4. 檢查數量是否一致 if not (len(wav_files) == len(en_lines) == len(zh_lines) == len(ipa_lines)): msg = f"錯誤:音訊({len(wav_files)}), EN({len(en_lines)}), ZH({len(zh_lines)}), IPA({len(ipa_lines)}) 數量不一致!" return False, msg, None # 5. 使用 pysubs2 建立 .ass 檔案 subs = pysubs2.SSAFile() # 設定畫布解析度(與影片一致) subs.info["PlayResX"] = "1920" subs.info["PlayResY"] = "1080" # 從您的 gen_ass.py 中複製樣式定義 [1] subs.styles["EN"] = pysubs2.SSAStyle(fontname="Noto Sans", fontsize=140, primarycolor=pysubs2.Color (255, 248, 231), outlinecolor=pysubs2.Color (255, 248, 231),outline=2, borderstyle=1, alignment=pysubs2.Alignment.TOP_CENTER, marginv=280) subs.styles["IPA"] = pysubs2.SSAStyle(fontname="Noto Sans", fontsize=110, primarycolor=pysubs2.Color(255,140,0), outlinecolor=pysubs2.Color(255,140,0), outline=1, borderstyle=1, alignment=pysubs2.Alignment.TOP_CENTER, marginv=340) subs.styles["ZH"] = pysubs2.SSAStyle(fontname="Noto Sans TC", fontsize=140, primarycolor=pysubs2.Color(102,128,153), outlinecolor=pysubs2.Color(102,128,153), outline=1, borderstyle=1, alignment=pysubs2.Alignment.TOP_CENTER, marginv=440) subs.styles["NUMBER"] = pysubs2.SSAStyle(fontname="Segoe UI Symbol", fontsize=120, primarycolor=pysubs2.Color(204, 136, 0), outlinecolor=pysubs2.Color(204, 136, 0), bold=True, scalex=120, outline=1, borderstyle=1, alignment=pysubs2.Alignment.TOP_RIGHT, marginl=0, marginr=260, marginv=160) # 6. 遍歷音訊檔,生成字幕事件 current_time_ms = 0 for i, wav_path in enumerate(wav_files): duration_s = librosa.get_duration(path=str(wav_path)) duration_ms = int(duration_s * 1000) start_time = current_time_ms end_time = current_time_ms + duration_ms # 建立四層字幕事件 subs.append(pysubs2.SSAEvent(start=start_time, end=end_time, text=en_lines[i], style="EN")) subs.append(pysubs2.SSAEvent(start=start_time, end=end_time, text=f"[{ipa_lines[i]}]", style="IPA")) subs.append(pysubs2.SSAEvent(start=start_time, end=end_time, text=zh_lines[i], style="ZH")) subs.append(pysubs2.SSAEvent(start=start_time, end=end_time, text=number_list[i], style="NUMBER")) current_time_ms = end_time # 7. 儲存 .ass 檔案 subs.save(str(final_ass_path)) return True, f"✅ ASS 字幕檔已成功生成並儲存至 '{final_ass_path}'", final_ass_path except Exception as e: return False, f"❌ 在生成 ASS 字幕時發生未預期的錯誤: {e}", None