Files
gain/scripts/step4_concatenate_audio.py
2025-07-08 15:27:03 +08:00

79 lines
3.0 KiB
Python

# scripts/step4_concatenate_audio.py
import re
from pathlib import Path
from pydub import AudioSegment
def _concatenate_audio_files(audio_folder: Path, file_pattern: str, output_path: Path, delay_ms: int = 500):
"""
Internal helper function to find, sort, and concatenate audio files based on a pattern.
This is based on the script you provided [1].
"""
if not audio_folder.is_dir():
raise FileNotFoundError(f"Audio source folder '{audio_folder}' does not exist.")
# Find all files matching the pattern and extract the number for sorting
compiled_pattern = re.compile(file_pattern)
matching_files = []
for filepath in audio_folder.iterdir():
if filepath.is_file():
match = compiled_pattern.match(filepath.name)
if match and match.group(1).isdigit():
matching_files.append((filepath, int(match.group(1))))
if not matching_files:
raise FileNotFoundError(f"No files matching pattern '{file_pattern}' found in '{audio_folder}'.")
# Sort files numerically based on the extracted number
matching_files.sort(key=lambda x: x[1])
print("Found and sorted the following files for concatenation:")
for file_path, _ in matching_files:
print(f"- {file_path.name}")
# Start with a silent segment (delay)
combined_audio = AudioSegment.silent(duration=delay_ms)
# Concatenate all sorted audio files
for audio_file_path, _ in matching_files:
try:
segment = AudioSegment.from_file(audio_file_path)
combined_audio += segment
except Exception as e:
print(f"Warning: Could not process file '{audio_file_path.name}'. Skipping. Error: {e}")
# End with a silent segment (delay)
combined_audio += AudioSegment.silent(duration=2000)
# Export the final combined audio file
output_path.parent.mkdir(parents=True, exist_ok=True)
combined_audio.export(output_path, format="mp3")
print(f"Successfully concatenated audio to '{output_path}'")
def run_step4_concatenate_audio(project_path: Path):
"""
Main function for Step 4. Finds all 'vocab_xx.wav' files in the project's
audio folder, concatenates them, and saves the result as a single MP3.
"""
try:
audio_folder = project_path / "audio"
output_dir = project_path / "output"
output_wav_path = output_dir / "combined_audio.wav"
# Define the pattern for the audio files created in Step 3
file_pattern = r"vocab_(\d{2})\.wav"
_concatenate_audio_files(
audio_folder=audio_folder,
file_pattern=file_pattern,
output_path=output_wav_path,
delay_ms=0 # Start with a 1-second delay
)
return True, f"✅ Audio successfully concatenated and saved to '{output_wav_path}'"
except FileNotFoundError as e:
return False, f"❌ Error: {e}", None
except Exception as e:
return False, f"❌ An unexpected error occurred during audio concatenation: {e}", None