79 lines
3.0 KiB
Python
79 lines
3.0 KiB
Python
# scripts/step4_concatenate_audio.py
|
|
import re
|
|
from pathlib import Path
|
|
from pydub import AudioSegment
|
|
|
|
def _concatenate_audio_files(audio_folder: Path, file_pattern: str, output_path: Path, delay_ms: int = 500):
|
|
"""
|
|
Internal helper function to find, sort, and concatenate audio files based on a pattern.
|
|
This is based on the script you provided [1].
|
|
"""
|
|
if not audio_folder.is_dir():
|
|
raise FileNotFoundError(f"Audio source folder '{audio_folder}' does not exist.")
|
|
|
|
# Find all files matching the pattern and extract the number for sorting
|
|
compiled_pattern = re.compile(file_pattern)
|
|
matching_files = []
|
|
for filepath in audio_folder.iterdir():
|
|
if filepath.is_file():
|
|
match = compiled_pattern.match(filepath.name)
|
|
if match and match.group(1).isdigit():
|
|
matching_files.append((filepath, int(match.group(1))))
|
|
|
|
if not matching_files:
|
|
raise FileNotFoundError(f"No files matching pattern '{file_pattern}' found in '{audio_folder}'.")
|
|
|
|
# Sort files numerically based on the extracted number
|
|
matching_files.sort(key=lambda x: x[1])
|
|
|
|
print("Found and sorted the following files for concatenation:")
|
|
for file_path, _ in matching_files:
|
|
print(f"- {file_path.name}")
|
|
|
|
# Start with a silent segment (delay)
|
|
combined_audio = AudioSegment.silent(duration=delay_ms)
|
|
|
|
# Concatenate all sorted audio files
|
|
for audio_file_path, _ in matching_files:
|
|
try:
|
|
segment = AudioSegment.from_file(audio_file_path)
|
|
combined_audio += segment
|
|
except Exception as e:
|
|
print(f"Warning: Could not process file '{audio_file_path.name}'. Skipping. Error: {e}")
|
|
|
|
# End with a silent segment (delay)
|
|
combined_audio += AudioSegment.silent(duration=2000)
|
|
# Export the final combined audio file
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
combined_audio.export(output_path, format="mp3")
|
|
print(f"Successfully concatenated audio to '{output_path}'")
|
|
|
|
|
|
def run_step4_concatenate_audio(project_path: Path):
|
|
"""
|
|
Main function for Step 4. Finds all 'vocab_xx.wav' files in the project's
|
|
audio folder, concatenates them, and saves the result as a single MP3.
|
|
"""
|
|
try:
|
|
audio_folder = project_path / "audio"
|
|
output_dir = project_path / "output"
|
|
output_wav_path = output_dir / "combined_audio.wav"
|
|
|
|
# Define the pattern for the audio files created in Step 3
|
|
file_pattern = r"vocab_(\d{2})\.wav"
|
|
|
|
_concatenate_audio_files(
|
|
audio_folder=audio_folder,
|
|
file_pattern=file_pattern,
|
|
output_path=output_wav_path,
|
|
delay_ms=0 # Start with a 1-second delay
|
|
)
|
|
|
|
return True, f"✅ Audio successfully concatenated and saved to '{output_wav_path}'"
|
|
|
|
except FileNotFoundError as e:
|
|
return False, f"❌ Error: {e}", None
|
|
except Exception as e:
|
|
return False, f"❌ An unexpected error occurred during audio concatenation: {e}", None
|
|
|