# scripts/step4_concatenate_audio.py import re from pathlib import Path from pydub import AudioSegment def _concatenate_audio_files(audio_folder: Path, file_pattern: str, output_path: Path, delay_ms: int = 500): """ Internal helper function to find, sort, and concatenate audio files based on a pattern. This is based on the script you provided [1]. """ if not audio_folder.is_dir(): raise FileNotFoundError(f"Audio source folder '{audio_folder}' does not exist.") # Find all files matching the pattern and extract the number for sorting compiled_pattern = re.compile(file_pattern) matching_files = [] for filepath in audio_folder.iterdir(): if filepath.is_file(): match = compiled_pattern.match(filepath.name) if match and match.group(1).isdigit(): matching_files.append((filepath, int(match.group(1)))) if not matching_files: raise FileNotFoundError(f"No files matching pattern '{file_pattern}' found in '{audio_folder}'.") # Sort files numerically based on the extracted number matching_files.sort(key=lambda x: x[1]) print("Found and sorted the following files for concatenation:") for file_path, _ in matching_files: print(f"- {file_path.name}") # Start with a silent segment (delay) combined_audio = AudioSegment.silent(duration=delay_ms) # Concatenate all sorted audio files for audio_file_path, _ in matching_files: try: segment = AudioSegment.from_file(audio_file_path) combined_audio += segment except Exception as e: print(f"Warning: Could not process file '{audio_file_path.name}'. Skipping. Error: {e}") # End with a silent segment (delay) combined_audio += AudioSegment.silent(duration=2000) # Export the final combined audio file output_path.parent.mkdir(parents=True, exist_ok=True) combined_audio.export(output_path, format="mp3") print(f"Successfully concatenated audio to '{output_path}'") def run_step4_concatenate_audio(project_path: Path): """ Main function for Step 4. Finds all 'vocab_xx.wav' files in the project's audio folder, concatenates them, and saves the result as a single MP3. """ try: audio_folder = project_path / "audio" output_dir = project_path / "output" output_wav_path = output_dir / "combined_audio.wav" # Define the pattern for the audio files created in Step 3 file_pattern = r"vocab_(\d{2})\.wav" _concatenate_audio_files( audio_folder=audio_folder, file_pattern=file_pattern, output_path=output_wav_path, delay_ms=0 # Start with a 1-second delay ) return True, f"✅ Audio successfully concatenated and saved to '{output_wav_path}'" except FileNotFoundError as e: return False, f"❌ Error: {e}", None except Exception as e: return False, f"❌ An unexpected error occurred during audio concatenation: {e}", None