gain/scripts/step4_concatenate_audio.py

# scripts/step4_concatenate_audio.py
import re
from pathlib import Path
from pydub import AudioSegment

def _concatenate_audio_files(audio_folder: Path, file_pattern: str, output_path: Path, delay_ms: int = 500):
    """
    Internal helper function to find, sort, and concatenate audio files based on a pattern.
    This is based on the script you provided [1].
    """
    if not audio_folder.is_dir():
        raise FileNotFoundError(f"Audio source folder '{audio_folder}' does not exist.")

    # Find all files matching the pattern and extract the number for sorting
    compiled_pattern = re.compile(file_pattern)
    matching_files = []
    for filepath in audio_folder.iterdir():
        if filepath.is_file():
            match = compiled_pattern.match(filepath.name)
            if match and match.group(1).isdigit():
                matching_files.append((filepath, int(match.group(1))))

    if not matching_files:
        raise FileNotFoundError(f"No files matching pattern '{file_pattern}' found in '{audio_folder}'.")

    # Sort files numerically based on the extracted number
    matching_files.sort(key=lambda x: x[1])

    print("Found and sorted the following files for concatenation:")
    for file_path, _ in matching_files:
        print(f"- {file_path.name}")

    # Start with a silent segment (delay)
    combined_audio = AudioSegment.silent(duration=delay_ms)

    # Concatenate all sorted audio files
    for audio_file_path, _ in matching_files:
        try:
            segment = AudioSegment.from_file(audio_file_path)
            combined_audio += segment
        except Exception as e:
            print(f"Warning: Could not process file '{audio_file_path.name}'. Skipping. Error: {e}")

    # End with a silent segment (delay)
    combined_audio += AudioSegment.silent(duration=2000)
    # Export the final combined audio file
    output_path.parent.mkdir(parents=True, exist_ok=True)
    combined_audio.export(output_path, format="mp3")
    print(f"Successfully concatenated audio to '{output_path}'")


def run_step4_concatenate_audio(project_path: Path):
    """
    Main function for Step 4. Finds all 'vocab_xx.wav' files in the project's
    audio folder, concatenates them, and saves the result as a single MP3.
    """
    try:
        audio_folder = project_path / "audio"
        output_dir = project_path / "output"
        output_wav_path = output_dir / "combined_audio.wav"

        # Define the pattern for the audio files created in Step 3
        file_pattern = r"vocab_(\d{2})\.wav"

        _concatenate_audio_files(
            audio_folder=audio_folder,
            file_pattern=file_pattern,
            output_path=output_wav_path,
            delay_ms=0  # Start with a 1-second delay
        )

        return True, f"✅ Audio successfully concatenated and saved to '{output_wav_path}'"

    except FileNotFoundError as e:
        return False, f"❌ Error: {e}", None
    except Exception as e:
        return False, f"❌ An unexpected error occurred during audio concatenation: {e}", None