File size: 5,649 Bytes
d2b29c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import gradio as gr
import numpy as np
import librosa
import soundfile as sf
import pandas as pd

def generate_audio(clip_length=4.0, fade_in_duration=0.5, fade_out_duration=0.5, volume_factor=0.3):
    # Load audio files
    narration, sr = librosa.load('narration.wav', sr=None)
    baa, _ = librosa.load('baa.wav', sr=sr)
    murmur, _ = librosa.load('murmur.wav', sr=sr)
    
    # Calculate RMS for normalization
    narration_rms = np.sqrt(np.mean(narration**2))
    baa_rms = np.sqrt(np.mean(baa**2))
    murmur_rms = np.sqrt(np.mean(murmur**2))
    
    # Normalize baa and murmur to match narration volume
    baa_normalized = baa * (narration_rms / baa_rms)
    murmur_normalized = murmur * (narration_rms / murmur_rms)
    
    # Create output array with narration length
    output_length = len(narration)
    combined = np.zeros(output_length)
    
    # Add narration as baseline
    combined += narration
    
    # Add first clip_length seconds of baa at 0:05 (5 seconds) with fade in/out
    baa_clip = baa_normalized[:int(clip_length*sr)] * volume_factor
    
    # Create fade-in and fade-out envelopes
    # fade_in_duration and fade_out_duration are factors (0-1) of the clip length
    fade_in_samples = int(fade_in_duration * len(baa_clip))
    fade_out_samples = int(fade_out_duration * len(baa_clip))
    fade_in = np.linspace(0, 1, fade_in_samples)
    fade_out = np.linspace(1, 0, fade_out_samples)
    
    # Apply fade effects
    if fade_in_samples > 0:
        baa_clip[:fade_in_samples] *= fade_in
    if fade_out_samples > 0:
        baa_clip[-fade_out_samples:] *= fade_out
    
    start_idx = int(5 * sr)
    end_idx = start_idx + len(baa_clip)
    if end_idx <= output_length:
        combined[start_idx:end_idx] += baa_clip
    
    # Add first clip_length seconds of murmur at 0:15 (15 seconds) with fade in/out
    murmur_clip = murmur_normalized[:int(clip_length*sr)] * volume_factor
    
    # Calculate fade samples for murmur clip
    murmur_fade_in_samples = int(fade_in_duration * len(murmur_clip))
    murmur_fade_out_samples = int(fade_out_duration * len(murmur_clip))
    
    # Apply fade effects to murmur
    if murmur_fade_in_samples > 0:
        murmur_fade_in = np.linspace(0, 1, murmur_fade_in_samples)
        murmur_clip[:murmur_fade_in_samples] *= murmur_fade_in
    if murmur_fade_out_samples > 0:
        murmur_fade_out = np.linspace(1, 0, murmur_fade_out_samples)
        murmur_clip[-murmur_fade_out_samples:] *= murmur_fade_out
    
    start_idx = int(15 * sr)
    end_idx = start_idx + len(murmur_clip)
    if end_idx <= output_length:
        combined[start_idx:end_idx] += murmur_clip
    
    # Normalize to prevent clipping
    max_val = np.max(np.abs(combined))
    if max_val > 1.0:
        combined = combined / max_val
    
    return (sr, combined)

def visualize_sfx(sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor):
    # Calculate fade durations in seconds
    fade_in_seconds = fade_in_duration * sound_effect_clip_length
    fade_out_seconds = fade_out_duration * sound_effect_clip_length
    
    # Create time array with high resolution for smooth visualization
    time_resolution = 0.01  # 10ms resolution
    times = np.arange(0, sound_effect_clip_length + time_resolution, time_resolution)
    
    # Calculate volume envelope
    volumes = []
    for t in times:
        if t <= fade_in_seconds and fade_in_seconds > 0:
            # Fade in phase
            volume = sound_effect_volume_factor * (t / fade_in_seconds)
        elif t >= sound_effect_clip_length - fade_out_seconds and fade_out_seconds > 0:
            # Fade out phase
            fade_out_progress = (sound_effect_clip_length - t) / fade_out_seconds
            volume = sound_effect_volume_factor * fade_out_progress
        else:
            # Steady state phase
            volume = sound_effect_volume_factor
        
        volumes.append(volume)
    
    # Create DataFrame for LinePlot
    plot_data = pd.DataFrame({
        "time": times,
        "volume": volumes
    })
    
    return plot_data

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            sound_effect_clip_length = gr.Slider(minimum=0.5, maximum=5, value=4.0, step=0.1, label="Sound Effect Clip Length (seconds)")
            fade_in_duration = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Fade In Duration Factor", info="0.0 = no fade in, 1.0 = fade in over entire clip")
            fade_out_duration = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Fade Out Duration Factor", info="0.0 = no fade out, 1.0 = fade out over entire clip")
            sound_effect_volume_factor = gr.Slider(minimum=0.1, maximum=1.0, value=0.15, step=0.05, label="Sound Effect Volume Factor", info="0.1 is 10% of the narration volume, 1.0 is 100% of the original volume")
            visualization = gr.LinePlot(label="Sound Effect Volume Envelope", x="time", y="volume", y_lim=[0, 1])
            generate_button = gr.Button("Generate Audio")
        with gr.Column():
            output = gr.Audio()

    gr.on(
        [demo.load, sound_effect_clip_length.change, fade_in_duration.change, fade_out_duration.change, sound_effect_volume_factor.change],
        fn=visualize_sfx,
        inputs=[sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor], 
        outputs=visualization
    )
    generate_button.click(generate_audio, inputs=[sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor], outputs=output)

if __name__ == "__main__":
    demo.launch()