Mo2294 commited on
Commit
0800432
·
verified ·
1 Parent(s): 9d5ea3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -135
app.py CHANGED
@@ -1,7 +1,20 @@
1
  #!/usr/bin/env python3
2
  """
3
  HuggingFace Spaces app.py for IndexTTS2 with Auto-Processing and Combined Audio
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
 
5
  import os
6
  import sys
7
  import subprocess
@@ -36,9 +49,16 @@ auto_process_thread = None
36
  current_status = "Ready"
37
  tts_model = None
38
 
 
39
  # Constants
40
- MAX_COMBINED_DURATION = 30 * 60 # 30 minutes in seconds
41
- PAUSE_DURATION = 3.0 # 3 seconds pause between audios
 
 
 
 
 
 
42
 
43
 
44
  def sanitize_filename(text: str, max_len: int = 120) -> str:
@@ -129,116 +149,131 @@ def parse_audio_duration_from_log(log_line: str):
129
  return None
130
 
131
 
132
- def create_combined_audios(audio_files_info):
133
  """
134
- Create combined audio file(s) with 3-second pauses,
135
- without changing pitch, samplerate or bitdepth.
136
-
137
- audio_files_info: List[(file_path, duration_in_seconds)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  """
 
 
139
 
140
- # 1) Samplerate der ersten Datei korrekt auslesen (z.B. 22050 Hz von BigVGAN)
141
- first_file = audio_files_info[0][0]
142
  _, sr = sf.read(first_file, dtype="int16")
143
 
144
- # 3 Sekunden Stille in ORIGINAL-SAMPLERATE erzeugen
145
- silence_3s = np.zeros(int(sr * PAUSE_DURATION), dtype=np.int16)
 
 
 
146
 
147
- combined_files = []
148
  current_files = []
 
149
  current_duration = 0.0
150
- combined_index = 1
151
 
152
- for file_path, duration in audio_files_info:
153
- # "Was wäre die Länge, wenn wir diese Datei hinzufügen?"
154
- new_length = current_duration
155
- if current_files:
156
- new_length += PAUSE_DURATION
157
- new_length += duration
158
-
159
- # Wenn zu lang → speichern & neue Combined beginnen
160
- if new_length > MAX_COMBINED_DURATION and current_files:
161
- combined_name = (
162
- "temp_combined.wav"
163
- if combined_index == 1 and len(audio_files_info) <= 30
164
- else f"temp_combined_{combined_index:03d}.wav"
165
- )
166
 
167
- audio_out = []
 
168
 
169
- # 1.5 Sekunden Intro-Stille vor der ersten Audio
170
- silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
171
- audio_out.append(silence_intro)
172
 
173
- for i, fp in enumerate(current_files):
174
- data, _ = sf.read(fp, dtype="int16")
175
- audio_out.append(data)
 
 
 
176
 
177
- # Zwischen Affirmationen 3 Sekunden Pause
178
- if i < len(current_files) - 1:
179
- audio_out.append(silence_3s)
180
 
181
- final_audio = np.concatenate(audio_out)
182
- sf.write(combined_name, final_audio, sr, subtype="PCM_16")
183
 
184
- combined_files.append((combined_name, current_duration))
185
- print(
186
- f"Created combined file {combined_index}: "
187
- f"{int(current_duration // 60)}:{int(current_duration % 60):02d}"
188
- )
189
- combined_index += 1
190
 
191
- # Neue Combined-Gruppe beginnen mit aktueller Datei
192
- current_files = [file_path]
193
- current_duration = duration
 
194
 
195
- else:
196
- current_files.append(file_path)
197
- if len(current_files) == 1:
198
- current_duration = duration
199
- else:
200
- current_duration += PAUSE_DURATION + duration
201
-
202
- # Letzte Combined-Datei speichern
203
- if current_files:
204
- combined_name = (
205
- "temp_combined.wav"
206
- if combined_index == 1 and len(audio_files_info) <= 30
207
- else f"temp_combined_{combined_index:03d}.wav"
208
  )
209
 
210
- audio_out = []
 
 
 
211
 
212
- # 1.5 Sekunden Intro-Stille vor der ersten Audio
213
- silence_intro = np.zeros(int(sr * 1.5), dtype=np.int16)
214
- audio_out.append(silence_intro)
 
215
 
216
- for i, fp in enumerate(current_files):
217
- data, _ = sf.read(fp, dtype="int16")
218
- audio_out.append(data)
 
219
 
220
- # Zwischen Affirmationen 3 Sekunden Pause
221
- if i < len(current_files) - 1:
222
- audio_out.append(silence_3s)
 
223
 
224
- final_audio = np.concatenate(audio_out)
225
- sf.write(combined_name, final_audio, sr, subtype="PCM_16")
 
226
 
227
- combined_files.append((combined_name, current_duration))
228
- print(
229
- f"Created combined file {combined_index}: "
230
- f"{int(current_duration // 60)}:{int(current_duration % 60):02d}"
231
- )
 
 
 
 
 
232
 
233
- return combined_files
234
 
235
 
236
  def auto_process_dataset():
237
  """
238
- Auto-process TXT files from Monarchtaba22/rawAffirmation
239
- Generate audio for each sentence (split by .-) and upload to output dataset
240
- Create combined audio(s) with 3s pauses, max 30 min each
241
- Move processed TXT files to /done folder
 
 
 
 
 
 
 
242
  """
243
  global auto_process_running, current_status, tts_model
244
 
@@ -253,6 +288,8 @@ def auto_process_dataset():
253
  return
254
 
255
  api = HfApi(token=token)
 
 
256
  input_dataset_id = "Mo2294/rawAffirmation"
257
  output_dataset_id = "Mo2294/outputAffirmation"
258
 
@@ -271,7 +308,6 @@ def auto_process_dataset():
271
  repo_files = list_repo_files(
272
  repo_id=input_dataset_id, repo_type="dataset", token=token
273
  )
274
- # Filter for TXT files not in /done folder
275
  txt_files = [
276
  f
277
  for f in repo_files
@@ -296,6 +332,9 @@ def auto_process_dataset():
296
  txt_name = Path(txt_file).stem
297
  current_status = f"Processing: {txt_name}"
298
 
 
 
 
299
  try:
300
  # Download TXT file
301
  txt_path = hf_hub_download(
@@ -309,17 +348,17 @@ def auto_process_dataset():
309
  with open(txt_path, "r", encoding="utf-8") as f:
310
  content = f.read()
311
 
312
- # IMPROVED SPLITTING - preserve the actual text
313
  raw_sentences = content.split(".-")
314
  sentences = []
315
-
316
  for s in raw_sentences:
317
  cleaned = s.strip()
318
  if cleaned:
319
  # Remove only trailing punctuation if it's a single dash or dot
320
  if cleaned.endswith("-") or cleaned.endswith("."):
321
  cleaned = cleaned[:-1].rstrip()
322
- sentences.append(cleaned)
 
323
 
324
  if not sentences:
325
  current_status = f"No sentences found in {txt_name}"
@@ -328,11 +367,7 @@ def auto_process_dataset():
328
  current_status = f"Found {len(sentences)} sentences in {txt_name}"
329
  print(f"Processing sentences from {txt_name}:")
330
 
331
- temp_files = []
332
- audio_files_info = [] # still used for durations/logging, not for combining
333
- commit_operations = []
334
-
335
- # Track used filenames to avoid duplicates within same TXT
336
  used_names = set()
337
 
338
  # Process each sentence
@@ -351,7 +386,6 @@ def auto_process_dataset():
351
  # Filename should be the affirmation text (before adding punctuation)
352
  base_name = sanitize_filename(sentence)
353
  if base_name in used_names:
354
- # avoid overwriting if identical sentence appears multiple times
355
  suffix = 2
356
  while f"{base_name}_{suffix}" in used_names:
357
  suffix += 1
@@ -365,7 +399,7 @@ def auto_process_dataset():
365
 
366
  print(f" Sentence {idx+1}: '{tts_sentence}'")
367
 
368
- # Generate audio using IndexTTS2
369
  output_filename = f"temp_{base_name}.wav"
370
 
371
  # Capture stdout to get audio duration
@@ -381,7 +415,6 @@ def auto_process_dataset():
381
  verbose=True,
382
  )
383
 
384
- # Parse duration from output
385
  output_log = buf.getvalue()
386
  duration = None
387
  for line in output_log.split("\n"):
@@ -396,10 +429,9 @@ def auto_process_dataset():
396
 
397
  print(f" Generated audio: {duration:.2f} seconds")
398
 
399
- audio_files_info.append((output_filename, duration))
400
  temp_files.append(output_filename)
401
 
402
- # Upload path: use affirmation name, no numbering
403
  output_path = f"Affirmations/{txt_name}/{base_name}.wav"
404
  commit_operations.append(
405
  CommitOperationAdd(
@@ -408,26 +440,53 @@ def auto_process_dataset():
408
  )
409
  )
410
 
 
 
 
 
 
 
 
 
 
411
  except Exception as e:
412
  current_status = f"Error generating audio for sentence {idx+1}: {e}"
413
  print(f"Generation error: {e}")
414
  continue
415
 
416
- # NO MORE COMBINED AUDIO CREATION HERE
417
- # (combined generation removed/disabled as requested)
 
 
418
 
419
- # Upload all generated files
420
- if commit_operations and auto_process_running:
421
- total_individual = len(commit_operations)
 
 
 
 
 
 
 
 
 
 
 
 
422
 
423
- current_status = f"Uploading {total_individual} audio files for {txt_name}..."
 
424
 
 
 
 
425
  try:
426
  api.create_commit(
427
  repo_id=output_dataset_id,
428
  repo_type="dataset",
429
  operations=commit_operations,
430
- commit_message=f"Add {total_individual} audio files for {txt_name}",
431
  token=token,
432
  )
433
  current_status = f"Successfully uploaded files for {txt_name}"
@@ -454,7 +513,11 @@ def auto_process_dataset():
454
  token=token,
455
  )
456
 
457
- current_status = f"✅ Completed {txt_name}: {total_individual} audio files"
 
 
 
 
458
 
459
  except Exception as e:
460
  current_status = f"Upload/Move error for {txt_name}: {e}"
@@ -473,6 +536,15 @@ def auto_process_dataset():
473
  except Exception as e:
474
  current_status = f"Error processing {txt_name}: {e}"
475
  print(f"Error: {e}")
 
 
 
 
 
 
 
 
 
476
  continue
477
 
478
  if auto_process_running:
@@ -559,9 +631,7 @@ def manual_generate(text, reference_audio, emotion_audio, emo_alpha, use_emo_tex
559
  # Create Gradio interface
560
  with gr.Blocks(title="IndexTTS2 with Auto-Processing") as demo:
561
  gr.Markdown("# 🎤 IndexTTS2 Voice Synthesis")
562
- gr.Markdown(
563
- "State-of-the-art TTS with auto-processing and combined audio generation"
564
- )
565
 
566
  # Manual tab
567
  with gr.Tab("Manual Processing"):
@@ -592,25 +662,15 @@ with gr.Blocks(title="IndexTTS2 with Auto-Processing") as demo:
592
  step=0.1,
593
  label="Emotion strength",
594
  )
595
- use_emo_text = gr.Checkbox(
596
- label="Use text-based emotion", value=False
597
- )
598
 
599
  with gr.Column():
600
- generate_btn = gr.Button(
601
- "🎙️ Generate", variant="primary", size="lg"
602
- )
603
  output_audio = gr.Audio(label="Generated audio", type="numpy")
604
 
605
  generate_btn.click(
606
  manual_generate,
607
- inputs=[
608
- text_input,
609
- reference_audio,
610
- emotion_audio,
611
- emo_alpha,
612
- use_emo_text,
613
- ],
614
  outputs=output_audio,
615
  )
616
 
@@ -628,8 +688,9 @@ with gr.Blocks(title="IndexTTS2 with Auto-Processing") as demo:
628
  - 🎙️ Voice: `Mo.wav`
629
  - ✂️ Delimiter: `.-`
630
  - 📝 Structure: `/Affirmations/[name]/`
631
- - ⏰ Combined: Max 30 min chunks
632
- - ⏸️ Pauses: 3 seconds between audios
 
633
  """
634
  )
635
 
@@ -642,23 +703,15 @@ with gr.Blocks(title="IndexTTS2 with Auto-Processing") as demo:
642
  )
643
 
644
  with gr.Row():
645
- start_btn = gr.Button(
646
- "▶️ Start Processing", variant="primary", scale=2
647
- )
648
  stop_btn = gr.Button("⏹️ Stop", variant="stop", scale=1)
649
  refresh_btn = gr.Button("🔄 Refresh", scale=1)
650
 
651
- message_display = gr.Textbox(
652
- label="Message", interactive=False, visible=False
653
- )
654
 
655
  # Event handlers
656
- start_btn.click(
657
- start_auto_process, outputs=[message_display, status_display]
658
- )
659
- stop_btn.click(
660
- stop_auto_process, outputs=[message_display, status_display]
661
- )
662
  refresh_btn.click(get_status, outputs=status_display)
663
 
664
  # Footer
 
1
  #!/usr/bin/env python3
2
  """
3
  HuggingFace Spaces app.py for IndexTTS2 with Auto-Processing and Combined Audio
4
+ - Auto-process TXT files from dataset
5
+ - Generate per-affirmation WAVs (named by sanitized affirmation text)
6
+ - Create combined WAV chunks with:
7
+ - 1.5s pause between affirmations
8
+ - max duration 29:50 (1790s) per combined
9
+ - Upload combined WAV + matching TXT (same basename) per combined:
10
+ combined_001.wav + combined_001.txt
11
+ where TXT contains the original affirmations in the exact audio order:
12
+ i am worthy.-
13
+ I am blessed.-
14
+ ...
15
+ - Move processed TXT files to done/
16
  """
17
+
18
  import os
19
  import sys
20
  import subprocess
 
49
  current_status = "Ready"
50
  tts_model = None
51
 
52
+ # ---------------------------------------------------------------------
53
  # Constants
54
+ # ---------------------------------------------------------------------
55
+ # Combined constraints (as requested)
56
+ PAUSE_DURATION = 1.5 # 1.5 seconds between each affirmation in combined audio
57
+ MAX_COMBINED_DURATION = 29 * 60 + 50 # 29:50 = 1790 seconds max per combined
58
+
59
+ # (kept; not used in combined anymore as per new constants)
60
+ MAX_COMBINED_DURATION_OLD = 30 * 60
61
+ PAUSE_DURATION_OLD = 3.0
62
 
63
 
64
  def sanitize_filename(text: str, max_len: int = 120) -> str:
 
149
  return None
150
 
151
 
152
+ def create_combined_audios(audio_items):
153
  """
154
+ Create combined audio file(s) with 1.5-second pauses between affirmations,
155
+ max duration 29:50 per combined, without changing pitch/samplerate/bitdepth.
156
+
157
+ audio_items: List[dict] where each item is:
158
+ {
159
+ "file_path": <temp wav path>,
160
+ "duration": <seconds float>,
161
+ "text": <original affirmation text (no trailing .-)>,
162
+ }
163
+
164
+ Returns:
165
+ List[dict]:
166
+ {
167
+ "wav_path": <combined wav filename>,
168
+ "txt_path": <combined txt filename>,
169
+ "duration": <duration seconds float>,
170
+ "texts": <list[str]>,
171
+ }
172
  """
173
+ if not audio_items:
174
+ return []
175
 
176
+ # Read samplerate from first file
177
+ first_file = audio_items[0]["file_path"]
178
  _, sr = sf.read(first_file, dtype="int16")
179
 
180
+ # Silence between each affirmation in combined audio (original SR, int16)
181
+ silence_pause = np.zeros(int(sr * PAUSE_DURATION), dtype=np.int16)
182
+
183
+ combined_results = []
184
+ combined_index = 1
185
 
 
186
  current_files = []
187
+ current_texts = []
188
  current_duration = 0.0
 
189
 
190
+ def flush_current():
191
+ nonlocal combined_index, current_files, current_texts, current_duration
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
+ if not current_files:
194
+ return
195
 
196
+ audio_out = []
 
 
197
 
198
+ # IMPORTANT: no intro silence requested now; only 1.5s between affirmations
199
+ for i, fp in enumerate(current_files):
200
+ data, _ = sf.read(fp, dtype="int16")
201
+ audio_out.append(data)
202
+ if i < len(current_files) - 1:
203
+ audio_out.append(silence_pause)
204
 
205
+ final_audio = np.concatenate(audio_out) if len(audio_out) > 1 else audio_out[0]
 
 
206
 
207
+ wav_name = f"combined_{combined_index:03d}.wav"
208
+ txt_name = f"combined_{combined_index:03d}.txt"
209
 
210
+ sf.write(wav_name, final_audio, sr, subtype="PCM_16")
 
 
 
 
 
211
 
212
+ # TXT must contain original affirmation text in exact order, each ending with .-
213
+ with open(txt_name, "w", encoding="utf-8") as f:
214
+ for t in current_texts:
215
+ f.write(f"{t}.-\n")
216
 
217
+ combined_results.append(
218
+ {
219
+ "wav_path": wav_name,
220
+ "txt_path": txt_name,
221
+ "duration": current_duration,
222
+ "texts": list(current_texts),
223
+ }
 
 
 
 
 
 
224
  )
225
 
226
+ print(
227
+ f"Created combined file {combined_index}: "
228
+ f"{int(current_duration // 60)}:{int(current_duration % 60):02d}"
229
+ )
230
 
231
+ combined_index += 1
232
+ current_files = []
233
+ current_texts = []
234
+ current_duration = 0.0
235
 
236
+ for item in audio_items:
237
+ file_path = item["file_path"]
238
+ duration = float(item["duration"])
239
+ text = item["text"]
240
 
241
+ projected = current_duration
242
+ if current_files:
243
+ projected += PAUSE_DURATION
244
+ projected += duration
245
 
246
+ # If adding this would exceed max, flush first
247
+ if projected > MAX_COMBINED_DURATION and current_files:
248
+ flush_current()
249
 
250
+ current_files.append(file_path)
251
+ current_texts.append(text)
252
+
253
+ if len(current_files) == 1:
254
+ current_duration = duration
255
+ else:
256
+ current_duration += PAUSE_DURATION + duration
257
+
258
+ # flush final
259
+ flush_current()
260
 
261
+ return combined_results
262
 
263
 
264
  def auto_process_dataset():
265
  """
266
+ Auto-process TXT files from dataset:
267
+ - input: Mo2294/rawAffirmation
268
+ - output: Mo2294/outputAffirmation
269
+
270
+ For each TXT:
271
+ - Split affirmations by ".-"
272
+ - Generate individual WAV named by sanitized affirmation text
273
+ - Create combined WAV chunks (max 29:50) with 1.5s pauses
274
+ - For each combined WAV, create matching combined_XXX.txt with original affirmations in order (each line ends ".-")
275
+ - Upload all WAVs + combined WAVs + combined TXTs
276
+ - Move processed TXT to done/
277
  """
278
  global auto_process_running, current_status, tts_model
279
 
 
288
  return
289
 
290
  api = HfApi(token=token)
291
+
292
+ # TODO: replace with your Monarchtaba22 ids if different
293
  input_dataset_id = "Mo2294/rawAffirmation"
294
  output_dataset_id = "Mo2294/outputAffirmation"
295
 
 
308
  repo_files = list_repo_files(
309
  repo_id=input_dataset_id, repo_type="dataset", token=token
310
  )
 
311
  txt_files = [
312
  f
313
  for f in repo_files
 
332
  txt_name = Path(txt_file).stem
333
  current_status = f"Processing: {txt_name}"
334
 
335
+ temp_files = []
336
+ commit_operations = []
337
+
338
  try:
339
  # Download TXT file
340
  txt_path = hf_hub_download(
 
348
  with open(txt_path, "r", encoding="utf-8") as f:
349
  content = f.read()
350
 
351
+ # Split by ".-" and preserve original text (minus trailing '.'/'-')
352
  raw_sentences = content.split(".-")
353
  sentences = []
 
354
  for s in raw_sentences:
355
  cleaned = s.strip()
356
  if cleaned:
357
  # Remove only trailing punctuation if it's a single dash or dot
358
  if cleaned.endswith("-") or cleaned.endswith("."):
359
  cleaned = cleaned[:-1].rstrip()
360
+ if cleaned:
361
+ sentences.append(cleaned)
362
 
363
  if not sentences:
364
  current_status = f"No sentences found in {txt_name}"
 
367
  current_status = f"Found {len(sentences)} sentences in {txt_name}"
368
  print(f"Processing sentences from {txt_name}:")
369
 
370
+ audio_items = [] # used for combined creation: includes text + duration + temp wav path
 
 
 
 
371
  used_names = set()
372
 
373
  # Process each sentence
 
386
  # Filename should be the affirmation text (before adding punctuation)
387
  base_name = sanitize_filename(sentence)
388
  if base_name in used_names:
 
389
  suffix = 2
390
  while f"{base_name}_{suffix}" in used_names:
391
  suffix += 1
 
399
 
400
  print(f" Sentence {idx+1}: '{tts_sentence}'")
401
 
402
+ # Generate audio
403
  output_filename = f"temp_{base_name}.wav"
404
 
405
  # Capture stdout to get audio duration
 
415
  verbose=True,
416
  )
417
 
 
418
  output_log = buf.getvalue()
419
  duration = None
420
  for line in output_log.split("\n"):
 
429
 
430
  print(f" Generated audio: {duration:.2f} seconds")
431
 
 
432
  temp_files.append(output_filename)
433
 
434
+ # Upload individual WAV (named by affirmation text)
435
  output_path = f"Affirmations/{txt_name}/{base_name}.wav"
436
  commit_operations.append(
437
  CommitOperationAdd(
 
440
  )
441
  )
442
 
443
+ # For combined creation we must preserve original text + order
444
+ audio_items.append(
445
+ {
446
+ "file_path": output_filename,
447
+ "duration": duration,
448
+ "text": sentence, # ORIGINAL text (case preserved) for combined TXT
449
+ }
450
+ )
451
+
452
  except Exception as e:
453
  current_status = f"Error generating audio for sentence {idx+1}: {e}"
454
  print(f"Generation error: {e}")
455
  continue
456
 
457
+ # Create combined WAV(s) + TXT(s)
458
+ if audio_items and auto_process_running:
459
+ current_status = f"Creating combined audio(s) for {txt_name}..."
460
+ combined_results = create_combined_audios(audio_items)
461
 
462
+ for c in combined_results:
463
+ # upload combined wav
464
+ commit_operations.append(
465
+ CommitOperationAdd(
466
+ path_in_repo=f"Affirmations/{txt_name}/{Path(c['wav_path']).name}",
467
+ path_or_fileobj=c["wav_path"],
468
+ )
469
+ )
470
+ # upload combined txt (same basename)
471
+ commit_operations.append(
472
+ CommitOperationAdd(
473
+ path_in_repo=f"Affirmations/{txt_name}/{Path(c['txt_path']).name}",
474
+ path_or_fileobj=c["txt_path"],
475
+ )
476
+ )
477
 
478
+ temp_files.append(c["wav_path"])
479
+ temp_files.append(c["txt_path"])
480
 
481
+ # Upload all generated files
482
+ if commit_operations and auto_process_running:
483
+ current_status = f"Uploading files for {txt_name}..."
484
  try:
485
  api.create_commit(
486
  repo_id=output_dataset_id,
487
  repo_type="dataset",
488
  operations=commit_operations,
489
+ commit_message=f"Add affirmations + combined for {txt_name}",
490
  token=token,
491
  )
492
  current_status = f"Successfully uploaded files for {txt_name}"
 
513
  token=token,
514
  )
515
 
516
+ current_status = (
517
+ f"✅ Completed {txt_name}: "
518
+ f"{len(audio_items)} individual + "
519
+ f"{sum(1 for _ in [op for op in commit_operations if isinstance(op, CommitOperationAdd)]) - len(audio_items)} combined assets"
520
+ )
521
 
522
  except Exception as e:
523
  current_status = f"Upload/Move error for {txt_name}: {e}"
 
536
  except Exception as e:
537
  current_status = f"Error processing {txt_name}: {e}"
538
  print(f"Error: {e}")
539
+
540
+ # Cleanup on failure too
541
+ for temp_file in temp_files:
542
+ try:
543
+ if os.path.exists(temp_file):
544
+ os.remove(temp_file)
545
+ except Exception:
546
+ pass
547
+
548
  continue
549
 
550
  if auto_process_running:
 
631
  # Create Gradio interface
632
  with gr.Blocks(title="IndexTTS2 with Auto-Processing") as demo:
633
  gr.Markdown("# 🎤 IndexTTS2 Voice Synthesis")
634
+ gr.Markdown("State-of-the-art TTS with auto-processing and combined audio generation")
 
 
635
 
636
  # Manual tab
637
  with gr.Tab("Manual Processing"):
 
662
  step=0.1,
663
  label="Emotion strength",
664
  )
665
+ use_emo_text = gr.Checkbox(label="Use text-based emotion", value=False)
 
 
666
 
667
  with gr.Column():
668
+ generate_btn = gr.Button("🎙️ Generate", variant="primary", size="lg")
 
 
669
  output_audio = gr.Audio(label="Generated audio", type="numpy")
670
 
671
  generate_btn.click(
672
  manual_generate,
673
+ inputs=[text_input, reference_audio, emotion_audio, emo_alpha, use_emo_text],
 
 
 
 
 
 
674
  outputs=output_audio,
675
  )
676
 
 
688
  - 🎙️ Voice: `Mo.wav`
689
  - ✂️ Delimiter: `.-`
690
  - 📝 Structure: `/Affirmations/[name]/`
691
+ - ⏰ Combined: Max 29:50 chunks
692
+ - ⏸️ Pauses: 1.5 seconds between audios
693
+ - 🧾 TXT: one `combined_XXX.txt` per combined wav
694
  """
695
  )
696
 
 
703
  )
704
 
705
  with gr.Row():
706
+ start_btn = gr.Button("▶️ Start Processing", variant="primary", scale=2)
 
 
707
  stop_btn = gr.Button("⏹️ Stop", variant="stop", scale=1)
708
  refresh_btn = gr.Button("🔄 Refresh", scale=1)
709
 
710
+ message_display = gr.Textbox(label="Message", interactive=False, visible=False)
 
 
711
 
712
  # Event handlers
713
+ start_btn.click(start_auto_process, outputs=[message_display, status_display])
714
+ stop_btn.click(stop_auto_process, outputs=[message_display, status_display])
 
 
 
 
715
  refresh_btn.click(get_status, outputs=status_display)
716
 
717
  # Footer