Spaces:

artyomboyko
/

Aura_AI_Scan

Running

Aura_AI_Scan / app_srv /audio_processing.py

Artyom Boyko

Fix docker problem.

076e4c4 7 months ago

1.53 kB

	from transformers import pipeline
	import torch
	import gc
	import librosa

	from model_api import clear_gpu_cache, get_device_and_dtype


	def transcribe_audio(audio_path: str, device: str = "cuda", torch_dtype: torch.dtype = torch.float16) -> str:
	"""
	Transcribes an MP3 audio file.

	Args:
	audio_path: Path to the audio file.
	device: The device to use for transcription (e.g., "cuda" for GPU, "cpu" for CPU).
	torch_dtype: The torch data type to use for model computations.

	Returns:
	The transcribed text.
	"""
	try:
	# Create a pipeline with explicit device specification
	pipe = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-small",
	device=device,
	chunk_length_s=25,
	stride_length_s=2,
	torch_dtype=torch_dtype,
	)

	# Perform transcription
	result = pipe(audio_path, return_timestamps=True)

	# Extract text
	text = result['text']

	return text

	except Exception as e:
	print(f"Error during transcription: {str(e)}")
	return ""

	finally:
	del pipe
	clear_gpu_cache()


	if __name__ == "__main__":

	selected_device, selected_dtype = get_device_and_dtype()

	result = transcribe_audio("/workspaces/Video_Analyser/app_srv/downloads/45677153-510d-4f47-95ee-c1b4b0843433/audio.mp3.mp3", selected_device, selected_dtype)
	print(result)