Spaces:
Running
Running
| from transformers import pipeline | |
| import torch | |
| import gc | |
| import librosa | |
| from model_api import clear_gpu_cache, get_device_and_dtype | |
| def transcribe_audio(audio_path: str, device: str = "cuda", torch_dtype: torch.dtype = torch.float16) -> str: | |
| """ | |
| Transcribes an MP3 audio file. | |
| Args: | |
| audio_path: Path to the audio file. | |
| device: The device to use for transcription (e.g., "cuda" for GPU, "cpu" for CPU). | |
| torch_dtype: The torch data type to use for model computations. | |
| Returns: | |
| The transcribed text. | |
| """ | |
| try: | |
| # Create a pipeline with explicit device specification | |
| pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-small", | |
| device=device, | |
| chunk_length_s=25, | |
| stride_length_s=2, | |
| torch_dtype=torch_dtype, | |
| ) | |
| # Perform transcription | |
| result = pipe(audio_path, return_timestamps=True) | |
| # Extract text | |
| text = result['text'] | |
| return text | |
| except Exception as e: | |
| print(f"Error during transcription: {str(e)}") | |
| return "" | |
| finally: | |
| del pipe | |
| clear_gpu_cache() | |
| if __name__ == "__main__": | |
| selected_device, selected_dtype = get_device_and_dtype() | |
| result = transcribe_audio("/workspaces/Video_Analyser/app_srv/downloads/45677153-510d-4f47-95ee-c1b4b0843433/audio.mp3.mp3", selected_device, selected_dtype) | |
| print(result) |