bezzam HF Staff commited on
Commit
1b8c6ec
·
verified ·
1 Parent(s): 15f59d4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -8
README.md CHANGED
@@ -59,13 +59,12 @@ pip install diffusers
59
 
60
  ```python
61
  import torch
62
- from transformers import VibeVoiceFeatureExtractor, VibeVoiceAcousticTokenizerModel
63
  from transformers.audio_utils import load_audio_librosa
64
  from scipy.io import wavfile
65
 
66
 
67
- model_path = "bezzam/VibeVoice-AcousticTokenizer"
68
- fe_path = "bezzam/VibeVoice-1.5B"
69
  sampling_rate = 24000
70
 
71
  # load audio
@@ -75,11 +74,11 @@ audio = load_audio_librosa(
75
  )
76
 
77
  # load model
78
- torch_device = "cuda" if torch.cuda.is_available() else "cpu"
79
- feature_extractor = VibeVoiceFeatureExtractor.from_pretrained(fe_path)
80
  model = VibeVoiceAcousticTokenizerModel.from_pretrained(
81
- model_path, device_map=torch_device,
82
- ).to(torch_device).eval()
83
 
84
  # preprocess audio
85
  inputs = feature_extractor(
@@ -89,7 +88,7 @@ inputs = feature_extractor(
89
  pad_to_multiple_of=3200,
90
  return_attention_mask=False,
91
  return_tensors="pt"
92
- ).to(torch_device)
93
  print("Input audio shape:", inputs.input_features.shape)
94
  # Input audio shape: torch.Size([1, 1, 224000])
95
 
 
59
 
60
  ```python
61
  import torch
62
+ from transformers import AutoFeatureExtractor, VibeVoiceAcousticTokenizerModel
63
  from transformers.audio_utils import load_audio_librosa
64
  from scipy.io import wavfile
65
 
66
 
67
+ model_id = "bezzam/VibeVoice-AcousticTokenizer"
 
68
  sampling_rate = 24000
69
 
70
  # load audio
 
74
  )
75
 
76
  # load model
77
+ device = "cuda" if torch.cuda.is_available() else "cpu"
78
+ feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
79
  model = VibeVoiceAcousticTokenizerModel.from_pretrained(
80
+ model_id, device_map=device,
81
+ ).eval()
82
 
83
  # preprocess audio
84
  inputs = feature_extractor(
 
88
  pad_to_multiple_of=3200,
89
  return_attention_mask=False,
90
  return_tensors="pt"
91
+ ).to(device)
92
  print("Input audio shape:", inputs.input_features.shape)
93
  # Input audio shape: torch.Size([1, 1, 224000])
94