Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

README.md +29 -0
config.json +148 -0
model.safetensors +3 -0
model.safetensors.index.json +0 -0
special_tokens_map.json +81 -0
tokenizer.json +0 -0
tokenizer_config.json +212 -0

README.md ADDED Viewed

	@@ -0,0 +1,29 @@

+---
+license: apache-2.0
+language:
+- en
+- zh
+base_model:
+- openbmb/MiniCPM4-0.5B
+pipeline_tag: text-to-speech
+library_name: voxcpm1.5
+tags:
+- text-to-speech
+- speech
+- speech generation
+- voice cloning
+- mlx
+---
+# mlx-community/VoxCPM1.5-6bit
+This model was converted to MLX format from [`openbmb/VoxCPM1.5`](https://huggingface.co/openbmb/VoxCPM1.5) using mlx-audio version **0.2.7**.
+Refer to the [original model card](https://huggingface.co/openbmb/VoxCPM1.5) for more details on the model.
+## Use with mlx
+```bash
+pip install -U mlx-audio
+```
+```bash
+python -m mlx_audio.tts.generate --model mlx-community/VoxCPM1.5-6bit --text "Describe this image."
+```

config.json ADDED Viewed

	@@ -0,0 +1,148 @@

+{
+    "architecture": "voxcpm",
+    "audio_vae_config": {
+        "encoder_dim": 64,
+        "encoder_rates": [
+            2,
+            3,
+            6,
+            7,
+            7
+        ],
+        "latent_dim": 64,
+        "decoder_dim": 2048,
+        "decoder_rates": [
+            7,
+            7,
+            6,
+            3,
+            2
+        ],
+        "sample_rate": 44100
+    },
+    "device": "cuda",
+    "dit_config": {
+        "hidden_dim": 1024,
+        "ffn_dim": 4096,
+        "num_heads": 16,
+        "num_layers": 8,
+        "cfm_config": {
+            "sigma_min": 1e-06,
+            "solver": "euler",
+            "t_scheduler": "log-norm",
+            "inference_cfg_rate": 2.0
+        }
+    },
+    "dtype": "bfloat16",
+    "encoder_config": {
+        "hidden_dim": 1024,
+        "ffn_dim": 4096,
+        "num_heads": 16,
+        "num_layers": 8
+    },
+    "feat_dim": 64,
+    "lm_config": {
+        "bos_token_id": 1,
+        "eos_token_id": 2,
+        "hidden_size": 1024,
+        "intermediate_size": 4096,
+        "max_position_embeddings": 32768,
+        "num_attention_heads": 16,
+        "num_hidden_layers": 24,
+        "num_key_value_heads": 2,
+        "rms_norm_eps": 1e-05,
+        "rope_theta": 10000,
+        "rope_scaling": {
+            "type": "longrope",
+            "long_factor": [
+                1.0004360675811768,
+                1.0668443441390991,
+                1.1631425619125366,
+                1.3025742769241333,
+                1.5040205717086792,
+                1.7941505908966064,
+                2.2101221084594727,
+                2.802666664123535,
+                3.6389970779418945,
+                4.804192543029785,
+                6.39855432510376,
+                8.527148246765137,
+                11.277542114257812,
+                14.684998512268066,
+                18.69317054748535,
+                23.13019371032715,
+                27.72362518310547,
+                32.1606559753418,
+                36.168827056884766,
+                39.57627868652344,
+                42.32667541503906,
+                44.45526885986328,
+                46.04962921142578,
+                47.21482849121094,
+                48.05115509033203,
+                48.64370346069336,
+                49.05967712402344,
+                49.34980392456055,
+                49.551246643066406,
+                49.69068145751953,
+                49.78697967529297,
+                49.85338592529297
+            ],
+            "short_factor": [
+                1.0004360675811768,
+                1.0668443441390991,
+                1.1631425619125366,
+                1.3025742769241333,
+                1.5040205717086792,
+                1.7941505908966064,
+                2.2101221084594727,
+                2.802666664123535,
+                3.6389970779418945,
+                4.804192543029785,
+                6.39855432510376,
+                8.527148246765137,
+                11.277542114257812,
+                14.684998512268066,
+                18.69317054748535,
+                23.13019371032715,
+                27.72362518310547,
+                32.1606559753418,
+                36.168827056884766,
+                39.57627868652344,
+                42.32667541503906,
+                44.45526885986328,
+                46.04962921142578,
+                47.21482849121094,
+                48.05115509033203,
+                48.64370346069336,
+                49.05967712402344,
+                49.34980392456055,
+                49.551246643066406,
+                49.69068145751953,
+                49.78697967529297,
+                49.85338592529297
+            ],
+            "original_max_position_embeddings": 32768
+        },
+        "vocab_size": 73448,
+        "scale_emb": 12,
+        "dim_model_base": 256,
+        "scale_depth": 1.4,
+        "use_mup": false
+    },
+    "max_length": 8192,
+    "patch_size": 4,
+    "quantization": {
+        "group_size": 64,
+        "bits": 6,
+        "mode": "affine"
+    },
+    "quantization_config": {
+        "group_size": 64,
+        "bits": 6,
+        "mode": "affine"
+    },
+    "residual_lm_num_layers": 8,
+    "scalar_quantization_latent_dim": 256,
+    "scalar_quantization_scale": 9
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b5c83684b36ab1c1add49dd320941859c21167ba7eb987e68e2307822796af5
+size 823822876

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,81 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|tool_call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|execute_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|execute_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,212 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "<|audio_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "<|audio_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "<|audio_prompt_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "104": {
+      "content": "<|audio_prompt_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "105": {
+      "content": "<|background|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "106": {
+      "content": "<|/background|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "107": {
+      "content": "<|characters|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "108": {
+      "content": "<|/characters|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "109": {
+      "content": "<|speaker_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "110": {
+      "content": "<|/speaker_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "111": {
+      "content": "<|span|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "112": {
+      "content": "<|/span|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "73440": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "73441": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "73442": {
+      "content": "<|tool_call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "73443": {
+      "content": "<|execute_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "73444": {
+      "content": "<|execute_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "73445": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "73446": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "73447": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_end|>",
+    "<|im_start|>",
+    "<|tool_call|>",
+    "<|execute_start|>",
+    "<|execute_end|>",
+    "<|fim_prefix|>",
+    "<|fim_middle|>",
+    "<|fim_suffix|>"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false,
+  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
+}