{ "model_family": "Helion", "version": "2.5", "release_type": "research_and_development", "variants": { "base": { "name": "Helion-2.5-Rnd", "full_name": "DeepXR/Helion-2.5-Rnd", "description": "Base research model with full precision (FP16)", "parameters": "70B", "precision": "float16", "context_length": 131072, "safetensors_shards": 83, "shard_naming": "shard_00 to shard_82", "shard_size_gb": 1.69, "shard_size_gib": 1.57, "total_size_gb": 140.27, "status": "active", "recommended_use": [ "Research", "Development", "High-accuracy inference" ], "hardware_requirements": { "min_vram_gb": 145, "min_gpus": 2, "recommended_gpu": "A100 80GB" } }, "instruct": { "name": "Helion-2.5-Rnd-Instruct", "full_name": "DeepXR/Helion-2.5-Rnd-Instruct", "description": "Instruction-tuned variant optimized for following instructions", "parameters": "70B", "precision": "bfloat16", "context_length": 131072, "status": "planned", "recommended_use": [ "Instruction following", "Task completion", "Structured outputs" ], "fine_tuning": { "type": "supervised", "data_focus": "instruction_pairs" } }, "chat": { "name": "Helion-2.5-Rnd-Chat", "full_name": "DeepXR/Helion-2.5-Rnd-Chat", "description": "Conversational variant optimized for multi-turn dialogue", "parameters": "70B", "precision": "bfloat16", "context_length": 131072, "status": "planned", "recommended_use": [ "Conversational AI", "Customer service", "Interactive applications" ], "fine_tuning": { "type": "rlhf", "data_focus": "conversational_data" } }, "code": { "name": "Helion-2.5-Rnd-Code", "full_name": "DeepXR/Helion-2.5-Rnd-Code", "description": "Code-specialized variant with enhanced programming capabilities", "parameters": "70B", "precision": "bfloat16", "context_length": 131072, "status": "planned", "recommended_use": [ "Code generation", "Code review", "Bug fixing", "Documentation" ], "fine_tuning": { "type": "supervised", "data_focus": "code_repositories" }, "enhanced_languages": [ "Python", "JavaScript", "TypeScript", "Rust", "Go", "Java" ] }, "math": { "name": "Helion-2.5-Rnd-Math", "full_name": "DeepXR/Helion-2.5-Rnd-Math", "description": "Mathematics-specialized variant for advanced problem solving", "parameters": "70B", "precision": "bfloat16", "context_length": 131072, "status": "planned", "recommended_use": [ "Mathematical reasoning", "Proof generation", "Problem solving", "Educational applications" ], "fine_tuning": { "type": "supervised", "data_focus": "mathematical_proofs" } } }, "deployment_configurations": { "production": { "description": "Production-ready configuration with optimizations", "settings": { "tensor_parallel_size": 4, "gpu_memory_utilization": 0.95, "max_batch_size": 32, "enable_prefix_caching": true, "enable_chunked_prefill": true } }, "development": { "description": "Development configuration for testing", "settings": { "tensor_parallel_size": 2, "gpu_memory_utilization": 0.85, "max_batch_size": 8, "enable_prefix_caching": false, "enable_chunked_prefill": false } }, "research": { "description": "Research configuration for experimentation", "settings": { "tensor_parallel_size": 2, "gpu_memory_utilization": 0.90, "max_batch_size": 4, "enable_prefix_caching": false, "enable_chunked_prefill": false, "enable_logging": true } } }, "comparison_matrix": { "base_vs_instruct": { "base_advantages": [ "More flexible for fine-tuning", "Better for creative tasks", "Less constrained outputs" ], "instruct_advantages": [ "Better instruction following", "More structured outputs", "Improved task completion" ] }, "base_vs_chat": { "base_advantages": [ "Better for single-turn tasks", "More diverse outputs", "Flexible formatting" ], "chat_advantages": [ "Better conversation coherence", "Improved context awareness", "Natural dialogue flow" ] } }, "migration_guide": { "from_base_to_instruct": { "steps": [ "Update prompt format to instruction style", "Adjust temperature (typically lower)", "Add explicit task descriptions", "Use structured output formats" ], "example_prompt_change": { "base": "Write a function to sort a list", "instruct": "### Instruction:\nWrite a Python function that sorts a list in ascending order.\n\n### Response:" } }, "from_base_to_chat": { "steps": [ "Convert to chat message format", "Add system prompts", "Maintain conversation history", "Use appropriate message roles" ], "example_format_change": { "base": "Hello, how are you?", "chat": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "Hello, how are you?" } ] } } }, "version_history": { "2.5.0-rnd": { "release_date": "2025-01-30", "status": "current", "changes": [ "Initial research release", "70B parameter model", "131K context with YARN", "SafeTensors format (96 shards)", "Full precision (BF16)" ] } }, "roadmap": { "upcoming_variants": [ { "name": "Helion-2.5-Rnd-Instruct", "expected": "Q2 2025", "status": "in_development" }, { "name": "Helion-2.5-Rnd-Chat", "expected": "Q2 2025", "status": "planned" }, { "name": "Helion-2.5-Rnd-Code", "expected": "Q3 2025", "status": "planned" } ], "future_features": [ "Multi-modal capabilities", "Extended context to 256K", "Improved multilingual support", "Domain-specific variants" ] } }