# Copyright (c) 2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. name: NeMo E2E NeMo2 Tests on: workflow_call: inputs: test_to_run: required: true type: string image-name: required: false default: nemo_container_nemo2 type: string jobs: build: uses: ./.github/workflows/_build_container.yml with: image-name: ${{ inputs.image-name }} dockerfile: docker/Dockerfile.ci e2e-tests: strategy: fail-fast: false matrix: include: - script: L2_NeMo_2_GPT_Pretraining_no_transformer_engine runner: self-hosted-azure - script: L2_NeMo_2_llama3_pretraining_recipe runner: self-hosted-azure # - script: L2_NeMo_2_llama3_pytorch_profiler # runner: self-hosted-azure # timeout: 20 - script: L2_NeMo_2_llama3_fault_tolerance_plugin runner: self-hosted-azure - script: L2_NeMo_2_llama3_straggler_detection runner: self-hosted-azure - script: L2_NeMo_2_llama3_local_ckpt runner: self-hosted-azure - script: L2_NeMo_2_GPT_DDP_Param_Parity_check runner: self-hosted-azure - script: L2_NeMo_2_Hyena_Conversion_from_HF runner: self-hosted-azure - script: L2_NeMo_2_Hyena_DDP_Pretraining_Test runner: self-hosted-azure - script: L2_NeMo_2_Hyena_Mixer_Test runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_Hyena_PP_Pretraining_Test runner: self-hosted-azure - script: L2_NeMo_2_Hyena_TP_Pretraining_Test runner: self-hosted-azure - script: L2_NeMo_2_Hyena_CP_Pretraining_Test runner: self-hosted-azure - script: L2_NeMo_2_SSM_Pretraining runner: self-hosted-azure - script: L2_NeMo_2_SSM_Finetuning runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_HF_MODEL_IMPORT runner: self-hosted-azure - script: L2_NeMo_2_jit_callback runner: self-hosted-azure - script: L2_NeMo_2_T5_Pretraining runner: self-hosted-azure - script: L2_NeMo_2_T5_MockData_Pretraining runner: self-hosted-azure - script: L2_NeMo_2_T5_Finetuning runner: self-hosted-azure - script: L2_NeMo_2_T5_Squad runner: self-hosted-azure - script: L2_NeMo_2_T5_LoRA runner: self-hosted-azure - script: L2_NeMo_2_BERT_Pretraining_Megatron runner: self-hosted-azure - script: L2_NeMo_2_BERT_Pretraining_HuggingFace runner: self-hosted-azure - script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_TP2 runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_PP2 runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_CP2 runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_NEVA_MOCK_FINETUNE_TP2 runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_NEVA_ENERGON_FINETUNE_TP2 runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_NEVA_MOCK_FINETUNE_PP2 runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_NEVA_MOCK_FINETUNE_CP2 runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_NEVA_PRELOADED_FINETUNE_PP2_SEQPACK_PAD runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_NEVA_PRELOADED_FINETUNE_PP2_SEQPACK_TRUNC runner: self-hosted-azure-gpus-2-h100 - script: L2_NeMo_2_NEVA_LOAD_GENERATE runner: self-hosted-azure-gpus-1 - script: L2_NeMo_2_LLAVA_IMPORT runner: self-hosted-azure-gpus-1 - script: L2_NEMO_2_MLLAMA_Inference runner: self-hosted-azure-gpus-1 - script: L2_NeMo_2_MLLAMA_MOCK_FINETUNE_TP2 runner: self-hosted-azure - script: L2_NeMo_2_MLLAMA_PRELOADED_FINETUNE_TP2 runner: self-hosted-azure - script: L2_NeMo_2_MLLAMA_ENERGON_FINETUNE_TP2 runner: self-hosted-azure - script: L2_NeMo_2_MLLAMA_IMPORT runner: self-hosted-azure-gpus-1 - script: L2_NeMo_2_Mixtral_Pretraining runner: self-hosted-azure - script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1 runner: self-hosted-azure - script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS2 runner: self-hosted-azure - script: L2_NeMo_2_GPT_SFT_TP1PP2_MBS2 runner: self-hosted-azure - script: L2_NeMo_2_GPT_SFT_TP2PP1_MBS2 runner: self-hosted-azure - script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED runner: self-hosted-azure - script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1 runner: self-hosted-azure - script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2 runner: self-hosted-azure - script: L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2 runner: self-hosted-azure - script: L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2 runner: self-hosted-azure - script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED runner: self-hosted-azure - script: L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED runner: self-hosted-azure - script: L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED runner: self-hosted-azure - script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat runner: self-hosted-azure - script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_TE_op_fuser runner: self-hosted-azure - script: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2_exclude runner: self-hosted-azure - script: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2 runner: self-hosted-azure - script: L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1 runner: self-hosted-azure - script: L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1 runner: self-hosted-azure - script: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1 runner: self-hosted-azure - script: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1_exclude runner: self-hosted-azure - script: L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1 runner: self-hosted-azure - script: L2_NEMO_2_LoRA_MERGE runner: self-hosted-azure - script: L2_NEMO_2_LoRA_Inference runner: self-hosted-azure-gpus-1 - script: L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact runner: self-hosted-azure is-optional: true - script: L2_NeMo_2_PTQ_Llama2_FP8_trtllm runner: self-hosted-azure - script: L2_NeMo_2_PTQ_Llama2_FP8_nemo runner: self-hosted-azure - script: L2_NeMo_2_Distill_Llama3_TP1PP2 runner: self-hosted-azure - script: L2_NeMo_2_Prune_Llama_TP1PP2 runner: self-hosted-azure - script: L2_NeMo_2_GPT_Speculative_Llama3_TP2PP1 runner: self-hosted-azure - script: L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING runner: self-hosted-azure - script: L2_NeMo_2_LLAVA_NEXT_HF_CONVERSION runner: self-hosted-azure - script: L2_NeMo_2_LLAVA_NEXT_ENERGON_TRAIN runner: self-hosted-azure - script: L2_NeMo_2_LLAVA_NEXT_ENERGON_PACKED_TRAIN runner: self-hosted-azure - script: L2_NeMo_2_AVLM_MOCK_TRAINING runner: self-hosted-azure - script: L2_NeMo_2_AVLM_ENERGON_TRAIN runner: self-hosted-azure - script: L2_NeMo_2_AVLM_ENERGON_CP2_TRAIN runner: self-hosted-azure - script: L2_NeMo_2_CLIP_PRETRAIN runner: self-hosted-azure timeout: 20 - script: L2_NeMo_2_CLIP_INFER runner: self-hosted-azure - script: L2_NeMo_2_Auto_Configurator_llama_TP1_PP1_MBS124 runner: self-hosted-azure-gpus-1 - script: L2_NeMo_2_Auto_Configurator_bert_TP1_PP1_MBS124 runner: self-hosted-azure-gpus-1 - script: L2_NeMo_2_Auto_Configurator_t5_TP1_PP1_MBS124 runner: self-hosted-azure-gpus-1 - script: L2_NeMo_2_Auto_Configurator_callbacks runner: self-hosted-azure-gpus-1 - script: L2_NeMo_2_Conversion_Test_Baichuan2 runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_ChatGLM runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_DeepSeek runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Gemma runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Gemma2 runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Gemma3_llm runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Gemma3_vlm runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Mistral runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Llama runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Llama_Embedding runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Llama4 runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Llama4_Text runner: self-hosted-azure - script: L2_NeMo_2_PTQ_Llama4_FP8_nemo runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Nemotron runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Nemotron_H_4B runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Phi3Mini runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Qwen2 runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Qwen3 runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Starcoder runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_Starcoder2 runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_BERT runner: self-hosted-azure - script: L2_NeMo_2_Conversion_Test_T5 runner: self-hosted-azure - runner: self-hosted-azure script: L2_NeMo_2_QWEN2VL_MOCK_FINETUNE_TP2 - runner: self-hosted-azure script: L2_NeMo_2_QWEN2VL_PRELOADED_FINETUNE_TP2 - runner: self-hosted-azure script: L2_NeMo_2_QWEN2VL_ENERGON_FINETUNE_TP2 - runner: self-hosted-azure script: L2_NeMo_2_LLAMA4_MOCK_FINETUNE_PP2 - runner: self-hosted-azure script: L2_NeMo_2_LLAMA4_MOCK_FINETUNE_CP2 - runner: self-hosted-azure script: L2_NeMo_2_LLAMA4_ENERGON_FINETUNE_EP2 - runner: self-hosted-azure script: L2_NeMo_2_Diffusion_Recipe_Test - runner: self-hosted-azure script: L2_NeMo_2_Diffusion_Taskencoder_Test - runner: self-hosted-azure script: L2_NeMo_2_Flux_Import_Test is-optional: true - runner: self-hosted-azure script: L2_NeMo_2_Flux_Inference_Test - runner: self-hosted-azure script: L2_NeMo_2_Flux_Training_DDP_Test - runner: self-hosted-azure script: L2_NeMo_2_Flux_Training_FSDP_Test - runner: self-hosted-azure script: L2_NeMo_2_Flux_ControlNet_Training_DDP_Test - runner: self-hosted-azure script: L2_NeMo_2_Flux_ControlNet_Training_FSDP_Test is-optional: true needs: [build] runs-on: ${{ matrix.runner }} name: ${{ matrix.is-optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }} steps: - name: Checkout uses: actions/checkout@v4 with: path: ${{ github.run_id }} - name: main uses: NVIDIA/NeMo/.github/actions/test-template@main with: runner: ${{ runner.name }} script: ${{ matrix.script }} tests_to_run: ${{ inputs.test_to_run }} image: ${{ inputs.image-name }} is_optional: ${{ matrix.is-optional || false }} timeout: ${{ matrix.timeout || 10 }}