Skip to content

Refactor prepare_pt2, prepare_traced_pt2 (#13006) #14307

Refactor prepare_pt2, prepare_traced_pt2 (#13006)

Refactor prepare_pt2, prepare_traced_pt2 (#13006) #14307

Workflow file for this run

name: trunk
on:
push:
branches:
- main
- release/*
tags:
- ciflow/trunk/*
pull_request:
paths:
- .ci/docker/ci_commit_pins/pytorch.txt
- .ci/scripts/**
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
jobs:
test-models-macos-cpu:
name: test-models-macos-cpu
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
# Mac runners are expensive and limited, and non reliable.
# Do some basic testing for macos jobs, and rely mostly on
# test-models-linux-aarch64 job instead.
model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l]
backend: [xnnpack-quantization-delegation]
include:
- model: efficient_sam
backend: portable
- model: llama
backend: portable
- model: llama3_2_vision_encoder
backend: portable
- model: mv3
backend: portable
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
MODEL_NAME=${{ matrix.model }}
BUILD_TOOL=cmake
BACKEND=${{ matrix.backend }}
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
# Build and test executorch
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
test-models-arm-zephyr:
name: test-models-arm-zephyr
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
model: [add]
fail-fast: false
with:
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-22.04-zephyr-sdk
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 120
script: |
MODEL_NAME=${{ matrix.model }}
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
source .ci/scripts/utils.sh
source .ci/scripts/zephyr-utils.sh
mkdir -p zephyr_scratch/
cd zephyr_scratch
export ZEPHYR_PROJ_ROOT=$(realpath $(pwd))
download_arm_zephyr_sdk
./zephyr-sdk-0.16.0/setup.sh -c -t arm-zephyr-eabi
cd $ZEPHYR_PROJ_ROOT
setup_zephyr_et_module
cd $ZEPHYR_PROJ_ROOT/modules/lib/executorch
install_executorch "--use-pt-pinned-commit"
.ci/scripts/setup-arm-baremetal-tools.sh --target-toolchain zephyr
source examples/arm/ethos-u-scratch/setup_path.sh
source $ZEPHYR_PROJ_ROOT/zephyr/zephyr-env.sh
cd $ZEPHYR_PROJ_ROOT/zephyr/samples/modules/executorch/arm/hello_world
west build -p always -b mps3/corstone300/fvp
FVP_Corstone_SSE-300_Ethos-U55 -a build/zephyr/zephyr.elf -C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 -C mps3_board.uart0.out_file='sim.out' -C cpu0.CFGITCMSZ=15 -C cpu0.CFGDTCMSZ=15 --simlimit 120
grep -qF "Output[0][0]: (float) 2.000000" sim.out
exit_status=$? #store 0 if found (success), 1 if not (failure)
exit $exit_status
test-models-linux-aarch64:
name: test-models-linux-aarch64
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
matrix:
model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
backend: [portable, xnnpack-quantization-delegation]
runner: [linux.arm64.2xlarge]
include:
- model: lstm
backend: portable
runner: linux.arm64.2xlarge
- model: mul
backend: portable
runner: linux.arm64.2xlarge
- model: softmax
backend: portable
runner: linux.arm64.2xlarge
- model: phi_4_mini
backend: portable
runner: linux.arm64.m7g.4xlarge
- model: qwen2_5
backend: portable
runner: linux.arm64.2xlarge
- model: llama3_2_vision_encoder
backend: portable
runner: linux.arm64.2xlarge
fail-fast: false
with:
runner: ${{ matrix.runner }}
docker-image: ci-image:executorch-ubuntu-22.04-gcc11-aarch64
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
MODEL_NAME=${{ matrix.model }}
BUILD_TOOL="cmake"
BACKEND=${{ matrix.backend }}
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
# Build and test ExecuTorch
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
test-custom-ops-macos:
name: test-custom-ops-macos
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
include:
- build-tool: cmake
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
BUILD_TOOL=${{ matrix.build-tool }}
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
# Build and test custom ops
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
test-selective-build-macos:
name: test-selective-build-macos
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
include:
- build-tool: cmake
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
BUILD_TOOL=${{ matrix.build-tool }}
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
# Build and test selective build
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
test-demo-backend-delegation:
name: test-demo-backend-delegation
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
matrix:
include:
- build-tool: buck2
- build-tool: cmake
fail-fast: false
with:
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-22.04-clang12
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
BUILD_TOOL=${{ matrix.build-tool }}
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
# Test selective build
PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"
test-arm-backend:
name: test-arm-backend
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
matrix:
include:
- test_arm_baremetal: test_pytest_ops_ethosu_fvp
- test_arm_baremetal: test_pytest_models_ethosu_fvp
- test_arm_baremetal: test_run_ethosu_fvp
- test_arm_baremetal: test_models_tosa
- test_arm_baremetal: test_models_ethos-u55
- test_arm_baremetal: test_models_ethos-u85
fail-fast: false
with:
runner: linux.2xlarge.memory
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 120
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
source .ci/scripts/utils.sh
install_executorch "--use-pt-pinned-commit"
.ci/scripts/setup-arm-baremetal-tools.sh
# Increase number of files user can monitor to bypass buck failures.
# Hopefully this is high enough for this setup.
sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
ARM_TEST=${{ matrix.test_arm_baremetal }}
# Test test_arm_baremetal.sh with test
backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
test-arm-cortex-m-size-test:
name: test-arm-cortex-m-size-test
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
matrix:
os: [bare_metal, zephyr-preset]
fail-fast: false
with:
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0"
setup_script_args=""
if [[ ${{ matrix.os}} == "bare_metal" ]]; then
toolchain_prefix=arm-none-eabi-
threshold="109000"
toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
setup_script_args="--target-toolchain zephyr"
toolchain_prefix=arm-zephyr-eabi-
threshold="135000"
toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
else
echo "Fail unsupport OS selection ${{ matrix.os }}"
exit 1
fi
source .ci/scripts/utils.sh
install_executorch "--use-pt-pinned-commit"
.ci/scripts/setup-arm-baremetal-tools.sh ${setup_script_args}
source examples/arm/ethos-u-scratch/setup_path.sh
# User toolchain
${toolchain_prefix}c++ --version
# Setup cmake target to desired toolchain
toolchain_cmake=$(realpath ${toolchain_cmake})
# Build and run size test
if [[ ${{ matrix.os}} == "bare_metal" ]]; then
bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
cmake --build cmake-out -j9 --target install --config Release
CXXFLAGS=${cxx_flags} cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
cmake --build cmake-out/test -j9 --config Release
else
echo "Fail unsupport OS selection ${{ matrix.os }}"
exit 1
fi
elf="cmake-out/test/size_test"
# Dump basic info
ls -al ${elf}
${toolchain_prefix}size ${elf}
# Dump symbol
python .github/scripts/run_nm.py -e ${elf}
python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "${toolchain_prefix}"
python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "${toolchain_prefix}"
# Add basic guard - TODO: refine this!
${toolchain_prefix}strip ${elf}
output=$(ls -la ${elf})
arr=($output)
size=${arr[4]}
echo "size: $size, threshold: $threshold"
if [[ "$size" -le "$threshold" ]]; then
echo "Success $size <= $threshold"
else
echo "Fail $size > $threshold"
exit 1
fi
test-arm-ootb-linux:
name: test-arm-ootb-linux
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
# Follow the steps required before running the notebooks
# Try to mirror these as closely as possible
source .ci/scripts/utils.sh
install_executorch "--use-pt-pinned-commit"
.ci/scripts/setup-arm-baremetal-tools.sh
source examples/arm/ethos-u-scratch/setup_path.sh
# Install requirements for converting notebooks
pip install notebook
# Run OOTB tests
backends/arm/test/test_arm_ootb.sh
test-coreml-delegate:
name: test-coreml-delegate
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-latest-xlarge
python-version: '3.11'
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
BUILD_TOOL=cmake
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
# Build and test coreml delegate
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
test-static-llama-ane:
name: test-static-llama-ane
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
set -eux
bash .ci/scripts/setup-conda.sh
eval "$(conda shell.bash hook)"
# Install requirements
${CONDA_RUN} sh install_requirements.sh
${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh
${CONDA_RUN} python install_executorch.py
${CONDA_RUN} sh examples/models/llama/install_requirements.sh
# Test ANE llama
${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
test-llama-torchao-lowbit:
name: test-llama-torchao-lowbit
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
set -eux
bash .ci/scripts/setup-conda.sh
eval "$(conda shell.bash hook)"
# Install requirements
${CONDA_RUN} EXECUTORCH_BUILD_TORCHAO=1 python install_executorch.py
${CONDA_RUN} sh examples/models/llama/install_requirements.sh
# Run test
${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh
test-llama-runner-linux:
# Test Both linux x86 and linux aarch64
name: test-llama-runner-linux
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
matrix:
dtype: [fp32]
mode: [portable, xnnpack+custom]
runner: [linux.2xlarge, linux.arm64.2xlarge]
docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
include:
- dtype: bf16
mode: portable
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-clang12
- dtype: bf16
mode: portable
runner: linux.arm64.2xlarge
docker-image: executorch-ubuntu-22.04-gcc11-aarch64
- dtype: bf16
mode: custom
runner: linux.arm64.2xlarge
docker-image: executorch-ubuntu-22.04-gcc11-aarch64
# Excluding specific runner + docker image combinations that don't make sense:
# - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
# - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
exclude:
- runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-gcc11-aarch64
- runner: linux.arm64.2xlarge
docker-image: executorch-ubuntu-22.04-clang12
fail-fast: false
with:
runner: ${{ matrix.runner }}
docker-image: ci-image:${{ matrix.docker-image }}
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
DTYPE=${{ matrix.dtype }}
BUILD_TOOL="cmake"
MODE=${{ matrix.mode }}
ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
test-llama-runner-macos:
name: test-llama-runner-mac
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
dtype: [fp32]
mode: [mps, coreml, xnnpack+custom+quantize_kv]
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
DTYPE=${{ matrix.dtype }}
MODE=${{ matrix.mode }}
bash .ci/scripts/setup-conda.sh
# Setup executorch
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake
if [[ "${MODE}" == "coreml" ]]; then
# Install coreml delegate
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
echo "Finishing installing coreml."
fi
# Install requirements for export_llama
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
# test-llava-runner-macos:
# name: test-llava-runner-macos
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
# strategy:
# fail-fast: false
# with:
# runner: macos-14-xlarge
# python-version: '3.11'
# submodules: 'recursive'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: 900
# script: |
# BUILD_TOOL=cmake
# bash .ci/scripts/setup-conda.sh
# # Setup MacOS dependencies as there is no Docker support on MacOS atm
# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
# # install Llava requirements
# ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# ${CONDA_RUN} bash examples/models/llava/install_requirements.sh
# # run python unittest
# ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava
# # run e2e (export, tokenizer and runner)
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh
test-qnn-model:
name: test-qnn-model
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
matrix:
dtype: [fp32]
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
fail-fast: false
with:
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
test-qnn-optimum-model:
name: test-qnn-optimum-model
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
matrix:
dtype: [fp32]
model: [cvt, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] # eurobert requires transfomer >= 4.48.0, skip for now
fail-fast: false
with:
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
test-models-macos-coreml:
name: test-models-macos-coreml
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
model: [dl3, edsr, efficient_sam, emformer_join, emformer_transcribe, ic3, ic4, mobilebert, mv2, mv3, resnet50, vit, w2l]
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
MODEL_NAME=${{ matrix.model }}
BUILD_TOOL=cmake
BACKEND="coreml-pybind"
# Set model specific overrides
if [[ "${MODEL_NAME}" == "mobilebert" ]]; then
# See https://github.com/pytorch/executorch/issues/12907
# mobilebert has nan output on FP16, and high MSE on fp32, so we disable runtime test now
BACKEND="coreml"
fi
if [[ "${MODEL_NAME}" == "efficient_sam" ]]; then
# See https://github.com/pytorch/executorch/issues/12906
# efficient_sam fails to run on CoreML
BACKEND="coreml"
fi
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
echo "Finishing installing coreml."
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
test-models-macos-mps:
name: test-models-macos-mps
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
BUILD_TOOL=cmake
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
# Build and test mps model
for MODEL_NAME in mv3 ic4 resnet50 edsr mobilebert w2l; do
echo "::group::Exporting mps model: $MODEL_NAME"
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps"
echo "::endgroup::"
done
test-huggingface-transformers:
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
if: ${{ !github.event.pull_request.head.repo.fork }}
name: test-huggingface-transformers
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
secrets: inherit
strategy:
matrix:
hf_model_id: [
google/gemma-3-1b-it,
Qwen/Qwen3-0.6B,
HuggingFaceTB/SmolLM2-135M,
meta-llama/Llama-3.2-1B,
allenai/OLMo-1B-hf,
]
fail-fast: false
with:
secrets-env: EXECUTORCH_HF_TOKEN
runner: linux.2xlarge.memory
docker-image: ci-image:executorch-ubuntu-22.04-clang12
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
upload-artifact: profiling-artifacts-${{ strategy.job-index }}
script: |
echo "::group::Set up ExecuTorch"
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
# Build executor_runner with ETdump enabled
PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DEXECUTORCH_ENABLE_LOGGING=1 \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
-Bcmake-out .
cmake --build cmake-out -j16 --target install --config Release
echo "::endgroup::"
echo "::group::Set up Hugging Face"
pip install -U "huggingface_hub[cli]"
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
git clone https://github.com/huggingface/optimum-executorch
pushd optimum-executorch
# There is no release yet, for CI stability, always test from the same commit on main
git checkout $OPTIMUM_ET_COMMIT
python install_dev.py --skip_override_torch
popd
pip list
echo "::endgroup::"
echo "::group::Export to ExecuTorch"
# Pass matrix variable as environment variable
export MODEL_ID="${{ matrix.hf_model_id }}"
export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_kv_cache_8da4w"
pushd optimum-executorch
ARGS=(
"--model" "${MODEL_ID}"
"--task" "text-generation"
"--recipe" "xnnpack"
"--use_custom_sdpa"
"--use_custom_kv_cache"
"--qlinear" "8da4w"
"--qembedding" "8w"
"--output_dir" "${OUTPUT_DIR}"
)
optimum-cli export executorch "${ARGS[@]}"
ls -FlAGhp ${OUTPUT_DIR}
popd
echo "::endgroup::"
echo "::group::Inference using python API"
pushd optimum-executorch
python -c "
import os
from optimum.executorch import ExecuTorchModelForCausalLM
from transformers import AutoTokenizer
model_id = os.getenv('MODEL_ID')
pte_dir = os.getenv('OUTPUT_DIR')
print(f'Loading model {model_id} from {pte_dir}.')
model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir)
generated_text = model.text_generation(
tokenizer=AutoTokenizer.from_pretrained(model_id),
prompt='Simply put, the theory of relativity states that',
max_seq_len=64
)
print(generated_text)
"
popd
echo "::endgroup::"
echo "::group::Inference using executor_runner with ETDump"
./cmake-out/executor_runner \
--model_path ${OUTPUT_DIR}/model.pte \
--etdump_path ${OUTPUT_DIR}/etdump.etdp
export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv
mkdir -p $(dirname "$TSV_PATH")
python3 -m devtools.inspector.inspector_cli \
--etdump_path ${OUTPUT_DIR}/etdump.etdp \
--tsv_path ${TSV_PATH}
echo "::endgroup::"
test-llama-runner-qnn-linux:
name: test-llama-runner-qnn-linux
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
matrix:
dtype: [fp32]
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
mode: [qnn]
fail-fast: false
with:
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
BUILD_TOOL="cmake"
DTYPE=${{ matrix.dtype }}
MODE=${{ matrix.mode }}
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
./install_requirements.sh --use-pt-pinned-commit
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
unittest-release:
uses: ./.github/workflows/_unittest.yml
permissions:
id-token: write
contents: read
with:
build-mode: Release
build-tool: cmake
docker-image: ci-image:executorch-ubuntu-22.04-clang12