From 24f2be018266b0057d76e16d9ae2c349ab7954a5 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Thu, 24 Jul 2025 14:35:35 -0700
Subject: [PATCH 01/10] Update docker SDK image

---
 Dockerfile.sdk | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index d1c4b5a189..a694f4cfb3 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -86,18 +86,9 @@ RUN apt-get update && \
             python3-pdfkit \
             openjdk-11-jdk \
             maven && \
-    pip3 install --upgrade "grpcio-tools<1.68"
-
-# Client build requires recent version of CMake (FetchContent required)
-# Using CMAKE installation instruction from:: https://apt.kitware.com/
-RUN apt update -q=2 \
-    && apt install -y gpg wget \
-    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
-    && . /etc/os-release \
-    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
-    && apt-get update -q=2 \
-    && apt-get install -y --no-install-recommends cmake=3.28.3* cmake-data=3.28.3* \
-    && cmake --version
+    pip3 install --upgrade "grpcio-tools<1.68" cmake==4.0.3
+
+ENV CMAKE_POLICY_MINIMUM_REQUIRED=3.5
 
 # Build expects "python" executable (not python3).
 RUN rm -f /usr/bin/python && \

From 4e0ef2873e538abe2f92bc8f254a9e855945aae9 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Thu, 24 Jul 2025 14:37:55 -0700
Subject: [PATCH 02/10] Update perl version

---
 Dockerfile.sdk | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index a694f4cfb3..eb3126f056 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -226,6 +226,7 @@ RUN apt-get update && \
             wget \
             python3-pdfkit \
             maven \
+            perl \
             default-jdk && \
     pip3 install "grpcio<1.68" "grpcio-tools<1.68"
 

From 385a0a25a6c6d048aad3aed664180b246d61efad Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Thu, 24 Jul 2025 15:18:04 -0700
Subject: [PATCH 03/10] Update readme and version

---
 Dockerfile.sdk                                              | 2 +-
 TRITON_VERSION                                              | 2 +-
 build.py                                                    | 6 +++---
 deploy/aws/values.yaml                                      | 2 +-
 deploy/fleetcommand/Chart.yaml                              | 2 +-
 deploy/fleetcommand/values.yaml                             | 6 +++---
 deploy/gcp/values.yaml                                      | 2 +-
 .../benchmark/perf-analyzer-script/triton_client.yaml       | 2 +-
 .../gke-marketplace-app/server-deployer/build_and_push.sh   | 4 ++--
 .../server-deployer/chart/triton/Chart.yaml                 | 2 +-
 .../server-deployer/chart/triton/values.yaml                | 6 +++---
 .../server-deployer/data-test/schema.yaml                   | 2 +-
 deploy/gke-marketplace-app/server-deployer/schema.yaml      | 4 ++--
 deploy/gke-marketplace-app/trt-engine/README.md             | 6 +++---
 deploy/k8s-onprem/values.yaml                               | 2 +-
 deploy/oci/values.yaml                                      | 2 +-
 docs/introduction/compatibility.md                          | 1 +
 python/openai/README.md                                     | 2 +-
 qa/common/gen_jetson_trt_models                             | 2 +-
 qa/common/gen_qa_custom_ops                                 | 2 +-
 qa/common/gen_qa_model_repository                           | 2 +-
 21 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index eb3126f056..011bb6f62b 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -29,7 +29,7 @@
 #
 
 # Base image on the minimum Triton container
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.06-py3-min
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.07-py3-min
 
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
diff --git a/TRITON_VERSION b/TRITON_VERSION
index e7a9ca6a62..3b4ce49e5d 100644
--- a/TRITON_VERSION
+++ b/TRITON_VERSION
@@ -1 +1 @@
-2.59.0
+2.59.1
diff --git a/build.py b/build.py
index 5ced5327f2..f848ff05f8 100755
--- a/build.py
+++ b/build.py
@@ -71,9 +71,9 @@
 #
 
 DEFAULT_TRITON_VERSION_MAP = {
-    "release_version": "2.59.0",
-    "triton_container_version": "25.06",
-    "upstream_container_version": "25.06",
+    "release_version": "2.59.1",
+    "triton_container_version": "25.07",
+    "upstream_container_version": "25.07",
     "ort_version": "1.22.0",
     "ort_openvino_version": "2025.1.0",
     "standalone_openvino_version": "2025.1.0",
diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml
index c24b0ed7b8..eb9afb78e0 100644
--- a/deploy/aws/values.yaml
+++ b/deploy/aws/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:25.06-py3
+  imageName: nvcr.io/nvidia/tritonserver:25.07-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://triton-inference-server-repository/model_repository
   numGpus: 1
diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml
index b819cbb4a5..1b1af554ee 100644
--- a/deploy/fleetcommand/Chart.yaml
+++ b/deploy/fleetcommand/Chart.yaml
@@ -26,7 +26,7 @@
 
 apiVersion: v1
 # appVersion is the Triton version; update when changing release
-appVersion: "2.59.0"
+appVersion: "2.59.1"
 description: Triton Inference Server (Fleet Command)
 name: triton-inference-server
 # version is the Chart version; update when changing anything in the chart
diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml
index d2300ef421..03569ab343 100644
--- a/deploy/fleetcommand/values.yaml
+++ b/deploy/fleetcommand/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:25.06-py3
+  imageName: nvcr.io/nvidia/tritonserver:25.07-py3
   pullPolicy: IfNotPresent
   numGpus: 1
   serverCommand: tritonserver
@@ -47,13 +47,13 @@ image:
     #
     # To set model control mode, uncomment and configure below
     # TODO: Fix the following url, it is invalid
-    # See https://github.com/triton-inference-server/server/blob/r25.06/docs/user_guide/model_management.md
+    # See https://github.com/triton-inference-server/server/blob/r25.07/docs/user_guide/model_management.md
     #  for more details
     #- --model-control-mode=explicit|poll|none
     #
     # Additional server args
     #
-    # see https://github.com/triton-inference-server/server/blob/r25.06/README.md
+    # see https://github.com/triton-inference-server/server/blob/r25.07/README.md
     #  for more details
 
 service:
diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml
index 73948fb39d..55d0919205 100644
--- a/deploy/gcp/values.yaml
+++ b/deploy/gcp/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:25.06-py3
+  imageName: nvcr.io/nvidia/tritonserver:25.07-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: gs://triton-inference-server-repository/model_repository
   numGpus: 1
diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
index 8be23e0f2c..951f04898e 100644
--- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
+++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
@@ -33,7 +33,7 @@ metadata:
   namespace: default
 spec:
   containers:
-  - image: nvcr.io/nvidia/tritonserver:25.06-py3-sdk
+  - image: nvcr.io/nvidia/tritonserver:25.07-py3-sdk
     imagePullPolicy: Always
     name: nv-triton-client
     securityContext:
diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
index b31804b9a2..659e3a40eb 100755
--- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
+++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
@@ -28,8 +28,8 @@
 export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
 export APP_NAME=tritonserver
 export MAJOR_VERSION=2.59
-export MINOR_VERSION=2.59.0
-export NGC_VERSION=25.06-py3
+export MINOR_VERSION=2.59.1
+export NGC_VERSION=25.07-py3
 
 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION
 
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
index 3206248935..218a45c7a5 100644
--- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
@@ -28,4 +28,4 @@ apiVersion: v1
 appVersion: "2.59"
 description: Triton Inference Server
 name: triton-inference-server
-version: 2.59.0
+version: 2.59.1
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
index 52b1ab2f21..94e5132d4d 100644
--- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
@@ -31,14 +31,14 @@ maxReplicaCount: 3
 tritonProtocol: HTTP
 # HPA GPU utilization autoscaling target
 HPATargetAverageValue: 85
-modelRepositoryPath: gs://triton_sample_models/25.06
-publishedVersion: '2.59.0'
+modelRepositoryPath: gs://triton_sample_models/25.07
+publishedVersion: '2.59.1'
 gcpMarketplace: true
 
 image:
   registry: gcr.io
   repository: nvidia-ngc-public/tritonserver
-  tag: 25.06-py3
+  tag: 25.07-py3
   pullPolicy: IfNotPresent
   # modify the model repository here to match your GCP storage bucket
   numGpus: 1
diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
index 36ba6b6a63..edfcbe164b 100644
--- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.59.0'
+  publishedVersion: '2.59.1'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.
diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml
index 629303952d..ab21637bf3 100644
--- a/deploy/gke-marketplace-app/server-deployer/schema.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.59.0'
+  publishedVersion: '2.59.1'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.
@@ -89,7 +89,7 @@ properties:
   modelRepositoryPath:
     type: string
     title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
-    default: gs://triton_sample_models/25.06
+    default: gs://triton_sample_models/25.07
   image.ldPreloadPath:
     type: string
     title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable.
diff --git a/deploy/gke-marketplace-app/trt-engine/README.md b/deploy/gke-marketplace-app/trt-engine/README.md
index dba63cf63e..c80da18f89 100644
--- a/deploy/gke-marketplace-app/trt-engine/README.md
+++ b/deploy/gke-marketplace-app/trt-engine/README.md
@@ -33,7 +33,7 @@
 ```
 docker run --gpus all -it --network host \
     --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-    -v ~:/scripts nvcr.io/nvidia/tensorrt:25.06-py3
+    -v ~:/scripts nvcr.io/nvidia/tensorrt:25.07-py3
 
 pip install onnx six torch tf2onnx tensorflow
 
@@ -57,7 +57,7 @@ mkdir -p engines
 
 python3 builder.py -m models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/model.ckpt -o engines/bert_large_int8_bs1_s128.engine -b 1 -s 128 -c models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/ -v models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/vocab.txt --int8 --fp16 --strict --calib-num 1 -iln -imh
 
-gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/25.06/bert/1/model.plan
+gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/25.07/bert/1/model.plan
 ```
 
-For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/25.06/` should be updated accordingly with the correct version.
+For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/25.07/` should be updated accordingly with the correct version.
diff --git a/deploy/k8s-onprem/values.yaml b/deploy/k8s-onprem/values.yaml
index ac6d7a3480..84d6c62f28 100644
--- a/deploy/k8s-onprem/values.yaml
+++ b/deploy/k8s-onprem/values.yaml
@@ -30,7 +30,7 @@ tags:
   openshift: false
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:25.06-py3
+  imageName: nvcr.io/nvidia/tritonserver:25.07-py3
   pullPolicy: IfNotPresent
   modelRepositoryServer: < Replace with the IP Address of your file server >
   modelRepositoryPath: /srv/models
diff --git a/deploy/oci/values.yaml b/deploy/oci/values.yaml
index be6eb13a0e..093791e1be 100644
--- a/deploy/oci/values.yaml
+++ b/deploy/oci/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:25.06-py3
+  imageName: nvcr.io/nvidia/tritonserver:25.07-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
   numGpus: 1
diff --git a/docs/introduction/compatibility.md b/docs/introduction/compatibility.md
index 4332d4c8ae..5671b4e77d 100644
--- a/docs/introduction/compatibility.md
+++ b/docs/introduction/compatibility.md
@@ -38,6 +38,7 @@
 
 | Triton release version	 | NGC Tag	 | Python version	 | Torch version | TensorRT version | TensorRT-LLM version | CUDA version | CUDA Driver version | Size |
 | --- | ---  | --- | --- | --- | --- | --- | --- | --- |
+| 25.06 | nvcr.io/nvidia/tritonserver:25.07-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+79aa17489c.nv25.4 | 10.10.0.31 | 0.20.0 | 12.9.0.036 | 575.51.03 | 18.3G |
 | 25.06 | nvcr.io/nvidia/tritonserver:25.06-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+79aa17489c.nv25.4 | 10.10.0.31 | 0.20.0 | 12.9.0.036 | 575.51.03 | 18.3G |
 | 25.05 | nvcr.io/nvidia/tritonserver:25.05-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+7c8ec84dab.nv25.3 | 10.9.0.34 | 0.19.0 | 12.8.1.012 | 570.124.06 | 17G |
 | 25.04 | nvcr.io/nvidia/tritonserver:25.04-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+7c8ec84dab.nv25.3 | 10.9.0.34 | 0.18.2 | 12.8.1.012 | 570.124.06 | 17G |
diff --git a/python/openai/README.md b/python/openai/README.md
index 5851918ff2..db2ca8817e 100644
--- a/python/openai/README.md
+++ b/python/openai/README.md
@@ -51,7 +51,7 @@
 docker run -it --net=host --gpus all --rm \
   -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
   -e HF_TOKEN \
-  nvcr.io/nvidia/tritonserver:25.06-vllm-python-py3
+  nvcr.io/nvidia/tritonserver:25.07-vllm-python-py3
 ```
 
 2. Launch the OpenAI-compatible Triton Inference Server:
diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models
index b081fe9c4e..5bbfb4c74f 100755
--- a/qa/common/gen_jetson_trt_models
+++ b/qa/common/gen_jetson_trt_models
@@ -34,7 +34,7 @@
 # Make all generated files accessible outside of container
 umask 0000
 # Set the version of the models
-TRITON_VERSION=${TRITON_VERSION:=25.06}
+TRITON_VERSION=${TRITON_VERSION:=25.07}
 # Set the CUDA device to use
 CUDA_DEVICE=${RUNNER_ID:=0}
 # Set TensorRT image
diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops
index 108b1daa53..6b3f349f44 100755
--- a/qa/common/gen_qa_custom_ops
+++ b/qa/common/gen_qa_custom_ops
@@ -37,7 +37,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=25.06}
+TRITON_VERSION=${TRITON_VERSION:=25.07}
 NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION}
 PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3}
 UBUNTU_IMAGE=${UBUNTU_IMAGE:=ubuntu:24.04}
diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index 2deeb97da5..8660124fc5 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -48,7 +48,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=25.06}
+TRITON_VERSION=${TRITON_VERSION:=25.07}
 
 # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version
 ONNX_VERSION=1.16.1

From 31dc97639c6a4b89032518dd1e668b7a6d879437 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Thu, 24 Jul 2025 16:26:37 -0700
Subject: [PATCH 04/10] Update version

---
 Dockerfile.sdk | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index 011bb6f62b..693ad662d2 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -60,32 +60,33 @@ ENV PIP_BREAK_SYSTEM_PACKAGES=1
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-            ca-certificates \
-            software-properties-common \
             autoconf \
             automake \
             build-essential \
+            ca-certificates \
             curl \
             git \
             gperf \
             libb64-dev \
             libgoogle-perftools-dev \
-            libopencv-dev \
             libopencv-core-dev \
+            libopencv-dev \
             libssl-dev \
             libtool \
+            maven \
+            openjdk-11-jdk \
+            perl \
             pkg-config \
             python3 \
-            python3-pip \
             python3-dev \
-            python3-wheel \
+            python3-pdfkit \
+            python3-pip \
             python3-setuptools \
+            python3-wheel \
             rapidjson-dev \
+            software-properties-common \
             vim \
-            wget \
-            python3-pdfkit \
-            openjdk-11-jdk \
-            maven && \
+            wget && \
     pip3 install --upgrade "grpcio-tools<1.68" cmake==4.0.3
 
 ENV CMAKE_POLICY_MINIMUM_REQUIRED=3.5

From ca0d205501b4e1284fefa9be3b87d87aed554161 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Thu, 24 Jul 2025 22:07:26 -0700
Subject: [PATCH 05/10] Update Dokcerfile configuration

---
 Dockerfile.sdk | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index 693ad662d2..bf80d047e2 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -75,7 +75,6 @@ RUN apt-get update && \
             libtool \
             maven \
             openjdk-11-jdk \
-            perl \
             pkg-config \
             python3 \
             python3-dev \
@@ -86,8 +85,8 @@ RUN apt-get update && \
             rapidjson-dev \
             software-properties-common \
             vim \
-            wget && \
-    pip3 install --upgrade "grpcio-tools<1.68" cmake==4.0.3
+            wget &&
+    pip3 install --upgrade "grpcio-tools<1.68" cmake==3.28.3
 
 ENV CMAKE_POLICY_MINIMUM_REQUIRED=3.5
 
@@ -129,8 +128,7 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
           -DTRITON_ENABLE_JAVA_HTTP=ON \
           -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
-RUN make -j16 cc-clients java-clients && \
-    rm -fr ~/.m2
+RUN cmake -v --parallel cc-clients java-clients
 
 # TODO: PA will rebuild the CC clients since it depends on it.
 # This should be optimized so that we do not have to build
@@ -148,6 +146,7 @@ RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
           -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
           -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
           -DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
+          -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
           -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
           -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \
           -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \
@@ -159,7 +158,7 @@ RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
           -DTRITON_PACKAGE_PERF_ANALYZER=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
         /workspace/perf_analyzer && \
-        make -j16 perf-analyzer python-clients && \
+        cmake -v --parallel perf-analyzer python-clients && \
         pip3 install build && \
         cd /workspace/perf_analyzer/genai-perf && \
         python3 -m build --wheel --outdir /workspace/install/python; \
@@ -172,12 +171,13 @@ RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
           -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
           -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
           -DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
+          -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
           -DTRITON_ENABLE_PYTHON_HTTP=ON \
           -DTRITON_ENABLE_PYTHON_GRPC=ON \
           -DTRITON_PACKAGE_PERF_ANALYZER=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
         /workspace/perf_analyzer && \
-        make -j16 python-clients && \
+        cmake -v --parallel python-clients && \
         mkdir -p /workspace/install/python && \
         cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \
     fi
@@ -208,27 +208,27 @@ ARG TRITON_ENABLE_GPU
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-            software-properties-common \
             curl \
+            default-jdk \
             git \
             gperf \
             libb64-dev \
             libgoogle-perftools-dev \
-            libopencv-dev \
             libopencv-core-dev \
+            libopencv-dev \
             libssl-dev \
             libtool \
+            maven \
+            perl \
             python3 \
-            python3-pip \
             python3-dev \
-            python3-wheel \
+            python3-pdfkit \
+            python3-pip \
             python3-setuptools \
+            python3-wheel \
+            software-properties-common \
             vim \
-            wget \
-            python3-pdfkit \
-            maven \
-            perl \
-            default-jdk && \
+            wget && \
     pip3 install "grpcio<1.68" "grpcio-tools<1.68"
 
 WORKDIR /workspace

From fcf996d886f755554d53a31de642f1b63830e5ed Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Thu, 24 Jul 2025 22:25:22 -0700
Subject: [PATCH 06/10] Update version

---
 docs/introduction/compatibility.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/introduction/compatibility.md b/docs/introduction/compatibility.md
index 5671b4e77d..450944756d 100644
--- a/docs/introduction/compatibility.md
+++ b/docs/introduction/compatibility.md
@@ -38,7 +38,7 @@
 
 | Triton release version	 | NGC Tag	 | Python version	 | Torch version | TensorRT version | TensorRT-LLM version | CUDA version | CUDA Driver version | Size |
 | --- | ---  | --- | --- | --- | --- | --- | --- | --- |
-| 25.06 | nvcr.io/nvidia/tritonserver:25.07-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+79aa17489c.nv25.4 | 10.10.0.31 | 0.20.0 | 12.9.0.036 | 575.51.03 | 18.3G |
+| 25.07 | nvcr.io/nvidia/tritonserver:25.07-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+79aa17489c.nv25.4 | 10.10.0.31 | 0.20.0 | 12.9.0.036 | 575.51.03 | 18.3G |
 | 25.06 | nvcr.io/nvidia/tritonserver:25.06-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+79aa17489c.nv25.4 | 10.10.0.31 | 0.20.0 | 12.9.0.036 | 575.51.03 | 18.3G |
 | 25.05 | nvcr.io/nvidia/tritonserver:25.05-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+7c8ec84dab.nv25.3 | 10.9.0.34 | 0.19.0 | 12.8.1.012 | 570.124.06 | 17G |
 | 25.04 | nvcr.io/nvidia/tritonserver:25.04-trtllm-python-py3 | Python 3.12.3  | 2.7.0a0+7c8ec84dab.nv25.3 | 10.9.0.34 | 0.18.2 | 12.8.1.012 | 570.124.06 | 17G |

From d224e2f8d202ce1dae890f7a47e7ce8a9afd5c6d Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Fri, 25 Jul 2025 07:28:34 -0700
Subject: [PATCH 07/10] Update config file

---
 Dockerfile.sdk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index bf80d047e2..24c33b393b 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -85,7 +85,7 @@ RUN apt-get update && \
             rapidjson-dev \
             software-properties-common \
             vim \
-            wget &&
+            wget && \
     pip3 install --upgrade "grpcio-tools<1.68" cmake==3.28.3
 
 ENV CMAKE_POLICY_MINIMUM_REQUIRED=3.5

From 74b90ba7e98bba1270b6b1cda135df6f3c15713f Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Fri, 25 Jul 2025 08:00:22 -0700
Subject: [PATCH 08/10] Bring back the missed flag

---
 Dockerfile.sdk | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index 24c33b393b..166bf9466e 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -128,7 +128,7 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
           -DTRITON_ENABLE_JAVA_HTTP=ON \
           -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
-RUN cmake -v --parallel cc-clients java-clients
+RUN cmake -v --parallel --build cc-clients java-clients
 
 # TODO: PA will rebuild the CC clients since it depends on it.
 # This should be optimized so that we do not have to build
@@ -158,7 +158,7 @@ RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
           -DTRITON_PACKAGE_PERF_ANALYZER=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
         /workspace/perf_analyzer && \
-        cmake -v --parallel perf-analyzer python-clients && \
+        cmake -v --parallel --build perf-analyzer python-clients && \
         pip3 install build && \
         cd /workspace/perf_analyzer/genai-perf && \
         python3 -m build --wheel --outdir /workspace/install/python; \
@@ -177,7 +177,7 @@ RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
           -DTRITON_PACKAGE_PERF_ANALYZER=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
         /workspace/perf_analyzer && \
-        cmake -v --parallel python-clients && \
+        cmake -v --parallel --build python-clients && \
         mkdir -p /workspace/install/python && \
         cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \
     fi

From 20fb6b10891981ef672044a15578a3af820737c3 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Fri, 25 Jul 2025 08:20:42 -0700
Subject: [PATCH 09/10] Adust execution for cmake

---
 Dockerfile.sdk | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index 166bf9466e..18402a3e00 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -128,7 +128,7 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
           -DTRITON_ENABLE_JAVA_HTTP=ON \
           -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
-RUN cmake -v --parallel --build cc-clients java-clients
+RUN cmake --build . -v --parallel cc-clients java-clients
 
 # TODO: PA will rebuild the CC clients since it depends on it.
 # This should be optimized so that we do not have to build
@@ -158,7 +158,7 @@ RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
           -DTRITON_PACKAGE_PERF_ANALYZER=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
         /workspace/perf_analyzer && \
-        cmake -v --parallel --build perf-analyzer python-clients && \
+        cmake --build . -v --parallel perf-analyzer python-clients && \
         pip3 install build && \
         cd /workspace/perf_analyzer/genai-perf && \
         python3 -m build --wheel --outdir /workspace/install/python; \
@@ -177,7 +177,7 @@ RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
           -DTRITON_PACKAGE_PERF_ANALYZER=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
         /workspace/perf_analyzer && \
-        cmake -v --parallel --build python-clients && \
+        cmake --build . -v --parallel python-clients && \
         mkdir -p /workspace/install/python && \
         cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \
     fi

From a025faa08413656d073b2e14fc9b39363dff6173 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Fri, 25 Jul 2025 08:24:24 -0700
Subject: [PATCH 10/10] Foget about target flag

---
 Dockerfile.sdk | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index 18402a3e00..e24d5c1982 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -128,7 +128,7 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
           -DTRITON_ENABLE_JAVA_HTTP=ON \
           -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
-RUN cmake --build . -v --parallel cc-clients java-clients
+RUN cmake --build . -v --parallel --target cc-clients java-clients
 
 # TODO: PA will rebuild the CC clients since it depends on it.
 # This should be optimized so that we do not have to build
@@ -158,7 +158,7 @@ RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
           -DTRITON_PACKAGE_PERF_ANALYZER=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
         /workspace/perf_analyzer && \
-        cmake --build . -v --parallel perf-analyzer python-clients && \
+        cmake --build . -v --parallel --target perf-analyzer python-clients && \
         pip3 install build && \
         cd /workspace/perf_analyzer/genai-perf && \
         python3 -m build --wheel --outdir /workspace/install/python; \
@@ -177,7 +177,7 @@ RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
           -DTRITON_PACKAGE_PERF_ANALYZER=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
         /workspace/perf_analyzer && \
-        cmake --build . -v --parallel python-clients && \
+        cmake --build . -v --parallel --target python-clients && \
         mkdir -p /workspace/install/python && \
         cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \
     fi