-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Build: Build using the PA binaries and whl if available. #8043
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 12 commits
302f309
4b96d44
1497df3
608717f
ace72c1
11237ab
c3d66df
d261aa9
d94cee0
2f7e71a
dfe4569
9281c60
d66db9c
1192011
d9afc19
b8ba70f
e7181bb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,7 +42,7 @@ ARG TRITON_MODEL_ANALYZER_REPO_TAG=main | |
ARG TRITON_ENABLE_GPU=ON | ||
ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4 | ||
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8 | ||
|
||
ARG PERF_ANALYZER_BUILD=ON | ||
# DCGM version to install for Model Analyzer | ||
ARG DCGM_VERSION=3.3.6 | ||
|
||
|
@@ -131,11 +131,11 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ | |
-DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \ | ||
-DTRITON_ENABLE_PERF_ANALYZER=OFF \ | ||
-DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \ | ||
-DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \ | ||
-DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \ | ||
-DTRITON_ENABLE_JAVA_HTTP=ON \ | ||
-DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \ | ||
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client | ||
RUN make -j16 cc-clients java-clients && \ | ||
RUN make -j16 cc-clients java-clients python-clients && \ | ||
rm -fr ~/.m2 | ||
|
||
# TODO: PA will rebuild the CC clients since it depends on it. | ||
|
@@ -145,7 +145,8 @@ RUN make -j16 cc-clients java-clients && \ | |
# the python client until now. Post-migration we should focus | ||
# effort on de-tangling these flows. | ||
WORKDIR /workspace/pa_build | ||
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ | ||
RUN if [ "$PERF_ANALYZER_BUILD" = "ON" ]; then \ | ||
cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ | ||
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ | ||
-DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \ | ||
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \ | ||
|
@@ -161,12 +162,15 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ | |
-DTRITON_ENABLE_PYTHON_GRPC=ON \ | ||
-DTRITON_PACKAGE_PERF_ANALYZER=ON \ | ||
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \ | ||
/workspace/perf_analyzer | ||
RUN make -j16 perf-analyzer python-clients | ||
|
||
RUN pip3 install build \ | ||
&& cd /workspace/perf_analyzer/genai-perf \ | ||
&& python3 -m build --wheel --outdir /workspace/install/python | ||
/workspace/perf_analyzer && \ | ||
make -j16 perf-analyzer python-clients && \ | ||
pip3 install build \ | ||
&& cd /workspace/perf_analyzer/genai-perf && \ | ||
python3 -m build --wheel --outdir /workspace/install/python; \ | ||
else \ | ||
tar -xzf /workspace/perf_analyzer/perf_analyzer*.tar.gz -C /workspace/install/bin && \ | ||
echo "Perf Analyzer binaries was extracted and not build"; \ | ||
fi | ||
|
||
# Install Java API Bindings | ||
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ | ||
|
@@ -243,6 +247,15 @@ RUN pip3 install install/python/genai_perf-*.whl | |
# Install the dependencies needed to run the client examples. These | ||
# are not needed for building but including them allows this image to | ||
# be used to run the client examples. | ||
|
||
RUN if [ "$PERF_ANALYZER_BUILD" = "ON" ]; then \ | ||
pip3 install install/python/genai_perf-*.whl; \ | ||
else \ | ||
mkdir -p /workspace/install/python && \ | ||
cp perf_analyzer/genai_perf-*.whl /workspace/install/python/ && \ | ||
pip3 install /workspace/install/python/genai_perf-*.whl; \ | ||
fi | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This condition confusing me a bit. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This condition confusing me a bit. |
||
RUN pip3 install --upgrade "numpy<2" pillow attrdict && \ | ||
find install/python/ -maxdepth 1 -type f -name \ | ||
"tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \ | ||
|
@@ -285,4 +298,4 @@ ENV LD_LIBRARY_PATH=/workspace/install/lib:${LD_LIBRARY_PATH} | |
ENV LD_LIBRARY_PATH=/opt/hpcx/ompi/lib:${LD_LIBRARY_PATH} | ||
|
||
# Set TCMALLOC_RELEASE_RATE for users setting LD_PRELOAD with tcmalloc | ||
ENV TCMALLOC_RELEASE_RATE=200 | ||
ENV TCMALLOC_RELEASE_RATE=200 |
This file was deleted.
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why we are building this
python-clients
target twice, can we avoid it?