Skip to content

ModelError Parameter model_name is required #10

@akqjxx

Description

@akqjxx

batch transform error:

2022-08-30T09:01:17.792:[sagemaker logs]: MaxConcurrentTransforms=1, MaxPayloadInMB=50, BatchStrategy=MULTI_RECORD
2022-08-30T09:01:17.883:[sagemaker logs]: st-s3/trainingPlatform/model/ba0ba70ebb2c48f69c61240a199f7a24/inference/dataset/21f9bb9e2a39407691bdb18e04e1b672/202208170843490.jpg: ClientError: 400
2022-08-30T09:01:17.883:[sagemaker logs]: st-s3/trainingPlatform/model/ba0ba70ebb2c48f69c61240a199f7a24/inference/dataset/21f9bb9e2a39407691bdb18e04e1b672/202208170843490.jpg:
2022-08-30T09:01:17.883:[sagemaker logs]: st-s3/trainingPlatform/model/ba0ba70ebb2c48f69c61240a199f7a24/inference/dataset/21f9bb9e2a39407691bdb18e04e1b672/202208170843490.jpg: Message:
2022-08-30T09:01:17.883:[sagemaker logs]: st-s3/trainingPlatform/model/ba0ba70ebb2c48f69c61240a199f7a24/inference/dataset/21f9bb9e2a39407691bdb18e04e1b672/202208170843490.jpg: {
2022-08-30T09:01:17.883:[sagemaker logs]: st-s3/trainingPlatform/model/ba0ba70ebb2c48f69c61240a199f7a24/inference/dataset/21f9bb9e2a39407691bdb18e04e1b672/202208170843490.jpg: "code": 400,
2022-08-30T09:01:17.884:[sagemaker logs]: st-s3/trainingPlatform/model/ba0ba70ebb2c48f69c61240a199f7a24/inference/dataset/21f9bb9e2a39407691bdb18e04e1b672/202208170843490.jpg: "type": "BadRequestException",
2022-08-30T09:01:17.884:[sagemaker logs]: st-s3/trainingPlatform/model/ba0ba70ebb2c48f69c61240a199f7a24/inference/dataset/21f9bb9e2a39407691bdb18e04e1b672/202208170843490.jpg: "message": "Parameter model_name is required."
2022-08-30T09:01:17.884:[sagemaker logs]: st-s3/trainingPlatform/model/ba0ba70ebb2c48f69c61240a199f7a24/inference/dataset/21f9bb9e2a39407691bdb18e04e1b672/202208170843490.jpg: }

-------------------------------------------------my dockerfile is-------------------------------------------------

FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04

NCCL_VERSION=2.4.7, CUDNN_VERSION=7.6.2.24
LABEL maintainer="Amazon AI"
LABEL dlc_major_version="1"
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true

Add arguments to achieve the version, python and url
ARG PYTHON=python3
ARG PYTHON_VERSION=3.7.3
ARG OPEN_MPI_VERSION=4.0.1
ARG TS_VERSION="0.3.1"
ARG PT_INFERENCE_URL=https://aws-pytorch-binaries.s3-us-west-2.amazonaws.com/r1.6.0_inference/20200727-223446/b0251e7e070e57f34ee08ac59ab4710081b41918/gpu/torch-1.6.0-cp36-cp36m-manylinux1_x86_64.whl
ARG PT_VISION_URL=https://torchvision-build.s3.amazonaws.com/1.6.0/gpu/torchvision-0.7.0-cp36-cp36m-linux_x86_64.whl

See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH
ENV PATH /opt/conda/bin:$PATH
ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
ENV TEMP=/home/model-server/tmp

RUN apt-get update
&& apt-get install -y --no-install-recommends software-properties-common
&& add-apt-repository ppa:openjdk-r/ppa
&& apt-get update
&& apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends
build-essential
ca-certificates
cmake
curl
emacs
git
jq
libgl1-mesa-glx
libglib2.0-0
libgomp1
libibverbs-dev
libnuma1
libnuma-dev
libsm6
libxext6
libxrender-dev
openjdk-11-jdk
vim
wget
unzip
zlib1g-dev

docker-library/openjdk#261 https://github.com/docker-library/openjdk/pull/263/files
RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt;
mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts;
/var/lib/dpkg/info/ca-certificates-java.postinst configure;

RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz
&& gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf -
&& cd openmpi-$OPEN_MPI_VERSION
&& ./configure --prefix=/home/.openmpi
&& make all install
&& cd ..
&& rm openmpi-$OPEN_MPI_VERSION.tar.gz
&& rm -rf openmpi-$OPEN_MPI_VERSION

ENV PATH="$PATH:/home/.openmpi/bin"
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/"

Install OpenSSH. Allow OpenSSH to talk to containers without asking for confirmation
RUN apt-get install -y --no-install-recommends
openssh-client
openssh-server
&& mkdir -p /var/run/sshd
&& cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new
&& echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new
&& mv /etc/ssh/ssh_config.new /etc/ssh/ssh_configs

RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh
&& chmod +x ~/miniconda.sh
&& ~/miniconda.sh -b -p /opt/conda
&& rm ~/miniconda.sh
RUN /opt/conda/bin/conda update conda
&& /opt/conda/bin/conda install -c conda-forge
python=$PYTHON_VERSION
&& /opt/conda/bin/conda install -y
cython==0.29.12
ipython==7.7.0
mkl-include==2019.4
mkl==2019.4
numpy==1.19.1
scipy==1.3.0
typing==3.6.4
&& /opt/conda/bin/conda clean -ya

RUN conda install -c
pytorch magma-cuda101
&& conda install -c
conda-forge
opencv==4.0.1
&& conda install -y
scikit-learn==0.21.2
pandas==0.25.0
h5py==2.9.0
requests==2.22.0
&& conda clean -ya
&& /opt/conda/bin/conda config --set ssl_verify False
&& pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3
&& pip install packaging==20.4
enum-compat==0.0.3
ruamel-yaml

Uninstall and re-install torch and torchvision from the PyTorch website
RUN pip install --no-cache-dir -U https://pypi.tuna.tsinghua.edu.cn/packages/5d/5e/35140615fc1f925023f489e71086a9ecc188053d263d3594237281284d82/torch-1.6.0-cp37-cp37m-manylinux1_x86_64.whl#sha256=87d65c01d1b70bb46070824f28bfd93c86d3c5c56b90cbbe836a3f2491d91c76
RUN pip uninstall -y torchvision
&& pip install --no-deps --no-cache-dir -U https://mirrors.aliyun.com/pypi/packages/4d/b5/60d5eb61f1880707a5749fea43e0ec76f27dfe69391cdec953ab5da5e676/torchvision-0.7.0-cp37-cp37m-manylinux1_x86_64.whl#sha256=0d1a5adfef4387659c7a0af3b72e16caa0c67224a422050ab65184d13ac9fb13

RUN pip uninstall -y model-archiver multi-model-server
&& pip install captum
&& pip install torchserve==$TS_VERSION
&& pip install torch-model-archiver==$TS_VERSION

RUN useradd -m model-server
&& mkdir -p /home/model-server/tmp /opt/ml/model
&& chown -R model-server /home/model-server /opt/ml/model

COPY torchserve-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
COPY config.properties /home/model-server

RUN chmod +x /usr/local/bin/dockerd-entrypoint.py

ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py

RUN chmod +x /usr/local/bin/deep_learning_container.py

RUN pip install --no-cache-dir "sagemaker-pytorch-inference>=2"

RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.6.0/license.txt -o /license.txt

RUN conda install -y -c conda-forge "pyyaml>5.4,<5.5"
RUN pip install pillow==8.2.0 "awscli<2"

RUN python3 -m pip install detectron2==0.4 -f
https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html

RUN HOME_DIR=/root
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip
&& unzip {HOME_DIR}/
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance
&& chmod +x /usr/local/bin/testOSSCompliance
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh
&& {HOME_DIR} ${PYTHON}
&& rm -rf ${HOME_DIR}/oss_compliance*

EXPOSE 8080 8081
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
CMD ["torchserve", "--start", "--ts-config", "/home/model-server/config.properties", "--model-store", "/home/model-server/"]

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions