marian-nmt · rhenry-nv · Jan 20, 2021 · Jan 21, 2021 · Jan 21, 2021 · Jan 21, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 ## [Unreleased]
 
 ### Added
+- Add support for returning sentences as soon as translation is done in beam search.
 - Support for RMSNorm as drop-in replace for LayerNorm from `Biao Zhang; Rico Sennrich (2019). Root Mean Square Layer Normalization`. Enabled in Transformer model via `--transformer-postprocess dar` instead of `dan`.
 - Extend suppression of unwanted output symbols, specifically "\n" from default vocabulary if generated by SentencePiece with byte-fallback. Deactivates with --allow-special
 - Allow for fine-grained CPU intrinsics overrides when BUILD_ARCH != native e.g. -DBUILD_ARCH=x86-64 -DCOMPILE_AVX512=off

diff --git a/contrib/triton-aml/Dockerfile b/contrib/triton-aml/Dockerfile
@@ -1,7 +1,7 @@
 # It is recommended to use a machine which supports CUDA to build this image.
-FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 AS BUILDER
+FROM nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04 AS BUILDER
 RUN apt-get update --fix-missing
-RUN apt-get install -y curl git autoconf automake libtool curl make g++ unzip cmake build-essential cpio
+RUN apt-get install -y curl git autoconf automake libtool curl make g++ unzip cmake build-essential cpio libgoogle-perftools-dev
 RUN apt-get -y clean && \
     rm -rf /var/lib/apt/lists/*
 
@@ -42,10 +42,9 @@ RUN ./b2 install --prefix=/usr --with-system --with-thread --with-date_time --wi
 
 # Marian install
 WORKDIR /
-RUN git clone --no-checkout https://github.com/marian-nmt/marian-dev
+RUN git clone --no-checkout https://github.com/marian-nmt/marian-dev.git 
 WORKDIR marian-dev
-RUN git checkout youki/quantize-embedding
-RUN git checkout dad48865fd3b7f1d7b891de81040f7651e824510
+RUN git checkout master
 RUN mkdir src/static
 RUN mkdir build
 COPY src/cmarian.cpp /marian-dev/src/static
@@ -54,7 +53,10 @@ RUN rm src/CMakeLists.txt
 COPY src/CMakeLists.txt /marian-dev/src
 
 WORKDIR /marian-dev/build
-RUN cmake .. -DCOMPILE_CPU=on -DCOMPILE_CUDA=on -DUSE_SENTENCEPIECE=on -DUSE_STATIC_LIBS=off -DCOMPILE_SERVER=off -DUSE_FBGEMM=on -DCUDA_cublas_device_LIBRARY=/usr/lib/x86_64-linux-gnu/libcublas.so
+RUN cmake .. -DCOMPILE_CPU=on -DCOMPILE_CUDA=on -DUSE_SENTENCEPIECE=on -DUSE_STATIC_LIBS=off -DCOMPILE_SERVER=off -DUSE_FBGEMM=on  \ 
+             -DCOMPILE_CUDA_SM35=off -DCOMPILE_CUDA_SM50=off -DCOMPILE_CUDA_SM60=off -DCOMPILE_CUDA_SM70=on -DCOMPILE_CUDA_SM75=on \
+             -DCUDA_cublas_device_LIBRARY=/usr/local/cuda/lib64/libcublas.so
+
 RUN make -j $(grep -c ^processor /proc/cpuinfo)
 
 # build cmarian static library
@@ -66,6 +68,7 @@ COPY --from=BUILDER /marian-dev/build/src/3rd_party/fbgemm/libfbgemm.a /usr/lib
 COPY --from=BUILDER /marian-dev/build/src/3rd_party/fbgemm/asmjit/libasmjit.a /usr/lib
 COPY --from=BUILDER /marian-dev/build/src/3rd_party/sentencepiece/src/libsentencepiece_train.a /usr/lib
 COPY --from=BUILDER /marian-dev/build/src/3rd_party/sentencepiece/src/libsentencepiece.a /usr/lib
+COPY --from=BUILDER /marian-dev/build/src/3rd_party/intgemm/libintgemm.a /usr/lib
 COPY --from=BUILDER /marian-dev/build/libmarian.a /usr/lib/libcmarian.a
 COPY --from=BUILDER /marian-dev/build/src/libmarian_cuda.a /usr/lib/libcmarian_cuda.a
 

diff --git a/contrib/triton-aml/README.md b/contrib/triton-aml/README.md
@@ -27,9 +27,18 @@ For the AzureML Inference team members, you can put it into the following place
 
 Where <backend_directory> is by default /opt/tritonserver/backends.
 
+This backend will return sentences as soon as they are done with translation by default. To only return when the 
+entire batch is finished translating, set the async_mode to false by adding the following your config.pbtxt file.
+
+parameters [
+  {
+    key: "async"
+    value: { string_value : "false" }
+  }
+]
 ## Make changes
 
-If you want to compile with another version of Marian, you need to replace `RUN git checkout youki/quantize-embedding` in the Dockerfile, then copy the new CMakeLists.txt replace the old one, add src/cmarian.cpp into CMakeLists.txt and make some changes to make sure it will build a static library of Marian.
+If you want to compile with another version of Marian, you need to replace `RUN git checkout async` in the Dockerfile, then copy the new CMakeLists.txt replace the old one, add src/cmarian.cpp into CMakeLists.txt and make some changes to make sure it will build a static library of Marian.
 
 ## Limitation
 

diff --git a/contrib/triton-aml/marian_backend/CMakeLists.txt b/contrib/triton-aml/marian_backend/CMakeLists.txt
@@ -94,6 +94,7 @@ target_link_libraries(
     fbgemm
     asmjit
     protobuf
+    intgemm
 )
-Original file line number
+Diff line change
@@ Expand Up / @@ -94,6 +94,7 @@ target_link_libraries( @@
         fbgemm
         asmjit
         protobuf
+        intgemm
     )
@@ Expand Down @@