dmlc · barry-jin · Mar 19, 2020 · Mar 27, 2020 · Mar 28, 2020 · Apr 1, 2020
@@ -0,0 +1,63 @@
+name: continuous build
+
+on: [push, pull_request]
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  unittest:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        # TODO Add windows test by using "windows-latest"
+        os: [ubuntu-latest, macos-latest]
+        python-version: [ '3.6', '3.7', '3.8']
+        exclude:
+          - os: macos-latest
+            python-version: 3.6
+          - os: macos-latest
+            python-version: 3.8
+          - os: ubuntu-latest
+            python-version: 3.7
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      - name: Compilation cache
+        uses: actions/cache@v2
+        with:
+          path: ~/.ccache
+          # We include the commit sha in the cache key, as new cache entries are
+          # only created if there is no existing entry for the key yet.
+          key: ${{ runner.os }}-ccache-${{ github.sha }}
+          # Restore any ccache cache entry, if none for
+          # ${{ runner.os }}-ccache-${{ github.sha }} exists
+          restore-keys: |
+            ${{ runner.os }}-ccache
+      - name: Setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+      - name: Install Other Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install setuptools pytest pytest-cov contextvars
+          python -m pip install --pre "mxnet>=1.9.0b20210220" -f https://dist.mxnet.io/python
+          python -m pip install -U -e .[extras,dev]
+          python -m pip install -v -e .
+          python -m pip install horovod --no-cache-dir -U
+          python -m spacy download en
+          python -m spacy download de
+          python -m nltk.downloader all
+
+      - name: Run Unittests
+        run: |
+          pytest -n 4 -m 'not (gpu or serial)' --durations=30 --cov=./ --cov-report=xml tests/unittest
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v1.0.10
+        with:
+          env_vars: OS,PYTHON
@@ -57,8 +57,7 @@ ratcheck: ci/rat/apache-rat.jar
         echo "SUCCESS: There are no files with an Unknown License."; \
     fi
 
-docs: compile_notebooks distribute
-	make -C docs html SPHINXOPTS=-W
+docs: docs_local
 	for f in $(shell find docs/examples -type f -name '*.md' -print) ; do \
 		FILE=`echo $$f | sed 's/docs\///g'` ; \
 		DIR=`dirname $$FILE` ; \
@@ -80,8 +79,11 @@ docs: compile_notebooks distribute
 	sed -i.bak 's/33\,150\,243/23\,141\,201/g' docs/_build/html/_static/material-design-lite-1.3.0/material.blue-deep_orange.min.css;
 	sed -i.bak 's/2196f3/178dc9/g' docs/_build/html/_static/sphinx_materialdesign_theme.css;
 
+docs_local: compile_notebooks distribute
+	make -C docs html SPHINXOPTS=-W
+
 clean:
-	git clean -ff -d -x --exclude="$(ROOTDIR)/tests/data/*" --exclude="$(ROOTDIR)/conda/"
+	git clean -ff -d -x --exclude="$(ROOTDIR)/tests/data/*" --exclude="$(ROOTDIR)/conda/" --exclude="$(ROOTDIR)/.idea/"
 
 compile_notebooks:
 	for f in $(shell find docs/examples -type f -name '*.md' -print) ; do \
@@ -93,7 +95,7 @@ compile_notebooks:
 		if [ -f $$TARGETNAME ]; then \
 			echo $$TARGETNAME exists. Skipping compilation of $$BASENAME in Makefile. ; \
 		else \
-			python $(MD2IPYNB) $$BASENAME ; \
+			python3 $(MD2IPYNB) $(MD2IPYNB_OPTION) $$BASENAME ; \
 		fi ; \
 		cd - ; \
 	done;
@@ -110,4 +112,4 @@ test:
 	py.test -v --capture=no --durations=0  tests/unittest scripts
 
 distribute: dist_scripts dist_notebooks
-	python setup.py sdist
+	python3 setup.py sdist
@@ -215,4 +215,4 @@ The bibtex entry for the `reference paper <https://arxiv.org/abs/1907.04433>`__
 New to Deep Learning or NLP?
 ============================
 
-For background knowledge of deep learning or NLP, please refer to the open source book `Dive into Deep Learning <http://en.diveintodeeplearning.org/>`__.
+For background knowledge of deep learning or NLP, please refer to the open source book `Dive into Deep Learning <https://d2l.ai/>`__ (`中文版 <https://zh.d2l.ai>`__).
@@ -16,7 +16,7 @@ FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
           libxft-dev &&\
       rm -rf /var/lib/apt/lists/*
 
- RUN curl -o ~/miniconda.sh -O  https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh  && \
+ RUN curl -o ~/miniconda.sh -O  https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh  && \
       chmod +x ~/miniconda.sh && \
       ~/miniconda.sh -b -p /opt/conda && \
       rm ~/miniconda.sh && \

@@ -21,7 +21,7 @@
 // See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/
 
 // timeout in minutes
-max_time = 120
+max_time = 180
 
 node {
   // Loading the utilities requires a node context unfortunately

@@ -70,7 +70,7 @@ core_logic: {
                 conda activate ./conda/cpu/py3-master
 
                 python3 ci/batch/submit-job.py --region us-east-1 --wait \
-                  --timeout 1800 --saved-output ./docs/examples --conda-env docker/py3 \
+                  --timeout 3600 --saved-output ./docs/examples --conda-env docker/py3 \
                   --name GluonNLP-${env.BRANCH_NAME}-${env.BUILD_NUMBER} \
                   --save-path batch/${env.BRANCH_NAME}/${env.BUILD_NUMBER}/docs/examples \
                   --work-dir . --source-ref refs/pull/${env.CHANGE_ID}/head \
@@ -103,7 +103,7 @@ core_logic: {
                 conda activate ./conda/cpu/py3-master
 
                 python3 ci/batch/submit-job.py --region us-east-1 --wait \
-                  --timeout 1800 --saved-output ./docs/examples --conda-env docker/py3 \
+                  --timeout 3600 --saved-output ./docs/examples --conda-env docker/py3 \
                   --name GluonNLP-${env.BRANCH_NAME}-${env.BUILD_NUMBER} \
                   --save-path batch/${env.BRANCH_NAME}/${env.BUILD_NUMBER}/docs/examples \
                   --work-dir . --source-ref ${env.BRANCH_NAME} \

@@ -21,7 +21,7 @@
 // See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/
 
 // timeout in minutes
-max_time = 120
+max_time = 180
 
 node {
   // Loading the utilities requires a node context unfortunately

@@ -11,13 +11,16 @@ export MXNET_HOME=$PWD/tests/data
 export HOROVOD_WITHOUT_TENSORFLOW=1
 export HOROVOD_WITHOUT_PYTORCH=1
 export HOROVOD_WITH_MXNET=1
+export HOROVOD_WITH_GLOO=1
+export HOROVOD_WITHOUT_MPI=1
 
 make clean
 conda env update --prune -p conda/${env_name} -f env/${env_name}.yml
 conda activate ./conda/${env_name}
 conda list
 printenv
 
+pip install cmake
 pip install -v -e .
 pip install horovod --no-cache-dir -U
 python -m spacy download en

@@ -63,6 +63,7 @@
 nbsphinx_kernel_name = 'python3'
 nbsphinx_allow_errors = True
 nbsphinx_timeout = 1200
+nbsphinx_execute = 'never'
 html_sourcelink_suffix = ''
 
 html_context = {
@@ -172,8 +173,8 @@
     'header_links' : [
         ('Install', 'install/install-more', False, ''),
         ('API', 'api/index', False, ''),
-        ('Community', 'community/index', False, ''),
-        ('Contribute', 'community/contribute', False, ''),
+        ('Community', 'website/index', False, ''),
+        ('Contribute', 'website/contribute', False, ''),
         ('GitHub', 'https://github.com/dmlc/gluon-nlp/', True, 'fab fa-github'),
     ],
 
@@ -209,7 +210,7 @@
 
 intersphinx_mapping = {
     'python': ('https://docs.python.org/{.major}'.format(sys.version_info), None),
-    'mxnet': ('https://beta.mxnet.io/', None),
+    'mxnet': ('https://mxnet.apache.org/api/python/docs/', None),
     'numpy': ('http://docs.scipy.org/doc/numpy/', None),
     'scipy': ('http://docs.scipy.org/doc/scipy/reference', None),
     'matplotlib': ('http://matplotlib.org/', None),

@@ -38,8 +38,9 @@ from mxnet import gluon, nd, init
 from mxnet.gluon import nn, rnn
 from mxnet import autograd, gluon, nd
 
-# iUse sklearn's metric function to evaluate the results of the experiment
-from sklearn.metrics import accuracy_score, f1_score
+# iUse Mxnet and sklearn's metric functions to evaluate the results of the experiment
+from mxnet.metric import Accuracy
+from sklearn.metrics import f1_score
 
 # fixed random number seed
 np.random.seed(2018)
@@ -65,7 +66,6 @@ The [Yelp users' review dataset](https://www.kaggle.com/yelp-dataset/yelp-datase
 
 Each sample in the data consists of a user's comment, in English, with each comment marked one through five, each number representing one of five different emotions the user expressed. Here we download, unzip, and reformat the dataset for ease of use further on.
 
-
 ```{.python .input}
 # Download the data from the server
 data_url = 'http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/yelp_review_subset-167bb781.zip'
@@ -398,10 +398,14 @@ def one_epoch(data_iter, model, loss, trainer, ctx, is_train, epoch,
     total_true = []
     n_batch = 0
 
+    batch_acc = Accuracy()  # Batch Accuracy
+    epoch_acc = Accuracy()  # Epoch Accuracy
+
     for batch_x, batch_y in data_iter:
         batch_x = batch_x.as_in_context(ctx)
         batch_y = batch_y.as_in_context(ctx)
 
+        batch_acc.reset()   # Reset Batch Accuracy
         if is_train:
             with autograd.record():
                 batch_pred, l = calculate_loss(batch_x, batch_y, model, loss, class_weight, penal_coeff)
@@ -429,10 +433,10 @@ def one_epoch(data_iter, model, loss, trainer, ctx, is_train, epoch,
             batch_pred, l = calculate_loss(batch_x, batch_y, model, loss, class_weight, penal_coeff)
 
         # keep result for metric
-        batch_pred = nd.argmax(nd.softmax(batch_pred, axis=1), axis=1).asnumpy()
-        batch_true = np.reshape(batch_y.asnumpy(), (-1, ))
-        total_pred.extend(batch_pred.tolist())
-        total_true.extend(batch_true.tolist())
+        batch_pred = nd.argmax(nd.softmax(batch_pred, axis=1), axis=1)
+        batch_true = batch_y.reshape(-1, )
+        total_pred.extend(batch_pred.asnumpy().tolist())
+        total_true.extend(batch_true.asnumpy().tolist())
 
         batch_loss = l.mean().asscalar()
 
@@ -441,22 +445,24 @@ def one_epoch(data_iter, model, loss, trainer, ctx, is_train, epoch,
 
         # check the result of traing phase
         if is_train and n_batch % 400 == 0:
+            batch_acc.update(batch_true, batch_pred)
             print('epoch %d, batch %d, batch_train_loss %.4f, batch_train_acc %.3f' %
-                  (epoch, n_batch, batch_loss, accuracy_score(batch_true, batch_pred)))
+                  (epoch, n_batch, batch_loss, batch_acc.get()[1]))
+
 
     # metric
     F1 = f1_score(np.array(total_true), np.array(total_pred), average='weighted')
-    acc = accuracy_score(np.array(total_true), np.array(total_pred))
+    epoch_acc.update(nd.array(total_true), nd.array(total_pred))
     loss_val /= n_batch
 
     if is_train:
         print('epoch %d, learning_rate %.5f \n\t train_loss %.4f, acc_train %.3f, F1_train %.3f, ' %
-              (epoch, trainer.learning_rate, loss_val, acc, F1))
+              (epoch, trainer.learning_rate, loss_val, epoch_acc.get()[1], F1))
         # declay lr
         if epoch % 2 == 0:
             trainer.set_learning_rate(trainer.learning_rate * 0.9)
     else:
-        print('\t valid_loss %.4f, acc_valid %.3f, F1_valid %.3f, ' % (loss_val, acc, F1))
+        print('\t valid_loss %.4f, acc_valid %.3f, F1_valid %.3f, ' % (loss_val, epoch_acc.get()[1], F1))
 
 ```
 
@@ -487,7 +493,6 @@ def train_valid(data_iter_train, data_iter_valid, model, loss, trainer, ctx, nep
 
 Now that we are actually training the model, we use `WeightedSoftmaxCE` to alleviate the problem of data categorical imbalance. We perform statistical analysis on the data in advance to retrieve a set of `class_weight`s.
 
-
 ```{.python .input}
 class_weight = None
 loss_name = 'wsce'

@@ -79,5 +79,5 @@ You may find the 60-min Gluon crash course linked from there especially helpful.
    model_zoo/index
    examples/index
    api/index
-   community/index
+   website/index
    genindex
@@ -1,22 +1,26 @@
+import argparse
 import os
-import sys
 import time
-import notedown
+
 import nbformat
+import notedown
 
-assert len(sys.argv) == 2, 'usage: input.md'
+parser = argparse.ArgumentParser(description='Convert md file to ipynb files.')
+parser.add_argument('input', help='input.md', type=str)
+parser.add_argument('-d', '--disable_compute',
+                    help='Disable computing python scripts', action="store_true")
+args = parser.parse_args()
 
 # timeout for each notebook, in sec
-timeout = 40 * 60
+timeout = 90 * 60
 
 # the files will be ignored for execution
 ignore_execution = []
 
-input_path = sys.argv[1]
-
 # Change working directory to directory of input file
-input_dir, input_fn = os.path.split(input_path)
-os.chdir(input_dir)
+input_dir, input_fn = os.path.split(args.input)
+if input_dir:
+    os.chdir(input_dir)
 
 output_fn = '.'.join(input_fn.split('.')[:-1] + ['ipynb'])
 
@@ -28,8 +32,9 @@
 
 if not any([i in input_fn for i in ignore_execution]):
     tic = time.time()
-    notedown.run(notebook, timeout)
-    print('=== Finished evaluation in %f sec'%(time.time()-tic))
+    if not args.disable_compute:
+        notedown.run(notebook, timeout)
+    print('=== Finished evaluation in %f sec' % (time.time() - tic))
 
 # write
 # need to add language info to for syntax highlight