microsoft · mnoukhov · Jan 13, 2020 · Jan 13, 2020 · Jan 13, 2020
diff --git a/.gitignore b/.gitignore
@@ -221,7 +221,7 @@ ClientBin/
 *.publishsettings
 orleans.codegen.cs
 
-# Including strong name files can present a security risk 
+# Including strong name files can present a security risk
 # (https://github.com/github/gitignore/pull/2483#issue-259490424)
 #*.snk
 
@@ -317,7 +317,7 @@ __pycache__/
 # OpenCover UI analysis results
 OpenCover/
 
-# Azure Stream Analytics local run output 
+# Azure Stream Analytics local run output
 ASALocalRun/
 
 # MSBuild Binary and Structured Log
@@ -326,5 +326,10 @@ ASALocalRun/
 # NVidia Nsight GPU debugger configuration file
 *.nvuser
 
-# MFractors (Xamarin productivity tool) working folder 
+# MFractors (Xamarin productivity tool) working folder
 .mfractor/
+
+# Models and Data
+saved_model/
+data/
+
diff --git a/README.md b/README.md
@@ -9,9 +9,12 @@ Code for our ACL'19 accepted paper: [Towards Complex Text-to-SQL in Cross-Domain
 
 * `Python3.6`
 * `Pytorch 0.4.0` or higher
+* `libmysqlclient-dev` installed
 
 Install Python dependency via `pip install -r requirements.txt` when the environment of Python and Pytorch is setup.
 
+* download `wordnet` via `python -m nltk.downloader wordnet`
+
 ## Running Code
 
 #### Data preparation
@@ -20,11 +23,11 @@ Install Python dependency via `pip install -r requirements.txt` when the environ
 * Download [Glove Embedding](https://nlp.stanford.edu/data/wordvecs/glove.42B.300d.zip) and put `glove.42B.300d` under `./data/` directory
 * Download [Pretrained IRNet](https://drive.google.com/open?id=1VoV28fneYss8HaZmoThGlvYU3A-aK31q) and put `
 IRNet_pretrained.model` under `./saved_model/` directory
-* Download preprocessed train/dev datasets from [here](https://drive.google.com/open?id=1YFV1GoLivOMlmunKW0nkzefKULO4wtrn) and put `train.json`, `dev.json` and 
+* Download preprocessed train/dev datasets from [here](https://drive.google.com/open?id=1YFV1GoLivOMlmunKW0nkzefKULO4wtrn) and put `train.json`, `dev.json` and
 `tables.json` under `./data/` directory
 
 ##### Generating train/dev data by yourself
-You could process the origin [Spider Data](https://drive.google.com/uc?export=download&id=11icoH_EA-NYb0OrPTdehRWm_d7-DIzWX) by your own. Download  and put `train.json`, `dev.json` and 
+You could process the origin [Spider Data](https://drive.google.com/uc?export=download&id=11icoH_EA-NYb0OrPTdehRWm_d7-DIzWX) by your own. Download  and put `train.json`, `dev.json` and
 `tables.json` under `./data/` directory and follow the instruction on `./preprocess/`
 
 #### Training

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -0,0 +1,16 @@
+FROM nvcr.io/nvidia/pytorch:19.12-py3
+
+# update and install setup
+RUN apt-get update && apt-get install -y \
+        libmysqlclient-dev
+
+# no more need for sudo
+#RUN useradd -r -u 12078 mnoukhov
+#USER mnoukhov
+
+# pip install
+COPY requirements.txt /tmp/
+RUN pip install -r /tmp/requirements.txt
+
+# nltk data
+RUN python -m nltk.downloader wordnet
diff --git a/docker/requirements.txt b/docker/requirements.txt
@@ -0,0 +1,8 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+nltk>=3.4
+pattern
+numpy>=1.14.0
+pytorch-pretrained-bert>=0.5.1
+tqdm>=4.31.1
diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,8 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
-nltk==3.4
+nltk>=3.4
 pattern
-numpy==1.14.0
-pytorch-pretrained-bert==0.5.1
-tqdm==4.31.1
+numpy>=1.14.0
+pytorch-pretrained-bert>=0.5.1
+tqdm>=4.31.1