From 963951c658843a0707aaa65d292d2d07eb33b48b Mon Sep 17 00:00:00 2001 From: Michael Noukhovitch Date: Mon, 13 Jan 2020 16:30:05 +0000 Subject: [PATCH 1/3] more complete setup instructions --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2c9a0fb..034be7f 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,12 @@ Code for our ACL'19 accepted paper: [Towards Complex Text-to-SQL in Cross-Domain * `Python3.6` * `Pytorch 0.4.0` or higher +* `libmysqlclient-dev` installed Install Python dependency via `pip install -r requirements.txt` when the environment of Python and Pytorch is setup. +* download `wordnet` via `python -m nltk.downloader wordnet` + ## Running Code #### Data preparation @@ -20,11 +23,11 @@ Install Python dependency via `pip install -r requirements.txt` when the environ * Download [Glove Embedding](https://nlp.stanford.edu/data/wordvecs/glove.42B.300d.zip) and put `glove.42B.300d` under `./data/` directory * Download [Pretrained IRNet](https://drive.google.com/open?id=1VoV28fneYss8HaZmoThGlvYU3A-aK31q) and put ` IRNet_pretrained.model` under `./saved_model/` directory -* Download preprocessed train/dev datasets from [here](https://drive.google.com/open?id=1YFV1GoLivOMlmunKW0nkzefKULO4wtrn) and put `train.json`, `dev.json` and +* Download preprocessed train/dev datasets from [here](https://drive.google.com/open?id=1YFV1GoLivOMlmunKW0nkzefKULO4wtrn) and put `train.json`, `dev.json` and `tables.json` under `./data/` directory ##### Generating train/dev data by yourself -You could process the origin [Spider Data](https://drive.google.com/uc?export=download&id=11icoH_EA-NYb0OrPTdehRWm_d7-DIzWX) by your own. Download and put `train.json`, `dev.json` and +You could process the origin [Spider Data](https://drive.google.com/uc?export=download&id=11icoH_EA-NYb0OrPTdehRWm_d7-DIzWX) by your own. Download and put `train.json`, `dev.json` and `tables.json` under `./data/` directory and follow the instruction on `./preprocess/` #### Training From c56954730d764d47813b2c97374f595a221de107 Mon Sep 17 00:00:00 2001 From: Michael Noukhovitch Date: Mon, 13 Jan 2020 16:32:14 +0000 Subject: [PATCH 2/3] newer req libraries --- requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index a0e23ba..f1ecb55 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -nltk==3.4 +nltk>=3.4 pattern -numpy==1.14.0 -pytorch-pretrained-bert==0.5.1 -tqdm==4.31.1 \ No newline at end of file +numpy>=1.14.0 +pytorch-pretrained-bert>=0.5.1 +tqdm>=4.31.1 From f3d2b9e85f4f5f05f0a9eaeb61f2ef71755e0b7c Mon Sep 17 00:00:00 2001 From: Michael Noukhovitch Date: Mon, 13 Jan 2020 18:01:15 +0000 Subject: [PATCH 3/3] gitignore data and move to docker --- .gitignore | 11 ++++++++--- docker/Dockerfile | 16 ++++++++++++++++ docker/requirements.txt | 8 ++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 docker/Dockerfile create mode 100644 docker/requirements.txt diff --git a/.gitignore b/.gitignore index 3e759b7..31cc943 100644 --- a/.gitignore +++ b/.gitignore @@ -221,7 +221,7 @@ ClientBin/ *.publishsettings orleans.codegen.cs -# Including strong name files can present a security risk +# Including strong name files can present a security risk # (https://github.com/github/gitignore/pull/2483#issue-259490424) #*.snk @@ -317,7 +317,7 @@ __pycache__/ # OpenCover UI analysis results OpenCover/ -# Azure Stream Analytics local run output +# Azure Stream Analytics local run output ASALocalRun/ # MSBuild Binary and Structured Log @@ -326,5 +326,10 @@ ASALocalRun/ # NVidia Nsight GPU debugger configuration file *.nvuser -# MFractors (Xamarin productivity tool) working folder +# MFractors (Xamarin productivity tool) working folder .mfractor/ + +# Models and Data +saved_model/ +data/ + diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..a670d67 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,16 @@ +FROM nvcr.io/nvidia/pytorch:19.12-py3 + +# update and install setup +RUN apt-get update && apt-get install -y \ + libmysqlclient-dev + +# no more need for sudo +#RUN useradd -r -u 12078 mnoukhov +#USER mnoukhov + +# pip install +COPY requirements.txt /tmp/ +RUN pip install -r /tmp/requirements.txt + +# nltk data +RUN python -m nltk.downloader wordnet diff --git a/docker/requirements.txt b/docker/requirements.txt new file mode 100644 index 0000000..f1ecb55 --- /dev/null +++ b/docker/requirements.txt @@ -0,0 +1,8 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +nltk>=3.4 +pattern +numpy>=1.14.0 +pytorch-pretrained-bert>=0.5.1 +tqdm>=4.31.1