diff --git a/.gitignore b/.gitignore index 3e759b7..31cc943 100644 --- a/.gitignore +++ b/.gitignore @@ -221,7 +221,7 @@ ClientBin/ *.publishsettings orleans.codegen.cs -# Including strong name files can present a security risk +# Including strong name files can present a security risk # (https://github.com/github/gitignore/pull/2483#issue-259490424) #*.snk @@ -317,7 +317,7 @@ __pycache__/ # OpenCover UI analysis results OpenCover/ -# Azure Stream Analytics local run output +# Azure Stream Analytics local run output ASALocalRun/ # MSBuild Binary and Structured Log @@ -326,5 +326,10 @@ ASALocalRun/ # NVidia Nsight GPU debugger configuration file *.nvuser -# MFractors (Xamarin productivity tool) working folder +# MFractors (Xamarin productivity tool) working folder .mfractor/ + +# Models and Data +saved_model/ +data/ + diff --git a/README.md b/README.md index 2c9a0fb..034be7f 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,12 @@ Code for our ACL'19 accepted paper: [Towards Complex Text-to-SQL in Cross-Domain * `Python3.6` * `Pytorch 0.4.0` or higher +* `libmysqlclient-dev` installed Install Python dependency via `pip install -r requirements.txt` when the environment of Python and Pytorch is setup. +* download `wordnet` via `python -m nltk.downloader wordnet` + ## Running Code #### Data preparation @@ -20,11 +23,11 @@ Install Python dependency via `pip install -r requirements.txt` when the environ * Download [Glove Embedding](https://nlp.stanford.edu/data/wordvecs/glove.42B.300d.zip) and put `glove.42B.300d` under `./data/` directory * Download [Pretrained IRNet](https://drive.google.com/open?id=1VoV28fneYss8HaZmoThGlvYU3A-aK31q) and put ` IRNet_pretrained.model` under `./saved_model/` directory -* Download preprocessed train/dev datasets from [here](https://drive.google.com/open?id=1YFV1GoLivOMlmunKW0nkzefKULO4wtrn) and put `train.json`, `dev.json` and +* Download preprocessed train/dev datasets from [here](https://drive.google.com/open?id=1YFV1GoLivOMlmunKW0nkzefKULO4wtrn) and put `train.json`, `dev.json` and `tables.json` under `./data/` directory ##### Generating train/dev data by yourself -You could process the origin [Spider Data](https://drive.google.com/uc?export=download&id=11icoH_EA-NYb0OrPTdehRWm_d7-DIzWX) by your own. Download and put `train.json`, `dev.json` and +You could process the origin [Spider Data](https://drive.google.com/uc?export=download&id=11icoH_EA-NYb0OrPTdehRWm_d7-DIzWX) by your own. Download and put `train.json`, `dev.json` and `tables.json` under `./data/` directory and follow the instruction on `./preprocess/` #### Training diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..a670d67 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,16 @@ +FROM nvcr.io/nvidia/pytorch:19.12-py3 + +# update and install setup +RUN apt-get update && apt-get install -y \ + libmysqlclient-dev + +# no more need for sudo +#RUN useradd -r -u 12078 mnoukhov +#USER mnoukhov + +# pip install +COPY requirements.txt /tmp/ +RUN pip install -r /tmp/requirements.txt + +# nltk data +RUN python -m nltk.downloader wordnet diff --git a/docker/requirements.txt b/docker/requirements.txt new file mode 100644 index 0000000..f1ecb55 --- /dev/null +++ b/docker/requirements.txt @@ -0,0 +1,8 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +nltk>=3.4 +pattern +numpy>=1.14.0 +pytorch-pretrained-bert>=0.5.1 +tqdm>=4.31.1 diff --git a/requirements.txt b/requirements.txt index a0e23ba..f1ecb55 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -nltk==3.4 +nltk>=3.4 pattern -numpy==1.14.0 -pytorch-pretrained-bert==0.5.1 -tqdm==4.31.1 \ No newline at end of file +numpy>=1.14.0 +pytorch-pretrained-bert>=0.5.1 +tqdm>=4.31.1