diff --git a/.gitattributes b/.gitattributes index 191ace7..25b1ef7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,10 @@ tests/testdata/eicu_demo/dyn.parquet filter=lfs diff=lfs merge=lfs -text tests/testdata/eicu_demo/sta.parquet filter=lfs diff=lfs merge=lfs -text tests/testdata/eicu_demo/hospital.parquet filter=lfs diff=lfs merge=lfs -text +tests/testdata/eicu_demo/features.parquet filter=lfs diff=lfs merge=lfs -text tests/testdata/mimic_demo/dyn.parquet filter=lfs diff=lfs merge=lfs -text -tests/testdata/mimic_demo/sta.parquet filter=lfs diff=lfs merge=lfs -text \ No newline at end of file +tests/testdata/mimic_demo/sta.parquet filter=lfs diff=lfs merge=lfs -text +tests/testdata/mimic_demo/features.parquet filter=lfs diff=lfs merge=lfs -text +tests/testdata/miiv_demo/dyn.parquet filter=lfs diff=lfs merge=lfs -text +tests/testdata/miiv_demo/sta.parquet filter=lfs diff=lfs merge=lfs -text +tests/testdata/miiv_demo/features.parquet filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f166cf0..d8a1518 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,15 +30,29 @@ jobs: run: python -m pip install --no-build-isolation --no-deps --disable-pip-version-check -e . - name: install icd-mappings run: pip install icd-mappings + - name: Move features.parquet out of the way + run: | + mv tests/testdata/eicu_demo/features.parquet tests/testdata/eicu_demo/old_features.parquet + mv tests/testdata/mimic_demo-carevue/features.parquet tests/testdata/mimic_demo-carevue/old_features.parquet + mv tests/testdata/miiv_demo/features.parquet tests/testdata/miiv_demo/old_features.parquet - name: feature engineering for eicu_demo run: | python icu_features/icd_codes.py --data_dir "tests/testdata" --dataset "eicu_demo" python icu_features/feature_engineering.py --dataset "eicu_demo" --data_dir "tests/testdata" - - name: feature engineering for eicu_demo + - name: feature engineering for mimic_demo run: | python icu_features/split_datasets.py --data_dir "tests/testdata" python icu_features/icd_codes.py --data_dir "tests/testdata" --dataset "mimic_demo-carevue" python icu_features/feature_engineering.py --dataset "mimic_demo-carevue" --data_dir "tests/testdata" + - name: feature engineering for miiv_demo + run: | + python icu_features/icd_codes.py --data_dir "tests/testdata" --dataset "miiv_demo" + python icu_features/feature_engineering.py --dataset "miiv_demo" --data_dir "tests/testdata" + # - name: compare new to old features.parquet + # run: | + # python -c "import pyarrow.parquet as pq;old = pq.read_table('tests/testdata/eicu_demo/old_features.parquet');new = pq.read_table('tests/testdata/eicu_demo/features.parquet');assert old.equals(new)" + # python -c "import pyarrow.parquet as pq;old = pq.read_table('tests/testdata/mimic_demo-carevue/old_features.parquet');new = pq.read_table('tests/testdata/mimic_demo-carevue/features.parquet');assert old.equals(new)" + # python -c "import pyarrow.parquet as pq;old = pq.read_table('tests/testdata/miiv_demo/old_features.parquet');new = pq.read_table('tests/testdata/miiv_demo/features.parquet');assert old.equals(new)" - name: Pytest run: pytest tests diff --git a/environment.yml b/environment.yml index 5ed7372..4a5d8ef 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge - nodefaults dependencies: - - python>=3.12 + - python==3.12 - setuptools - pip - click diff --git a/tests/testdata/eicu_demo/.gitignore b/tests/testdata/eicu_demo/.gitignore deleted file mode 100644 index c84daf3..0000000 --- a/tests/testdata/eicu_demo/.gitignore +++ /dev/null @@ -1 +0,0 @@ -features.parquet \ No newline at end of file diff --git a/tests/testdata/eicu_demo/dyn.parquet b/tests/testdata/eicu_demo/dyn.parquet index 0e1beab..b74f634 100644 --- a/tests/testdata/eicu_demo/dyn.parquet +++ b/tests/testdata/eicu_demo/dyn.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d4cbf01a2c17dc32814fc8df3b119c676d736f4158c47c2ee88b81a2f0e8f77 -size 2252914 +oid sha256:99bf738453bdf0b054ed0e092b80d5b3e142647d095673c51fe305ed7154d22b +size 2264514 diff --git a/tests/testdata/eicu_demo/features.parquet b/tests/testdata/eicu_demo/features.parquet new file mode 100644 index 0000000..8e5937f --- /dev/null +++ b/tests/testdata/eicu_demo/features.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcf722ba960991684c36a79ad78d40f29b22ee0a25efdb6b5c597e51b0768269 +size 68297045 diff --git a/tests/testdata/eicu_demo/hospital.parquet b/tests/testdata/eicu_demo/hospital.parquet index a96c583..b66ba7f 100644 --- a/tests/testdata/eicu_demo/hospital.parquet +++ b/tests/testdata/eicu_demo/hospital.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:203c21ace7b2d8ca0bf9882e1b530eea49b035845dbc86650f61398e76563ce3 -size 2905 +oid sha256:92a328bcfe024e39b8846aafbe30194243a9dcd7a5592461139f9427298eaaa7 +size 3224 diff --git a/tests/testdata/eicu_demo/sta.parquet b/tests/testdata/eicu_demo/sta.parquet index 93511c9..5bb45c0 100644 --- a/tests/testdata/eicu_demo/sta.parquet +++ b/tests/testdata/eicu_demo/sta.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d86e836864335842b38e24030268bf311625d3dc9e7c8d88b14c7bf5c798cb9 -size 68334 +oid sha256:e09b7218a66eed16184d4b96241ec5991b2a492e279e62b8c4e02277cc8f0f62 +size 70005 diff --git a/tests/testdata/miiv_demo/dyn.parquet b/tests/testdata/miiv_demo/dyn.parquet new file mode 100644 index 0000000..1b9dc4a --- /dev/null +++ b/tests/testdata/miiv_demo/dyn.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80ee7cb1df0397b6f7be42fa33d11e5ba659b223e04344b3fd19cb8308c897a6 +size 385801 diff --git a/tests/testdata/miiv_demo/features.parquet b/tests/testdata/miiv_demo/features.parquet new file mode 100644 index 0000000..7e0eb61 --- /dev/null +++ b/tests/testdata/miiv_demo/features.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b5e144592f37a345a3def9cb5e2a28defeae25b285eeb88c83703de4aa2d82d +size 9520079 diff --git a/tests/testdata/miiv_demo/sta.parquet b/tests/testdata/miiv_demo/sta.parquet new file mode 100644 index 0000000..09a18ae --- /dev/null +++ b/tests/testdata/miiv_demo/sta.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c024b1e3e23ccae52043e2c7a3e7c32d606a4e1b170ca05b32b319acd96c22ab +size 15520 diff --git a/tests/testdata/mimic_demo-carevue/dyn.parquet b/tests/testdata/mimic_demo-carevue/dyn.parquet new file mode 100644 index 0000000..9393a47 Binary files /dev/null and b/tests/testdata/mimic_demo-carevue/dyn.parquet differ diff --git a/tests/testdata/mimic_demo-carevue/features.parquet b/tests/testdata/mimic_demo-carevue/features.parquet new file mode 100644 index 0000000..98e92d5 Binary files /dev/null and b/tests/testdata/mimic_demo-carevue/features.parquet differ diff --git a/tests/testdata/mimic_demo-carevue/sta.parquet b/tests/testdata/mimic_demo-carevue/sta.parquet new file mode 100644 index 0000000..1985177 Binary files /dev/null and b/tests/testdata/mimic_demo-carevue/sta.parquet differ diff --git a/tests/testdata/mimic_demo/dyn.parquet b/tests/testdata/mimic_demo/dyn.parquet index a97f594..6755627 100644 Binary files a/tests/testdata/mimic_demo/dyn.parquet and b/tests/testdata/mimic_demo/dyn.parquet differ diff --git a/tests/testdata/mimic_demo/sta.parquet b/tests/testdata/mimic_demo/sta.parquet index 81997e1..f2a97fe 100644 Binary files a/tests/testdata/mimic_demo/sta.parquet and b/tests/testdata/mimic_demo/sta.parquet differ