diff --git a/development/Xenium_classification/README.md b/development/Xenium_classification/README.md index 93fcbd4..796ccfa 100644 --- a/development/Xenium_classification/README.md +++ b/development/Xenium_classification/README.md @@ -10,8 +10,20 @@ The following data are required for each training sample (sequenced whole slide Additionally, a single-cell reference atlas is needed to generate the cell type labels. ### 0. Installation + +Modules to load: +```commandline +$ module load gcc/12 # Otherwise ICU with fail with OSError: /lib64/libstdc++.so.6: version `GLIBCXX_3.4.30' not found +``` + The `environment.yml` contains the required dependencies for the conda environment. +Example: +```commandline +$ conda env create --prefix /scratch/project_mnt/andrew/conda/hovernet --file /scratch/project_mnt/andrew/STimage/development/Xenium_classification/environment.yml +$ conda activate hovernet +``` + Computing performance metrics relies on some code from https://github.com/meszlili96/PanNukeChallenge. This repository should be cloned into the same folder as this source. @@ -22,20 +34,37 @@ These labels are then processed and stored into an anndata object for the Xenium ``` python load_annotations.py --xenium XENIUM_DIR --annotated ANNOTATED_PATH --out-dir OUT_DIR - ``` Alternatively, the labels can be provided in the form of a geopandas dataframe. See `generate_masks.py` for details. -### 2. Register H&E image -The H&E image must be registered to the Xenium DAPI data, which can be done with SIFT or similar methods. An affine transformation matrix should be saved as a csv file, with the standard 2x3 format. Example: +### 2. Register H&E image and Alignment + +The full-color H&E image must be registered to the Xenium DAPI data to ensure proper alignment between the morphological information and the spatial gene expression data. + +Two ways to perform the registration: +* Using Xenium Explorer: + - You can use Xenium Explorer to produce a transformation matrix and keypoints: +https://www.10xgenomics.com/support/software/xenium-explorer/latest/tutorials/xe-image-alignment +* Using the register.py script: + - Provide a greyscale DAPI TIFF image. + - Using OpenCV it generates the keypoints and transformation matrix. + +An affine transformation matrix should be saved as a csv file, with the standard 2x3 format. Example: ``` -0.002511365031637, -0.776020225795623, 27762.2568957 0.775820476630406, -0.00237876719423, -1158.81828889 ``` +Registration and alignment requires the transformation matrix. To align the RGB H&E image: +``` +python ./src/alignment.py --out-dir OUT_DIR --tif-path RGB_MORPHOLOGY_OME_FILE --transform TRANSFORM_PATH +``` +Here, `RGB_MORPHOLOGY_OME_FILE` is the path to the RGB H&E image file in OME-TIFF format and `TRANSFORM_PATH` is the CSV file containing the transformation matrix. + +Note: Make sure you have the necessary dependencies installed, such as OpenCV and any other required libraries, before running the scripts. ### 3. Generate training data The following command generates the training data (cell masks and H&E tiles) given the Xenium data and H&E image. diff --git a/development/Xenium_classification/environment-spatialdata-0.1.2.yml b/development/Xenium_classification/environment-spatialdata-0.1.2.yml new file mode 100644 index 0000000..49130f3 --- /dev/null +++ b/development/Xenium_classification/environment-spatialdata-0.1.2.yml @@ -0,0 +1,475 @@ +name: hovernet +channels: + - pytorch + - nvidia + - conda-forge + - bioconda + - defaults +dependencies: + - _libgcc_mutex=0.1 + - _openmp_mutex=4.5 + - blas=1.0 + - brotlipy=0.7.0 + - bzip2=1.0.8 + - ca-certificates=2023.05.30 + - cairo=1.16.0 + - certifi=2023.7.22 + - cffi=1.15.1 + - charset-normalizer=2.0.4 + - conda-pack=0.6.0 + - cryptography=39.0.1 + - cuda-cudart=11.8.89 + - cuda-cupti=11.8.87 + - cuda-libraries=11.8.0 + - cuda-nvrtc=11.8.89 + - cuda-nvtx=11.8.86 + - cuda-runtime=11.8.0 + - expat=2.5.0 + - ffmpeg=4.3 + - filelock=3.9.0 + - font-ttf-dejavu-sans-mono=2.37 + - font-ttf-inconsolata=3.000 + - font-ttf-source-code-pro=2.038 + - font-ttf-ubuntu=0.83 + - fontconfig=2.14.2 + - fonts-conda-ecosystem=1 + - fonts-conda-forge=1 + - freetype=2.12.1 + - gdal=1.9.2 + - gdk-pixbuf=2.42.10 + - gettext=0.21.1 + - giflib=5.2.1 + - glib=2.76.3 + - glib-tools=2.76.3 + - gmp=6.2.1 + - gmpy2=2.1.2 + - gnutls=3.6.15 + - icu=72.1 + - idna=3.4 + - intel-openmp=2023.1.0 + - jinja2=3.1.2 + - jpeg=9e + - lame=3.100 + - lcms2=2.12 + - ld_impl_linux-64=2.38 + - lerc=3.0 + - libblas=3.9.0 + - libcblas=3.9.0 + - libcublas=11.11.3.6 + - libcufft=10.9.0.58 + - libcufile=1.6.1.9 + - libcurand=10.3.2.106 + - libcusolver=11.4.1.48 + - libcusparse=11.7.5.86 + - libdeflate=1.17 + - libexpat=2.5.0 + - libffi=3.4.4 + - libgcc=7.2.0 + - libgcc-ng=13.1.0 + - libgfortran-ng=13.1.0 + - libgfortran5=13.1.0 + - libglib=2.76.3 + - libhwloc=2.9.0 + - libiconv=1.17 + - libidn2=2.3.4 + - liblapack=3.9.0 + - libnpp=11.8.0.86 + - libnsl=2.0.1 + - libnvjpeg=11.9.0.86 + - libpng=1.6.39 + - libsqlite=3.42.0 + - libstdcxx-ng=13.1.0 + - libtasn1=4.19.0 + - libtiff=4.5.0 + - libunistring=0.9.10 + - libuuid=2.38.1 + - libwebp=1.2.4 + - libwebp-base=1.2.4 + - libxcb=1.15 + - libxml2=2.10.4 + - libzlib=1.2.13 + - llvm-openmp=16.0.5 + - lz4-c=1.9.4 + - markupsafe=2.1.1 + - mkl=2023.1.0 + - mkl-service=2.4.0 + - mkl_fft=1.3.6 + - mkl_random=1.2.2 + - mpc=1.1.0 + - mpfr=4.0.2 + - mpmath=1.2.1 + - ncurses=6.4.20240210 + - nettle=3.7.3 + - networkx=2.8.4 + - openh264=2.1.1 + - openjdk=8.0.152 + - openjpeg=2.5.0 + - openslide=3.4.1 + - openssl=3.2.1 + - pcre2=10.40 + - pillow=9.4.0 + - pip=23.0.1 + - pixman=0.40.0 + - pthread-stubs=0.4 + - pycparser=2.21 + - pyopenssl=23.0.0 + - pysocks=1.7.1 + - python=3.9.16 + - python_abi=3.9=4_cp39 + - pytorch=2.0.1 + - pytorch-cuda=11.8 + - pytorch-mutex=1.0=cuda + - readline=8.2 + - requests=2.29.0 + - setuptools=67.8.0 + - sqlite=3.41.2 + - sympy=1.11.1 + - tbb=2021.8.0 + - tk=8.6.12 + - torchaudio=2.0.2 + - torchtriton=2.0.0 + - torchvision=0.15.2 + - urllib3=1.26.15 + - wheel=0.38.4 + - xorg-kbproto=1.0.7 + - xorg-libice=1.1.1 + - xorg-libsm=1.2.4 + - xorg-libx11=1.8.5 + - xorg-libxau=1.0.11 + - xorg-libxdmcp=1.1.3 + - xorg-libxext=1.3.4 + - xorg-libxrender=0.9.10 + - xorg-renderproto=0.11.1 + - xorg-xextproto=7.3.0 + - xorg-xproto=7.0.31 + - xz=5.4.2 + - zlib=1.2.13 + - zstd=1.5.5 + - pip: + - absl-py==2.1.0 + - aiobotocore + - aiohttp==3.9.5 + - aioitertools==0.11.0 + - aiosignal==1.3.1 + - alabaster==0.7.16 + - albumentations==1.3.1 + - anndata + - annotated-types==0.6.0 + - anyio==4.3.0 + - app-model==0.2.6 + - appdirs==1.4.4 + - argon2-cffi==23.1.0 + - argon2-cffi-bindings==21.2.0 + - array-api-compat==1.6 + - arrow==1.3.0 + - asciitree==0.3.3 + - asttokens==2.4.1 + - async-lru==2.0.4 + - async-timeout==4.0.3 + - attrs==23.2.0 + - babel==2.14.0 + - backcall==0.2.0 + - beautifulsoup4==4.12.3 + - bleach==6.1.0 + - blessed==1.20.0 + - boto3==1.34.69 + - botocore + - build==1.2.1 + - cached-property==1.5.2 + - cachey==0.2.1 + - cell2location==0.1.3 + - chex==0.1.7 + - click==8.1.7 + - click-plugins==1.1.1 + - cligj==0.7.2 + - cloudpickle==3.0.0 + - colorcet==3.1.0 + - comm==0.2.2 + - contextlib2==21.6.0 + - contourpy==1.2.1 + - croniter==2.0.5 + - cycler==0.12.1 + - cython==3.0.10 + - dask==2024.2.1 + - dask-expr==1.0.12 + - dask-image==2023.8.1 + - datashader==0.16.1 + - datashape==0.5.2 + - dateutils==0.6.12 + - debugpy==1.8.1 + - decorator==5.1.1 + - deepdiff==7.0.1 + - defusedxml==0.7.1 + - distributed + - dm-tree==0.1.8 + - docrep==0.3.2 + - docstring-parser==0.16 + - docutils==0.20.1 + - editor==1.6.6 + - entrypoints==0.4 + - etils==1.5.2 + - exceptiongroup==1.2.1 + - executing==2.0.1 + - fastapi==0.110.2 + - fasteners==0.19 + - fastjsonschema==2.19.1 + - fcsparser==0.2.8 + - fiona==1.9.6 + - flax==0.7.4 + - fonttools==4.51.0 + - fqdn==1.5.1 + - freetype-py==2.4.0 + - frozenlist==1.4.1 + - fsspec==2023.6.0 + - future==1.0.0 + - geopandas==0.14.3 + - greenlet==3.0.3 + - h11==0.14.0 + - h5py==3.11.0 + - heapdict==1.0.1 + - hsluv==5.0.4 + - huggingface-hub==0.22.2 + - igraph==0.11.4 + - imagecodecs==2024.1.1 + - imageio==2.34.1 + - imagesize==1.4.1 + - importlib-metadata==7.1.0 + - importlib-resources==6.4.0 + - in-n-out==0.2.0 + - inflect==7.2.0 + - inquirer==3.2.4 + - ipykernel==6.29.4 + - ipython==8.18.1 + - ipython-genutils==0.2.0 + - isoduration==20.11.0 + - itsdangerous==2.2.0 + - jax==0.4.13 + - jaxlib==0.4.13 + - jedi==0.19.1 + - jmespath==1.0.1 + - joblib==1.4.0 + - json5==0.9.25 + - jsonpointer==2.4 + - jsonschema==4.21.1 + - jsonschema-specifications==2023.12.1 + - jupyter-cache==0.6.1 + - jupyter-client==8.2.0 + - jupyter-core==5.3.0 + - jupyter-events==0.6.3 + - jupyter-lsp==2.2.0 + - jupyter-server==2.6.0 + - jupyter-server-terminals==0.4.4 + - jupyterlab==4.0.1 + - jupyterlab-pygments==0.2.2 + - jupyterlab-server==2.22.1 + - kiwisolver==1.4.5 + - lamin-utils==0.13.2 + - lazy-loader==0.4 + - leidenalg==0.10.2 + - lightning==2.1.4 + - lightning-cloud==0.5.66 + - lightning-utilities==0.11.2 + - llvmlite==0.40.1 + - locket==1.0.0 + - loguru==0.7.2 + - magicgui==0.8.2 + - markdown-it-py==3.0.0 + - matplotlib==3.8.4 + - matplotlib-inline==0.1.7 + - matplotlib-scalebar==0.8.1 + - mdit-py-plugins==0.4.0 + - mdurl==0.1.2 + - mistune==3.0.2 + - ml-collections==0.1.1 + - ml-dtypes==0.4.0 + - more-itertools==10.2.0 + - msgpack==1.0.8 + - mudata==0.2.3 + - multidict==6.0.5 + - multipledispatch==1.0.0 + - multiscale-spatial-image==0.11.2 + - mypy-extensions==1.0.0 + - myst-nb==1.1.0 + - myst-parser==2.0.0 + - napari==0.4.19.post1 + - napari-console==0.0.9 + - napari-matplotlib==2.0.1 + - napari-plugin-engine==0.2.0 + - napari-spatialdata==0.4.1 + - napari-svg==0.1.10 + - natsort==8.4.0 + - nbclient==0.7.4 + - nbconvert==7.16.3 + - nbformat==5.10.4 + - nest-asyncio==1.6.0 + - notebook-shim==0.2.4 + - npe2==0.7.5 + - numba==0.57.1 + - numcodecs==0.12.1 + - numpy==1.22.4 + - numpydoc==1.7.0 + - numpyro==0.12.1 + - ome-zarr==0.8.3 + - omnipath==1.0.8 + - opencv-contrib-python==4.9.0.80 + - opencv-python==4.9.0.80 + - opencv-python-headless==4.9.0.80 + - openslide-python==1.3.1 + - opt-einsum==3.3.0 + - optax==0.2.1 + - orbax-checkpoint==0.5.9 + - ordered-set==4.1.0 + - overrides==7.7.0 + - packaging==24.0 + - pandas==2.0.2 + - pandocfilters==1.5.1 + - param==2.1.0 + - parso==0.8.4 + - partd==1.4.1 + - pathml==2.1.1 + - patsy==0.5.6 + - pexpect==4.9.0 + - pickleshare==0.7.5 + - pims==0.6.1 + - pint==0.23 + - platformdirs==4.2.0 + - pooch==1.8.1 + - prometheus-client==0.20.0 + - prompt-toolkit==3.0.43 + - protobuf==5.26.1 + - psutil==5.9.8 + - psygnal==0.11.0 + - ptyprocess==0.7.0 + - pure-eval==0.2.2 + - pyarrow==16.0.0 + - pyconify==0.1.6 + - pyct==0.5.0 + - pydantic==2.7.0 + - pydantic-compat==0.1.2 + - pydantic-core==2.18.1 + - pydicom==2.4.4 + - pygeos==0.14 + - pygments==2.17.2 + - pyjwt==2.8.0 + - pynndescent==0.5.12 + - pyopengl==3.1.7 + - pyparsing==3.1.2 + - pyproj==3.6.1 + - pyproject-hooks==1.0.0 + - pyro-api==0.1.2 + - pyro-ppl==1.9.0 + - pyrsistent==0.20.0 + - python-bioformats==4.0.7 + - python-dateutil==2.9.0.post0 + - python-editor==1.0.4 + - python-igraph==0.11.4 + - python-javabridge==4.0.3 + - python-json-logger==2.0.7 + - python-multipart==0.0.9 + - pytomlpp==1.0.13 + - pytorch-lightning==2.2.2 + - pytz==2024.1 + - pywavelets==1.4.1 + - pyyaml==6.0.1 + - pyzmq==26.0.2 + - qtconsole==5.5.1 + - qtpy==2.4.1 + - qudida==0.0.4 + - readchar==4.0.6 + - readfcs==1.1.8 + - referencing==0.34.0 + - regex==2024.4.16 + - rfc3339-validator==0.1.4 + - rfc3986-validator==0.1.1 + - rich==13.7.1 + - rpds-py==0.18.0 + - runs==1.2.2 + - s3fs==2023.5.0 + - s3transfer==0.10.1 + - safetensors==0.4.3 + - scanpy==1.9.3 + - scikit-image==0.20.0 + - scikit-learn==1.4.2 + - scipy==1.9.1 + - scvi-tools==1.1.2 + - seaborn==0.13.2 + - send2trash==1.8.3 + - session-info==1.0.0 + - shapely==2.0.4 + - shellingham==1.5.4 + - simpleitk==2.3.1 + - six==1.16.0 + - slicerator==1.1.0 + - sniffio==1.3.1 + - snowballstemmer==2.2.0 + - sortedcontainers==2.4.0 + - soupsieve==2.5 + - spams==2.6.5.4 + - sparse==0.15.1 + - spatial-image==0.3.0 + - spatialdata==0.1.2 + - spatialdata-io==0.1.2 + - spatialdata-plot==0.2.1 + - sphinx==7.3.7 + - sphinx-copybutton==0.5.2 + - sphinxcontrib-applehelp==1.0.8 + - sphinxcontrib-devhelp==1.0.6 + - sphinxcontrib-htmlhelp==2.0.5 + - sphinxcontrib-jsmath==1.0.1 + - sphinxcontrib-qthelp==1.0.7 + - sphinxcontrib-serializinghtml==1.1.10 + - sqlalchemy==2.0.29 + - squidpy==1.2.2 + - stack-data==0.6.3 + - starlette==0.37.2 + - starsessions==2.1.3 + - statsmodels==0.14.1 + - stdlib-list==0.10.0 + - superqt==0.6.3 + - tabulate==0.9.0 + - tangram-sc==1.0.4 + - tblib==3.0.0 + - tensorstore==0.1.56 + - terminado==0.18.1 + - texttable==1.7.0 + - threadpoolctl==3.4.0 + - tifffile==2024.4.18 + - timm==0.9.16 + - tinycss2==1.2.1 + - tokenizers==0.19.1 + - tomli==2.0.1 + - tomli-w==1.0.0 + - toolz==0.12.1 + - torchinfo==1.8.0 + - torchmetrics==1.3.2 + - torchstain==1.3.0 + - tornado==6.4 + - tqdm==4.66.2 + - traitlets==5.14.3 + - transformers==4.40.0 + - typeguard==4.2.1 + - typer==0.12.3 + - types-python-dateutil==2.9.0.20240316 + - typing-extensions==4.11.0 + - tzdata==2024.1 + - umap-learn==0.5.6 + - uri-template==1.3.0 + - uvicorn==0.29.0 + - validators==0.28.1 + - vispy==0.14.2 + - wcwidth==0.2.13 + - webcolors==1.13 + - webencodings==0.5.1 + - websocket-client==1.7.0 + - websockets==12.0 + - wrapt==1.16.0 + - xarray==2023.12.0 + - xarray-dataclasses==1.7.0 + - xarray-datatree==0.0.14 + - xarray-schema==0.0.3 + - xarray-spatial==0.3.7 + - xmod==1.8.1 + - yarl==1.9.4 + - zarr==2.17.2 + - zict==3.0.0 + - zipp==3.18.1 \ No newline at end of file diff --git a/development/Xenium_classification/environment.yml b/development/Xenium_classification/environment.yml index ffd9531..93533e8 100644 --- a/development/Xenium_classification/environment.yml +++ b/development/Xenium_classification/environment.yml @@ -1,152 +1,157 @@ -name: /home/uqjxie6/xenium +name: hovernet channels: - pytorch - nvidia - conda-forge + - bioconda - defaults dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_kmp_llvm - - blas=1.0=mkl - - brotlipy=0.7.0=py39h27cfd23_1003 - - bzip2=1.0.8=h7b6447c_0 - - ca-certificates=2023.05.30=h06a4308_0 - - cairo=1.16.0=hbbf8b49_1016 - - certifi=2023.7.22=py39h06a4308_0 - - cffi=1.15.1=py39h5eee18b_3 - - charset-normalizer=2.0.4=pyhd3eb1b0_0 - - conda-pack=0.6.0=pyhd3eb1b0_0 - - cryptography=39.0.1=py39h9ce1e76_0 - - cuda-cudart=11.8.89=0 - - cuda-cupti=11.8.87=0 - - cuda-libraries=11.8.0=0 - - cuda-nvrtc=11.8.89=0 - - cuda-nvtx=11.8.86=0 - - cuda-runtime=11.8.0=0 - - expat=2.5.0=hcb278e6_1 - - ffmpeg=4.3=hf484d3e_0 - - filelock=3.9.0=py39h06a4308_0 - - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - - font-ttf-inconsolata=3.000=h77eed37_0 - - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=hab24e00_0 - - fontconfig=2.14.2=h14ed4e7_0 - - fonts-conda-ecosystem=1=0 - - fonts-conda-forge=1=0 - - freetype=2.12.1=h4a9f257_0 - - gdk-pixbuf=2.42.10=h05c8ddd_0 - - gettext=0.21.1=h27087fc_0 - - giflib=5.2.1=h5eee18b_3 - - glib=2.76.3=hfc55251_0 - - glib-tools=2.76.3=hfc55251_0 - - gmp=6.2.1=h295c915_3 - - gmpy2=2.1.2=py39heeb90bb_0 - - gnutls=3.6.15=he1e5248_0 - - icu=72.1=hcb278e6_0 - - idna=3.4=py39h06a4308_0 - - intel-openmp=2023.1.0=hdb19cb5_46305 - - jinja2=3.1.2=py39h06a4308_0 - - jpeg=9e=h5eee18b_1 - - lame=3.100=h7b6447c_0 - - lcms2=2.12=h3be6417_0 - - ld_impl_linux-64=2.38=h1181459_1 - - lerc=3.0=h295c915_0 - - libblas=3.9.0=1_h86c2bf4_netlib - - libcublas=11.11.3.6=0 - - libcufft=10.9.0.58=0 - - libcufile=1.6.1.9=0 - - libcurand=10.3.2.106=0 - - libcusolver=11.4.1.48=0 - - libcusparse=11.7.5.86=0 - - libdeflate=1.17=h5eee18b_0 - - libexpat=2.5.0=hcb278e6_1 - - libffi=3.4.4=h6a678d5_0 - - libgcc-ng=13.1.0=he5830b7_0 - - libgfortran-ng=13.1.0=h69a702a_0 - - libgfortran5=13.1.0=h15d22d2_0 - - libglib=2.76.3=hebfc3b9_0 - - libiconv=1.17=h166bdaf_0 - - libidn2=2.3.4=h5eee18b_0 - - liblapack=3.9.0=5_h92ddd45_netlib - - libnpp=11.8.0.86=0 - - libnvjpeg=11.9.0.86=0 - - libpng=1.6.39=h5eee18b_0 - - libsqlite=3.42.0=h2797004_0 - - libstdcxx-ng=13.1.0=hfd8a6a1_0 - - libtasn1=4.19.0=h5eee18b_0 - - libtiff=4.5.0=h6a678d5_2 - - libunistring=0.9.10=h27cfd23_0 - - libuuid=2.38.1=h0b41bf4_0 - - libwebp=1.2.4=h11a3e52_1 - - libwebp-base=1.2.4=h5eee18b_1 - - libxcb=1.15=h0b41bf4_0 - - libxml2=2.10.4=hfdac1af_0 - - libzlib=1.2.13=hd590300_5 - - llvm-openmp=16.0.5=h4dfa4b3_0 - - lz4-c=1.9.4=h6a678d5_0 - - markupsafe=2.1.1=py39h7f8727e_0 - - mkl=2023.1.0=h6d00ec8_46342 - - mkl-service=2.4.0=py39h5eee18b_1 - - mkl_fft=1.3.6=py39h417a72b_1 - - mkl_random=1.2.2=py39h417a72b_1 - - mpc=1.1.0=h10f8cd9_1 - - mpfr=4.0.2=hb69a4c5_1 - - mpmath=1.2.1=py39h06a4308_0 - - ncurses=6.4=h6a678d5_0 - - nettle=3.7.3=hbbd107a_1 - - networkx=2.8.4=py39h06a4308_1 - - numpy-base=1.24.3=py39h060ed82_1 - - openh264=2.1.1=h4ff587b_0 - - openjdk=8.0.152=h7b6447c_3 - - openjpeg=2.5.0=hfec8fc6_2 - - openslide=3.4.1=h7773abc_6 - - openssl=1.1.1v=h7f8727e_0 - - pcre2=10.40=hc3806b6_0 - - pillow=9.4.0=py39h6a678d5_0 - - pip=23.0.1=py39h06a4308_0 - - pixman=0.40.0=h36c2ea0_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - pycparser=2.21=pyhd3eb1b0_0 - - pyopenssl=23.0.0=py39h06a4308_0 - - pysocks=1.7.1=py39h06a4308_0 - - python=3.9.16=h7a1cb2a_2 - - pytorch=2.0.1=py3.9_cuda11.8_cudnn8.7.0_0 - - pytorch-cuda=11.8=h7e8668a_5 + - _libgcc_mutex=0.1 + - _openmp_mutex=4.5 + - blas=1.0 + - brotlipy=0.7.0 + - bzip2=1.0.8 + - ca-certificates=2023.05.30 + - cairo=1.16.0 + - certifi=2023.7.22 + - cffi=1.15.1 + - charset-normalizer=2.0.4 + - conda-pack=0.6.0 + - cryptography=39.0.1 + - cuda-cudart=11.8.89 + - cuda-cupti=11.8.87 + - cuda-libraries=11.8.0 + - cuda-nvrtc=11.8.89 + - cuda-nvtx=11.8.86 + - cuda-runtime=11.8.0 + - expat=2.5.0 + - ffmpeg=4.3 + - filelock=3.9.0 + - font-ttf-dejavu-sans-mono=2.37 + - font-ttf-inconsolata=3.000 + - font-ttf-source-code-pro=2.038 + - font-ttf-ubuntu=0.83 + - fontconfig=2.14.2 + - fonts-conda-ecosystem=1 + - fonts-conda-forge=1 + - freetype=2.12.1 + - gdal=1.9.2 + - gdk-pixbuf=2.42.10 + - gettext=0.21.1 + - giflib=5.2.1 + - glib=2.76.3 + - glib-tools=2.76.3 + - gmp=6.2.1 + - gmpy2=2.1.2 + - gnutls=3.6.15 + - icu=72.1 + - idna=3.4 + - intel-openmp=2023.1.0 + - jinja2=3.1.2 + - jpeg=9e + - lame=3.100 + - lcms2=2.12 + - ld_impl_linux-64=2.38 + - lerc=3.0 + - libblas=3.9.0 + - libcblas=3.9.0 + - libcublas=11.11.3.6 + - libcufft=10.9.0.58 + - libcufile=1.6.1.9 + - libcurand=10.3.2.106 + - libcusolver=11.4.1.48 + - libcusparse=11.7.5.86 + - libdeflate=1.17 + - libexpat=2.5.0 + - libffi=3.4.4 + - libgcc=7.2.0 + - libgcc-ng=13.1.0 + - libgfortran-ng=13.1.0 + - libgfortran5=13.1.0 + - libglib=2.76.3 + - libhwloc=2.9.0 + - libiconv=1.17 + - libidn2=2.3.4 + - liblapack=3.9.0 + - libnpp=11.8.0.86 + - libnsl=2.0.1 + - libnvjpeg=11.9.0.86 + - libpng=1.6.39 + - libsqlite=3.42.0 + - libstdcxx-ng=13.1.0 + - libtasn1=4.19.0 + - libtiff=4.5.0 + - libunistring=0.9.10 + - libuuid=2.38.1 + - libwebp=1.2.4 + - libwebp-base=1.2.4 + - libxcb=1.15 + - libxml2=2.10.4 + - libzlib=1.2.13 + - llvm-openmp=16.0.5 + - lz4-c=1.9.4 + - markupsafe=2.1.1 + - mkl=2023.1.0 + - mkl-service=2.4.0 + - mkl_fft=1.3.6 + - mkl_random=1.2.2 + - mpc=1.1.0 + - mpfr=4.0.2 + - mpmath=1.2.1 + - ncurses=6.4.20240210 + - nettle=3.7.3 + - networkx=2.8.4 + - openh264=2.1.1 + - openjdk=8.0.152 + - openjpeg=2.5.0 + - openslide=3.4.1 + - openssl=3.2.1 + - pcre2=10.40 + - pillow=9.4.0 + - pip=23.0.1 + - pixman=0.40.0 + - pthread-stubs=0.4 + - pycparser=2.21 + - pyopenssl=23.0.0 + - pysocks=1.7.1 + - python=3.9.16 + - python_abi=3.9=4_cp39 + - pytorch=2.0.1 + - pytorch-cuda=11.8 - pytorch-mutex=1.0=cuda - - readline=8.2=h5eee18b_0 - - requests=2.29.0=py39h06a4308_0 - - setuptools=67.8.0=py39h06a4308_0 - - sqlite=3.41.2=h5eee18b_0 - - sympy=1.11.1=py39h06a4308_0 - - tbb=2021.8.0=hdb19cb5_0 - - tk=8.6.12=h1ccaba5_0 - - torchaudio=2.0.2=py39_cu118 - - torchtriton=2.0.0=py39 - - torchvision=0.15.2=py39_cu118 - - typing_extensions=4.5.0=py39h06a4308_0 - - urllib3=1.26.15=py39h06a4308_0 - - wheel=0.38.4=py39h06a4308_0 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libice=1.1.1=hd590300_0 - - xorg-libsm=1.2.4=h7391055_0 - - xorg-libx11=1.8.5=h8ee46fc_0 - - xorg-libxau=1.0.11=hd590300_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h0b41bf4_2 - - xorg-libxrender=0.9.10=h7f98852_1003 - - xorg-renderproto=0.11.1=h7f98852_1002 - - xorg-xextproto=7.3.0=h0b41bf4_1003 - - xorg-xproto=7.0.31=h7f98852_1007 - - xz=5.4.2=h5eee18b_0 - - zlib=1.2.13=hd590300_5 - - zstd=1.5.5=hc292b87_0 + - readline=8.2 + - requests=2.29.0 + - setuptools=67.8.0 + - sqlite=3.41.2 + - sympy=1.11.1 + - tbb=2021.8.0 + - tk=8.6.12 + - torchaudio=2.0.2 + - torchtriton=2.0.0 + - torchvision=0.15.2 + - urllib3=1.26.15 + - wheel=0.38.4 + - xorg-kbproto=1.0.7 + - xorg-libice=1.1.1 + - xorg-libsm=1.2.4 + - xorg-libx11=1.8.5 + - xorg-libxau=1.0.11 + - xorg-libxdmcp=1.1.3 + - xorg-libxext=1.3.4 + - xorg-libxrender=0.9.10 + - xorg-renderproto=0.11.1 + - xorg-xextproto=7.3.0 + - xorg-xproto=7.0.31 + - xz=5.4.2 + - zlib=1.2.13 + - zstd=1.5.5 - pip: - absl-py==1.4.0 - - aiobotocore==2.5.0 - - aiohttp==3.8.4 + - aiobotocore==2.5.4 + - aiohttp==3.9.5 - aioitertools==0.11.0 - aiosignal==1.3.1 - - alabaster==0.7.13 + - alabaster==0.7.16 - albumentations==1.3.1 - anndata==0.9.1 - anyio==3.7.0 @@ -165,8 +170,8 @@ dependencies: - beautifulsoup4==4.12.2 - bleach==6.0.0 - blessed==1.20.0 - - boto3==1.26.152 - - botocore==1.29.152 + - boto3==1.28.17 + - botocore==1.31.17 - build==0.10.0 - cached-property==1.5.2 - cachey==0.2.1 @@ -204,7 +209,8 @@ dependencies: - fastapi==0.88.0 - fasteners==0.18 - fastjsonschema==2.17.1 - - fiona==1.9.4.post1 + - fcsparser==0.2.8 + - fiona==1.9.6 - flax==0.6.11 - fonttools==4.39.4 - fqdn==1.5.1 @@ -212,7 +218,7 @@ dependencies: - frozenlist==1.3.3 - fsspec==2023.5.0 - future==0.18.3 - - geopandas==0.13.0 + - geopandas==0.14.3 - greenlet==2.0.2 - h11==0.14.0 - h5py==3.8.0 @@ -252,6 +258,7 @@ dependencies: - jupyterlab-pygments==0.2.2 - jupyterlab-server==2.22.1 - kiwisolver==1.4.4 + - lamin-utils==0.13.2 - lazy-loader==0.2 - leidenalg==0.9.1 - lightning==2.0.2 @@ -298,9 +305,9 @@ dependencies: - numpyro==0.12.1 - ome-zarr==0.7.1 - omnipath==1.0.7 - - opencv-contrib-python==4.7.0.72 - - opencv-python==4.7.0.72 - - opencv-python-headless==4.7.0.72 + - opencv-contrib-python==4.9.0.80 + - opencv-python==4.9.0.80 + - opencv-python-headless==4.9.0.80 - openslide-python==1.2.0 - opt-einsum==3.3.0 - optax==0.1.5 @@ -358,12 +365,13 @@ dependencies: - qtpy==2.3.1 - qudida==0.0.4 - readchar==4.0.5 + - readfcs==1.1.8 - regex==2023.6.3 - rfc3339-validator==0.1.4 - rfc3986-validator==0.1.1 - rich==13.4.1 - s3fs==2023.5.0 - - s3transfer==0.6.1 + - s3transfer==0.6.2 - safetensors==0.3.1 - scanpy==1.9.3 - scikit-image==0.20.0 @@ -373,7 +381,7 @@ dependencies: - seaborn==0.12.2 - send2trash==1.8.2 - session-info==1.0.0 - - shapely==2.0.1 + - shapely==2.0.4 - simpleitk==2.2.1 - six==1.16.0 - slicerator==1.1.0 @@ -381,11 +389,11 @@ dependencies: - snowballstemmer==2.2.0 - sortedcontainers==2.4.0 - soupsieve==2.4.1 - - spams==2.6.2.5 + - spams==2.6.5.4 - sparse==0.14.0 - spatial-image==0.3.0 - spatialdata==0.0.9 - - spatialdata-io==0.0.4 + - spatialdata-io==0.0.9 - spatialdata-plot==0.0.2 - sphinx==4.5.0 - sphinx-copybutton==0.5.2 @@ -396,7 +404,7 @@ dependencies: - sphinxcontrib-qthelp==1.0.3 - sphinxcontrib-serializinghtml==1.1.5 - sqlalchemy==2.0.15 - - squidpy==1.2.3 + - squidpy==1.2.2 - stack-data==0.6.2 - starlette==0.22.0 - starsessions==1.3.0 @@ -424,6 +432,7 @@ dependencies: - traitlets==5.9.0 - transformers==4.30.2 - typer==0.9.0 + - typing-extensions==4.5.0 - tzdata==2023.3 - umap-learn==0.5.3 - uri-template==1.2.0 @@ -436,13 +445,12 @@ dependencies: - websocket-client==1.5.2 - websockets==11.0.3 - wrapt==1.15.0 - - xarray==2022.12.0 - - xarray-dataclasses==1.5.0 - - xarray-datatree==0.0.12 + - xarray==2023.12.0 + - xarray-dataclasses==1.7.0 + - xarray-datatree==0.0.14 - xarray-schema==0.0.3 - - xarray-spatial==0.3.5 - - yarl==1.9.2 - - zarr==2.14.2 + - xarray-spatial==0.3.7 + - yarl==1.9.4 + - zarr==2.17.1 - zict==3.0.0 - - zipp==3.15.0 -prefix: /home/uqjxie6/xenium + - zipp==3.18.1 \ No newline at end of file diff --git a/development/Xenium_classification/src/alignment.py b/development/Xenium_classification/src/alignment.py new file mode 100644 index 0000000..2d932fc --- /dev/null +++ b/development/Xenium_classification/src/alignment.py @@ -0,0 +1,42 @@ +#!/usr/bin/python3 +# coding: utf-8 + +import os as os +import argparse as ag +import warnings as w +import skimage.io as ski +import spatialdata_io as sd_io + +def main(args): + out_dir = args.out_dir + transform_matrix = args.transform + tif_path = args.tif_path + for file in [transform_matrix, tif_path]: + if not os.path.exists(file): + w.warn(f'File doesn\'t exist: {file}') + exit(1) + # Create output directory if it doesn't exist + if not os.path.exists(out_dir): + os.makedirs(out_dir) + w.warn(f"Created output directory: {out_dir}") + + # SpatialData IO to align image https://spatialdata.scverse.org/projects/io/en/latest/generated/spatialdata_io.xenium_aligned_image.html#spatialdata_io.xenium_aligned_image + output = sd_io.xenium_aligned_image(tif_path, transform_matrix) + ski.imsave(f'{out_dir}/output_image.tif', output) + + +if __name__ == "__main__": + parser = ag.ArgumentParser() + parser.add_argument( + "--transform", type=str, required=True, default=None, help="Path to transformation matrix (csv)" + ) + parser.add_argument( + "--tif-path", type=str, required=True, default=None, help="Path of TIF image to register" + ) + parser.add_argument( + "--out-dir", type=str, required=True, default=None, help="Output directory" + ) + + args = parser.parse_args() + print(args) + main(args) diff --git a/development/Xenium_classification/src/compute_acc.py b/development/Xenium_classification/src/compute_acc.py index 4b71490..c28cb3b 100644 --- a/development/Xenium_classification/src/compute_acc.py +++ b/development/Xenium_classification/src/compute_acc.py @@ -1,3 +1,6 @@ +#!/usr/bin/python3 +# coding: utf-8 + import argparse import cProfile as profile import glob diff --git a/development/Xenium_classification/src/compute_pq.py b/development/Xenium_classification/src/compute_pq.py index 55f907f..692d127 100644 --- a/development/Xenium_classification/src/compute_pq.py +++ b/development/Xenium_classification/src/compute_pq.py @@ -1,3 +1,6 @@ +#!/usr/bin/python3 +# coding: utf-8 + import os import numpy as np import pandas as pd diff --git a/development/Xenium_classification/src/generate_masks.py b/development/Xenium_classification/src/generate_masks.py index 1686150..cb6d76e 100644 --- a/development/Xenium_classification/src/generate_masks.py +++ b/development/Xenium_classification/src/generate_masks.py @@ -1,3 +1,6 @@ +#!/usr/bin/python3 +# coding: utf-8 + import spatialdata_io import squidpy as sq import spatialdata as sd @@ -43,11 +46,10 @@ def main(args): out_dir = args.out_dir img_dir = f"{out_dir}/images" mask_dir = f"{out_dir}/masks" - if not os.path.exists(out_dir): - os.makedirs(out_dir) - os.makedirs(img_dir) - os.makedirs(mask_dir) - warnings.warn("Created output directories") + for dir in [out_dir, img_dir, mask_dir]: + if not os.path.exists(dir): + warnings.warn(f'Creating output directory {dir}') + os.makedirs(dir) # Load data warnings.warn("Loading data...") @@ -75,8 +77,8 @@ def main(args): sdata.table.obs['celltype_major'] = sdata.table.obs['celltype_major'].astype('category') assert len(sdata.table.obs['celltype_major'].cat.categories) == args.num_categories # Drop NA values - # FIXME: See comments in load_annotations - # We should not have any NA values + # FIXME: See comments in load_annotations + # We should not have any NA values # sdata.table.obs = sdata.table.obs.dropna() # sdata.table.write(f"{out_dir}/adata_filtered.h5ad") print(sdata.table.obs['celltype_major'].head(15)) diff --git a/development/Xenium_classification/src/load_annotations.py b/development/Xenium_classification/src/load_annotations.py index e1aff21..7dfc0f1 100644 --- a/development/Xenium_classification/src/load_annotations.py +++ b/development/Xenium_classification/src/load_annotations.py @@ -1,3 +1,6 @@ +#!/usr/bin/python3 +# coding: utf-8 + import sys import argparse import cv2 diff --git a/development/Xenium_classification/src/pipeline.ipynb b/development/Xenium_classification/src/pipeline.ipynb new file mode 100644 index 0000000..fbf2dc0 --- /dev/null +++ b/development/Xenium_classification/src/pipeline.ipynb @@ -0,0 +1,460 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2 as cv2\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import scanpy as sp\n", + "import tqdm as tqdm\n", + "import pathml.ml.hovernet as hn\n", + "import squidpy.im as sp_im\n", + "import spatialdata as sd\n", + "import spatialdata.models as sd_m\n", + "import spatialdata_io as sd_io\n", + "import multiscale_spatial_image as msi\n", + "import spatialdata.transformations as sd_t\n", + "import xenium_utils as xu\n", + "from PIL import Image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "image_filename = '/scratch/project_mnt/S0010/Andrew_N/XeniumData/MPS-1-output/images/0_311.png'\n", + "mask_filename = '/scratch/project_mnt/S0010/Andrew_N/XeniumData/MPS-1-output/masks/0_311.npy'\n", + "tif_input = '/scratch/project_mnt/S0010/Andrew_N/XeniumData/MPS-1-output/output_image.tif'\n", + "h5_annotated = '/scratch/project_mnt/S0010/Andrew_N/XeniumData/MPS-1-output/imputed_annotated.h5ad'\n", + "transform_file = '/scratch/project_mnt/S0010/Andrew_N/XeniumData/alignments/MPS-1-matrix.csv'\n", + "xenium_file = '/scratch/project_mnt/S0010/Andrew_N/XeniumData/MPS-1'\n", + "obj_threshold = 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "full_image_parsed = xu.load_registered_image(tif_input)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata_annotated = sp.read_h5ad(h5_annotated)\n", + "adata_annotated" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sdata = sd_io.xenium(xenium_file, n_jobs=8, cells_as_shapes=True)\n", + "sdata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sdata.table.obs[[\"celltype_major\"]] = adata_annotated.obs.reset_index()[['predicted.id']]\n", + "sdata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "merged = sd.SpatialData(\n", + " images={\n", + " \"he\": full_image_parsed,\n", + " },\n", + " shapes={\n", + " \"cell_circles\": sdata.shapes[\"cell_circles\"], # Required for bbox queries otherwise adata table disappears\n", + " \"cell_boundaries\": sdata.shapes[\"cell_boundaries\"],\n", + " \"nucleus_boundaries\": sdata.shapes[\"nucleus_boundaries\"],\n", + " },\n", + " table=sdata.table,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "A = pd.read_csv(transform_file, header=None).to_numpy()\n", + "if A.shape[0] == 2:\n", + " A = np.append(A, [[0,0,1]], axis=0)\n", + "affineT = sd_t.Affine(A, input_axes=(\"x\", \"y\"), output_axes=(\"x\", \"y\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "height, width = full_image_parsed['scale0']['image'].shape[-2:]\n", + "coords = [[[0, 0],[height, width]]]\n", + "coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# img_sep, mask_sep = xu.sdata_load_img_mask(merged, affineT=affineT, img_key='he', expand_px=3, return_sep=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#mask_sep.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "img_key = 'he'\n", + "shape_key = 'nucleus_boundaries'\n", + "label_key = 'celltype_major'\n", + "t_shapes = sd_t.Sequence([\n", + " sd_t.get_transformation(merged.shapes[shape_key]),\n", + " sd_t.get_transformation(merged.images[img_key]).inverse()])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "shapes = sd.transform(merged.shapes[shape_key], t_shapes)\n", + "shapes = sd.transform(shapes, affineT)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "shapes.index = shapes.index.astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "labels = merged.table.obs[label_key].to_frame()\n", + "labels.index = shapes.index.astype(int)\n", + "shapes_df = shapes.merge(labels, how = 'inner', right_index = True, left_index = True)\n", + "shapes_df['label'] = shapes_df[label_key].cat.codes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "shapes_df_dict = {k: v for k, v in shapes_df.groupby(label_key)} \n", + "shapes_df_dict " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "img = merged.images[img_key]\n", + "if isinstance(img, msi.multiscale_spatial_image.MultiscaleSpatialImage):\n", + " # Note that this clears any transformation attribute\n", + " img = sd_m.Image2DModel.parse(img[\"scale0\"].ds.to_array().squeeze(axis=0))\n", + "img = img.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def new_mask_for_polygons(polygons, im_size, vals):\n", + " print(im_size)\n", + " if not isinstance(vals, (list, tuple, np.ndarray)):\n", + " vals = np.ones_like(polygons)\n", + " img_mask = np.zeros(im_size, np.float64)\n", + " if not polygons:\n", + " print(\"Not polys\")\n", + " return img_mask\n", + " int_coords = lambda x: np.array(x).round().astype(np.int32)\n", + " exteriors = [int_coords(poly.exterior.coords) if poly.geom_type == 'Polygon' \n", + " else int_coords(poly.convex_hull.exterior.coords)\n", + " for poly in polygons]\n", + " interiors = [poly.interiors if poly.geom_type == 'Polygon'\n", + " else poly.convex_hull.interiors\n", + " for poly in polygons]\n", + " interiors = [int_coords(pi.coords) for poly in interiors for pi in poly] # interiors should be [] anyway\n", + " print(f\"Exteriors: {type(exteriors[0])} {exteriors[0]}\")\n", + " print(f\"Interiors: {interiors}\")\n", + " print(f\"Vals {vals}\")\n", + " cv2.fillPoly(img_mask, [exteriors[0]], vals[0])\n", + " for i in range(len(exteriors)):\n", + " cv2.fillPoly(img_mask, [exteriors[i]], vals[i])\n", + " for i in range(len(interiors)):\n", + " cv2.fillPoly(img_mask, [interiors[i]], 0)\n", + " print(f\"Mask: {img_mask} {np.all(img_mask == 0),}\")\n", + " return img_mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from itertools import islice\n", + "\n", + "first_five = islice(shapes_df_dict.items(), 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "masks = [\n", + " new_mask_for_polygons(\n", + " v['geometry'].tolist(),\n", + " img.shape[-2:],\n", + " # Add 1 here in case val is 0 (background)\n", + " vals=(v.index.to_numpy() +1).tolist()\n", + " )\n", + " # https://stackoverflow.com/questions/60484383/typeerror-scalar-value-for-argument-color-is-not-numeric-when-using-opencv\n", + " for k, v in first_five\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "masks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\n", + " np.all(masks[0] == 0),\n", + " np.all(masks[1] == 0)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "masks = np.stack(masks)\n", + "mask_bg = (np.sum(masks, axis=0) == 0)*1.\n", + "mask = np.concatenate((masks, np.expand_dims(mask_bg, axis=0)))\n", + "mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\n", + " np.all(masks[0] == 0),\n", + " np.all(masks[1] == 0),\n", + " np.all(masks[2] == 0)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "masks.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "img_mask = xu.sdata_load_img_mask(merged, affineT=affineT, img_key='he', expand_px=3)\n", + "img_mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "imgc = sp_im.ImageContainer(img_mask)\n", + "gen = imgc.generate_equal_crops(size=256, as_array='image', squeeze=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i, tile in enumerate(tqdm.tqdm(gen)):\n", + " if i == 311 or i == 312:\n", + " mask = tile[:,:,3:-1]\n", + " # mask = np.moveaxis(tile[:,:,3:], 2, 0)\n", + " print(mask)\n", + " image = Image.fromarray(tile[:,:,:3].astype(np.uint8))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(plt.imread(image_filename))\n", + "plt.axis('off') # Optional: Remove the axis\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mask = np.load(str(mask_filename))\n", + "np.unique(mask)\n", + "mask\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(mask.transpose(1,2,0))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mask.transpose(1,2,0).shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nucleus_mask = (mask[-1] == 0).astype(np.uint8) # invert the bg mask\n", + "filter_mask = (hn.remove_small_objs(nucleus_mask, obj_threshold) != 0)\n", + "filter_mask_bg = (filter_mask == 0)\n", + "mask[:-1] = np.multiply(filter_mask, mask[:-1])\n", + "mask[-1] = np.multiply(filter_mask_bg, mask[-1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mask.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hovernet", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/development/Xenium_classification/src/preprocess_images.py b/development/Xenium_classification/src/preprocess_images.py index a28a89c..3973e08 100644 --- a/development/Xenium_classification/src/preprocess_images.py +++ b/development/Xenium_classification/src/preprocess_images.py @@ -1,3 +1,6 @@ +#!/usr/bin/python3 +# coding: utf-8 + import numpy as np import sys import argparse @@ -17,6 +20,8 @@ from loguru import logger from PIL import Image from pathml.ml.hovernet import remove_small_objs + + # from pathml.preprocessing import StainNormalizationHE # from torchvision import transforms # import torchstain @@ -27,7 +32,7 @@ def white_filter(img, white_threshold=200, percent_threshold=0.85, return_mono=F # Grayscale img = img.convert('L') # Threshold - img = img.point( lambda p: 255 if p > white_threshold else 0) + img = img.point(lambda p: 255 if p > white_threshold else 0) # To mono img = img.convert('1') # Percentage of white values @@ -36,106 +41,114 @@ def white_filter(img, white_threshold=200, percent_threshold=0.85, return_mono=F elif np.all(~np.array(img)): percent_white = 1 else: - percent_white = np.unique(img, return_counts=True)[1][1] / (img.size[0]**2) - return (percent_white > percent_threshold) if (not return_mono) else img + percent_white = np.unique(img, return_counts=True)[1][1] / (img.size[0] ** 2) + return (percent_white > percent_threshold) if (not return_mono) else img + def main(args): - data_dir = Path(args.data) + data_dir = Path(args.data) - # dirs for images, masks - imdir = data_dir / "images" - maskdir = data_dir / "masks" + # dirs for images, masks + imdir = data_dir / "images" + maskdir = data_dir / "masks" - # stop if the images and masks directories don't already exist - assert imdir.is_dir(), f"Error: 'images' directory not found: {imdir}" - assert maskdir.is_dir(), f"Error: 'masks' directory not found: {maskdir}" + # stop if the images and masks directories don't already exist + assert imdir.is_dir(), f"Error: 'images' directory not found: {imdir}" + assert maskdir.is_dir(), f"Error: 'masks' directory not found: {maskdir}" - paths = list(imdir.glob("*")) - paths = [p.stem for p in paths] + paths = list(imdir.glob("*")) + paths = [p.stem for p in paths] - remove_dir = data_dir / "removed" - if not args.dry_run: - remove_dir.mkdir(parents=True, exist_ok=True) - (remove_dir / "images").mkdir(parents=True, exist_ok=True) - (remove_dir / "masks").mkdir(parents=True, exist_ok=True) + remove_dir = data_dir / "removed" + if not args.dry_run: + remove_dir.mkdir(parents=True, exist_ok=True) + (remove_dir / "images").mkdir(parents=True, exist_ok=True) + (remove_dir / "masks").mkdir(parents=True, exist_ok=True) - filter_list = [] + filter_list = [] - for path in tqdm(paths): - impath = imdir / f"{path}.png" - maskpath = maskdir / f"{path}.npy" + for path in tqdm(paths): + to_remove = None + impath = imdir / f"{path}.png" + maskpath = maskdir / f"{path}.npy" - img = np.array(Image.open(str(impath))) - mask = np.load(str(maskpath)) + img = np.array(Image.open(str(impath))) + mask = np.load(str(maskpath)) - # Filter 0-filled values from cropping + # Filter 0-filled values from cropping # NOTE: I think this is no longer needed - if img.any(axis=-1).mean() < 0.75: - filter_list.append(path) + if img.any(axis=-1).mean() < 0.75: + if args.dry_run: + print(f"** 0 filled value - will move {path}") + to_remove = path # White space removal - if white_filter(img): - filter_list.append(path) - - # Filter small objects - # This could remove nuclei so run this first - nucleus_mask = (mask[-1] == 0).astype(np.uint8) # invert the bg mask - filter_mask = (remove_small_objs(nucleus_mask, args.obj_threshold) != 0) - filter_mask_bg = (filter_mask == 0) - mask[:-1] = np.multiply(filter_mask, mask[:-1]) - mask[-1] = np.multiply(filter_mask_bg, mask[-1]) - - # Filter images for nuclei + if white_filter(img): + if args.dry_run: + print(f"** white filter - will move {path}") + to_remove = path + + # Filter small objects + # This could remove nuclei so run this first + nucleus_mask = (mask[-1] == 0).astype(np.uint8) # invert the bg mask + filter_mask = (remove_small_objs(nucleus_mask, args.obj_threshold) != 0) + filter_mask_bg = (filter_mask == 0) + mask[:-1] = np.multiply(filter_mask, mask[:-1]) + mask[-1] = np.multiply(filter_mask_bg, mask[-1]) + + # Filter images for nuclei # 2 is for 0,1 (bg/fg) - if len(np.unique(mask)) - 2 < args.nuclei_threshold: - filter_list.append(path) - - # Stain normalisation - # TBA - # method = args.stain - # It doesn't seem like Pathml's function is good - - # Save modified files - if path not in filter_list: - if not args.dry_run: - image = Image.fromarray(img.astype(np.uint8)) - image.save(impath) - np.save(maskpath, mask) - else: - if not args.dry_run: - impath.rename(remove_dir / "images" / impath.name) - maskpath.rename(remove_dir / "masks" / maskpath.name) - else: - print(f"Will move {impath} to {str(remove_dir / 'images' / impath.name)}") - print(f"Will move {maskpath} to {str(remove_dir / 'masks' / maskpath.name)}") - - print("Tiles removed:") - print(filter_list) - + num_nuclei = len(np.unique(mask)) - 2 + min_nuclei = args.nuclei_threshold + if num_nuclei < min_nuclei: + if args.dry_run: + print(f"** nuclei filter {num_nuclei} < {min_nuclei} - will move {path}") + to_remove = path + + # Stain normalisation + # TBA + # method = args.stain + # It doesn't seem like Pathml's function is good + + # If added to remove add to filter list + if to_remove is not None: + filter_list.append(to_remove) + + # Save modified files + if path not in filter_list: + if not args.dry_run: + image = Image.fromarray(img.astype(np.uint8)) + image.save(impath) + np.save(maskpath, mask) + else: + if not args.dry_run: + impath.rename(remove_dir / "images" / impath.name) + maskpath.rename(remove_dir / "masks" / maskpath.name) + + print(f"Tiles removed ({len(filter_list)}/{len(paths)}):") + print(filter_list) if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--data", type=str, default="./tile_data/rep1/", help="Path to tile data directory" - ) - parser.add_argument( - "--nuclei-threshold", type=int, default=2, help="Nuclei threshold (must have at least N)" - ) - parser.add_argument( - "--obj-threshold", type=int, default=10, help="Threshold for small objects" - ) - parser.add_argument( - "--stain", type=str, default=None, help="Stain normalisation method (TBA)" - ) - parser.add_argument( - "--dry-run", - action="store_true", - default=False, - help="Dry run", - ) - - args = parser.parse_args() - print(args) - main(args) - - + parser = argparse.ArgumentParser() + parser.add_argument( + "--data", type=str, default="./tile_data/rep1/", help="Path to tile data directory" + ) + parser.add_argument( + "--nuclei-threshold", type=int, default=2, help="Nuclei threshold (must have at least N)" + ) + parser.add_argument( + "--obj-threshold", type=int, default=10, help="Threshold for small objects" + ) + parser.add_argument( + "--stain", type=str, default=None, help="Stain normalisation method (TBA)" + ) + parser.add_argument( + "--dry-run", + action="store_true", + default=False, + help="Dry run", + ) + + args = parser.parse_args() + print(args) + main(args) diff --git a/development/Xenium_classification/src/register.py b/development/Xenium_classification/src/register.py new file mode 100644 index 0000000..35573e3 --- /dev/null +++ b/development/Xenium_classification/src/register.py @@ -0,0 +1,34 @@ +#!/usr/bin/python3 +# coding: utf-8 + +import cv2 +import numpy as np +import sys + +if len(sys.argv) != 3: + print("Usage: python register.py ") + sys.exit(1) +he_image_path = sys.argv[1] +dapi_image_path = sys.argv[2] +he_image = cv2.imread(he_image_path, cv2.IMREAD_GRAYSCALE) +dapi_image = cv2.imread(dapi_image_path, cv2.IMREAD_GRAYSCALE) +if he_image is None: + print(f"Failed to load H&E image: {he_image_path}") + sys.exit(1) +if dapi_image is None: + print(f"Failed to load DAPI image: {dapi_image_path}") + sys.exit(1) +sift = cv2.SIFT_create() +keypoints_he, descriptors_he = sift.detectAndCompute(he_image, None) +keypoints_dapi, descriptors_dapi = sift.detectAndCompute(dapi_image, None) +matcher = cv2.BFMatcher() +matches = matcher.knnMatch(descriptors_he, descriptors_dapi, k=2) +good_matches = [] +for m, n in matches: + if m.distance < 0.75 * n.distance: + good_matches.append(m) +src_pts = np.float32([keypoints_he[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2) +dst_pts = np.float32([keypoints_dapi[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2) +M, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts, method=cv2.RANSAC, ransacReprojThreshold=5.0) +csv_output = '\n'.join([','.join(map(str, row)) for row in M]) +print(csv_output) diff --git a/development/Xenium_classification/src/xenium_utils.py b/development/Xenium_classification/src/xenium_utils.py index 21f2181..c8f5973 100644 --- a/development/Xenium_classification/src/xenium_utils.py +++ b/development/Xenium_classification/src/xenium_utils.py @@ -20,6 +20,7 @@ from spatialdata_io._constants._constants import VisiumKeys from spatialdata_io._docs import inject_docs from spatialdata_io.readers._utils._utils import _read_counts +from spatialdata.models._utils import DEFAULT_COORDINATE_SYSTEM # from https://github.com/scverse/spatialdata-io/blob/main/src/spatialdata_io/readers/visium.py#L141 def load_registered_image(tif_path): @@ -128,10 +129,20 @@ def sdata_load_img_mask(sdata, affineT=None, # If using bbox, transform shapes then translate back to origin t_shapes = Sequence([get_transformation(sdata.shapes[shape_key]), get_transformation(sdata.images[img_key]).inverse()]) + + # set_transformation(sdata.shapes[shape_key], t_shapes) + # spatialdata 0.1.2 version + #shapes = transform(sdata.shapes[shape_key], to_coordinate_system=DEFAULT_COORDINATE_SYSTEM) + # spatialdata 0.0.9 version shapes = transform(sdata.shapes[shape_key], t_shapes) # Affine transform if affineT: + # spatialdata 0.1.2 version + #set_transformation(shapes, affineT) + #shapes = transform(shapes, to_coordinate_system=DEFAULT_COORDINATE_SYSTEM) + # spatialdata 0.0.9 version shapes = transform(shapes, affineT) + # spatialdata 0.0.9 version shapes.index = shapes.index.astype(int) # NOTE: Bounding box query resets (removes) categories for the adata table # so, can use the original labels instead @@ -142,19 +153,24 @@ def sdata_load_img_mask(sdata, affineT=None, shapes_df = shapes.merge(labels, how = 'inner', right_index = True, left_index = True) shapes_df['label'] = shapes_df[label_key].cat.codes # Following lbl dicts are unused for now - int2lbl = dict(enumerate(shapes_df[label_key].cat.categories)) - lbl2int = dict(zip(int2lbl.values(), int2lbl.keys())) + # int2lbl = dict(enumerate(shapes_df[label_key].cat.categories)) + # lbl2int = dict(zip(int2lbl.values(), int2lbl.keys())) shapes_df_dict = {k: v for k, v in shapes_df.groupby(label_key)} img = sdata.images[img_key] if isinstance(img, msi.multiscale_spatial_image.MultiscaleSpatialImage): # Note that this clears any transformation attribute img = Image2DModel.parse(img["scale0"].ds.to_array().squeeze(axis=0)) - img = img.values # NOTE: this errors on int16 dtypes - masks = [mask_for_polygons(v['geometry'].tolist(), - img.shape[-2:], - vals=(v.index.to_numpy() + 1).tolist()) # Add 1 here in case val is 0 (background) - # https://stackoverflow.com/questions/60484383/typeerror-scalar-value-for-argument-color-is-not-numeric-when-using-opencv - for k, v in shapes_df_dict.items()] + img = img.values # NOTE: this errors on int16 dtypes + masks = [ + mask_for_polygons( + v['geometry'].tolist(), + img.shape[-2:], + # Add 1 here in case val is 0 (background) + vals=(v.index.to_numpy() + 1).tolist() + ) + # https://stackoverflow.com/questions/60484383/typeerror-scalar-value-for-argument-color-is-not-numeric-when-using-opencv + for k, v in shapes_df_dict.items() + ] if expand_px: masks = [expand_labels(mask, distance=expand_px) for mask in masks] # Ideally expand_labels should be run on the merged mask, but the loop