diff --git a/Instrument resources/resources_metadata.json b/Instrument resources/resources_metadata.json new file mode 100644 index 00000000..35c1e33d --- /dev/null +++ b/Instrument resources/resources_metadata.json @@ -0,0 +1,51 @@ +{ + "books": [ + { + "title": "The Encyclopedia of Musical Instruments of Iran: Percussion, Bowed, and String Instruments", + "volume": 1, + "author": "Mohammad Reza Darvishi", + "publisher": "Mahoor Institute of Culture and Arts", + "year": 2001, + "language": "Persian", + "pages": 598, + "isbn": "9789646409453", + "link": "https://mahoor.com/en/book/806-the-encyclopaedia-of-musical-instruments-of-iran-1", + "scan_location": "https://drive.google.com/file/d/1Qzq-I41Q5zZ-_xclgN34MXaP5xDKDfiU/view?usp=sharing" + }, + { + "title": "The Encyclopedia of Musical Instruments of Iran: Membranophones and Idiophones", + "volume": 2, + "author": "Mohammad Reza Darvishi", + "publisher": "Mahoor Institute of Culture and Arts", + "year": 2013, + "language": "Persian", + "pages": 708, + "isbn": "9789648772036", + "link": "https://www.mahoor.com/en/book/807-the-encyclopaedia-of-musical-instruments-of-iran-2", + "scan_location": "https://drive.google.com/file/d/1IHYjrcqi6lC0GmMdX4J0FVc0cZoVkmN9/view?usp=sharing" + }, + { + "title": "The Encyclopedia of Musical Instruments of Iran: Woodwinds", + "volume": 3, + "author": "Mohammad Reza Darvishi", + "publisher": "Mahoor Institute of Culture and Arts", + "year": 2023, + "language": "Persian", + "pages": 324, + "isbn": "9786229899762", + "link": "https://www.mahoor.com/en/book/20823-The%20Encyclopaedia%20of%20Musical%20Instruments%20of%20Iran%203", + "scan_location": "https://drive.google.com/file/d/1OQ19DuEoS8KHIcgo26knlqNYL5BEZnev/view?usp=sharing" + }, + { + "title": "The World Encyclopedia of Musical Instruments", + "author": "Hassan Zandbaf", + "publisher": "Rawzanah", + "year": 1997, + "language": "Persian", + "pages": 311, + "isbn": "9646176232", + "link": "https://web.archive.org/web/20180920195542/http://www.ketab.ir/bookview.aspx?bookid=177683", + "scan_location": "https://drive.google.com/file/d/1v0JXAKZLv7EW56T3WYgZSjFbteAoVr1n/view?usp=sharing" + } + ] +} diff --git a/docker-compose-deployment.yml b/docker-compose-deployment.yml index d614e2fa..96ab99fb 100644 --- a/docker-compose-deployment.yml +++ b/docker-compose-deployment.yml @@ -44,7 +44,9 @@ services: restart: unless-stopped nginx: - build: ./nginx + build: + context: . + dockerfile: ./nginx/Dockerfile.prod container_name: vim-nginx restart: unless-stopped environment: @@ -52,7 +54,6 @@ services: ports: - "8000:80" volumes: - - ./web-app/frontend/assets/:/virtual-instrument-museum/frontend/assets/ - vim-static:/virtual-instrument-museum/static - vim-media:/virtual-instrument-museum/media depends_on: diff --git a/nginx/Dockerfile b/nginx/Dockerfile index fab7a1c7..ed7b64b2 100644 --- a/nginx/Dockerfile +++ b/nginx/Dockerfile @@ -1,3 +1,3 @@ FROM nginx:1.25.2 COPY ./nginx.conf /etc/nginx/nginx.conf -COPY ./vim.conf.template /etc/nginx/templates/vim.conf.template \ No newline at end of file +COPY ./vim.conf.template /etc/nginx/templates/vim.conf.template diff --git a/nginx/Dockerfile.prod b/nginx/Dockerfile.prod new file mode 100644 index 00000000..a1043fc3 --- /dev/null +++ b/nginx/Dockerfile.prod @@ -0,0 +1,14 @@ +# Production nginx: includes compiled frontend assets in the image +# (Dev uses bind mounts instead, see docker-compose.yml) + +FROM node:20-slim AS frontend-builder +COPY ./web-app/frontend/ /frontend/ +WORKDIR /frontend +RUN npm install && npm run sass:build + +FROM nginx:1.25.2 +COPY ./nginx/nginx.conf /etc/nginx/nginx.conf +COPY ./nginx/vim.conf.template /etc/nginx/templates/vim.conf.template +# Copy static assets (images, fonts, etc.) and compiled CSS +COPY ./web-app/frontend/assets/ /virtual-instrument-museum/frontend/assets/ +COPY --from=frontend-builder /frontend/assets/css /virtual-instrument-museum/frontend/assets/css diff --git a/poetry.lock b/poetry.lock index fa8c955d..a9ef58c6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -31,45 +31,51 @@ wrapt = {version = ">=1.14,<2", markers = "python_version >= \"3.11\""} [[package]] name = "black" -version = "23.12.1" +version = "26.1.0" description = "The uncompromising code formatter." optional = false -python-versions = ">=3.8" +python-versions = ">=3.10" files = [ - {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"}, - {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"}, - {file = "black-23.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920b569dc6b3472513ba6ddea21f440d4b4c699494d2e972a1753cdc25df7b0"}, - {file = "black-23.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:3fa4be75ef2a6b96ea8d92b1587dd8cb3a35c7e3d51f0738ced0781c3aa3a5a3"}, - {file = "black-23.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d4df77958a622f9b5a4c96edb4b8c0034f8434032ab11077ec6c56ae9f384ba"}, - {file = "black-23.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:602cfb1196dc692424c70b6507593a2b29aac0547c1be9a1d1365f0d964c353b"}, - {file = "black-23.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c4352800f14be5b4864016882cdba10755bd50805c95f728011bcb47a4afd59"}, - {file = "black-23.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:0808494f2b2df923ffc5723ed3c7b096bd76341f6213989759287611e9837d50"}, - {file = "black-23.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:25e57fd232a6d6ff3f4478a6fd0580838e47c93c83eaf1ccc92d4faf27112c4e"}, - {file = "black-23.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d9e13db441c509a3763a7a3d9a49ccc1b4e974a47be4e08ade2a228876500ec"}, - {file = "black-23.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1bd9c210f8b109b1762ec9fd36592fdd528485aadb3f5849b2740ef17e674e"}, - {file = "black-23.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:ae76c22bde5cbb6bfd211ec343ded2163bba7883c7bc77f6b756a1049436fbb9"}, - {file = "black-23.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1fa88a0f74e50e4487477bc0bb900c6781dbddfdfa32691e780bf854c3b4a47f"}, - {file = "black-23.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a4d6a9668e45ad99d2f8ec70d5c8c04ef4f32f648ef39048d010b0689832ec6d"}, - {file = "black-23.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b18fb2ae6c4bb63eebe5be6bd869ba2f14fd0259bda7d18a46b764d8fb86298a"}, - {file = "black-23.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:c04b6d9d20e9c13f43eee8ea87d44156b8505ca8a3c878773f68b4e4812a421e"}, - {file = "black-23.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e1b38b3135fd4c025c28c55ddfc236b05af657828a8a6abe5deec419a0b7055"}, - {file = "black-23.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f0031eaa7b921db76decd73636ef3a12c942ed367d8c3841a0739412b260a54"}, - {file = "black-23.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e56155c6b737854e60a9ab1c598ff2533d57e7506d97af5481141671abf3ea"}, - {file = "black-23.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:dd15245c8b68fe2b6bd0f32c1556509d11bb33aec9b5d0866dd8e2ed3dba09c2"}, - {file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"}, - {file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"}, + {file = "black-26.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ca699710dece84e3ebf6e92ee15f5b8f72870ef984bf944a57a777a48357c168"}, + {file = "black-26.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e8e75dabb6eb83d064b0db46392b25cabb6e784ea624219736e8985a6b3675d"}, + {file = "black-26.1.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eb07665d9a907a1a645ee41a0df8a25ffac8ad9c26cdb557b7b88eeeeec934e0"}, + {file = "black-26.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:7ed300200918147c963c87700ccf9966dceaefbbb7277450a8d646fc5646bf24"}, + {file = "black-26.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:c5b7713daea9bf943f79f8c3b46f361cc5229e0e604dcef6a8bb6d1c37d9df89"}, + {file = "black-26.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3cee1487a9e4c640dc7467aaa543d6c0097c391dc8ac74eb313f2fbf9d7a7cb5"}, + {file = "black-26.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d62d14ca31c92adf561ebb2e5f2741bf8dea28aef6deb400d49cca011d186c68"}, + {file = "black-26.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb1dafbbaa3b1ee8b4550a84425aac8874e5f390200f5502cf3aee4a2acb2f14"}, + {file = "black-26.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:101540cb2a77c680f4f80e628ae98bd2bd8812fb9d72ade4f8995c5ff019e82c"}, + {file = "black-26.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:6f3977a16e347f1b115662be07daa93137259c711e526402aa444d7a88fdc9d4"}, + {file = "black-26.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6eeca41e70b5f5c84f2f913af857cf2ce17410847e1d54642e658e078da6544f"}, + {file = "black-26.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dd39eef053e58e60204f2cdf059e2442e2eb08f15989eefe259870f89614c8b6"}, + {file = "black-26.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9459ad0d6cd483eacad4c6566b0f8e42af5e8b583cee917d90ffaa3778420a0a"}, + {file = "black-26.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a19915ec61f3a8746e8b10adbac4a577c6ba9851fa4a9e9fbfbcf319887a5791"}, + {file = "black-26.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:643d27fb5facc167c0b1b59d0315f2674a6e950341aed0fc05cf307d22bf4954"}, + {file = "black-26.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ba1d768fbfb6930fc93b0ecc32a43d8861ded16f47a40f14afa9bb04ab93d304"}, + {file = "black-26.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2b807c240b64609cb0e80d2200a35b23c7df82259f80bef1b2c96eb422b4aac9"}, + {file = "black-26.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1de0f7d01cc894066a1153b738145b194414cc6eeaad8ef4397ac9abacf40f6b"}, + {file = "black-26.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:91a68ae46bf07868963671e4d05611b179c2313301bd756a89ad4e3b3db2325b"}, + {file = "black-26.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:be5e2fe860b9bd9edbf676d5b60a9282994c03fbbd40fe8f5e75d194f96064ca"}, + {file = "black-26.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9dc8c71656a79ca49b8d3e2ce8103210c9481c57798b48deeb3a8bb02db5f115"}, + {file = "black-26.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b22b3810451abe359a964cc88121d57f7bce482b53a066de0f1584988ca36e79"}, + {file = "black-26.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:53c62883b3f999f14e5d30b5a79bd437236658ad45b2f853906c7cbe79de00af"}, + {file = "black-26.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:f016baaadc423dc960cdddf9acae679e71ee02c4c341f78f3179d7e4819c095f"}, + {file = "black-26.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:66912475200b67ef5a0ab665011964bf924745103f51977a78b4fb92a9fc1bf0"}, + {file = "black-26.1.0-py3-none-any.whl", hash = "sha256:1054e8e47ebd686e078c0bb0eaf31e6ce69c966058d122f2c0c950311f9f3ede"}, + {file = "black-26.1.0.tar.gz", hash = "sha256:d294ac3340eef9c9eb5d29288e96dc719ff269a88e27b396340459dd85da4c58"}, ] [package.dependencies] click = ">=8.0.0" mypy-extensions = ">=0.4.3" packaging = ">=22.0" -pathspec = ">=0.9.0" +pathspec = ">=1.0.0" platformdirs = ">=2" +pytokens = ">=0.3.0" [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +d = ["aiohttp (>=3.10)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] @@ -1040,6 +1046,60 @@ setuptools = "*" [package.extras] solrcloud = ["kazoo (>=2.5.0)"] +[[package]] +name = "pytokens" +version = "0.4.1" +description = "A Fast, spec compliant Python 3.14+ tokenizer that runs on older Pythons." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytokens-0.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a44ed93ea23415c54f3face3b65ef2b844d96aeb3455b8a69b3df6beab6acc5"}, + {file = "pytokens-0.4.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:add8bf86b71a5d9fb5b89f023a80b791e04fba57960aa790cc6125f7f1d39dfe"}, + {file = "pytokens-0.4.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:670d286910b531c7b7e3c0b453fd8156f250adb140146d234a82219459b9640c"}, + {file = "pytokens-0.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4e691d7f5186bd2842c14813f79f8884bb03f5995f0575272009982c5ac6c0f7"}, + {file = "pytokens-0.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:27b83ad28825978742beef057bfe406ad6ed524b2d28c252c5de7b4a6dd48fa2"}, + {file = "pytokens-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d70e77c55ae8380c91c0c18dea05951482e263982911fc7410b1ffd1dadd3440"}, + {file = "pytokens-0.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a58d057208cb9075c144950d789511220b07636dd2e4708d5645d24de666bdc"}, + {file = "pytokens-0.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b49750419d300e2b5a3813cf229d4e5a4c728dae470bcc89867a9ad6f25a722d"}, + {file = "pytokens-0.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d9907d61f15bf7261d7e775bd5d7ee4d2930e04424bab1972591918497623a16"}, + {file = "pytokens-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:ee44d0f85b803321710f9239f335aafe16553b39106384cef8e6de40cb4ef2f6"}, + {file = "pytokens-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:140709331e846b728475786df8aeb27d24f48cbcf7bcd449f8de75cae7a45083"}, + {file = "pytokens-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d6c4268598f762bc8e91f5dbf2ab2f61f7b95bdc07953b602db879b3c8c18e1"}, + {file = "pytokens-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24afde1f53d95348b5a0eb19488661147285ca4dd7ed752bbc3e1c6242a304d1"}, + {file = "pytokens-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5ad948d085ed6c16413eb5fec6b3e02fa00dc29a2534f088d3302c47eb59adf9"}, + {file = "pytokens-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:3f901fe783e06e48e8cbdc82d631fca8f118333798193e026a50ce1b3757ea68"}, + {file = "pytokens-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8bdb9d0ce90cbf99c525e75a2fa415144fd570a1ba987380190e8b786bc6ef9b"}, + {file = "pytokens-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5502408cab1cb18e128570f8d598981c68a50d0cbd7c61312a90507cd3a1276f"}, + {file = "pytokens-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29d1d8fb1030af4d231789959f21821ab6325e463f0503a61d204343c9b355d1"}, + {file = "pytokens-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:970b08dd6b86058b6dc07efe9e98414f5102974716232d10f32ff39701e841c4"}, + {file = "pytokens-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:9bd7d7f544d362576be74f9d5901a22f317efc20046efe2034dced238cbbfe78"}, + {file = "pytokens-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321"}, + {file = "pytokens-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa"}, + {file = "pytokens-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d"}, + {file = "pytokens-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324"}, + {file = "pytokens-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9"}, + {file = "pytokens-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb"}, + {file = "pytokens-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3"}, + {file = "pytokens-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975"}, + {file = "pytokens-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a"}, + {file = "pytokens-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918"}, + {file = "pytokens-0.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:da5baeaf7116dced9c6bb76dc31ba04a2dc3695f3d9f74741d7910122b456edc"}, + {file = "pytokens-0.4.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11edda0942da80ff58c4408407616a310adecae1ddd22eef8c692fe266fa5009"}, + {file = "pytokens-0.4.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0fc71786e629cef478cbf29d7ea1923299181d0699dbe7c3c0f4a583811d9fc1"}, + {file = "pytokens-0.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dcafc12c30dbaf1e2af0490978352e0c4041a7cde31f4f81435c2a5e8b9cabb6"}, + {file = "pytokens-0.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:42f144f3aafa5d92bad964d471a581651e28b24434d184871bd02e3a0d956037"}, + {file = "pytokens-0.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:34bcc734bd2f2d5fe3b34e7b3c0116bfb2397f2d9666139988e7a3eb5f7400e3"}, + {file = "pytokens-0.4.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941d4343bf27b605e9213b26bfa1c4bf197c9c599a9627eb7305b0defcfe40c1"}, + {file = "pytokens-0.4.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3ad72b851e781478366288743198101e5eb34a414f1d5627cdd585ca3b25f1db"}, + {file = "pytokens-0.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:682fa37ff4d8e95f7df6fe6fe6a431e8ed8e788023c6bcc0f0880a12eab80ad1"}, + {file = "pytokens-0.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:30f51edd9bb7f85c748979384165601d028b84f7bd13fe14d3e065304093916a"}, + {file = "pytokens-0.4.1-py3-none-any.whl", hash = "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de"}, + {file = "pytokens-0.4.1.tar.gz", hash = "sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a"}, +] + +[package.extras] +dev = ["black", "build", "mypy", "pytest", "pytest-cov", "setuptools", "tox", "twine", "wheel"] + [[package]] name = "pyyaml" version = "6.0.3" @@ -1535,4 +1595,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "da888e26b13f44d8d84d1473c6570fe17b4bf633b921d084e702ac3cf259e292" +content-hash = "52fca2fb4da33dac5fc0efcece2e6c24de59097c61170ef0bf728252990ecb01" diff --git a/pyproject.toml b/pyproject.toml index 8f5868b9..25e13f55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ django-ratelimit = "^4.1.0" [tool.poetry.group.dev.dependencies] -black = "^23.9.1" +black = "26.1.0" mypy = "^1.5.1" pylint = "^2.17.6" types-requests = "^2.32.0.20240712" diff --git a/web-app/django/VIM/apps/instruments/constants.py b/web-app/django/VIM/apps/instruments/constants.py new file mode 100644 index 00000000..0ff8a38c --- /dev/null +++ b/web-app/django/VIM/apps/instruments/constants.py @@ -0,0 +1,78 @@ +""" +Single source of truth for image format constants. + +All format-related mappings are derived from STORABLE_FORMATS below. +To add a new format, add an entry there — all consumers pick it up automatically. + +Download-specific constants (CONVERT_FORMATS, SKIP_FORMATS, DOWNLOAD_CONTENT_TYPE_MAP) +are also defined here since they reference the same format domain. +""" + +# ── Core registry ── +# (extension, PIL_format_name_or_None, mime_type, human_label) +STORABLE_FORMATS = [ + ("jpg", "JPEG", "image/jpeg", "JPEG"), + ("png", "PNG", "image/png", "PNG"), + ("gif", "GIF", "image/gif", "GIF"), + ("webp", "WEBP", "image/webp", "WebP"), + ("svg", None, "image/svg+xml", "SVG (Scalable Vector Graphics)"), +] + +# ── Download-only formats (recognized but not stored as-is) ── + +# Formats converted to jpg/png during download +CONVERT_FORMATS = {"tiff", "tif", "bmp"} + +# Formats rejected during download +SKIP_FORMATS = {"xcf"} + +# MIME types for download-only formats +DOWNLOAD_CONTENT_TYPE_MAP = { + "image/tiff": "tiff", + "image/bmp": "bmp", + "image/x-xcf": "xcf", +} + +# ── Derived from STORABLE_FORMATS ── + +# MIME type → extension mapping for format detection during download. +# Includes storable formats and download-only formats: +# {"image/jpeg": "jpg", "image/png": "png", "image/gif": "gif", +# "image/webp": "webp", "image/svg+xml": "svg", +# "image/tiff": "tiff", "image/bmp": "bmp", "image/x-xcf": "xcf"} +CONTENT_TYPE_MAP = {mime: ext for ext, _, mime, _ in STORABLE_FORMATS} +CONTENT_TYPE_MAP.update(DOWNLOAD_CONTENT_TYPE_MAP) + +# Extensions preserved as-is during download (no conversion needed). +# {"jpg", "png", "gif", "webp", "svg", "jpeg"} +PRESERVE_FORMATS = {ext for ext, _, _, _ in STORABLE_FORMATS} | {"jpeg"} + +# Extension → PIL format name for saving images and thumbnails. +# {"jpg": "JPEG", "png": "PNG", "gif": "GIF", "webp": "WEBP", "jpeg": "JPEG"} +# Note: SVG is excluded (PIL cannot process SVG files). +EXT_TO_PIL_FORMAT = {ext: pil for ext, pil, _, _ in STORABLE_FORMATS if pil} +EXT_TO_PIL_FORMAT["jpeg"] = "JPEG" + +# PIL format name → (PIL save format, file extension) for upload processing. +# {"JPEG": ("JPEG", "jpg"), "PNG": ("PNG", "png"), +# "GIF": ("GIF", "gif"), "WEBP": ("WEBP", "webp")} +# Note: SVG is excluded (user uploads don't support SVG). +PIL_FORMAT_TO_EXTENSION = { + pil: (pil, ext) for ext, pil, _, _ in STORABLE_FORMATS if pil +} + +# Django model choices for AVResource.format field. +# [("jpg", "JPEG"), ("jpeg", "JPEG (alternative extension)"), +# ("png", "PNG"), ("gif", "GIF"), ("webp", "WebP"), +# ("svg", "SVG (Scalable Vector Graphics)")] +IMAGE_FORMAT_CHOICES = [(ext, label) for ext, _, _, label in STORABLE_FORMATS] +IMAGE_FORMAT_CHOICES.insert(1, ("jpeg", "JPEG (alternative extension)")) + +# Allowed file extensions for import/download validation. +# {"jpg", "jpeg", "png", "gif", "webp", "svg"} +ALLOWED_IMAGE_EXTENSIONS = {ext for ext, _, _, _ in STORABLE_FORMATS} | {"jpeg"} + +# Allowed MIME types for user uploads. +# ["image/jpeg", "image/png", "image/gif", "image/webp"] +# Note: SVG is excluded for security (script injection risk). +ALLOWED_IMAGE_TYPES = [mime for ext, _, mime, _ in STORABLE_FORMATS if ext != "svg"] diff --git a/web-app/django/VIM/apps/instruments/management/commands/download_imgs.py b/web-app/django/VIM/apps/instruments/management/commands/download_imgs.py index 5d00bce0..7ad2fe56 100644 --- a/web-app/django/VIM/apps/instruments/management/commands/download_imgs.py +++ b/web-app/django/VIM/apps/instruments/management/commands/download_imgs.py @@ -1,83 +1,199 @@ -"""This module downloads images from the web and creates thumbnails for the VIM instruments.""" - -import csv -import os -from io import BytesIO -import requests -from PIL import Image -from django.conf import settings -from django.core.management.base import BaseCommand -from VIM.apps.instruments.utils.image_processor import create_thumbnail_image - - -class Command(BaseCommand): - """Django management command to download images and create thumbnails for instruments.""" - - USER_AGENT = "UMIL/0.1.0 (https://vim.simssa.ca/; https://ddmal.music.mcgill.ca/)" - OUTPUT_DIR = os.path.join( - settings.STATIC_ROOT, "instruments", "images", "instrument_imgs" - ) - CSV_PATH = "startup_data/umil_instruments_15July_2025.csv" - - help = "Download images and create thumbnails for instruments" - - def __init__(self): - super().__init__() - self.headers = {"User-Agent": self.USER_AGENT} - self.original_img_dir = os.path.join(self.OUTPUT_DIR, "original") - self.thumbnail_dir = os.path.join(self.OUTPUT_DIR, "thumbnail") - os.makedirs(self.original_img_dir, exist_ok=True) - os.makedirs(self.thumbnail_dir, exist_ok=True) - - def download_image_as_png(self, url, save_path): - """Download an image from a URL and save it as a PNG file.""" - try: - response = requests.get(url, stream=True, headers=self.headers, timeout=10) - response.raise_for_status() # Raise an HTTPError for bad responses - self._save_image_as_png(response.content, url, save_path) - except requests.RequestException as e: - self.stderr.write(f"Failed to download image from {url}: {e}") - - def _save_image_as_png(self, img_content, url, save_path): - """Save image content as a PNG file.""" - try: - img = Image.open(BytesIO(img_content)) - img.save(save_path, "PNG") - self.stdout.write(f"Saved image at {save_path}") - except IOError as e: - self.stderr.write(f"Failed to save image from {url}: {e}") - - def create_thumbnail(self, image_path, thumbnail_path): - """Create a thumbnail of an image using shared utility.""" - try: - with Image.open(image_path) as original_img: - thumbnail = create_thumbnail_image(original_img) - thumbnail.save(thumbnail_path, "PNG") - self.stdout.write(f"Created thumbnail at {thumbnail_path}") - except IOError as e: - self.stderr.write(f"Failed to create thumbnail for {image_path}: {e}") - - def process_images(self, csv_file_path): - """Process images from a CSV file.""" - with open(csv_file_path, encoding="utf-8-sig") as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - image_url = row["image"] - instrument_wikidata_id = row["instrument"].split("/")[-1] - save_path_png = os.path.join( - self.original_img_dir, f"{instrument_wikidata_id}.png" - ) - thumbnail_path = os.path.join( - self.thumbnail_dir, f"{instrument_wikidata_id}.png" - ) - - if not os.path.exists(save_path_png): - self.download_image_as_png(image_url, save_path_png) - - if not os.path.exists(thumbnail_path) and os.path.exists(save_path_png): - self.create_thumbnail(save_path_png, thumbnail_path) - - def handle(self, *args, **options): - """Handle the command.""" - self.process_images(self.CSV_PATH) - self.stdout.write("Images downloaded and thumbnails created") +"""This module downloads images from the web and creates thumbnails for the VIM instruments.""" + +import csv +import glob +import os +import shutil +import time +from io import BytesIO +from urllib.parse import urlparse + +import requests +from PIL import Image +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +from django.conf import settings +from django.core.exceptions import ValidationError +from django.core.management.base import BaseCommand +from VIM.apps.instruments.constants import ( + CONTENT_TYPE_MAP, + CONVERT_FORMATS, + EXT_TO_PIL_FORMAT, + PRESERVE_FORMATS, + SKIP_FORMATS, +) +from VIM.apps.instruments.utils.image_processor import create_thumbnail_image +from VIM.apps.instruments.utils.validators import validate_image_extension + + +class Command(BaseCommand): + """Django management command to download images and create thumbnails for instruments.""" + + USER_AGENT = "UMIL/0.1.0 (https://vim.simssa.ca/; https://ddmal.music.mcgill.ca/)" + OUTPUT_DIR = os.path.join(settings.MEDIA_ROOT, "downloads") + CSV_PATH = "startup_data/umil_instruments_15July_2025.csv" + + help = "Download images and create thumbnails for instruments" + + def __init__(self): + super().__init__() + self.session = requests.Session() + retry_strategy = Retry( + total=3, + status_forcelist=[429, 500, 502, 503, 504], + backoff_factor=2, + respect_retry_after_header=True, + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + self.session.mount("https://", adapter) + self.session.mount("http://", adapter) + self.session.headers.update({"User-Agent": self.USER_AGENT}) + self.original_img_dir = os.path.join(self.OUTPUT_DIR, "original") + self.thumbnail_dir = os.path.join(self.OUTPUT_DIR, "thumbnail") + os.makedirs(self.original_img_dir, exist_ok=True) + os.makedirs(self.thumbnail_dir, exist_ok=True) + + def detect_format(self, response, url): + """Detect image format from Content-Type header, falling back to URL extension.""" + content_type = response.headers.get("Content-Type", "").split(";")[0].strip() + ext = CONTENT_TYPE_MAP.get(content_type) + if ext: + return ext + + # Fall back to URL file extension + path = urlparse(url).path + url_ext = os.path.splitext(path)[1].lstrip(".").lower() + if url_ext in PRESERVE_FORMATS | CONVERT_FORMATS | SKIP_FORMATS: + return url_ext + + return None + + def download_image(self, url, ins_id): + """Download an image, preserving its original format when possible. + + Returns the file extension used, or None if the download was skipped/failed. + """ + try: + response = self.session.get(url, stream=True, timeout=10) + response.raise_for_status() + except requests.RequestException as e: + self.stderr.write(f"Failed to download image from {url}: {e}") + return None + + fmt = self.detect_format(response, url) + if fmt is None: + self.stderr.write(f"Unknown image format for {url}, skipping") + return None + + if fmt in SKIP_FORMATS: + self.stderr.write(f"Skipping unsupported format '{fmt}' for {url}") + return None + + content = response.content + + if fmt == "svg": + save_path = os.path.join(self.original_img_dir, f"{ins_id}.svg") + with open(save_path, "wb") as f: + f.write(content) + self.stdout.write(f"Saved SVG image at {save_path}") + return "svg" + + if fmt in PRESERVE_FORMATS: + ext = fmt if fmt != "jpeg" else "jpg" + save_path = os.path.join(self.original_img_dir, f"{ins_id}.{ext}") + with open(save_path, "wb") as f: + f.write(content) + self.stdout.write(f"Saved image at {save_path}") + return ext + + # CONVERT_FORMATS (tiff, bmp): convert via PIL + try: + img = Image.open(BytesIO(content)) + has_transparency = img.mode in ("RGBA", "LA", "PA") or ( + img.mode == "P" and "transparency" in img.info + ) + if has_transparency: + ext = "png" + save_path = os.path.join(self.original_img_dir, f"{ins_id}.png") + img.save(save_path, "PNG") + else: + ext = "jpg" + save_path = os.path.join(self.original_img_dir, f"{ins_id}.jpg") + img.convert("RGB").save(save_path, "JPEG", quality=90) + self.stdout.write(f"Converted {fmt} to {ext} at {save_path}") + return ext + except IOError as e: + self.stderr.write(f"Failed to convert image from {url}: {e}") + return None + + def create_thumbnail(self, image_path, ext): + """Create a thumbnail preserving the original format. + + For SVG files, the original is copied as the thumbnail since SVG scales natively. + """ + ins_filename = os.path.basename(image_path) + thumbnail_path = os.path.join(self.thumbnail_dir, ins_filename) + + if ext == "svg": + # SVG scales natively; copy as thumbnail + shutil.copy2(image_path, thumbnail_path) + self.stdout.write(f"Copied SVG as thumbnail at {thumbnail_path}") + return + + try: + with Image.open(image_path) as original_img: + thumbnail = create_thumbnail_image(original_img) + pil_format = EXT_TO_PIL_FORMAT.get(ext, "PNG") + save_kwargs = {} + if pil_format == "JPEG": + save_kwargs["quality"] = 90 + thumbnail.save(thumbnail_path, pil_format, **save_kwargs) + self.stdout.write(f"Created thumbnail at {thumbnail_path}") + except IOError as e: + self.stderr.write(f"Failed to create thumbnail for {image_path}: {e}") + + def find_existing_image(self, directory, ins_id): + """Find an existing image file for the given instrument ID, regardless of extension.""" + matches = glob.glob(os.path.join(directory, f"{ins_id}.*")) + valid_exts = PRESERVE_FORMATS | CONVERT_FORMATS + return next( + (m for m in matches if os.path.splitext(m)[1].lstrip(".") in valid_exts), + None, + ) + + def process_images(self, csv_file_path): + """Process images from a CSV file.""" + with open(csv_file_path, encoding="utf-8-sig") as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + image_url = row["image"] + ins_id = row["instrument"].split("/")[-1] + + existing = self.find_existing_image(self.original_img_dir, ins_id) + if existing: + # Validate extension of existing file + try: + ext = validate_image_extension(existing) + except ValidationError as e: + self.stderr.write( + self.style.ERROR( + f"Skipping {ins_id} (invalid existing file): {e}" + ) + ) + continue + else: + ext = self.download_image(image_url, ins_id) + time.sleep(1) + + if ext is None: + continue + + original_path = os.path.join(self.original_img_dir, f"{ins_id}.{ext}") + existing_thumb = self.find_existing_image(self.thumbnail_dir, ins_id) + if not existing_thumb and os.path.exists(original_path): + self.create_thumbnail(original_path, ext) + + def handle(self, *args, **options): + """Handle the command.""" + self.process_images(self.CSV_PATH) + self.stdout.write("Images downloaded and thumbnails created") diff --git a/web-app/django/VIM/apps/instruments/management/commands/dump_instrument_to_csv.py b/web-app/django/VIM/apps/instruments/management/commands/dump_instrument_to_csv.py new file mode 100644 index 00000000..3d165ae8 --- /dev/null +++ b/web-app/django/VIM/apps/instruments/management/commands/dump_instrument_to_csv.py @@ -0,0 +1,128 @@ +"""Management command that exports instrument names to a CSV file.""" + +import csv +from pathlib import Path + +from django.core.management.base import BaseCommand, CommandError +from django.utils import timezone + +from VIM.apps.instruments.models import InstrumentName + + +class Command(BaseCommand): + """Dump every instrument name record to a CSV file.""" + + help = "Exports instrument names from the database to a CSV file." + + def add_arguments(self, parser) -> None: + parser.add_argument( + "-o", + "--output", + help="Destination CSV path. Defaults to dumped_csv/instrument_dump_.csv.", + ) + parser.add_argument( + "--delimiter", + default=",", + help="Single-character delimiter to use (default: %(default)s).", + ) + parser.add_argument( + "--force", + action="store_true", + help="Overwrite the output file if it already exists.", + ) + parser.add_argument( + "--batch-size", + type=int, + default=1000, + help="Number of rows to stream per batch while querying the database.", + ) + + def handle(self, *args, **options) -> None: + delimiter: str = options["delimiter"] + if len(delimiter) != 1: + raise CommandError("Delimiter must be a single character.") + + batch_size: int = options["batch_size"] + if batch_size < 1: + raise CommandError("Batch size must be a positive integer.") + + timestamp = timezone.now().strftime("%Y-%m-%d_%H-%M-%S") + default_dir = Path("dumped_csv") + default_filename = f"instrument_dump_{timestamp}.csv" + + if options["output"]: + output_path = Path(options["output"]).expanduser().resolve() + else: + output_path = (default_dir / default_filename).expanduser().resolve() + + if output_path.exists() and not options["force"]: + raise CommandError( + f"{output_path} already exists. Use --force to overwrite the file." + ) + output_path.parent.mkdir(parents=True, exist_ok=True) + + fieldnames = [ + "instrument_name_id", + "instrument_umil_id", + "instrument_wikidata_id", + "instrument_source", + "language_wikidata_code", + "language_en_label", + "language_autonym", + "name", + "source_name", + "verification_status", + "umil_label", + "contributor_username", + "on_wikidata", + ] + + queryset = ( + InstrumentName.objects.select_related( + "instrument", "language", "contributor" + ) + .order_by("instrument__umil_id", "language__wikidata_code", "name") + .iterator(chunk_size=batch_size) + ) + + self.stdout.write(f"Writing instrument names to {output_path} ...") + + rows_written = 0 + with output_path.open("w", newline="", encoding="utf-8") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=delimiter) + writer.writeheader() + for instrument_name in queryset: + instrument = instrument_name.instrument + language = instrument_name.language + contributor = instrument_name.contributor + writer.writerow( + { + "instrument_name_id": instrument_name.id, + "instrument_umil_id": instrument.umil_id or "", + "instrument_wikidata_id": instrument.wikidata_id or "", + "instrument_source": instrument.source, + "language_wikidata_code": language.wikidata_code, + "language_en_label": language.en_label, + "language_autonym": language.autonym, + "name": instrument_name.name, + "source_name": instrument_name.source_name, + "verification_status": instrument_name.verification_status, + "umil_label": instrument_name.umil_label, + "contributor_username": contributor.username, + "on_wikidata": instrument_name.on_wikidata, + } + ) + rows_written += 1 + + if rows_written == 0: + self.stdout.write( + self.style.WARNING( + "No instrument names found. The CSV only has headers." + ) + ) + else: + self.stdout.write( + self.style.SUCCESS( + f"Export complete. {rows_written} instrument names written to {output_path}." + ) + ) diff --git a/web-app/django/VIM/apps/instruments/management/commands/import_instruments.py b/web-app/django/VIM/apps/instruments/management/commands/import_instruments.py index 9bf51cc2..7dc85eab 100644 --- a/web-app/django/VIM/apps/instruments/management/commands/import_instruments.py +++ b/web-app/django/VIM/apps/instruments/management/commands/import_instruments.py @@ -1,14 +1,17 @@ """This module imports instrument objects from Wikidata for the VIM project.""" import csv +import glob import os from typing import Optional import requests from django.conf import settings from django.contrib.auth import get_user_model from django.core.management.base import BaseCommand +from django.core.exceptions import ValidationError from django.db import transaction from VIM.apps.instruments.models import Instrument, InstrumentName, Language, AVResource +from VIM.apps.instruments.utils.validators import validate_image_extension class Command(BaseCommand): @@ -106,7 +109,10 @@ def get_instrument_data(self, instrument_ids: list[str]) -> list[dict]: return instrument_data def create_database_objects( - self, instrument_attrs: dict, original_img_path: str, thumbnail_img_path: str + self, + instrument_attrs: dict, + original_img_path: Optional[str], + thumbnail_img_path: Optional[str], ) -> None: """ Given a dictionary of instrument attributes and a url to an instrument image, @@ -190,23 +196,60 @@ def create_database_objects( }, ) - img_obj = AVResource.objects.create( - instrument=instrument, - type="image", - format=original_img_path.split(".")[-1], - url=original_img_path, - source_name="Wikidata", - ) - instrument.default_image = img_obj - thumbnail_obj = AVResource.objects.create( - instrument=instrument, - type="image", - format=thumbnail_img_path.split(".")[-1], - url=thumbnail_img_path, - source_name="Wikidata", - ) - instrument.thumbnail = thumbnail_obj - instrument.save() + # Create AVResource objects only when both image paths are available + if original_img_path and thumbnail_img_path: + # Validate extensions before creating AVResource objects + try: + original_format = validate_image_extension(original_img_path) + thumbnail_format = validate_image_extension(thumbnail_img_path) + except ValidationError as e: + self.stderr.write( + self.style.ERROR( + f"Skipping images for {instrument.umil_id} (invalid format): {e}" + ) + ) + return + + img_obj, _ = AVResource.objects.update_or_create( + instrument=instrument, + url=original_img_path, + defaults={ + "type": "image", + "format": original_format, + "source_name": "Wikidata", + }, + ) + instrument.default_image = img_obj + thumbnail_obj, _ = AVResource.objects.update_or_create( + instrument=instrument, + url=thumbnail_img_path, + defaults={ + "type": "image", + "format": thumbnail_format, + "source_name": "Wikidata", + }, + ) + instrument.thumbnail = thumbnail_obj + instrument.save() + + @staticmethod + def find_image_file(directory, ins_id): + """Find an image file for the given instrument ID, regardless of extension. + + Returns the relative path (matching the directory format used for AVResource.url) + or None if no file is found. + """ + # directory is a relative path like "downloads/original" + # The actual files are under MEDIA_ROOT + abs_dir = os.path.join(settings.MEDIA_ROOT, directory) + matches = glob.glob(os.path.join(abs_dir, f"{ins_id}.*")) + if not matches: + return None + # Return relative path (for AVResource.url storage) + filename = os.path.basename( + matches[0] + ) # each instrunment is guaranteed to have at most one image + return os.path.join(directory, filename) def handle(self, *args, **options) -> None: # Use smaller test dataset when in test mode @@ -220,7 +263,7 @@ def handle(self, *args, **options) -> None: reader = csv.DictReader(csvfile) instrument_list: list[dict] = list(reader) self.language_map = Language.objects.in_bulk(field_name="wikidata_code") - img_dir = "instruments/images/instrument_imgs" + img_dir = "downloads" with transaction.atomic(): for ins_i in range(0, len(instrument_list), 50): ins_ids_subset: list[str] = [ @@ -229,12 +272,18 @@ def handle(self, *args, **options) -> None: ] ins_data: list[dict] = self.get_instrument_data(ins_ids_subset) for instrument_attrs, ins_id in zip(ins_data, ins_ids_subset): - original_img_path = os.path.join( - img_dir, "original", f"{ins_id}.png" + original_img_path = self.find_image_file( + os.path.join(img_dir, "original"), ins_id ) - thumbnail_img_path = os.path.join( - img_dir, "thumbnail", f"{ins_id}.png" + thumbnail_img_path = self.find_image_file( + os.path.join(img_dir, "thumbnail"), ins_id ) + if not original_img_path or not thumbnail_img_path: + self.stderr.write( + self.style.WARNING( + f"Missing image files for {ins_id}, creating instrument without images" + ) + ) self.create_database_objects( instrument_attrs, original_img_path, thumbnail_img_path ) diff --git a/web-app/django/VIM/apps/instruments/migrations/0013_alter_avresource_format_and_more.py b/web-app/django/VIM/apps/instruments/migrations/0013_alter_avresource_format_and_more.py new file mode 100644 index 00000000..353ac03c --- /dev/null +++ b/web-app/django/VIM/apps/instruments/migrations/0013_alter_avresource_format_and_more.py @@ -0,0 +1,36 @@ +# Generated by Django 4.2.5 on 2026-02-16 23:09 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("instruments", "0012_create_instrument_feature"), + ] + + operations = [ + migrations.AlterField( + model_name="avresource", + name="format", + field=models.CharField( + choices=[ + ("jpg", "JPEG"), + ("jpeg", "JPEG (alternative extension)"), + ("png", "PNG"), + ("gif", "GIF"), + ("webp", "WebP"), + ("svg", "SVG (Scalable Vector Graphics)"), + ], + help_text="Image file format extension", + max_length=10, + ), + ), + migrations.AddConstraint( + model_name="avresource", + constraint=models.UniqueConstraint( + condition=models.Q(("url__gt", "")), + fields=("instrument", "url"), + name="unique_instrument_url", + ), + ), + ] diff --git a/web-app/django/VIM/apps/instruments/models/avresource.py b/web-app/django/VIM/apps/instruments/models/avresource.py index 57059994..55af82be 100644 --- a/web-app/django/VIM/apps/instruments/models/avresource.py +++ b/web-app/django/VIM/apps/instruments/models/avresource.py @@ -1,6 +1,7 @@ import os from django.core.exceptions import ValidationError from django.db import models +from VIM.apps.instruments.constants import IMAGE_FORMAT_CHOICES def avresource_upload_path(instance, filename): @@ -9,11 +10,10 @@ def avresource_upload_path(instance, filename): Format: uploads/instrument_imgs/{original|thumbnail}/{umil_id}.{ext} Uses instance.is_thumbnail flag to determine subdirectory. - Thumbnails always use .png extension. Examples: - Original: uploads/instrument_imgs/original/UMIL-00001.jpg - - Thumbnail: uploads/instrument_imgs/thumbnail/UMIL-00001.png + - Thumbnail: uploads/instrument_imgs/thumbnail/UMIL-00001.jpg """ # Get file extension from original filename ext = os.path.splitext(filename)[1].lower() @@ -29,10 +29,6 @@ def avresource_upload_path(instance, filename): # Determine subdirectory based on is_thumbnail flag subdir = "thumbnail" if getattr(instance, "is_thumbnail", False) else "original" - # For thumbnails, always use .png extension - if subdir == "thumbnail": - ext = ".png" - filename = f"{umil_id}{ext}" return os.path.join("uploads", "instrument_imgs", subdir, filename) @@ -62,8 +58,11 @@ def __init__(self, *args, **kwargs): help_text="What type of audiovisual resource is this?", ) format = models.CharField( - blank=False - ) # This should eventually be a choice field with supported formats + max_length=10, + choices=IMAGE_FORMAT_CHOICES, + blank=False, + help_text="Image file format extension", + ) url = models.CharField(blank=True, max_length=1000) file = models.ImageField( upload_to=avresource_upload_path, @@ -102,6 +101,15 @@ def __init__(self, *args, **kwargs): help_text="User who uploaded this resource (null for imports)", ) + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["instrument", "url"], + condition=models.Q(url__gt=""), + name="unique_instrument_url", + ) + ] + def clean(self): super().clean() if not self.url and not self.file: diff --git a/web-app/django/VIM/apps/instruments/utils/image_processor.py b/web-app/django/VIM/apps/instruments/utils/image_processor.py index ed49b4cc..8b2839c7 100644 --- a/web-app/django/VIM/apps/instruments/utils/image_processor.py +++ b/web-app/django/VIM/apps/instruments/utils/image_processor.py @@ -1,6 +1,7 @@ """Utility functions for processing instrument images.""" from PIL import Image +from VIM.apps.instruments.constants import PIL_FORMAT_TO_EXTENSION def calculate_compression_ratio(width: int, height: int) -> float: @@ -63,10 +64,10 @@ def process_uploaded_image(uploaded_file, umil_id: str) -> tuple: umil_id: UMIL identifier (e.g., "UMIL-00001") Returns: - tuple: (original_content, thumbnail_content, file_extension) + tuple: (original_content, thumbnail_content, file_extension, thumb_extension) - original_content: ContentFile ready for av_resource.file.save() - thumbnail_content: ContentFile ready for thumbnail_av.file.save() - - file_extension: Original file extension (e.g., 'jpg', 'png') + - file_extension: Original and thumbnail file extension (e.g., 'jpg', 'png') Raises: IOError: If image processing fails @@ -77,8 +78,7 @@ def process_uploaded_image(uploaded_file, umil_id: str) -> tuple: # Determine format from actual image format (not just content type) with Image.open(uploaded_file) as img: - format_map = {"JPEG": "jpg", "PNG": "png", "GIF": "gif", "WEBP": "webp"} - ext = format_map.get(img.format, "jpg") + pil_format, ext = PIL_FORMAT_TO_EXTENSION.get(img.format, ("JPEG", "jpg")) # Reset file pointer after reading uploaded_file.seek(0) @@ -86,15 +86,17 @@ def process_uploaded_image(uploaded_file, umil_id: str) -> tuple: # Read original content into memory original_content = ContentFile(uploaded_file.read(), name=f"{umil_id}.{ext}") - # Generate thumbnail + # Generate thumbnail in the same format as original uploaded_file.seek(0) with Image.open(uploaded_file) as img: thumbnail = create_thumbnail_image(img) - # Convert thumbnail to PNG bytes thumb_buffer = BytesIO() - thumbnail.save(thumb_buffer, "PNG") + save_kwargs = {} + if pil_format == "JPEG": + save_kwargs["quality"] = 90 + thumbnail.save(thumb_buffer, pil_format, **save_kwargs) thumb_buffer.seek(0) - thumbnail_content = ContentFile(thumb_buffer.read(), name=f"{umil_id}.png") + thumbnail_content = ContentFile(thumb_buffer.read(), name=f"{umil_id}.{ext}") return original_content, thumbnail_content, ext diff --git a/web-app/django/VIM/apps/instruments/utils/image_urls.py b/web-app/django/VIM/apps/instruments/utils/image_urls.py index f632a3c3..45a27a01 100644 --- a/web-app/django/VIM/apps/instruments/utils/image_urls.py +++ b/web-app/django/VIM/apps/instruments/utils/image_urls.py @@ -5,7 +5,8 @@ places: uploads/… → MEDIA_URL + path (user-uploaded files) - anything else → STATIC_URL + path (Wikidata / bundled static files) + downloads/… → MEDIA_URL + path (downloaded Wikidata images) + anything else → STATIC_URL + path (bundled static files) This module owns that rule once. Callers that already have an absolute URL (starts with "/" or "http") or an empty string can skip the call entirely. @@ -19,7 +20,8 @@ def resolve_image_url(path: str) -> str: Args: path: Relative path as stored in the database or returned by Solr - (e.g. ``"uploads/instrument_imgs/thumbnail/UMIL-00001.png"`` + (e.g. ``"uploads/instrument_imgs/thumbnail/UMIL-00001.png"``, + ``"downloads/original/Q12345.jpg"``, or ``"instruments/images/…"``). An empty string or an already-absolute URL (``/…`` / ``http…``) is returned unchanged. @@ -27,7 +29,7 @@ def resolve_image_url(path: str) -> str: if not path or path.startswith(("/", "http")): return path - if path.startswith("uploads/"): + if path.startswith(("uploads/", "downloads/")): return f"{settings.MEDIA_URL}{path}" return f"{settings.STATIC_URL}{path}" diff --git a/web-app/django/VIM/apps/instruments/utils/validators.py b/web-app/django/VIM/apps/instruments/utils/validators.py index 8d07bbf4..130e4685 100644 --- a/web-app/django/VIM/apps/instruments/utils/validators.py +++ b/web-app/django/VIM/apps/instruments/utils/validators.py @@ -20,10 +20,12 @@ 4. Enable reuse across views and management commands """ +import os import re from django.core.exceptions import ValidationError from django.conf import settings from typing import List, Tuple +from VIM.apps.instruments.constants import ALLOWED_IMAGE_EXTENSIONS, ALLOWED_IMAGE_TYPES def validate_instrument_names(instrument_names: List["InstrumentName"]) -> None: @@ -133,9 +135,9 @@ def validate_hbs_classification(hbs_class: str) -> bool: Validate Hornbostel-Sachs classification format. Valid formats: - - Two digits minimum (e.g., "11", "21") - - With optional sub-classifications (e.g., "21.2", "311.121") - - First digit must be 1-5, second digit 0-9 + - At least 1 character, only digits (1-9), dot, dash, and plus permitted + - First character must be 1-5 + - If there is a second character, it must be 1-5 Args: hbs_class: Hornbostel-Sachs classification string to validate @@ -145,16 +147,25 @@ def validate_hbs_classification(hbs_class: str) -> bool: Example: >>> validate_hbs_classification("11") # True - >>> validate_hbs_classification("21.2") # True - >>> validate_hbs_classification("311.121") # True - >>> validate_hbs_classification("6") # False (needs 2 digits) - >>> validate_hbs_classification("11x") # False (invalid format) + >>> validate_hbs_classification("21.2+2") # True + >>> validate_hbs_classification("6") # False (first char not 1-5) + >>> validate_hbs_classification("11x") # False (invalid char) """ if not hbs_class: return False - # Pattern: one digit (1-5), followed by another digit, optionally followed by more .digits - pattern = r"^[1-5][0-9](\.[0-9]+)*$" - return bool(re.match(pattern, hbs_class)) and len(hbs_class) >= 2 + # Only digits (1-9), dot, dash, plus permitted + if not re.match(r"^[1-9.\-+]+$", hbs_class): + return False + # First character must be 1-5 + first_char = hbs_class[0] + if not re.match(r"[1-5]", first_char): + return False + # If there is a second character, it must be 1-5 + if len(hbs_class) > 1: + second_char = hbs_class[1] + if not re.match(r"[1-5]", second_char): + return False + return True def validate_image_file(image_file) -> Tuple[bool, str]: @@ -188,10 +199,59 @@ def validate_image_file(image_file) -> Tuple[bool, str]: # Check content type content_type = image_file.content_type - if content_type not in settings.ALLOWED_IMAGE_TYPES: + if content_type not in ALLOWED_IMAGE_TYPES: return ( False, f"Invalid image type. Allowed types: JPEG, PNG, GIF, WebP", ) return True, "" + + +def validate_image_extension(file_path: str) -> str: + """ + Validate image file extension for import/download operations. + + This validator is used by management commands (import_instruments, download_imgs) + that process files from external sources where MIME type validation is not available. + + NOTE: User uploads do NOT use this function - they are validated via validate_image_file() + which checks MIME types. This is specifically for import/download paths. + + Args: + file_path: Path to image file (URL or local path) + + Returns: + str: Clean extension without leading dot (e.g., "jpg", "png", "svg") + + Raises: + ValidationError: If extension is missing or not in ALLOWED_IMAGE_EXTENSIONS + + Example: + >>> validate_image_extension("https://example.com/image.jpg") + 'jpg' + >>> validate_image_extension("/path/to/file.png") + 'png' + >>> validate_image_extension("malicious.exe") + ValidationError: Invalid file extension '.exe'... + """ + # Extract extension from path/URL + ext = os.path.splitext(file_path)[1].lower() + + if not ext: + raise ValidationError( + f"File has no extension: {file_path}. " + f"Valid image files must have an extension." + ) + + # Remove leading dot for comparison + ext_clean = ext.lstrip(".") + + # Validate against allowed extensions + if ext_clean not in ALLOWED_IMAGE_EXTENSIONS: + raise ValidationError( + f"Invalid file extension '.{ext_clean}' in {file_path}. " + f"Allowed formats: {', '.join(ALLOWED_IMAGE_EXTENSIONS)}" + ) + + return ext_clean diff --git a/web-app/django/VIM/apps/instruments/views/create_instrument.py b/web-app/django/VIM/apps/instruments/views/create_instrument.py index 9784d96c..d53be56a 100644 --- a/web-app/django/VIM/apps/instruments/views/create_instrument.py +++ b/web-app/django/VIM/apps/instruments/views/create_instrument.py @@ -277,9 +277,11 @@ def create_instrument(request: HttpRequest) -> JsonResponse: # rollback (e.g. bulk_create failure) cannot leave orphaned files on disk. if image_file: # Process image to get ContentFile objects - original_content, thumbnail_content, img_format = ( - process_uploaded_image(image_file, umil_id) - ) + ( + original_content, + thumbnail_content, + img_format, + ) = process_uploaded_image(image_file, umil_id) # Create AVResource for original image (file saved in on_commit) av_resource = AVResource( @@ -297,7 +299,7 @@ def create_instrument(request: HttpRequest) -> JsonResponse: thumbnail_av = AVResource( instrument=instrument, type="image", - format="png", + format=img_format, source_name=image_source, created_by=request.user, is_thumbnail=True, @@ -316,7 +318,7 @@ def save_image_files(): f"{umil_id}.{img_format}", original_content, save=True ) thumbnail_av.file.save( - f"{umil_id}.png", thumbnail_content, save=True + f"{umil_id}.{img_format}", thumbnail_content, save=True ) transaction.on_commit(save_image_files) diff --git a/web-app/django/VIM/settings.py b/web-app/django/VIM/settings.py index e4d3a2bb..fdddcf27 100644 --- a/web-app/django/VIM/settings.py +++ b/web-app/django/VIM/settings.py @@ -179,10 +179,9 @@ } } -# Media files (user uploads) +# Media files (user uploads & wikidata images) MEDIA_ROOT = ROOT_DIR / "media" MEDIA_URL = "/media/" -ALLOWED_IMAGE_TYPES = ["image/jpeg", "image/png", "image/gif", "image/webp"] MAX_IMAGE_SIZE = 5 * 1024 * 1024 # 5MB # Default primary key field type diff --git a/web-app/django/VIM/templates/instruments/includes/addName.html b/web-app/django/VIM/templates/instruments/includes/addName.html index 739359a6..a00162e6 100644 --- a/web-app/django/VIM/templates/instruments/includes/addName.html +++ b/web-app/django/VIM/templates/instruments/includes/addName.html @@ -34,6 +34,14 @@