From 42defa224900ad2e4aa8007419c729eb8ea78fe5 Mon Sep 17 00:00:00 2001 From: nleanba Date: Thu, 1 Sep 2022 19:12:21 +0200 Subject: [PATCH 01/90] move old stuff out of the way --- .gitignore | 65 -- .vscode/settings.json | 4 +- 3doc.config.js | 0 LICENSE | 2 +- find-draft.txt | 2 - .dockerignore => old/.dockerignore | 0 old/.gitignore | 9 + DEV-README.md => old/DEV-README.md | 0 Dockerfile => old/Dockerfile | 0 config-tdb.ttl => old/config-tdb.ttl | 0 .../dev-docker-compose.yml | 0 docker-cmd.sh => old/docker-cmd.sh | 0 docker-compose.yml => old/docker-compose.yml | 0 {lib => old/lib}/datastore.js | 0 {lib => old/lib}/metadeleter.js | 0 {lib => old/lib}/metafinder.js | 0 {lib => old/lib}/metastorer.js | 0 {lib => old/lib}/pdfprocessor.js | 0 {lib => old/lib}/server.js | 0 package.json => old/package.json | 0 tdt.fish => old/tdt.fish | 0 yarn.lock | 624 ------------------ 22 files changed, 13 insertions(+), 693 deletions(-) delete mode 100644 .gitignore delete mode 100644 3doc.config.js delete mode 100644 find-draft.txt rename .dockerignore => old/.dockerignore (100%) create mode 100644 old/.gitignore rename DEV-README.md => old/DEV-README.md (100%) rename Dockerfile => old/Dockerfile (100%) rename config-tdb.ttl => old/config-tdb.ttl (100%) rename dev-docker-compose.yml => old/dev-docker-compose.yml (100%) rename docker-cmd.sh => old/docker-cmd.sh (100%) rename docker-compose.yml => old/docker-compose.yml (100%) rename {lib => old/lib}/datastore.js (100%) rename {lib => old/lib}/metadeleter.js (100%) rename {lib => old/lib}/metafinder.js (100%) rename {lib => old/lib}/metastorer.js (100%) rename {lib => old/lib}/pdfprocessor.js (100%) rename {lib => old/lib}/server.js (100%) rename package.json => old/package.json (100%) rename tdt.fish => old/tdt.fish (100%) delete mode 100644 yarn.lock diff --git a/.gitignore b/.gitignore deleted file mode 100644 index c797f99..0000000 --- a/.gitignore +++ /dev/null @@ -1,65 +0,0 @@ -# Logs -logs -*.log -npm-debug.log* -yarn-debug.log* -yarn-error.log* - -# Runtime data -pids -*.pid -*.seed -*.pid.lock - -# Directory for instrumented libs generated by jscoverage/JSCover -lib-cov - -# Coverage directory used by tools like istanbul -coverage - -# nyc test coverage -.nyc_output - -# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) -.grunt - -# Bower dependency directory (https://bower.io/) -bower_components - -# node-waf configuration -.lock-wscript - -# Compiled binary addons (https://nodejs.org/api/addons.html) -build/Release - -# Dependency directories -node_modules/ -jspm_packages/ - -# TypeScript v1 declaration files -typings/ - -# Optional npm cache directory -.npm - -# Optional eslint cache -.eslintcache - -# Optional REPL history -.node_repl_history - -# Output of 'npm pack' -*.tgz - -# Yarn Integrity file -.yarn-integrity - -# dotenv environment variables file -.env - -# next.js build output -.next - -blobs - -fuseki-base \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 3d41712..85ece8f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,5 @@ { - "npm.packageManager": "yarn" + "npm.packageManager": "yarn", + "deno.enable": true, + "deno.unstable": true } \ No newline at end of file diff --git a/3doc.config.js b/3doc.config.js deleted file mode 100644 index e69de29..0000000 diff --git a/LICENSE b/LICENSE index e2b8549..2ec5ef9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018 Reto Gmür +Copyright (c) 2022 Noam Bachmann & Reto Gmür Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/find-draft.txt b/find-draft.txt deleted file mode 100644 index df2b38d..0000000 --- a/find-draft.txt +++ /dev/null @@ -1,2 +0,0 @@ -Encode where necessary: GET /doc?tag=pay&tag=prority(>3)&tag=not(personal)&search=helsinki - /doc?tag=pay&tag=prority(>3)¬tag=personal&text=helsinki diff --git a/.dockerignore b/old/.dockerignore similarity index 100% rename from .dockerignore rename to old/.dockerignore diff --git a/old/.gitignore b/old/.gitignore new file mode 100644 index 0000000..ccf4377 --- /dev/null +++ b/old/.gitignore @@ -0,0 +1,9 @@ +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +node_modules +blobs +fuseki-base \ No newline at end of file diff --git a/DEV-README.md b/old/DEV-README.md similarity index 100% rename from DEV-README.md rename to old/DEV-README.md diff --git a/Dockerfile b/old/Dockerfile similarity index 100% rename from Dockerfile rename to old/Dockerfile diff --git a/config-tdb.ttl b/old/config-tdb.ttl similarity index 100% rename from config-tdb.ttl rename to old/config-tdb.ttl diff --git a/dev-docker-compose.yml b/old/dev-docker-compose.yml similarity index 100% rename from dev-docker-compose.yml rename to old/dev-docker-compose.yml diff --git a/docker-cmd.sh b/old/docker-cmd.sh similarity index 100% rename from docker-cmd.sh rename to old/docker-cmd.sh diff --git a/docker-compose.yml b/old/docker-compose.yml similarity index 100% rename from docker-compose.yml rename to old/docker-compose.yml diff --git a/lib/datastore.js b/old/lib/datastore.js similarity index 100% rename from lib/datastore.js rename to old/lib/datastore.js diff --git a/lib/metadeleter.js b/old/lib/metadeleter.js similarity index 100% rename from lib/metadeleter.js rename to old/lib/metadeleter.js diff --git a/lib/metafinder.js b/old/lib/metafinder.js similarity index 100% rename from lib/metafinder.js rename to old/lib/metafinder.js diff --git a/lib/metastorer.js b/old/lib/metastorer.js similarity index 100% rename from lib/metastorer.js rename to old/lib/metastorer.js diff --git a/lib/pdfprocessor.js b/old/lib/pdfprocessor.js similarity index 100% rename from lib/pdfprocessor.js rename to old/lib/pdfprocessor.js diff --git a/lib/server.js b/old/lib/server.js similarity index 100% rename from lib/server.js rename to old/lib/server.js diff --git a/package.json b/old/package.json similarity index 100% rename from package.json rename to old/package.json diff --git a/tdt.fish b/old/tdt.fish similarity index 100% rename from tdt.fish rename to old/tdt.fish diff --git a/yarn.lock b/yarn.lock deleted file mode 100644 index 1ad8e52..0000000 --- a/yarn.lock +++ /dev/null @@ -1,624 +0,0 @@ -# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. -# yarn lockfile v1 - - -accept@3.x.x: - version "3.0.2" - resolved "https://registry.yarnpkg.com/accept/-/accept-3.0.2.tgz#83e41cec7e1149f3fd474880423873db6c6cc9ac" - dependencies: - boom "7.x.x" - hoek "5.x.x" - -adm-zip@^0.4.16: - version "0.4.16" - resolved "https://registry.yarnpkg.com/adm-zip/-/adm-zip-0.4.16.tgz#cf4c508fdffab02c269cbc7f471a875f05570365" - integrity sha512-TFi4HBKSGfIKsK5YCkKaaFG2m4PEDyViZmEwof3MTIgzimHLto6muaHVpbrljdIvIrFZzEq/p4nafOeLcYegrg== - -ajv-keywords@^3.1.0: - version "3.2.0" - resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.2.0.tgz#e86b819c602cf8821ad637413698f1dec021847a" - -ajv@^6.1.0: - version "6.5.2" - resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.5.2.tgz#678495f9b82f7cca6be248dd92f59bff5e1f4360" - dependencies: - fast-deep-equal "^2.0.1" - fast-json-stable-stringify "^2.0.0" - json-schema-traverse "^0.4.1" - uri-js "^4.2.1" - -ammo@3.x.x: - version "3.0.1" - resolved "https://registry.yarnpkg.com/ammo/-/ammo-3.0.1.tgz#c79ceeac36fb4e55085ea3fe0c2f42bfa5f7c914" - dependencies: - hoek "5.x.x" - -archiver-utils@^2.1.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/archiver-utils/-/archiver-utils-2.1.0.tgz#e8a460e94b693c3e3da182a098ca6285ba9249e2" - integrity sha512-bEL/yUb/fNNiNTuUz979Z0Yg5L+LzLxGJz8x79lYmR54fmTIb6ob/hNQgkQnIUDWIFjZVQwl9Xs356I6BAMHfw== - dependencies: - glob "^7.1.4" - graceful-fs "^4.2.0" - lazystream "^1.0.0" - lodash.defaults "^4.2.0" - lodash.difference "^4.5.0" - lodash.flatten "^4.4.0" - lodash.isplainobject "^4.0.6" - lodash.union "^4.6.0" - normalize-path "^3.0.0" - readable-stream "^2.0.0" - -archiver@^3.1.1: - version "3.1.1" - resolved "https://registry.yarnpkg.com/archiver/-/archiver-3.1.1.tgz#9db7819d4daf60aec10fe86b16cb9258ced66ea0" - integrity sha512-5Hxxcig7gw5Jod/8Gq0OneVgLYET+oNHcxgWItq4TbhOzRLKNAFUb9edAftiMKXvXfCB0vbGrJdZDNq0dWMsxg== - dependencies: - archiver-utils "^2.1.0" - async "^2.6.3" - buffer-crc32 "^0.2.1" - glob "^7.1.4" - readable-stream "^3.4.0" - tar-stream "^2.1.0" - zip-stream "^2.1.2" - -async@^2.6.3: - version "2.6.3" - resolved "https://registry.yarnpkg.com/async/-/async-2.6.3.tgz#d72625e2344a3656e3a3ad4fa749fa83299d82ff" - integrity sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg== - dependencies: - lodash "^4.17.14" - -b64@4.x.x: - version "4.0.0" - resolved "https://registry.yarnpkg.com/b64/-/b64-4.0.0.tgz#c37f587f0a383c7019e821120e8c3f58f0d22772" - -balanced-match@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767" - integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c= - -base64-js@^1.0.2: - version "1.3.1" - resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.3.1.tgz#58ece8cb75dd07e71ed08c736abc5fac4dbf8df1" - integrity sha512-mLQ4i2QO1ytvGWFWmcngKO//JXAQueZvwEKtjgQFM4jIK0kU+ytMfplL8j+n5mspOfjHwoAg+9yhb7BwAHm36g== - -big-time@2.x.x: - version "2.0.1" - resolved "https://registry.yarnpkg.com/big-time/-/big-time-2.0.1.tgz#68c7df8dc30f97e953f25a67a76ac9713c16c9de" - -big.js@^3.1.3: - version "3.2.0" - resolved "https://registry.yarnpkg.com/big.js/-/big.js-3.2.0.tgz#a5fc298b81b9e0dca2e458824784b65c52ba588e" - -bl@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/bl/-/bl-3.0.0.tgz#3611ec00579fd18561754360b21e9f784500ff88" - integrity sha512-EUAyP5UHU5hxF8BPT0LKW8gjYLhq1DQIcneOX/pL/m2Alo+OYDQAJlHq+yseMP50Os2nHXOSic6Ss3vSQeyf4A== - dependencies: - readable-stream "^3.0.1" - -boom@7.x.x: - version "7.2.0" - resolved "https://registry.yarnpkg.com/boom/-/boom-7.2.0.tgz#2bff24a55565767fde869ec808317eb10c48e966" - dependencies: - hoek "5.x.x" - -bounce@1.x.x: - version "1.2.0" - resolved "https://registry.yarnpkg.com/bounce/-/bounce-1.2.0.tgz#e3bac68c73fd256e38096551efc09f504873c8c8" - dependencies: - boom "7.x.x" - hoek "5.x.x" - -brace-expansion@^1.1.7: - version "1.1.11" - resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" - integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA== - dependencies: - balanced-match "^1.0.0" - concat-map "0.0.1" - -buffer-crc32@^0.2.1, buffer-crc32@^0.2.13: - version "0.2.13" - resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242" - integrity sha1-DTM+PwDqxQqhRUq9MO+MKl2ackI= - -buffer@^5.1.0: - version "5.4.3" - resolved "https://registry.yarnpkg.com/buffer/-/buffer-5.4.3.tgz#3fbc9c69eb713d323e3fc1a895eee0710c072115" - integrity sha512-zvj65TkFeIt3i6aj5bIvJDzjjQQGs4o/sNoezg1F1kYap9Nu2jcUdpwzRSJTHMMzG0H7bZkn4rNQpImhuxWX2A== - dependencies: - base64-js "^1.0.2" - ieee754 "^1.1.4" - -call@5.x.x: - version "5.0.1" - resolved "https://registry.yarnpkg.com/call/-/call-5.0.1.tgz#ac1b5c106d9edc2a17af2a4a4f74dd4f0c06e910" - dependencies: - boom "7.x.x" - hoek "5.x.x" - -catbox-memory@3.x.x: - version "3.1.2" - resolved "https://registry.yarnpkg.com/catbox-memory/-/catbox-memory-3.1.2.tgz#4aeec1bc994419c0f7e60087f172aaedd9b4911c" - dependencies: - big-time "2.x.x" - boom "7.x.x" - hoek "5.x.x" - -catbox@10.x.x: - version "10.0.2" - resolved "https://registry.yarnpkg.com/catbox/-/catbox-10.0.2.tgz#e6ac1f35102d1a9bd07915b82e508d12b50a8bfa" - dependencies: - boom "7.x.x" - bounce "1.x.x" - hoek "5.x.x" - joi "13.x.x" - -compress-commons@^2.1.1: - version "2.1.1" - resolved "https://registry.yarnpkg.com/compress-commons/-/compress-commons-2.1.1.tgz#9410d9a534cf8435e3fbbb7c6ce48de2dc2f0610" - integrity sha512-eVw6n7CnEMFzc3duyFVrQEuY1BlHR3rYsSztyG32ibGMW722i3C6IizEGMFmfMU+A+fALvBIwxN3czffTcdA+Q== - dependencies: - buffer-crc32 "^0.2.13" - crc32-stream "^3.0.1" - normalize-path "^3.0.0" - readable-stream "^2.3.6" - -concat-map@0.0.1: - version "0.0.1" - resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" - integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s= - -content@4.x.x: - version "4.0.5" - resolved "https://registry.yarnpkg.com/content/-/content-4.0.5.tgz#bc547deabc889ab69bce17faf3585c29f4c41bf2" - dependencies: - boom "7.x.x" - -core-util-is@~1.0.0: - version "1.0.2" - resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7" - integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac= - -crc32-stream@^3.0.1: - version "3.0.1" - resolved "https://registry.yarnpkg.com/crc32-stream/-/crc32-stream-3.0.1.tgz#cae6eeed003b0e44d739d279de5ae63b171b4e85" - integrity sha512-mctvpXlbzsvK+6z8kJwSJ5crm7yBwrQMTybJzMw1O4lLGJqjlDCXY2Zw7KheiA6XBEcBmfLx1D88mjRGVJtY9w== - dependencies: - crc "^3.4.4" - readable-stream "^3.4.0" - -crc@^3.4.4: - version "3.8.0" - resolved "https://registry.yarnpkg.com/crc/-/crc-3.8.0.tgz#ad60269c2c856f8c299e2c4cc0de4556914056c6" - integrity sha512-iX3mfgcTMIq3ZKLIsVFAbv7+Mc10kxabAGQb8HvjA1o3T1PIYprbakQ65d3I+2HGHt6nSKkM9PYjgoJO2KcFBQ== - dependencies: - buffer "^5.1.0" - -cryptiles@4.x.x: - version "4.1.2" - resolved "https://registry.yarnpkg.com/cryptiles/-/cryptiles-4.1.2.tgz#363c9ab5c859da9d2d6fb901b64d980966181184" - dependencies: - boom "7.x.x" - -emojis-list@^2.0.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/emojis-list/-/emojis-list-2.1.0.tgz#4daa4d9db00f9819880c79fa457ae5b09a1fd389" - -end-of-stream@^1.4.1: - version "1.4.1" - resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.1.tgz#ed29634d19baba463b6ce6b80a37213eab71ec43" - integrity sha512-1MkrZNvWTKCaigbn+W15elq2BB/L22nqrSY5DKlo3X6+vclJm8Bb5djXJBmEX6fS3+zCh/F4VBK5Z2KxJt4s2Q== - dependencies: - once "^1.4.0" - -fast-deep-equal@^2.0.1: - version "2.0.1" - resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz#7b05218ddf9667bf7f370bf7fdb2cb15fdd0aa49" - -fast-json-stable-stringify@^2.0.0: - version "2.0.0" - resolved "https://registry.yarnpkg.com/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz#d5142c0caee6b1189f87d3a76111064f86c8bbf2" - -fs-constants@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/fs-constants/-/fs-constants-1.0.0.tgz#6be0de9be998ce16af8afc24497b9ee9b7ccd9ad" - integrity sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow== - -fs.realpath@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" - integrity sha1-FQStJSMVjKpA20onh8sBQRmU6k8= - -glob@^7.1.4: - version "7.1.4" - resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.4.tgz#aa608a2f6c577ad357e1ae5a5c26d9a8d1969255" - integrity sha512-hkLPepehmnKk41pUGm3sYxoFs/umurYfYJCerbXEyFIWcAzvpipAgVkBqqT9RBKMGjnq6kMuyYwha6csxbiM1A== - dependencies: - fs.realpath "^1.0.0" - inflight "^1.0.4" - inherits "2" - minimatch "^3.0.4" - once "^1.3.0" - path-is-absolute "^1.0.0" - -graceful-fs@^4.2.0: - version "4.2.2" - resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.2.tgz#6f0952605d0140c1cfdb138ed005775b92d67b02" - integrity sha512-IItsdsea19BoLC7ELy13q1iJFNmd7ofZH5+X/pJr90/nRoPEX0DJo1dHDbgtYWOhJhcCgMDTOw84RZ72q6lB+Q== - -hapi-auth-basic@^5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/hapi-auth-basic/-/hapi-auth-basic-5.0.0.tgz#0438b00225e4f7baccd7f29e04b4fc5037c012b0" - integrity sha1-BDiwAiXk97rM1/KeBLT8UDfAErA= - dependencies: - boom "7.x.x" - hoek "5.x.x" - -hapi@^17.5.2: - version "17.5.2" - resolved "https://registry.yarnpkg.com/hapi/-/hapi-17.5.2.tgz#9c5823cdcdd17e5621ebc8928aefb144d033caac" - dependencies: - accept "3.x.x" - ammo "3.x.x" - boom "7.x.x" - bounce "1.x.x" - call "5.x.x" - catbox "10.x.x" - catbox-memory "3.x.x" - heavy "6.x.x" - hoek "5.x.x" - joi "13.x.x" - mimos "4.x.x" - podium "3.x.x" - shot "4.x.x" - statehood "6.x.x" - subtext "6.x.x" - teamwork "3.x.x" - topo "3.x.x" - -heavy@6.x.x: - version "6.1.0" - resolved "https://registry.yarnpkg.com/heavy/-/heavy-6.1.0.tgz#1bbfa43dc61dd4b543ede3ff87db8306b7967274" - dependencies: - boom "7.x.x" - hoek "5.x.x" - joi "13.x.x" - -hoek@5.x.x: - version "5.0.3" - resolved "https://registry.yarnpkg.com/hoek/-/hoek-5.0.3.tgz#b71d40d943d0a95da01956b547f83c4a5b4a34ac" - -ieee754@^1.1.4: - version "1.1.13" - resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.1.13.tgz#ec168558e95aa181fd87d37f55c32bbcb6708b84" - integrity sha512-4vf7I2LYV/HaWerSo3XmlMkp5eZ83i+/CDluXi/IGTs/O1sejBNhTtnxzmRZfvOUqj7lZjqHkeTvpgSFDlWZTg== - -inflight@^1.0.4: - version "1.0.6" - resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9" - integrity sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk= - dependencies: - once "^1.3.0" - wrappy "1" - -inherits@2, inherits@^2.0.3, inherits@~2.0.3: - version "2.0.4" - resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" - integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== - -iron@5.x.x: - version "5.0.4" - resolved "https://registry.yarnpkg.com/iron/-/iron-5.0.4.tgz#003ed822f656f07c2b62762815f5de3947326867" - dependencies: - boom "7.x.x" - cryptiles "4.x.x" - hoek "5.x.x" - -isarray@~1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11" - integrity sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE= - -isemail@3.x.x: - version "3.1.3" - resolved "https://registry.yarnpkg.com/isemail/-/isemail-3.1.3.tgz#64f37fc113579ea12523165c3ebe3a71a56ce571" - dependencies: - punycode "2.x.x" - -joi@13.x.x: - version "13.4.0" - resolved "https://registry.yarnpkg.com/joi/-/joi-13.4.0.tgz#afc359ee3d8bc5f9b9ba6cdc31b46d44af14cecc" - dependencies: - hoek "5.x.x" - isemail "3.x.x" - topo "3.x.x" - -json-schema-traverse@^0.4.1: - version "0.4.1" - resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660" - -json5@^0.5.0: - version "0.5.1" - resolved "https://registry.yarnpkg.com/json5/-/json5-0.5.1.tgz#1eade7acc012034ad84e2396767ead9fa5495821" - -lazystream@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/lazystream/-/lazystream-1.0.0.tgz#f6995fe0f820392f61396be89462407bb77168e4" - integrity sha1-9plf4PggOS9hOWvolGJAe7dxaOQ= - dependencies: - readable-stream "^2.0.5" - -loader-utils@^1.0.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.1.0.tgz#c98aef488bcceda2ffb5e2de646d6a754429f5cd" - dependencies: - big.js "^3.1.3" - emojis-list "^2.0.0" - json5 "^0.5.0" - -lodash.defaults@^4.2.0: - version "4.2.0" - resolved "https://registry.yarnpkg.com/lodash.defaults/-/lodash.defaults-4.2.0.tgz#d09178716ffea4dde9e5fb7b37f6f0802274580c" - integrity sha1-0JF4cW/+pN3p5ft7N/bwgCJ0WAw= - -lodash.difference@^4.5.0: - version "4.5.0" - resolved "https://registry.yarnpkg.com/lodash.difference/-/lodash.difference-4.5.0.tgz#9ccb4e505d486b91651345772885a2df27fd017c" - integrity sha1-nMtOUF1Ia5FlE0V3KIWi3yf9AXw= - -lodash.flatten@^4.4.0: - version "4.4.0" - resolved "https://registry.yarnpkg.com/lodash.flatten/-/lodash.flatten-4.4.0.tgz#f31c22225a9632d2bbf8e4addbef240aa765a61f" - integrity sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8= - -lodash.isplainobject@^4.0.6: - version "4.0.6" - resolved "https://registry.yarnpkg.com/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz#7c526a52d89b45c45cc690b88163be0497f550cb" - integrity sha1-fFJqUtibRcRcxpC4gWO+BJf1UMs= - -lodash.union@^4.6.0: - version "4.6.0" - resolved "https://registry.yarnpkg.com/lodash.union/-/lodash.union-4.6.0.tgz#48bb5088409f16f1821666641c44dd1aaae3cd88" - integrity sha1-SLtQiECfFvGCFmZkHETdGqrjzYg= - -lodash@^4.17.14: - version "4.17.15" - resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.15.tgz#b447f6670a0455bbfeedd11392eff330ea097548" - integrity sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A== - -mime-db@1.x.x: - version "1.35.0" - resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.35.0.tgz#0569d657466491283709663ad379a99b90d9ab47" - -mimos@4.x.x: - version "4.0.0" - resolved "https://registry.yarnpkg.com/mimos/-/mimos-4.0.0.tgz#76e3d27128431cb6482fd15b20475719ad626a5a" - dependencies: - hoek "5.x.x" - mime-db "1.x.x" - -minimatch@^3.0.4: - version "3.0.4" - resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083" - integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA== - dependencies: - brace-expansion "^1.1.7" - -nanoid@^1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-1.1.0.tgz#b18e806e1cdbfdbe030374d5cf08a48cbc80b474" - -nigel@3.x.x: - version "3.0.1" - resolved "https://registry.yarnpkg.com/nigel/-/nigel-3.0.1.tgz#48a08859d65177312f1c25af7252c1e07bb07c2a" - dependencies: - hoek "5.x.x" - vise "3.x.x" - -node-ensure@^0.0.0: - version "0.0.0" - resolved "https://registry.yarnpkg.com/node-ensure/-/node-ensure-0.0.0.tgz#ecae764150de99861ec5c810fd5d096b183932a7" - -node-fetch@^2.2.0: - version "2.2.0" - resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.2.0.tgz#4ee79bde909262f9775f731e3656d0db55ced5b5" - -normalize-path@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65" - integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA== - -once@^1.3.0, once@^1.4.0: - version "1.4.0" - resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1" - integrity sha1-WDsap3WWHUsROsF9nFC6753Xa9E= - dependencies: - wrappy "1" - -path-is-absolute@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f" - integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18= - -pdfjs-dist@^2.0.489: - version "2.0.489" - resolved "https://registry.yarnpkg.com/pdfjs-dist/-/pdfjs-dist-2.0.489.tgz#63e54b292a86790a454697eb44d4347b8fbfad27" - dependencies: - node-ensure "^0.0.0" - worker-loader "^1.1.1" - -pez@4.x.x: - version "4.0.2" - resolved "https://registry.yarnpkg.com/pez/-/pez-4.0.2.tgz#0a7c81b64968e90b0e9562b398f390939e9c4b53" - dependencies: - b64 "4.x.x" - boom "7.x.x" - content "4.x.x" - hoek "5.x.x" - nigel "3.x.x" - -podium@3.x.x: - version "3.1.2" - resolved "https://registry.yarnpkg.com/podium/-/podium-3.1.2.tgz#b701429739cf6bdde6b3015ae6b48d400817ce9e" - dependencies: - hoek "5.x.x" - joi "13.x.x" - -process-nextick-args@~2.0.0: - version "2.0.1" - resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2" - integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag== - -punycode@2.x.x, punycode@^2.1.0: - version "2.1.1" - resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec" - -readable-stream@^2.0.0, readable-stream@^2.0.5, readable-stream@^2.3.6: - version "2.3.6" - resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-2.3.6.tgz#b11c27d88b8ff1fbe070643cf94b0c79ae1b0aaf" - integrity sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw== - dependencies: - core-util-is "~1.0.0" - inherits "~2.0.3" - isarray "~1.0.0" - process-nextick-args "~2.0.0" - safe-buffer "~5.1.1" - string_decoder "~1.1.1" - util-deprecate "~1.0.1" - -readable-stream@^3.0.1, readable-stream@^3.1.1, readable-stream@^3.4.0: - version "3.4.0" - resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.4.0.tgz#a51c26754658e0a3c21dbf59163bd45ba6f447fc" - integrity sha512-jItXPLmrSR8jmTRmRWJXCnGJsfy85mB3Wd/uINMXA65yrnFo0cPClFIUWzo2najVNSl+mx7/4W8ttlLWJe99pQ== - dependencies: - inherits "^2.0.3" - string_decoder "^1.1.1" - util-deprecate "^1.0.1" - -safe-buffer@~5.1.0, safe-buffer@~5.1.1: - version "5.1.2" - resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d" - integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g== - -safe-buffer@~5.2.0: - version "5.2.0" - resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.0.tgz#b74daec49b1148f88c64b68d49b1e815c1f2f519" - integrity sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg== - -schema-utils@^0.4.0: - version "0.4.5" - resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-0.4.5.tgz#21836f0608aac17b78f9e3e24daff14a5ca13a3e" - dependencies: - ajv "^6.1.0" - ajv-keywords "^3.1.0" - -shot@4.x.x: - version "4.0.5" - resolved "https://registry.yarnpkg.com/shot/-/shot-4.0.5.tgz#c7e7455d11d60f6b6cd3c43e15a3b431c17e5566" - dependencies: - hoek "5.x.x" - joi "13.x.x" - -statehood@6.x.x: - version "6.0.6" - resolved "https://registry.yarnpkg.com/statehood/-/statehood-6.0.6.tgz#0dbd7c50774d3f61a24e42b0673093bbc81fa5f0" - dependencies: - boom "7.x.x" - bounce "1.x.x" - cryptiles "4.x.x" - hoek "5.x.x" - iron "5.x.x" - joi "13.x.x" - -string_decoder@^1.1.1: - version "1.3.0" - resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e" - integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA== - dependencies: - safe-buffer "~5.2.0" - -string_decoder@~1.1.1: - version "1.1.1" - resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8" - integrity sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg== - dependencies: - safe-buffer "~5.1.0" - -subtext@6.x.x: - version "6.0.7" - resolved "https://registry.yarnpkg.com/subtext/-/subtext-6.0.7.tgz#8e40a67901a734d598142665c90e398369b885f9" - dependencies: - boom "7.x.x" - content "4.x.x" - hoek "5.x.x" - pez "4.x.x" - wreck "14.x.x" - -tar-stream@^2.1.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-2.1.0.tgz#d1aaa3661f05b38b5acc9b7020efdca5179a2cc3" - integrity sha512-+DAn4Nb4+gz6WZigRzKEZl1QuJVOLtAwwF+WUxy1fJ6X63CaGaUAxJRD2KEn1OMfcbCjySTYpNC6WmfQoIEOdw== - dependencies: - bl "^3.0.0" - end-of-stream "^1.4.1" - fs-constants "^1.0.0" - inherits "^2.0.3" - readable-stream "^3.1.1" - -teamwork@3.x.x: - version "3.0.1" - resolved "https://registry.yarnpkg.com/teamwork/-/teamwork-3.0.1.tgz#ff38c7161f41f8070b7813716eb6154036ece196" - -topo@3.x.x: - version "3.0.0" - resolved "https://registry.yarnpkg.com/topo/-/topo-3.0.0.tgz#37e48c330efeac784538e0acd3e62ca5e231fe7a" - dependencies: - hoek "5.x.x" - -uri-js@^4.2.1: - version "4.2.2" - resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.2.2.tgz#94c540e1ff772956e2299507c010aea6c8838eb0" - dependencies: - punycode "^2.1.0" - -util-deprecate@^1.0.1, util-deprecate@~1.0.1: - version "1.0.2" - resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" - integrity sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8= - -vise@3.x.x: - version "3.0.0" - resolved "https://registry.yarnpkg.com/vise/-/vise-3.0.0.tgz#76ad14ab31669c50fbb0817bc0e72fedcbb3bf4c" - dependencies: - hoek "5.x.x" - -worker-loader@^1.1.1: - version "1.1.1" - resolved "https://registry.yarnpkg.com/worker-loader/-/worker-loader-1.1.1.tgz#920d74ddac6816fc635392653ed8b4af1929fd92" - dependencies: - loader-utils "^1.0.0" - schema-utils "^0.4.0" - -wrappy@1: - version "1.0.2" - resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f" - integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8= - -wreck@14.x.x: - version "14.0.2" - resolved "https://registry.yarnpkg.com/wreck/-/wreck-14.0.2.tgz#89c17a9061c745ed1c3aebcb66ea181dbaab454c" - dependencies: - boom "7.x.x" - hoek "5.x.x" - -zip-stream@^2.1.2: - version "2.1.2" - resolved "https://registry.yarnpkg.com/zip-stream/-/zip-stream-2.1.2.tgz#841efd23214b602ff49c497cba1a85d8b5fbc39c" - integrity sha512-ykebHGa2+uzth/R4HZLkZh3XFJzivhVsjJt8bN3GvBzLaqqrUdRacu+c4QtnUgjkkQfsOuNE1JgLKMCPNmkKgg== - dependencies: - archiver-utils "^2.1.0" - compress-commons "^2.1.1" - readable-stream "^3.4.0" From 3c7dcd8a7156531ee4120ed74075801dd48b6af7 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 15:14:13 +0200 Subject: [PATCH 02/90] defined all routes --- deno.jsonc | 11 +++++ src/deps.ts | 2 + src/handlers/count.ts | 3 ++ src/handlers/notImplemented.ts | 3 ++ src/helpers/processParams.ts | 88 ++++++++++++++++++++++++++++++++++ src/main.ts | 12 +++++ src/meta/finder.ts | 20 ++++++++ src/server/routes.ts | 85 ++++++++++++++++++++++++++++++++ src/server/server.ts | 52 ++++++++++++++++++++ 9 files changed, 276 insertions(+) create mode 100644 deno.jsonc create mode 100644 src/deps.ts create mode 100644 src/handlers/count.ts create mode 100644 src/handlers/notImplemented.ts create mode 100644 src/helpers/processParams.ts create mode 100644 src/main.ts create mode 100644 src/meta/finder.ts create mode 100644 src/server/routes.ts create mode 100644 src/server/server.ts diff --git a/deno.jsonc b/deno.jsonc new file mode 100644 index 0000000..86149a3 --- /dev/null +++ b/deno.jsonc @@ -0,0 +1,11 @@ +{ + "fmt": { + "files": { + "include": ["src/"] + } + }, + "tasks": { + "run": "deno run --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD src/main.ts", + "run-watch": "deno run --watch --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD src/main.ts" + } +} diff --git a/src/deps.ts b/src/deps.ts new file mode 100644 index 0000000..1ad18b0 --- /dev/null +++ b/src/deps.ts @@ -0,0 +1,2 @@ +export { serve } from "https://deno.land/std@0.153.0/http/mod.ts"; +export { encode } from "https://deno.land/std@0.153.0/encoding/base64.ts" \ No newline at end of file diff --git a/src/handlers/count.ts b/src/handlers/count.ts new file mode 100644 index 0000000..4274ece --- /dev/null +++ b/src/handlers/count.ts @@ -0,0 +1,3 @@ +export function count (_: URLPatternResult): Response { + throw new Error("not implemented"); +} \ No newline at end of file diff --git a/src/handlers/notImplemented.ts b/src/handlers/notImplemented.ts new file mode 100644 index 0000000..d9b1d1c --- /dev/null +++ b/src/handlers/notImplemented.ts @@ -0,0 +1,3 @@ +export function notImplemented (_request: Request, _match: URLPatternResult): Response { + throw new Error("not implemented"); +} diff --git a/src/helpers/processParams.ts b/src/helpers/processParams.ts new file mode 100644 index 0000000..a2566a5 --- /dev/null +++ b/src/helpers/processParams.ts @@ -0,0 +1,88 @@ +import { getTagTypes } from "../meta/finder.ts"; + +function extractQuery(request: Request) { + const url = new URL(request.url); + const query: Record = {}; + for (const param of url.searchParams) { + if (query[param[0]]) { + query[param[0]].push(param[1]); + } else query[param[0]] = new Array(param[1]); + } + return query; +} + +type ParamTag = { + label: string; + min?: string; + max?: string; + type?: string; + maxIsExclusive?: boolean; +}; + +type Params = { + tags?: ParamTag[]; + nottags?: ParamTag[]; + text?: string; + limit?: number; + offset?: number; +}; + +export async function processParams(request: Request): Promise { + const query = extractQuery(request); + const result: Params = {}; + const tags = query.tag?.map((t) => t.split(";")) ?? []; + const nottags = query.nottag?.map((t) => t.split(";")) ?? []; + result.text = query.text?.[0]; + result.limit = parseInt(query.limit?.[0], 10) > 0 + ? parseInt(query.limit[0]) + : undefined; + result.offset = parseInt(query.offset?.[0], 10) >= 0 + ? parseInt(query.offset[0]) + : undefined; + return await getTagTypes( + tags.map((e) => e[0]).concat(nottags.map((e) => e[0])), + ).then((types) => { + function tagMap(t: string[]): ParamTag { + const label = t[0]; + const type = types.find((e) => e[0] === t[0])?.[1]; + let min = t[1]; + let max = t[2]; + let maxIsExclusive; + if (type === "http://www.w3.org/2001/XMLSchema#date") { + if (min) { + switch (min.length) { + case 4: + min += "-01-01"; + break; + case 7: + min += "-01"; + break; + } + } + if (max) { + switch (max.length) { + case 4: + max += "-12-31"; + break; + case 7: { + const month = parseInt(max.substring(5), 10) + 1; + if (month < 13) { + max = max.substring(0, 5) + "-" + + month.toString().padStart(2, "0") + "-01"; + maxIsExclusive = true; + } else { + max += "-31"; + } + break; + } + } + } + } + return { label, min, max, type, maxIsExclusive }; + } + result.tags = tags.map(tagMap); + result.nottags = nottags.map(tagMap); + console.log("eh??", result); + return result; + }); +} diff --git a/src/main.ts b/src/main.ts new file mode 100644 index 0000000..e75b254 --- /dev/null +++ b/src/main.ts @@ -0,0 +1,12 @@ +import { serve } from "./server/server.ts"; + +console.log("Starting tridoc backend server"); + +// TODO Check external dependencies + +if (!Deno.env.get("TRIDOC_PWD")) { + throw new Error("No password set"); +} + +serve(); +console.log("Tridoc backend server is listening on port 8000"); diff --git a/src/meta/finder.ts b/src/meta/finder.ts new file mode 100644 index 0000000..2c1b13d --- /dev/null +++ b/src/meta/finder.ts @@ -0,0 +1,20 @@ +export async function getTagTypes(labels: string[]): Promise { + const response=await fetch("http://fuseki:3030/3DOC/query",{ + method: "POST", + headers: { +"Authorization": "Basic "+btoa("admin:pw123"), +"Content-Type": "application/sparql-query", + }, + body: `PREFIX tridoc: +SELECT DISTINCT ?l ?t WHERE { VALUES ?l { "${labels.join('" "')}" } ?s tridoc:label ?l . OPTIONAL { ?s tridoc:valueType ?t . } }`, +}); +const json=await response.json(); +return json.results.bindings.map((binding: Record) => { + const result_1=[]; + result_1[0]=binding.l.value; + if(binding.t) { +result_1[1]=binding.t.value; + } + return result_1; +}); +} diff --git a/src/server/routes.ts b/src/server/routes.ts new file mode 100644 index 0000000..63e0866 --- /dev/null +++ b/src/server/routes.ts @@ -0,0 +1,85 @@ +import { notImplemented } from "../handlers/notImplemented.ts"; + +export const routes: { + [method: string]: { + pattern: URLPattern; + handler: (request: Request, match: URLPatternResult) => Response; + }[]; +} = { + "GET": [{ + pattern: new URLPattern({ pathname: "/count" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc/:id" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc/:id/comment" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc/:id/tag" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc/:id/thumb" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc/:id/title" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc/:id/meta" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/raw/rdf" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/raw/zip" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/raw/tgz" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/tag" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/tag/:tagLabel" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/version" }), + handler: notImplemented, + }], + "POST": [{ + pattern: new URLPattern({ pathname: "/doc" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc/:id/comment" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc/:id/tag" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/tag" }), + handler: notImplemented, + }], + "PUT": [{ + pattern: new URLPattern({ pathname: "/doc/:id/title" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/raw/zip" }), + handler: notImplemented, + }], + "DELETE": [{ + pattern: new URLPattern({ pathname: "/doc/:id" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc/:id/tag/:tagLabel" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/doc/:id/title" }), + handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/tag/:tagLabel" }), + handler: notImplemented, + }], +}; diff --git a/src/server/server.ts b/src/server/server.ts new file mode 100644 index 0000000..02833b8 --- /dev/null +++ b/src/server/server.ts @@ -0,0 +1,52 @@ +import { encode, serve as stdServe } from "../deps.ts"; +import { routes } from "./routes.ts"; + +const isAuthenticated = (request: Request) => { + return request.headers.get("Authorization") === + "Basic " + encode("tridoc:" + Deno.env.get("TRIDOC_PWD")); +}; + +const handler = (request: Request): Response => { + const path = request.url.slice(request.url.indexOf("/", "https://".length)); + console.log((new Date()).toISOString(), request.method, path); + try { + if (!isAuthenticated(request)) { + console.log( + (new Date()).toISOString(), + request.method, + path, + "→ 401: Not Authenticated", + ); + return new Response("401: Not Authenticated", { + status: 401, + headers: { "WWW-Authenticate": "Basic" }, + }); + } + + const route = routes[request.method]?.find(({ pattern }) => + pattern.test(request.url) + ); + if (route) { + return route.handler(request, route.pattern.exec(request.url)!); + } + + console.log( + (new Date()).toISOString(), + request.method, + path, + "→ 404: Path not foun", + ); + return new Response("404: Path not found", { status: 404 }); + } catch (error) { + console.log( + (new Date()).toISOString(), + request.method, + path, + "→ 500:", + error, + ); + return new Response("500: " + error, { status: 500 }); + } +}; + +export const serve = () => stdServe(handler, { onListen: undefined }); From 5d136365c2dde7e980ccf1af077d68525878b0df Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 15:14:58 +0200 Subject: [PATCH 03/90] fmt --- src/deps.ts | 2 +- src/handlers/count.ts | 4 ++-- src/handlers/notImplemented.ts | 5 ++++- src/meta/finder.ts | 32 ++++++++++++++++++-------------- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/deps.ts b/src/deps.ts index 1ad18b0..b75190d 100644 --- a/src/deps.ts +++ b/src/deps.ts @@ -1,2 +1,2 @@ export { serve } from "https://deno.land/std@0.153.0/http/mod.ts"; -export { encode } from "https://deno.land/std@0.153.0/encoding/base64.ts" \ No newline at end of file +export { encode } from "https://deno.land/std@0.153.0/encoding/base64.ts"; diff --git a/src/handlers/count.ts b/src/handlers/count.ts index 4274ece..586ddf0 100644 --- a/src/handlers/count.ts +++ b/src/handlers/count.ts @@ -1,3 +1,3 @@ -export function count (_: URLPatternResult): Response { +export function count(_: URLPatternResult): Response { throw new Error("not implemented"); -} \ No newline at end of file +} diff --git a/src/handlers/notImplemented.ts b/src/handlers/notImplemented.ts index d9b1d1c..65e2a35 100644 --- a/src/handlers/notImplemented.ts +++ b/src/handlers/notImplemented.ts @@ -1,3 +1,6 @@ -export function notImplemented (_request: Request, _match: URLPatternResult): Response { +export function notImplemented( + _request: Request, + _match: URLPatternResult, +): Response { throw new Error("not implemented"); } diff --git a/src/meta/finder.ts b/src/meta/finder.ts index 2c1b13d..b271513 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -1,20 +1,24 @@ export async function getTagTypes(labels: string[]): Promise { - const response=await fetch("http://fuseki:3030/3DOC/query",{ + const response = await fetch("http://fuseki:3030/3DOC/query", { method: "POST", headers: { -"Authorization": "Basic "+btoa("admin:pw123"), -"Content-Type": "application/sparql-query", + "Authorization": "Basic " + btoa("admin:pw123"), + "Content-Type": "application/sparql-query", }, body: `PREFIX tridoc: -SELECT DISTINCT ?l ?t WHERE { VALUES ?l { "${labels.join('" "')}" } ?s tridoc:label ?l . OPTIONAL { ?s tridoc:valueType ?t . } }`, -}); -const json=await response.json(); -return json.results.bindings.map((binding: Record) => { - const result_1=[]; - result_1[0]=binding.l.value; - if(binding.t) { -result_1[1]=binding.t.value; - } - return result_1; -}); +SELECT DISTINCT ?l ?t WHERE { VALUES ?l { "${ + labels.join('" "') + }" } ?s tridoc:label ?l . OPTIONAL { ?s tridoc:valueType ?t . } }`, + }); + const json = await response.json(); + return json.results.bindings.map( + (binding: Record) => { + const result_1 = []; + result_1[0] = binding.l.value; + if (binding.t) { + result_1[1] = binding.t.value; + } + return result_1; + }, + ); } From 315819c92020b85fa89095d642ebb3832e587b22 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 15:27:23 +0200 Subject: [PATCH 04/90] added GET /version --- src/deps.ts | 2 ++ src/handlers/version.ts | 8 ++++++++ src/server/routes.ts | 3 ++- 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 src/handlers/version.ts diff --git a/src/deps.ts b/src/deps.ts index b75190d..e422717 100644 --- a/src/deps.ts +++ b/src/deps.ts @@ -1,2 +1,4 @@ +export const VERSION = "1.6.0-alpha.deno"; + export { serve } from "https://deno.land/std@0.153.0/http/mod.ts"; export { encode } from "https://deno.land/std@0.153.0/encoding/base64.ts"; diff --git a/src/handlers/version.ts b/src/handlers/version.ts new file mode 100644 index 0000000..edf10bf --- /dev/null +++ b/src/handlers/version.ts @@ -0,0 +1,8 @@ +import { VERSION } from "../deps.ts"; + +export function version( + _request: Request, + _match: URLPatternResult, +): Response { + return new Response(VERSION); +} diff --git a/src/server/routes.ts b/src/server/routes.ts index 63e0866..3afd997 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -1,4 +1,5 @@ import { notImplemented } from "../handlers/notImplemented.ts"; +import { version } from "../handlers/version.ts"; export const routes: { [method: string]: { @@ -47,7 +48,7 @@ export const routes: { handler: notImplemented, }, { pattern: new URLPattern({ pathname: "/version" }), - handler: notImplemented, + handler: version, }], "POST": [{ pattern: new URLPattern({ pathname: "/doc" }), From b11dee5536b6661fe772371e0e7862a976b85425 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 16:10:16 +0200 Subject: [PATCH 05/90] docker framework --- old/.dockerignore => .dockerignore | 1 + old/.gitignore => .gitignore | 0 old/DEV-README.md => DEV-README.md | 0 Dockerfile | 18 +++++++++++++++ old/config-tdb.ttl => config-tdb.ttl | 0 ...cker-compose.yml => dev-docker-compose.yml | 0 old/docker-cmd.sh => docker-cmd.sh | 3 +-- old/docker-compose.yml => docker-compose.yml | 0 old/Dockerfile | 11 ---------- old/package.json | 22 ------------------- 10 files changed, 20 insertions(+), 35 deletions(-) rename old/.dockerignore => .dockerignore (88%) rename old/.gitignore => .gitignore (100%) rename old/DEV-README.md => DEV-README.md (100%) create mode 100644 Dockerfile rename old/config-tdb.ttl => config-tdb.ttl (100%) rename old/dev-docker-compose.yml => dev-docker-compose.yml (100%) rename old/docker-cmd.sh => docker-cmd.sh (80%) rename old/docker-compose.yml => docker-compose.yml (100%) delete mode 100644 old/Dockerfile delete mode 100644 old/package.json diff --git a/old/.dockerignore b/.dockerignore similarity index 88% rename from old/.dockerignore rename to .dockerignore index f145ba1..e911418 100644 --- a/old/.dockerignore +++ b/.dockerignore @@ -1,3 +1,4 @@ +old blobs fuseki-base node_modules \ No newline at end of file diff --git a/old/.gitignore b/.gitignore similarity index 100% rename from old/.gitignore rename to .gitignore diff --git a/old/DEV-README.md b/DEV-README.md similarity index 100% rename from old/DEV-README.md rename to DEV-README.md diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b983ae9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM denoland/deno:1.26.2 + +EXPOSE 8000 + +RUN mkdir -p /usr/src/app/src +WORKDIR /usr/src/app + +RUN apt update \ + && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl +RUN rm /etc/ImageMagick-6/policy.xml + +USER deno +COPY src/deps.ts src/deps.ts +RUN deno cache src/deps.ts + +COPY . . + +CMD [ "/bin/bash", "/usr/src/app/docker-cmd.sh" ] \ No newline at end of file diff --git a/old/config-tdb.ttl b/config-tdb.ttl similarity index 100% rename from old/config-tdb.ttl rename to config-tdb.ttl diff --git a/old/dev-docker-compose.yml b/dev-docker-compose.yml similarity index 100% rename from old/dev-docker-compose.yml rename to dev-docker-compose.yml diff --git a/old/docker-cmd.sh b/docker-cmd.sh similarity index 80% rename from old/docker-cmd.sh rename to docker-cmd.sh index 56e8c9d..4ef74cc 100644 --- a/old/docker-cmd.sh +++ b/docker-cmd.sh @@ -1,10 +1,9 @@ #!/bin/bash -sleep 5 echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' set -m -yarn start & +deno run --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD src/main.ts & sleep 5 echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ diff --git a/old/docker-compose.yml b/docker-compose.yml similarity index 100% rename from old/docker-compose.yml rename to docker-compose.yml diff --git a/old/Dockerfile b/old/Dockerfile deleted file mode 100644 index 3fbabeb..0000000 --- a/old/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM node:lts-buster -EXPOSE 8000 -RUN apt update \ - && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra -RUN rm /etc/ImageMagick-6/policy.xml -RUN mkdir -p /usr/src/app -WORKDIR /usr/src/app -COPY . /usr/src/app -RUN yarn install -RUN chmod +x /usr/src/app/docker-cmd.sh -CMD [ "/usr/src/app/docker-cmd.sh" ] \ No newline at end of file diff --git a/old/package.json b/old/package.json deleted file mode 100644 index 5a603ea..0000000 --- a/old/package.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "name": "tridoc-backend", - "version": "1.5.2", - "description": "Simple RDF-Based Document Management System", - "main": "lib/server", - "repository": "git@github.com:tridoc/tridoc-backend.git", - "author": "Noam Bachmann ", - "license": "MIT", - "dependencies": { - "adm-zip": "^0.4.16", - "archiver": "^3.1.1", - "hapi": "^17.5.2", - "hapi-auth-basic": "^5.0.0", - "nanoid": "^1.1.0", - "node-fetch": "^2.2.0", - "pdfjs-dist": "^2.0.489" - }, - "scripts": { - "start": "node lib/server.js", - "start-with-pwd": "TRIDOC_PWD='tridoc' node lib/server.js" - } -} From 08ff7a3e53b7af341efcd27e0eb3ba82335f7bef Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 15:16:37 +0000 Subject: [PATCH 06/90] setup devcontainer --- .devcontainer/Dockerfile | 20 +++++++++++ .devcontainer/devcontainer.json | 47 ++++++++++++++++++++++++ .devcontainer/docker-cmd.sh | 11 ++++++ .devcontainer/docker-compose.yml | 37 +++++++++++++++++++ .vscode/settings.json | 2 +- DEV-README.md | 62 +++----------------------------- 6 files changed, 120 insertions(+), 59 deletions(-) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/docker-cmd.sh create mode 100644 .devcontainer/docker-compose.yml diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..7a9c183 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,20 @@ +FROM denoland/deno:1.26.2 + +EXPOSE 8000 + +RUN mkdir -p /home/deno +RUN chown -R deno /home/deno +RUN mkdir -p /usr/src/app/src +WORKDIR /usr/src/app + +RUN apt update \ + && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git +RUN rm /etc/ImageMagick-6/policy.xml + +USER deno +COPY src/deps.ts src/deps.ts +RUN deno cache src/deps.ts + +COPY . . + +CMD [ "/bin/bash", "/usr/src/app/.devcontainer/docker-cmd.sh" ] \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..03ce50f --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,47 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/docker-existing-docker-compose +// If you want to run as a non-root user in the container, see .devcontainer/docker-compose.yml. +{ + "name": "Existing Docker Compose (Extend)", + + // Update the 'dockerComposeFile' list if you have more compose files or use different names. + // The .devcontainer/docker-compose.yml file contains any overrides you need/want to make. + "dockerComposeFile": [ + "../dev-docker-compose.yml", + "docker-compose.yml" + ], + + "containerEnv": { + "TRIDOC_PWD": "pw123", + }, + + // The 'service' property is the name of the service for the container that VS Code should + // use. Update this value and .devcontainer/docker-compose.yml to the real service name. + "service": "tridoc", + + // The optional 'workspaceFolder' property is the path VS Code should open by default when + // connected. This is typically a file mount in .devcontainer/docker-compose.yml + "workspaceFolder": "/usr/src/app", + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Uncomment the next line if you want start specific services in your Docker Compose config. + "runServices": [ "fuseki" ], + + // Uncomment the next line if you want to keep your containers running after VS Code shuts down. + // "shutdownAction": "none", + + // Uncomment the next line to run commands after the container is created - for example installing curl. + // "postCreateCommand": "apt-get update && apt-get install -y curl", + + // Uncomment to connect as a non-root user if you've added one. See https://aka.ms/vscode-remote/containers/non-root. + "remoteUser": "deno", + "customizations": { + "vscode": { + "extensions": [ + "denoland.vscode-deno" + ] + } + } +} diff --git a/.devcontainer/docker-cmd.sh b/.devcontainer/docker-cmd.sh new file mode 100644 index 0000000..a1d8c86 --- /dev/null +++ b/.devcontainer/docker-cmd.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo 'Attempting to create Dataset "3DOC"' +curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ + -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' +set -m +deno run --watch --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD src/main.ts & +sleep 5 +echo 'Attempting to create Dataset "3DOC"' +curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ + -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' +fg 1 diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 0000000..0638fa6 --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,37 @@ +version: '3' +services: + # Update this to the name of the service you want to work with in your docker-compose.yml file + tridoc: + # If you want add a non-root user to your Dockerfile, you can use the "remoteUser" + # property in devcontainer.json to cause VS Code its sub-processes (terminals, tasks, + # debugging) to execute as the user. Uncomment the next line if you want the entire + # container to run as this user instead. Note that, on Linux, you may need to + # ensure the UID and GID of the container user you create matches your local user. + # See https://aka.ms/vscode-remote/containers/non-root for details. + # + user: deno + + # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer + # folder. Note that the path of the Dockerfile and context is relative to the *primary* + # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile" + # array). The sample below assumes your primary file is in the root of your project. + # + build: + context: . + dockerfile: .devcontainer/Dockerfile + + volumes: + # Update this to wherever you want VS Code to mount the folder of your project + - .:/usr/src/app:cached + + # Uncomment the next line to use Docker from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker-compose for details. + # - /var/run/docker.sock:/var/run/docker.sock + + # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust. + # cap_add: + # - SYS_PTRACE + # security_opt: + # - seccomp:unconfined + + # Overrides default command so things don't shut down after the process ends. + # command: "/bin/bash -c \"TRIDOC_PWD=\\\"pw123\\\" deno run --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD src/main.ts &\\\n sleep 5\\\n echo 'Attempting to create Dataset \\\"3DOC\\\"'\\\n curl 'http://fuseki:3030/$/datasets' -H \\\"Authorization: Basic $(echo -n admin:pw123 | base64)\\\" -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb'\\\n fg 1\\\n /bin/sh -c \\\"while sleep 1000; do :; done\\\"\"" diff --git a/.vscode/settings.json b/.vscode/settings.json index 85ece8f..1535e13 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,5 @@ { - "npm.packageManager": "yarn", "deno.enable": true, + "deno.lint": true, "deno.unstable": true } \ No newline at end of file diff --git a/DEV-README.md b/DEV-README.md index 8ddc74e..f809fd1 100644 --- a/DEV-README.md +++ b/DEV-README.md @@ -1,65 +1,11 @@ # tridoc -## Table Of Contents - * [Easy Setup with Docker-Compose](#easy-setup-with-docker-compose) - * [Dev Build](#dev-build) - * [Production Build](#production-build) - * [Setup with Persistent Fuseki](#setup-with-persistent-fuseki) - * [Docker](#docker) - * [Manual](#manual) +## Run "live" -## Developer Guide +Use the vscode-devcontainer: this will start tridoc and fuseki. -This assumes a Unix/Linux/wsl system with bash - -### Easy Setup with Docker-Compose - -This will setup tridoc on port 8000 and fuseki avaliable on port 8001. - -Replace `YOUR PASSWORD HERE` in the first command with your choice of password. - -#### Dev Build: - -``` -export TRIDOC_PWD="YOUR PASSWORD HERE" -docker-compose -f dev-docker-compose.yml build -docker-compose -f dev-docker-compose.yml up -``` - -#### Production Build: - -``` -export TRIDOC_PWD="YOUR PASSWORD HERE" -docker-compose build -docker-compose up -``` - -### Setup with Persistent Fuseki - -The following method expect an instance of Fuseki running on http://fuseki:3030/ with user `admin` and password `pw123`. This fuseki instance must have lucene indexing enabled and configured as in [config-tdb.ttl](config-tdb.ttl). - -#### Docker: - -``` -docker build -t tridoc . -docker run -p 8000:8000 -e TRIDOC_PWD="YOUR PASSWORD HERE" tridoc -``` - -#### Manual: - -Install the following dependencies: - -``` -node:12.18 yarn pdfsandwich tesseract-ocr-deu tesseract-ocr-fra -``` - -And run the following commands - -``` -rm /etc/ImageMagick-6/policy.xml -yarn install -bash docker-cmd.sh -``` +It will use TRIDOC_PWD = "pw123". +Access tridoc from http://localhost:8000 and fuseki from http://localhost:8001 ## Tips & Tricks From f08e7f1d0975fcf539c3598ac1ee6ae88933b979 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 18:34:20 +0000 Subject: [PATCH 07/90] added zip restore --- .devcontainer/Dockerfile | 2 +- .devcontainer/docker-cmd.sh | 2 +- Dockerfile | 2 +- README.md | 9 +++---- deno.jsonc | 4 ++-- docker-cmd.sh | 2 +- src/deps.ts | 6 +++-- src/handlers/notImplemented.ts | 2 +- src/handlers/raw.ts | 44 ++++++++++++++++++++++++++++++++++ src/handlers/version.ts | 4 ++-- src/meta/store.ts | 14 +++++++++++ src/server/routes.ts | 8 +++++-- src/server/server.ts | 6 ++--- 13 files changed, 85 insertions(+), 20 deletions(-) create mode 100644 src/handlers/raw.ts create mode 100644 src/meta/store.ts diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 7a9c183..39c4d3a 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -8,7 +8,7 @@ RUN mkdir -p /usr/src/app/src WORKDIR /usr/src/app RUN apt update \ - && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git + && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip RUN rm /etc/ImageMagick-6/policy.xml USER deno diff --git a/.devcontainer/docker-cmd.sh b/.devcontainer/docker-cmd.sh index a1d8c86..d48095a 100644 --- a/.devcontainer/docker-cmd.sh +++ b/.devcontainer/docker-cmd.sh @@ -3,7 +3,7 @@ echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' set -m -deno run --watch --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD src/main.ts & +deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,zip,unzip --allow-env=TRIDOC_PWD src/main.ts & sleep 5 echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ diff --git a/Dockerfile b/Dockerfile index b983ae9..680ee6a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ RUN mkdir -p /usr/src/app/src WORKDIR /usr/src/app RUN apt update \ - && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl + && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl zip unzip RUN rm /etc/ImageMagick-6/policy.xml USER deno diff --git a/README.md b/README.md index 0965e93..ebff991 100644 --- a/README.md +++ b/README.md @@ -105,8 +105,8 @@ When getting a comment, a JSON array with objects of the following structure is ## API -| Address | Method | Description | Request / Payload | Response | Implemented in Version | -| - | - | - | - | - | - | +| Address | Method | Description | Request / Payload | Response | Implemented in Version | deno? | +| - | - | - | - | - | - | - | | `/count` | GET | Count (matching) documents | [1](#f1) [3](#f3) | Number | 1.1.0 | | `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | | `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | @@ -123,13 +123,14 @@ When getting a comment, a JSON array with objects of the following structure is | `/doc/{id}/title` | DELETE | Reset document title | - | - | 1.1.0 | | `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | | `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | +| `/raw/rdf` | DELETE | "Cancel" failed zip upload—use only if certain it’s done & failed | | | (deno only) | ✅ | | `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | -| `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | +| `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | ✅ | | `/tag` | POST | Create new tag | See above | - | 1.1.0 | | `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | | `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | | `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | -| `/version` | GET | Get tridoc version | - | semver version number | 1.1.0 | +| `/version` | GET | Get tridoc version | - | semver version number | 1.1.0 | ✅ | #### URL-Parameters supported: diff --git a/deno.jsonc b/deno.jsonc index 86149a3..085b9a3 100644 --- a/deno.jsonc +++ b/deno.jsonc @@ -5,7 +5,7 @@ } }, "tasks": { - "run": "deno run --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD src/main.ts", - "run-watch": "deno run --watch --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD src/main.ts" + "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttls --allow-run=convert,pdfsandwich,zip,unzip --allow-env=TRIDOC_PWD src/main.ts", + "run-watch": "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,zip,unzip --allow-env=TRIDOC_PWD src/main.ts" } } diff --git a/docker-cmd.sh b/docker-cmd.sh index 4ef74cc..4bb7aa7 100644 --- a/docker-cmd.sh +++ b/docker-cmd.sh @@ -3,7 +3,7 @@ echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' set -m -deno run --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD src/main.ts & +deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,zip,unzip --allow-env=TRIDOC_PWD src/main.ts & sleep 5 echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ diff --git a/src/deps.ts b/src/deps.ts index e422717..209487c 100644 --- a/src/deps.ts +++ b/src/deps.ts @@ -1,4 +1,6 @@ export const VERSION = "1.6.0-alpha.deno"; -export { serve } from "https://deno.land/std@0.153.0/http/mod.ts"; -export { encode } from "https://deno.land/std@0.153.0/encoding/base64.ts"; +export { encode } from "https://deno.land/std@0.160.0/encoding/base64.ts"; +export { emptyDir } from "https://deno.land/std@0.160.0/fs/mod.ts"; +export { serve } from "https://deno.land/std@0.160.0/http/mod.ts"; +export { writableStreamFromWriter } from "https://deno.land/std@0.160.0/streams/mod.ts"; diff --git a/src/handlers/notImplemented.ts b/src/handlers/notImplemented.ts index 65e2a35..10da909 100644 --- a/src/handlers/notImplemented.ts +++ b/src/handlers/notImplemented.ts @@ -1,6 +1,6 @@ export function notImplemented( _request: Request, _match: URLPatternResult, -): Response { +): Promise { throw new Error("not implemented"); } diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts new file mode 100644 index 0000000..a69dfe7 --- /dev/null +++ b/src/handlers/raw.ts @@ -0,0 +1,44 @@ +import { emptyDir, writableStreamFromWriter } from "../deps.ts"; +import { restore } from "../meta/store.ts"; + +const decoder = new TextDecoder("utf-8"); + +export async function deleteRdfFile( + _request: Request, + _match: URLPatternResult, +): Promise { + await Deno.remove("rdf.ttl"); + return new Response("200: OK"); +} + +export async function putZip( + request: Request, + _match: URLPatternResult, +): Promise { + try { + await Deno.stat("rdf.ttl"); + throw new Error( + "Can't unzip concurrently: rdf.ttl already exists. If you know what you are doing, clear this message with HTTP DELETE /raw/rdf", + ); + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + throw error; + } + } + await emptyDir("blobs"); + const zipPath = "blobs/zip-" + Date.now(); + const zip = await Deno.open(zipPath, { write: true, create: true }); + const writableStream = writableStreamFromWriter(zip); + await request.body?.pipeTo(writableStream); + const p = Deno.run({ cmd: ["unzip", zipPath] }); + const { success, code } = await p.status(); + if (success) { + await Deno.remove(zipPath); + const turtleData = decoder.decode(await Deno.readFile("rdf.ttl")); + await Deno.remove("rdf.ttl"); + await restore(turtleData); + return new Response("200: OK"); + } else { + throw new Error("unzip failed with code " + code); + } +} diff --git a/src/handlers/version.ts b/src/handlers/version.ts index edf10bf..a092472 100644 --- a/src/handlers/version.ts +++ b/src/handlers/version.ts @@ -3,6 +3,6 @@ import { VERSION } from "../deps.ts"; export function version( _request: Request, _match: URLPatternResult, -): Response { - return new Response(VERSION); +): Promise { + return new Promise((resolve) => resolve(new Response(VERSION))); } diff --git a/src/meta/store.ts b/src/meta/store.ts new file mode 100644 index 0000000..808c6c7 --- /dev/null +++ b/src/meta/store.ts @@ -0,0 +1,14 @@ +export function restore(turtleData: string) { + const statement = `CLEAR GRAPH ; + INSERT DATA { + GRAPH { ${turtleData} } + }`; + return fetch("http://fuseki:3030/3DOC/update", { + method: "POST", + headers: { + "Authorization": "Basic " + btoa("admin:pw123"), + "Content-Type": "application/sparql-update", + }, + body: statement, + }); +} diff --git a/src/server/routes.ts b/src/server/routes.ts index 3afd997..45169cb 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -1,10 +1,11 @@ import { notImplemented } from "../handlers/notImplemented.ts"; +import { deleteRdfFile, putZip } from "../handlers/raw.ts"; import { version } from "../handlers/version.ts"; export const routes: { [method: string]: { pattern: URLPattern; - handler: (request: Request, match: URLPatternResult) => Response; + handler: (request: Request, match: URLPatternResult) => Promise; }[]; } = { "GET": [{ @@ -68,7 +69,7 @@ export const routes: { handler: notImplemented, }, { pattern: new URLPattern({ pathname: "/raw/zip" }), - handler: notImplemented, + handler: putZip, }], "DELETE": [{ pattern: new URLPattern({ pathname: "/doc/:id" }), @@ -82,5 +83,8 @@ export const routes: { }, { pattern: new URLPattern({ pathname: "/tag/:tagLabel" }), handler: notImplemented, + }, { + pattern: new URLPattern({ pathname: "/raw/rdf" }), + handler: deleteRdfFile, }], }; diff --git a/src/server/server.ts b/src/server/server.ts index 02833b8..876303e 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -6,7 +6,7 @@ const isAuthenticated = (request: Request) => { "Basic " + encode("tridoc:" + Deno.env.get("TRIDOC_PWD")); }; -const handler = (request: Request): Response => { +const handler = async (request: Request): Promise => { const path = request.url.slice(request.url.indexOf("/", "https://".length)); console.log((new Date()).toISOString(), request.method, path); try { @@ -27,14 +27,14 @@ const handler = (request: Request): Response => { pattern.test(request.url) ); if (route) { - return route.handler(request, route.pattern.exec(request.url)!); + return await route.handler(request, route.pattern.exec(request.url)!); } console.log( (new Date()).toISOString(), request.method, path, - "→ 404: Path not foun", + "→ 404: Path not found", ); return new Response("404: Path not found", { status: 404 }); } catch (error) { From 1ed70b6d9c096b9791037b107e2f5282bc18795f Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 19:08:55 +0000 Subject: [PATCH 08/90] added cors headers --- src/handlers/cors.ts | 12 ++++++++++++ src/handlers/raw.ts | 5 +++-- src/helpers/cors.ts | 11 +++++++++++ src/server/routes.ts | 5 +++++ src/server/server.ts | 11 ++++++----- 5 files changed, 37 insertions(+), 7 deletions(-) create mode 100644 src/handlers/cors.ts create mode 100644 src/helpers/cors.ts diff --git a/src/handlers/cors.ts b/src/handlers/cors.ts new file mode 100644 index 0000000..cf3d819 --- /dev/null +++ b/src/handlers/cors.ts @@ -0,0 +1,12 @@ +import { respond } from "../helpers/cors.ts"; + +export function options( + _request: Request, + _match: URLPatternResult, +): Promise { + return new Promise((resolve) => + resolve( + respond(undefined, { status: 204 }) + ) + ); +} \ No newline at end of file diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index a69dfe7..1845781 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -1,4 +1,5 @@ import { emptyDir, writableStreamFromWriter } from "../deps.ts"; +import { respond } from "../helpers/cors.ts"; import { restore } from "../meta/store.ts"; const decoder = new TextDecoder("utf-8"); @@ -8,7 +9,7 @@ export async function deleteRdfFile( _match: URLPatternResult, ): Promise { await Deno.remove("rdf.ttl"); - return new Response("200: OK"); + return respond("200: OK"); } export async function putZip( @@ -37,7 +38,7 @@ export async function putZip( const turtleData = decoder.decode(await Deno.readFile("rdf.ttl")); await Deno.remove("rdf.ttl"); await restore(turtleData); - return new Response("200: OK"); + return respond("200: OK"); } else { throw new Error("unzip failed with code " + code); } diff --git a/src/helpers/cors.ts b/src/helpers/cors.ts new file mode 100644 index 0000000..ac7b85e --- /dev/null +++ b/src/helpers/cors.ts @@ -0,0 +1,11 @@ +export function respond(body?: BodyInit, init?: ResponseInit) { + return new Response(body, { + ...init, + headers: { + ...init?.headers, + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "POST, PUT, DELETE, GET, OPTIONS", + "Access-Control-Allow-Headers": "Authorization", + }, + }); +} diff --git a/src/server/routes.ts b/src/server/routes.ts index 45169cb..82a123b 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -1,3 +1,4 @@ +import { options } from "../handlers/cors.ts"; import { notImplemented } from "../handlers/notImplemented.ts"; import { deleteRdfFile, putZip } from "../handlers/raw.ts"; import { version } from "../handlers/version.ts"; @@ -8,6 +9,10 @@ export const routes: { handler: (request: Request, match: URLPatternResult) => Promise; }[]; } = { + "OPTIONS": [{ + pattern: new URLPattern({ pathname: "*" }), + handler: options, + }], "GET": [{ pattern: new URLPattern({ pathname: "/count" }), handler: notImplemented, diff --git a/src/server/server.ts b/src/server/server.ts index 876303e..03f31e8 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -1,8 +1,9 @@ import { encode, serve as stdServe } from "../deps.ts"; +import { respond } from "../helpers/cors.ts"; import { routes } from "./routes.ts"; const isAuthenticated = (request: Request) => { - return request.headers.get("Authorization") === + return (request.method === "OPTIONS") || request.headers.get("Authorization") === "Basic " + encode("tridoc:" + Deno.env.get("TRIDOC_PWD")); }; @@ -17,7 +18,7 @@ const handler = async (request: Request): Promise => { path, "→ 401: Not Authenticated", ); - return new Response("401: Not Authenticated", { + return respond("401: Not Authenticated", { status: 401, headers: { "WWW-Authenticate": "Basic" }, }); @@ -36,16 +37,16 @@ const handler = async (request: Request): Promise => { path, "→ 404: Path not found", ); - return new Response("404: Path not found", { status: 404 }); + return respond("404: Path not found", { status: 404 }); } catch (error) { console.log( (new Date()).toISOString(), request.method, path, - "→ 500:", + "→ 500: ", error, ); - return new Response("500: " + error, { status: 500 }); + return respond("500: " + error, { status: 500 }); } }; From d208e9e5d1ba167098f6d6968da97d327bd7f694 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 19:08:55 +0000 Subject: [PATCH 09/90] added cors headers --- src/handlers/cors.ts | 12 ++++++++++++ src/handlers/raw.ts | 5 +++-- src/handlers/version.ts | 3 ++- src/helpers/cors.ts | 11 +++++++++++ src/server/routes.ts | 5 +++++ src/server/server.ts | 11 ++++++----- 6 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 src/handlers/cors.ts create mode 100644 src/helpers/cors.ts diff --git a/src/handlers/cors.ts b/src/handlers/cors.ts new file mode 100644 index 0000000..cf3d819 --- /dev/null +++ b/src/handlers/cors.ts @@ -0,0 +1,12 @@ +import { respond } from "../helpers/cors.ts"; + +export function options( + _request: Request, + _match: URLPatternResult, +): Promise { + return new Promise((resolve) => + resolve( + respond(undefined, { status: 204 }) + ) + ); +} \ No newline at end of file diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index a69dfe7..1845781 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -1,4 +1,5 @@ import { emptyDir, writableStreamFromWriter } from "../deps.ts"; +import { respond } from "../helpers/cors.ts"; import { restore } from "../meta/store.ts"; const decoder = new TextDecoder("utf-8"); @@ -8,7 +9,7 @@ export async function deleteRdfFile( _match: URLPatternResult, ): Promise { await Deno.remove("rdf.ttl"); - return new Response("200: OK"); + return respond("200: OK"); } export async function putZip( @@ -37,7 +38,7 @@ export async function putZip( const turtleData = decoder.decode(await Deno.readFile("rdf.ttl")); await Deno.remove("rdf.ttl"); await restore(turtleData); - return new Response("200: OK"); + return respond("200: OK"); } else { throw new Error("unzip failed with code " + code); } diff --git a/src/handlers/version.ts b/src/handlers/version.ts index a092472..2864385 100644 --- a/src/handlers/version.ts +++ b/src/handlers/version.ts @@ -1,8 +1,9 @@ import { VERSION } from "../deps.ts"; +import { respond } from "../helpers/cors.ts"; export function version( _request: Request, _match: URLPatternResult, ): Promise { - return new Promise((resolve) => resolve(new Response(VERSION))); + return new Promise((resolve) => resolve(respond(VERSION))); } diff --git a/src/helpers/cors.ts b/src/helpers/cors.ts new file mode 100644 index 0000000..ac7b85e --- /dev/null +++ b/src/helpers/cors.ts @@ -0,0 +1,11 @@ +export function respond(body?: BodyInit, init?: ResponseInit) { + return new Response(body, { + ...init, + headers: { + ...init?.headers, + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "POST, PUT, DELETE, GET, OPTIONS", + "Access-Control-Allow-Headers": "Authorization", + }, + }); +} diff --git a/src/server/routes.ts b/src/server/routes.ts index 45169cb..82a123b 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -1,3 +1,4 @@ +import { options } from "../handlers/cors.ts"; import { notImplemented } from "../handlers/notImplemented.ts"; import { deleteRdfFile, putZip } from "../handlers/raw.ts"; import { version } from "../handlers/version.ts"; @@ -8,6 +9,10 @@ export const routes: { handler: (request: Request, match: URLPatternResult) => Promise; }[]; } = { + "OPTIONS": [{ + pattern: new URLPattern({ pathname: "*" }), + handler: options, + }], "GET": [{ pattern: new URLPattern({ pathname: "/count" }), handler: notImplemented, diff --git a/src/server/server.ts b/src/server/server.ts index 876303e..03f31e8 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -1,8 +1,9 @@ import { encode, serve as stdServe } from "../deps.ts"; +import { respond } from "../helpers/cors.ts"; import { routes } from "./routes.ts"; const isAuthenticated = (request: Request) => { - return request.headers.get("Authorization") === + return (request.method === "OPTIONS") || request.headers.get("Authorization") === "Basic " + encode("tridoc:" + Deno.env.get("TRIDOC_PWD")); }; @@ -17,7 +18,7 @@ const handler = async (request: Request): Promise => { path, "→ 401: Not Authenticated", ); - return new Response("401: Not Authenticated", { + return respond("401: Not Authenticated", { status: 401, headers: { "WWW-Authenticate": "Basic" }, }); @@ -36,16 +37,16 @@ const handler = async (request: Request): Promise => { path, "→ 404: Path not found", ); - return new Response("404: Path not found", { status: 404 }); + return respond("404: Path not found", { status: 404 }); } catch (error) { console.log( (new Date()).toISOString(), request.method, path, - "→ 500:", + "→ 500: ", error, ); - return new Response("500: " + error, { status: 500 }); + return respond("500: " + error, { status: 500 }); } }; From ffce65fc0141ce9adbd5741eff6ecc81e5bae20c Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 19:30:13 +0000 Subject: [PATCH 10/90] added count --- src/handlers/cors.ts | 4 +- src/handlers/count.ts | 13 +++++- src/helpers/processParams.ts | 12 +++--- src/meta/finder.ts | 76 ++++++++++++++++++++++++++++++++++++ src/server/routes.ts | 3 +- src/server/server.ts | 5 ++- 6 files changed, 100 insertions(+), 13 deletions(-) diff --git a/src/handlers/cors.ts b/src/handlers/cors.ts index cf3d819..d98c74a 100644 --- a/src/handlers/cors.ts +++ b/src/handlers/cors.ts @@ -6,7 +6,7 @@ export function options( ): Promise { return new Promise((resolve) => resolve( - respond(undefined, { status: 204 }) + respond(undefined, { status: 204 }), ) ); -} \ No newline at end of file +} diff --git a/src/handlers/count.ts b/src/handlers/count.ts index 586ddf0..06d0f26 100644 --- a/src/handlers/count.ts +++ b/src/handlers/count.ts @@ -1,3 +1,12 @@ -export function count(_: URLPatternResult): Response { - throw new Error("not implemented"); +import { respond } from "../helpers/cors.ts"; +import { processParams } from "../helpers/processParams.ts"; +import { getDocumentNumber } from "../meta/finder.ts"; + +export async function count( + request: Request, + _match: URLPatternResult, +): Promise { + const params = await processParams(request); + const count = await getDocumentNumber(params); + return respond("" + count); } diff --git a/src/helpers/processParams.ts b/src/helpers/processParams.ts index a2566a5..d02de40 100644 --- a/src/helpers/processParams.ts +++ b/src/helpers/processParams.ts @@ -12,14 +12,14 @@ function extractQuery(request: Request) { } type ParamTag = { - label: string; - min?: string; - max?: string; - type?: string; - maxIsExclusive?: boolean; + label: string; // [0] + min?: string; // [1] + max?: string; // [2] + type?: string; // [3] + maxIsExclusive?: boolean; //[5] }; -type Params = { +export type Params = { tags?: ParamTag[]; nottags?: ParamTag[]; text?: string; diff --git a/src/meta/finder.ts b/src/meta/finder.ts index b271513..a7cb6d5 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -1,3 +1,79 @@ +import { Params } from "../helpers/processParams.ts"; + +export async function getDocumentNumber( + { tags = [], nottags = [], text = "" }: Params, +) { + let tagQuery = ""; + for (let i = 0; i < tags.length; i++) { + if (tags[i].type) { + tagQuery += `{ ?s tridoc:tag ?ptag${i} . + ?ptag${i} tridoc:parameterizableTag ?atag${i} . + ?ptag${i} tridoc:value ?v${i} . + ?atag${i} tridoc:label "${tags[i].label}" . + ${ + tags[i].min + ? `FILTER (?v${i} >= "${tags[i].min}"^^<${tags[i].type}> )` + : "" + } + ${ + tags[i].max + ? `FILTER (?v${i} ${tags[i].maxIsExclusive ? "<" : "<="} "${ + tags[i].max + }"^^<${tags[i].type}> )` + : "" + } }`; + } else { + tagQuery += `{ ?s tridoc:tag ?tag${i} . + ?tag${i} tridoc:label "${tags[i].label}" . }`; + } + } + for (let i = 0; i < nottags.length; i++) { + if (nottags[i].type) { + tagQuery += `FILTER NOT EXISTS { ?s tridoc:tag ?ptag${i} . + ?ptag${i} tridoc:parameterizableTag ?atag${i} . + ?ptag${i} tridoc:value ?v${i} . + ?atag${i} tridoc:label "${nottags[i].label}" . + ${ + nottags[i].min + ? `FILTER (?v${i} >= "${nottags[i].min}"^^<${nottags[i].type}> )` + : "" + } + ${ + nottags[i].max + ? `FILTER (?v${i} ${nottags[i].maxIsExclusive ? "<" : "<="} "${ + nottags[i].max + }"^^<${nottags[i].type}> )` + : "" + } }`; + } else { + tagQuery += `FILTER NOT EXISTS { ?s tridoc:tag ?tag${i} . + ?tag${i} tridoc:label "${nottags[i].label}" . }`; + } + } + return await fetch("http://fuseki:3030/3DOC/query", { + method: "POST", + headers: { + "Authorization": "Basic " + btoa("admin:pw123"), + "Content-Type": "application/sparql-query", + }, + body: "PREFIX rdf: \n" + + "PREFIX s: \n" + + "PREFIX tridoc: \n" + + "PREFIX text: \n" + + "SELECT (COUNT(DISTINCT ?s) as ?count)\n" + + "WHERE {\n" + + " ?s s:identifier ?identifier .\n" + + tagQuery + + (text + ? '{ { ?s text:query (s:name "' + text + + '") } UNION { ?s text:query (s:text "' + text + '")} } .\n' + : "") + + "}", + }).then((response) => response.json()).then((json) => + json.results.bindings[0].count.value as number + ); +} + export async function getTagTypes(labels: string[]): Promise { const response = await fetch("http://fuseki:3030/3DOC/query", { method: "POST", diff --git a/src/server/routes.ts b/src/server/routes.ts index 82a123b..41f046d 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -1,4 +1,5 @@ import { options } from "../handlers/cors.ts"; +import { count } from "../handlers/count.ts"; import { notImplemented } from "../handlers/notImplemented.ts"; import { deleteRdfFile, putZip } from "../handlers/raw.ts"; import { version } from "../handlers/version.ts"; @@ -15,7 +16,7 @@ export const routes: { }], "GET": [{ pattern: new URLPattern({ pathname: "/count" }), - handler: notImplemented, + handler: count, }, { pattern: new URLPattern({ pathname: "/doc" }), handler: notImplemented, diff --git a/src/server/server.ts b/src/server/server.ts index 03f31e8..d49f105 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -3,8 +3,9 @@ import { respond } from "../helpers/cors.ts"; import { routes } from "./routes.ts"; const isAuthenticated = (request: Request) => { - return (request.method === "OPTIONS") || request.headers.get("Authorization") === - "Basic " + encode("tridoc:" + Deno.env.get("TRIDOC_PWD")); + return (request.method === "OPTIONS") || + request.headers.get("Authorization") === + "Basic " + encode("tridoc:" + Deno.env.get("TRIDOC_PWD")); }; const handler = async (request: Request): Promise => { From 4c4586667af9b56038d248f722d7003e116f694e Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 19:41:33 +0000 Subject: [PATCH 11/90] added count --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ebff991..94c9412 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ When getting a comment, a JSON array with objects of the following structure is | Address | Method | Description | Request / Payload | Response | Implemented in Version | deno? | | - | - | - | - | - | - | - | -| `/count` | GET | Count (matching) documents | [1](#f1) [3](#f3) | Number | 1.1.0 | +| `/count` | GET | Count (matching) documents | [1](#f1) [3](#f3) | Number | 1.1.0 | ✅ | | `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | | `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | | `/doc/{id}` | GET | Get this document | - | PDF | 1.1.0 | From 9a72686c9d5d1561b9d988b9e79a3315db5e3a61 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 20:20:49 +0000 Subject: [PATCH 12/90] added GET /doc/:id --- README.md | 2 +- src/handlers/doc.ts | 36 +++++++++++++++ src/meta/finder.ts | 102 ++++++++++++++++++++++++++++++++++++++++++- src/server/routes.ts | 11 ++--- 4 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 src/handlers/doc.ts diff --git a/README.md b/README.md index 94c9412..94486d5 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/count` | GET | Count (matching) documents | [1](#f1) [3](#f3) | Number | 1.1.0 | ✅ | | `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | | `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | -| `/doc/{id}` | GET | Get this document | - | PDF | 1.1.0 | +| `/doc/{id}` | GET | Get this document | - | PDF | 1.1.0 | ✅ | | `/doc/{id}` | DELETE | Deletes all metadata associated with the document. Document will not be deleted and is stays accessible over /doc/{id}. | - | - | 1.1.0 | | `/doc/{id}/comment` | POST | Add comment to document | Comment object / See above | - | 1.2.0 | | `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts new file mode 100644 index 0000000..7a273e2 --- /dev/null +++ b/src/handlers/doc.ts @@ -0,0 +1,36 @@ +import { respond } from "../helpers/cors.ts"; +import { processParams } from "../helpers/processParams.ts"; +import { getDocumentList } from "../meta/finder.ts"; + +function getPath(id: string) { + return "./blobs/" + id.slice(0, 2) + "/" + id.slice(2, 6) + "/" + + id.slice(6, 14) + "/" + id; +} + +export async function getPDF( + _request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + const path = getPath(id); + try { + const file = await Deno.open(path, { read: true }); + // Build a readable stream so the file doesn't have to be fully loaded into memory while we send it + const readableStream = file.readable; + return respond(readableStream); + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + return respond("404 Not Found", { status: 404 }); + } + throw error; + } +} + +export async function list( + request: Request, + _match: URLPatternResult, +): Promise { + const params = await processParams(request); + const response = await getDocumentList(params); + return respond(JSON.stringify(response)); +} diff --git a/src/meta/finder.ts b/src/meta/finder.ts index a7cb6d5..46c70c6 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -1,7 +1,107 @@ import { Params } from "../helpers/processParams.ts"; +/** takes: { tags: [string, string, string][], nottags: [string, string, string][], text: string, limit: number, offset: number } */ +export async function getDocumentList( + { tags = [], nottags = [], text, limit, offset }: Params, +) { + let tagQuery = ""; + for (let i = 0; i < tags.length; i++) { + if (tags[i].type) { + tagQuery += `{ ?s tridoc:tag ?ptag${i} . + ?ptag${i} tridoc:parameterizableTag ?atag${i} . + ?ptag${i} tridoc:value ?v${i} . + ?atag${i} tridoc:label "${tags[i].label}" . + ${ + tags[i].min + ? `FILTER (?v${i} >= "${tags[i].min}"^^<${tags[i].type}> )` + : "" + } + ${ + tags[i].max + ? `FILTER (?v${i} ${tags[i].maxIsExclusive ? "<" : "<="} "${ + tags[i].max + }"^^<${tags[i].type}> )` + : "" + } }`; + } else { + tagQuery += `{ ?s tridoc:tag ?tag${i} . + ?tag${i} tridoc:label "${tags[i].label}" . }`; + } + } + for (let i = 0; i < nottags.length; i++) { + if (nottags[i].type) { + tagQuery += `FILTER NOT EXISTS { ?s tridoc:tag ?ptag${i} . + ?ptag${i} tridoc:parameterizableTag ?atag${i} . + ?ptag${i} tridoc:value ?v${i} . + ?atag${i} tridoc:label "${nottags[i].label}" . + ${ + nottags[i].min + ? `FILTER (?v${i} >= "${nottags[i].min}"^^<${nottags[i].type}> )` + : "" + } + ${ + nottags[i].max + ? `FILTER (?v${i} ${nottags[i].maxIsExclusive ? "<" : "<="} "${ + nottags[i].max + }"^^<${nottags[i].type}> )` + : "" + } }`; + } else { + tagQuery += `FILTER NOT EXISTS { ?s tridoc:tag ?tag${i} . + ?tag${i} tridoc:label "${nottags[i].label}" . }`; + } + } + const body = "PREFIX rdf: \n" + + "PREFIX s: \n" + + "PREFIX tridoc: \n" + + "PREFIX text: \n" + + "SELECT DISTINCT ?s ?identifier ?title ?date\n" + + "WHERE {\n" + + " ?s s:identifier ?identifier .\n" + + " ?s s:dateCreated ?date .\n" + + tagQuery + + " OPTIONAL { ?s s:name ?title . }\n" + + (text + ? '{ { ?s text:query (s:name "' + text + + '") } UNION { ?s text:query (s:text "' + text + '")} } .\n' + : "") + + "}\n" + + "ORDER BY desc(?date)\n" + + (limit ? "LIMIT " + limit + "\n" : "") + + (offset ? "OFFSET " + offset : ""); + return await fetch("http://fuseki:3030/3DOC/query", { + method: "POST", + headers: { + "Authorization": "Basic " + btoa("admin:pw123"), + "Content-Type": "application/sparql-query", + }, + body: body, + }).then((response) => response.json()).then((json) => + json.results.bindings.map( + ( + binding: { + s: { value: string }; + identifier: { value: string }; + title?: { value: string }; + date?: { value: string }; + }, + ) => { + const result: Record = {}; + result.identifier = binding.identifier.value; + if (binding.title) { + result.title = binding.title.value; + } + if (binding.date) { + result.created = binding.date.value; + } + return result; + }, + ) + ); +} + export async function getDocumentNumber( - { tags = [], nottags = [], text = "" }: Params, + { tags = [], nottags = [], text }: Params, ) { let tagQuery = ""; for (let i = 0; i < tags.length; i++) { diff --git a/src/server/routes.ts b/src/server/routes.ts index 41f046d..107bd22 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -1,7 +1,8 @@ import { options } from "../handlers/cors.ts"; import { count } from "../handlers/count.ts"; +import * as doc from "../handlers/doc.ts"; import { notImplemented } from "../handlers/notImplemented.ts"; -import { deleteRdfFile, putZip } from "../handlers/raw.ts"; +import * as raw from "../handlers/raw.ts"; import { version } from "../handlers/version.ts"; export const routes: { @@ -19,10 +20,10 @@ export const routes: { handler: count, }, { pattern: new URLPattern({ pathname: "/doc" }), - handler: notImplemented, + handler: doc.list, }, { pattern: new URLPattern({ pathname: "/doc/:id" }), - handler: notImplemented, + handler: doc.getPDF, }, { pattern: new URLPattern({ pathname: "/doc/:id/comment" }), handler: notImplemented, @@ -75,7 +76,7 @@ export const routes: { handler: notImplemented, }, { pattern: new URLPattern({ pathname: "/raw/zip" }), - handler: putZip, + handler: raw.putZip, }], "DELETE": [{ pattern: new URLPattern({ pathname: "/doc/:id" }), @@ -91,6 +92,6 @@ export const routes: { handler: notImplemented, }, { pattern: new URLPattern({ pathname: "/raw/rdf" }), - handler: deleteRdfFile, + handler: raw.deleteRdfFile, }], }; From c1c0b30584177844e1a13a5133e027d1c579a419 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sat, 22 Oct 2022 20:50:34 +0000 Subject: [PATCH 13/90] added GET /doc/:id/comment --- src/handlers/doc.ts | 13 +++++++++++-- src/meta/finder.ts | 42 +++++++++++++++++++++++++++++++++++------- src/server/routes.ts | 2 +- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 7a273e2..eb28665 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -1,12 +1,21 @@ import { respond } from "../helpers/cors.ts"; import { processParams } from "../helpers/processParams.ts"; -import { getDocumentList } from "../meta/finder.ts"; +import * as metafinder from "../meta/finder.ts"; function getPath(id: string) { return "./blobs/" + id.slice(0, 2) + "/" + id.slice(2, 6) + "/" + id.slice(6, 14) + "/" + id; } +export async function getComments( + _request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + const response = await metafinder.getComments(id); + return respond(JSON.stringify(response)); +} + export async function getPDF( _request: Request, match: URLPatternResult, @@ -31,6 +40,6 @@ export async function list( _match: URLPatternResult, ): Promise { const params = await processParams(request); - const response = await getDocumentList(params); + const response = await metafinder.getDocumentList(params); return respond(JSON.stringify(response)); } diff --git a/src/meta/finder.ts b/src/meta/finder.ts index 46c70c6..8a7dab2 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -1,6 +1,39 @@ import { Params } from "../helpers/processParams.ts"; -/** takes: { tags: [string, string, string][], nottags: [string, string, string][], text: string, limit: number, offset: number } */ +export async function getComments(id: string) { + const query = `PREFIX rdf: +PREFIX xsd: +PREFIX tridoc: +PREFIX s: +SELECT DISTINCT ?d ?t WHERE { + GRAPH { + s:comment [ + a s:Comment ; + s:dateCreated ?d ; + s:text ?t + ] . + } +}`; + return await fetch("http://fuseki:3030/3DOC/query", { + method: "POST", + headers: { + "Authorization": "Basic " + btoa("admin:pw123"), + "Content-Type": "application/sparql-query", + }, + body: query, + }).then((response) => { + if (response.ok) { + return response.json(); + } else { + throw new Error("" + response); + } + }).then((json) => + json.results.bindings.map((binding: Record) => { + return { text: binding.t.value, created: binding.d.value }; + }) + ); +} + export async function getDocumentList( { tags = [], nottags = [], text, limit, offset }: Params, ) { @@ -79,12 +112,7 @@ export async function getDocumentList( }).then((response) => response.json()).then((json) => json.results.bindings.map( ( - binding: { - s: { value: string }; - identifier: { value: string }; - title?: { value: string }; - date?: { value: string }; - }, + binding: Record, ) => { const result: Record = {}; result.identifier = binding.identifier.value; diff --git a/src/server/routes.ts b/src/server/routes.ts index 107bd22..8f40997 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -26,7 +26,7 @@ export const routes: { handler: doc.getPDF, }, { pattern: new URLPattern({ pathname: "/doc/:id/comment" }), - handler: notImplemented, + handler: doc.getComments, }, { pattern: new URLPattern({ pathname: "/doc/:id/tag" }), handler: notImplemented, From 1c5795ce9e2014feae0d68da9da1a23a6e7fb958 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 09:37:10 +0000 Subject: [PATCH 14/90] added POST /doc --- .devcontainer/docker-cmd.sh | 2 +- .devcontainer/docker-compose.yml | 2 +- DEV-README.md | 9 +++++ README.md | 6 ++-- deno.jsonc | 4 +-- docker-cmd.sh | 2 +- src/deps.ts | 4 ++- src/handlers/doc.ts | 62 ++++++++++++++++++++++++++++++++ src/handlers/raw.ts | 15 ++++---- src/helpers/cors.ts | 2 +- src/helpers/pdfprocessor.ts | 8 +++++ src/meta/store.ts | 40 +++++++++++++++++++++ src/server/routes.ts | 2 +- 13 files changed, 138 insertions(+), 20 deletions(-) create mode 100644 src/helpers/pdfprocessor.ts diff --git a/.devcontainer/docker-cmd.sh b/.devcontainer/docker-cmd.sh index d48095a..cfe9532 100644 --- a/.devcontainer/docker-cmd.sh +++ b/.devcontainer/docker-cmd.sh @@ -3,7 +3,7 @@ echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' set -m -deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,zip,unzip --allow-env=TRIDOC_PWD src/main.ts & +deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,pdftotext,zip,unzip --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts & sleep 5 echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 0638fa6..3b042ba 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -34,4 +34,4 @@ services: # - seccomp:unconfined # Overrides default command so things don't shut down after the process ends. - # command: "/bin/bash -c \"TRIDOC_PWD=\\\"pw123\\\" deno run --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD src/main.ts &\\\n sleep 5\\\n echo 'Attempting to create Dataset \\\"3DOC\\\"'\\\n curl 'http://fuseki:3030/$/datasets' -H \\\"Authorization: Basic $(echo -n admin:pw123 | base64)\\\" -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb'\\\n fg 1\\\n /bin/sh -c \\\"while sleep 1000; do :; done\\\"\"" + # command: "/bin/bash -c \"TRIDOC_PWD=\\\"pw123\\\" deno run --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts &\\\n sleep 5\\\n echo 'Attempting to create Dataset \\\"3DOC\\\"'\\\n curl 'http://fuseki:3030/$/datasets' -H \\\"Authorization: Basic $(echo -n admin:pw123 | base64)\\\" -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb'\\\n fg 1\\\n /bin/sh -c \\\"while sleep 1000; do :; done\\\"\"" diff --git a/DEV-README.md b/DEV-README.md index f809fd1..cc6a6a2 100644 --- a/DEV-README.md +++ b/DEV-README.md @@ -7,6 +7,15 @@ Use the vscode-devcontainer: this will start tridoc and fuseki. It will use TRIDOC_PWD = "pw123". Access tridoc from http://localhost:8000 and fuseki from http://localhost:8001 +You might need to `chown deno:deno` blobs/ and fuseki-base (attach bash to docker as root from outside) + +Watch the logs from outside of vscode with + +```sh +docker logs -f tridoc-backend_tridoc_1 +``` + + ## Tips & Tricks - Upload Backups with diff --git a/README.md b/README.md index 94486d5..621a8ec 100644 --- a/README.md +++ b/README.md @@ -108,12 +108,12 @@ When getting a comment, a JSON array with objects of the following structure is | Address | Method | Description | Request / Payload | Response | Implemented in Version | deno? | | - | - | - | - | - | - | - | | `/count` | GET | Count (matching) documents | [1](#f1) [3](#f3) | Number | 1.1.0 | ✅ | -| `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | -| `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | +| `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | ✅ | +| `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | ✅ | | `/doc/{id}` | GET | Get this document | - | PDF | 1.1.0 | ✅ | | `/doc/{id}` | DELETE | Deletes all metadata associated with the document. Document will not be deleted and is stays accessible over /doc/{id}. | - | - | 1.1.0 | | `/doc/{id}/comment` | POST | Add comment to document | Comment object / See above | - | 1.2.0 | -| `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | +| `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | ✅ | | `/doc/{id}/tag` | POST | Add a tag to document | Tag object / See above | - | 1.1.0 | | `/doc/{id}/tag` | GET | Get tags of document | - | Array of tag objects | 1.1.0 | | `/doc/{id}/tag/{tagLabel}` | DELETE | Remove tag from document | - | - | 1.1.0 | diff --git a/deno.jsonc b/deno.jsonc index 085b9a3..5413c78 100644 --- a/deno.jsonc +++ b/deno.jsonc @@ -5,7 +5,7 @@ } }, "tasks": { - "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttls --allow-run=convert,pdfsandwich,zip,unzip --allow-env=TRIDOC_PWD src/main.ts", - "run-watch": "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,zip,unzip --allow-env=TRIDOC_PWD src/main.ts" + "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttls --allow-run=convert,pdfsandwich,pdftotext,zip,unzip --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts", + "run-watch": "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,pdftotext,zip,unzip --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts" } } diff --git a/docker-cmd.sh b/docker-cmd.sh index 4bb7aa7..0780872 100644 --- a/docker-cmd.sh +++ b/docker-cmd.sh @@ -3,7 +3,7 @@ echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' set -m -deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,zip,unzip --allow-env=TRIDOC_PWD src/main.ts & +deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,pdftotext,zip,unzip --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts & sleep 5 echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ diff --git a/src/deps.ts b/src/deps.ts index 209487c..c2af758 100644 --- a/src/deps.ts +++ b/src/deps.ts @@ -1,6 +1,8 @@ export const VERSION = "1.6.0-alpha.deno"; export { encode } from "https://deno.land/std@0.160.0/encoding/base64.ts"; -export { emptyDir } from "https://deno.land/std@0.160.0/fs/mod.ts"; +export { emptyDir, ensureDir } from "https://deno.land/std@0.160.0/fs/mod.ts"; export { serve } from "https://deno.land/std@0.160.0/http/mod.ts"; export { writableStreamFromWriter } from "https://deno.land/std@0.160.0/streams/mod.ts"; + +export { nanoid } from "https://deno.land/x/nanoid@v3.0.0/mod.ts"; diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index eb28665..cf54c0e 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -1,12 +1,29 @@ +import { ensureDir } from "https://deno.land/std@0.160.0/fs/ensure_dir.ts"; +import { nanoid, writableStreamFromWriter } from "../deps.ts"; import { respond } from "../helpers/cors.ts"; +import { getText } from "../helpers/pdfprocessor.ts"; import { processParams } from "../helpers/processParams.ts"; import * as metafinder from "../meta/finder.ts"; +import * as metastore from "../meta/store.ts"; + +function getDir(id: string) { + return "./blobs/" + id.slice(0, 2) + "/" + id.slice(2, 6) + "/" + + id.slice(6, 14); +} function getPath(id: string) { return "./blobs/" + id.slice(0, 2) + "/" + id.slice(2, 6) + "/" + id.slice(6, 14) + "/" + id; } +function datecheck(request: Request) { + const url = new URL(request.url); + const regex = + /^(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-6]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-6]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-6]\d([+-][0-2]\d:[0-5]\d|Z))$/; + const date = url.searchParams.get("date"); + return date ? (regex.test(date) ? date : undefined) : undefined; +} + export async function getComments( _request: Request, match: URLPatternResult, @@ -43,3 +60,48 @@ export async function list( const response = await metafinder.getDocumentList(params); return respond(JSON.stringify(response)); } + +export async function postPDF( + request: Request, + _match: URLPatternResult, +): Promise { + const id = nanoid(); + const path = getPath(id); + await ensureDir(getDir(id)); + const pdf = await Deno.open(path, { write: true, create: true }); + const writableStream = writableStreamFromWriter(pdf); + await request.body?.pipeTo(writableStream); + console.log((new Date()).toISOString(), "Document created with id", id); + let text = await getText(path); + if (text.length < 4) { + // run OCR + const lang = Deno.env.get("OCR_LANG") || "fra+deu+eng"; + const p = Deno.run({ cmd: ["pdfsandwich", "-rgb", "-lang", lang, path] }); + const { success, code } = await p.status(); + if (!success) throw new Error("pdfsandwich failed with code " + code); + // pdfsandwich generates a file with the same name + _ocr + await Deno.rename(path + "_ocr", path); + text = await getText(path); + console.log((new Date()).toISOString(), id, ": OCR finished"); + } + // no await as we don’t care for the result - if it fails, the thumbnail will be created upon request. + Deno.run({ + cmd: [ + "convert", + "-thumbnail", + "300x", + "-alpha", + "remove", + `${path}[0]`, + `${path}.png`, + ], + }); + const date = datecheck(request); + await metastore.storeDocument({ id, text, date }); + return respond(undefined, { + headers: { + "Location": "/doc/" + id, + "Access-Control-Expose-Headers": "Location", + }, + }); +} diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index 1845781..c819489 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -33,13 +33,10 @@ export async function putZip( await request.body?.pipeTo(writableStream); const p = Deno.run({ cmd: ["unzip", zipPath] }); const { success, code } = await p.status(); - if (success) { - await Deno.remove(zipPath); - const turtleData = decoder.decode(await Deno.readFile("rdf.ttl")); - await Deno.remove("rdf.ttl"); - await restore(turtleData); - return respond("200: OK"); - } else { - throw new Error("unzip failed with code " + code); - } + if (!success) throw new Error("unzip failed with code " + code); + await Deno.remove(zipPath); + const turtleData = decoder.decode(await Deno.readFile("rdf.ttl")); + await Deno.remove("rdf.ttl"); + await restore(turtleData); + return respond("200: OK"); } diff --git a/src/helpers/cors.ts b/src/helpers/cors.ts index ac7b85e..907f2ad 100644 --- a/src/helpers/cors.ts +++ b/src/helpers/cors.ts @@ -5,7 +5,7 @@ export function respond(body?: BodyInit, init?: ResponseInit) { ...init?.headers, "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "POST, PUT, DELETE, GET, OPTIONS", - "Access-Control-Allow-Headers": "Authorization", + "Access-Control-Allow-Headers": "Authorization, Content-Type", }, }); } diff --git a/src/helpers/pdfprocessor.ts b/src/helpers/pdfprocessor.ts new file mode 100644 index 0000000..585b3db --- /dev/null +++ b/src/helpers/pdfprocessor.ts @@ -0,0 +1,8 @@ +const decoder = new TextDecoder("utf-8"); + +export async function getText(path: string) { + const p = Deno.run({ cmd: ["pdftotext", path, "-"], stdout: "piped" }); + const { success, code } = await p.status(); + if (!success) throw new Error("pdfsandwich failed with code " + code); + return decoder.decode(await p.output()); +} diff --git a/src/meta/store.ts b/src/meta/store.ts index 808c6c7..9b23424 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -1,3 +1,10 @@ +function escapeLiteral(string: string) { + return string.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace( + /\r/g, + "\\r", + ).replace(/'/g, "\\'").replace(/"/g, '\\"'); +} + export function restore(turtleData: string) { const statement = `CLEAR GRAPH ; INSERT DATA { @@ -12,3 +19,36 @@ export function restore(turtleData: string) { body: statement, }); } + +export async function storeDocument( + { id, text, date }: { id: string; text: string; date?: string }, +) { + const created = (date ? new Date(date) : new Date()).toISOString(); + const query = ` +PREFIX rdf: +PREFIX xsd: +PREFIX s: +INSERT DATA { + GRAPH { + rdf:type s:DigitalDocument ; + s:dateCreated "${created}"^^xsd:dateTime ; + s:identifier "${id}" ; + s:text "${escapeLiteral(text)}" . + } +}`; + return await fetch("http://fuseki:3030/3DOC/update", { + method: "POST", + headers: { + "Authorization": "Basic " + btoa("admin:pw123"), + "Content-Type": "application/sparql-update", + }, + body: query, + }).then((response) => { + //console.log("Fuseki returned: "+response.status); + if (response.ok) { + return response; + } else { + throw new Error("Error from Fuseki: " + response.statusText); + } + }); +} diff --git a/src/server/routes.ts b/src/server/routes.ts index 8f40997..aeafbb0 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -60,7 +60,7 @@ export const routes: { }], "POST": [{ pattern: new URLPattern({ pathname: "/doc" }), - handler: notImplemented, + handler: doc.postPDF, }, { pattern: new URLPattern({ pathname: "/doc/:id/comment" }), handler: notImplemented, From 36ec78cfdf0926cb8b995950a78e594c0bf8a3af Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 09:57:48 +0000 Subject: [PATCH 15/90] added filename to GET /doc --- src/handlers/doc.ts | 10 ++++++- src/meta/finder.ts | 68 ++++++++++++++++++++++++++++++++------------- 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index cf54c0e..660bb1f 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -40,10 +40,18 @@ export async function getPDF( const id = match.pathname.groups.id; const path = getPath(id); try { + const fileName = await metafinder.getBasicMeta(id).then(( + { title, created }, + ) => title || created || "document"); const file = await Deno.open(path, { read: true }); // Build a readable stream so the file doesn't have to be fully loaded into memory while we send it const readableStream = file.readable; - return respond(readableStream); + return respond(readableStream, { + headers: { + "content-disposition": `inline; filename="${encodeURI(fileName)}.pdf"`, + "content-type": "application/pdf", + }, + }); } catch (error) { if (!(error instanceof Deno.errors.NotFound)) { return respond("404 Not Found", { status: 404 }); diff --git a/src/meta/finder.ts b/src/meta/finder.ts index 8a7dab2..fba26e8 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -1,5 +1,14 @@ import { Params } from "../helpers/processParams.ts"; +type SparqlJson = { + head: { + vars: string[]; + }; + results: { + bindings: { [key: string]: { type: string; value: string } }[]; + }; +}; + export async function getComments(id: string) { const query = `PREFIX rdf: PREFIX xsd: @@ -27,8 +36,8 @@ SELECT DISTINCT ?d ?t WHERE { } else { throw new Error("" + response); } - }).then((json) => - json.results.bindings.map((binding: Record) => { + }).then((json: SparqlJson) => + json.results.bindings.map((binding) => { return { text: binding.t.value, created: binding.d.value }; }) ); @@ -109,22 +118,18 @@ export async function getDocumentList( "Content-Type": "application/sparql-query", }, body: body, - }).then((response) => response.json()).then((json) => - json.results.bindings.map( - ( - binding: Record, - ) => { - const result: Record = {}; - result.identifier = binding.identifier.value; - if (binding.title) { - result.title = binding.title.value; - } - if (binding.date) { - result.created = binding.date.value; - } - return result; - }, - ) + }).then((response) => response.json()).then((json: SparqlJson) => + json.results.bindings.map((binding) => { + const result: Record = {}; + result.identifier = binding.identifier.value; + if (binding.title) { + result.title = binding.title.value; + } + if (binding.date) { + result.created = binding.date.value; + } + return result; + }) ); } @@ -197,11 +202,34 @@ export async function getDocumentNumber( '") } UNION { ?s text:query (s:text "' + text + '")} } .\n' : "") + "}", - }).then((response) => response.json()).then((json) => - json.results.bindings[0].count.value as number + }).then((response) => response.json()).then((json: SparqlJson) => + parseInt(json.results.bindings[0].count.value, 10) ); } +export async function getBasicMeta(id: string) { + return await fetch("http://fuseki:3030/3DOC/query", { + method: "POST", + headers: { + "Authorization": "Basic " + btoa("admin:pw123"), + "Content-Type": "application/sparql-query", + }, + body: "PREFIX rdf: \n" + + "PREFIX s: \n" + + "SELECT ?title ?date\n" + + "WHERE {\n" + + ' ?s s:identifier "' + id + '" .\n' + + " ?s s:dateCreated ?date .\n" + + " OPTIONAL { ?s s:name ?title . }\n" + + "}", + }).then((response) => response.json()).then((json: SparqlJson) => { + return { + title: json.results.bindings[0].title?.value, + created: json.results.bindings[0].date?.value, + }; + }); +} + export async function getTagTypes(labels: string[]): Promise { const response = await fetch("http://fuseki:3030/3DOC/query", { method: "POST", From 1ef5b307a38b04ec5efe4fd6e5a870bb1c0502ac Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 10:18:44 +0000 Subject: [PATCH 16/90] added delete --- README.md | 2 +- src/handlers/doc.ts | 10 ++++++++++ src/meta/delete.ts | 8 ++++++++ src/meta/finder.ts | 4 ++-- src/meta/fusekiFetch.ts | 26 ++++++++++++++++++++++++++ src/server/routes.ts | 2 +- 6 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 src/meta/delete.ts create mode 100644 src/meta/fusekiFetch.ts diff --git a/README.md b/README.md index 621a8ec..7664b31 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | ✅ | | `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | ✅ | | `/doc/{id}` | GET | Get this document | - | PDF | 1.1.0 | ✅ | -| `/doc/{id}` | DELETE | Deletes all metadata associated with the document. Document will not be deleted and is stays accessible over /doc/{id}. | - | - | 1.1.0 | +| `/doc/{id}` | DELETE | Deletes all metadata associated with the document. Document will not be deleted and is stays accessible over /doc/{id}. | - | - | 1.1.0 | ✅ | | `/doc/{id}/comment` | POST | Add comment to document | Comment object / See above | - | 1.2.0 | | `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | ✅ | | `/doc/{id}/tag` | POST | Add a tag to document | Tag object / See above | - | 1.1.0 | diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 660bb1f..9319fa6 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -3,6 +3,7 @@ import { nanoid, writableStreamFromWriter } from "../deps.ts"; import { respond } from "../helpers/cors.ts"; import { getText } from "../helpers/pdfprocessor.ts"; import { processParams } from "../helpers/processParams.ts"; +import * as metadelete from "../meta/delete.ts"; import * as metafinder from "../meta/finder.ts"; import * as metastore from "../meta/store.ts"; @@ -24,6 +25,15 @@ function datecheck(request: Request) { return date ? (regex.test(date) ? date : undefined) : undefined; } +export async function deleteDoc( + _request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + await metadelete.deleteFile(id); + return respond(undefined, { status: 204 }); +} + export async function getComments( _request: Request, match: URLPatternResult, diff --git a/src/meta/delete.ts b/src/meta/delete.ts new file mode 100644 index 0000000..46cd3aa --- /dev/null +++ b/src/meta/delete.ts @@ -0,0 +1,8 @@ +import { fusekiFetch } from "./fusekiFetch.ts"; + +export function deleteFile(id: string) { + return fusekiFetch(` +WITH +DELETE { ?p ?o } +WHERE { ?p ?o }`); +} diff --git a/src/meta/finder.ts b/src/meta/finder.ts index fba26e8..f8ac358 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -224,8 +224,8 @@ export async function getBasicMeta(id: string) { "}", }).then((response) => response.json()).then((json: SparqlJson) => { return { - title: json.results.bindings[0].title?.value, - created: json.results.bindings[0].date?.value, + title: json.results.bindings[0]?.title?.value, + created: json.results.bindings[0]?.date?.value, }; }); } diff --git a/src/meta/fusekiFetch.ts b/src/meta/fusekiFetch.ts new file mode 100644 index 0000000..bc607a8 --- /dev/null +++ b/src/meta/fusekiFetch.ts @@ -0,0 +1,26 @@ +type SparqlJson = { + head: { + vars: string[]; + }; + results: { + bindings: { [key: string]: { type: string; value: string } }[]; + }; +}; + +export async function fusekiFetch(query: string): Promise { + console.log((new Date()).toISOString(), "→ FUSEKI", query); + return await fetch("http://fuseki:3030/3DOC/query", { + method: "POST", + headers: { + "Authorization": "Basic " + btoa("admin:pw123"), + "Content-Type": "application/sparql-query", + }, + body: query, + }).then((response) => { + if (response.ok) { + return response.json(); + } else { + throw new Error("Fuseki Error: " + response); + } + }); +} diff --git a/src/server/routes.ts b/src/server/routes.ts index aeafbb0..5fed9e9 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -80,7 +80,7 @@ export const routes: { }], "DELETE": [{ pattern: new URLPattern({ pathname: "/doc/:id" }), - handler: notImplemented, + handler: doc.deleteDoc, }, { pattern: new URLPattern({ pathname: "/doc/:id/tag/:tagLabel" }), handler: notImplemented, From 47285319a656bbdfcfb811302f39a3e148f37d87 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 10:41:12 +0000 Subject: [PATCH 17/90] redactored to fusekiFetch/Update instead of fetch --- src/meta/delete.ts | 4 +- src/meta/finder.ts | 113 ++++++++++++---------------------------- src/meta/fusekiFetch.ts | 22 ++++++-- src/meta/store.ts | 35 +++---------- 4 files changed, 61 insertions(+), 113 deletions(-) diff --git a/src/meta/delete.ts b/src/meta/delete.ts index 46cd3aa..9398486 100644 --- a/src/meta/delete.ts +++ b/src/meta/delete.ts @@ -1,7 +1,7 @@ -import { fusekiFetch } from "./fusekiFetch.ts"; +import { fusekiUpdate } from "./fusekiFetch.ts"; export function deleteFile(id: string) { - return fusekiFetch(` + return fusekiUpdate(` WITH DELETE { ?p ?o } WHERE { ?p ?o }`); diff --git a/src/meta/finder.ts b/src/meta/finder.ts index f8ac358..15bdf63 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -1,13 +1,5 @@ import { Params } from "../helpers/processParams.ts"; - -type SparqlJson = { - head: { - vars: string[]; - }; - results: { - bindings: { [key: string]: { type: string; value: string } }[]; - }; -}; +import { fusekiFetch } from "./fusekiFetch.ts"; export async function getComments(id: string) { const query = `PREFIX rdf: @@ -23,20 +15,7 @@ SELECT DISTINCT ?d ?t WHERE { ] . } }`; - return await fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query", - }, - body: query, - }).then((response) => { - if (response.ok) { - return response.json(); - } else { - throw new Error("" + response); - } - }).then((json: SparqlJson) => + return await fusekiFetch(query).then((json) => json.results.bindings.map((binding) => { return { text: binding.t.value, created: binding.d.value }; }) @@ -111,14 +90,7 @@ export async function getDocumentList( "ORDER BY desc(?date)\n" + (limit ? "LIMIT " + limit + "\n" : "") + (offset ? "OFFSET " + offset : ""); - return await fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query", - }, - body: body, - }).then((response) => response.json()).then((json: SparqlJson) => + return await fusekiFetch(body).then((json) => json.results.bindings.map((binding) => { const result: Record = {}; result.identifier = binding.identifier.value; @@ -183,46 +155,32 @@ export async function getDocumentNumber( ?tag${i} tridoc:label "${nottags[i].label}" . }`; } } - return await fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query", - }, - body: "PREFIX rdf: \n" + - "PREFIX s: \n" + - "PREFIX tridoc: \n" + - "PREFIX text: \n" + - "SELECT (COUNT(DISTINCT ?s) as ?count)\n" + - "WHERE {\n" + - " ?s s:identifier ?identifier .\n" + - tagQuery + - (text - ? '{ { ?s text:query (s:name "' + text + - '") } UNION { ?s text:query (s:text "' + text + '")} } .\n' - : "") + - "}", - }).then((response) => response.json()).then((json: SparqlJson) => - parseInt(json.results.bindings[0].count.value, 10) - ); + return await fusekiFetch(` +PREFIX rdf: +PREFIX s: +PREFIX tridoc: +PREFIX text: +SELECT (COUNT(DISTINCT ?s) as ?count) +WHERE { + ?s s:identifier ?identifier . + ${tagQuery} + ${ + text + ? `{ { ?s text:query (s:name "${text}") } UNION { ?s text:query (s:text "${text}")} } .\n` + : "" + }}`).then((json) => parseInt(json.results.bindings[0].count.value, 10)); } export async function getBasicMeta(id: string) { - return await fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query", - }, - body: "PREFIX rdf: \n" + - "PREFIX s: \n" + - "SELECT ?title ?date\n" + - "WHERE {\n" + - ' ?s s:identifier "' + id + '" .\n' + - " ?s s:dateCreated ?date .\n" + - " OPTIONAL { ?s s:name ?title . }\n" + - "}", - }).then((response) => response.json()).then((json: SparqlJson) => { + return await fusekiFetch(` +PREFIX rdf: +PREFIX s: +SELECT ?title ?date +WHERE { + ?s s:identifier "${id}" . + ?s s:dateCreated ?date . + OPTIONAL { ?s s:name ?title . } +}`).then((json) => { return { title: json.results.bindings[0]?.title?.value, created: json.results.bindings[0]?.date?.value, @@ -230,21 +188,14 @@ export async function getBasicMeta(id: string) { }); } -export async function getTagTypes(labels: string[]): Promise { - const response = await fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query", - }, - body: `PREFIX tridoc: +export async function getTagTypes(labels: string[]) { + const json = await fusekiFetch(` +PREFIX tridoc: SELECT DISTINCT ?l ?t WHERE { VALUES ?l { "${ - labels.join('" "') - }" } ?s tridoc:label ?l . OPTIONAL { ?s tridoc:valueType ?t . } }`, - }); - const json = await response.json(); + labels.join('" "') + }" } ?s tridoc:label ?l . OPTIONAL { ?s tridoc:valueType ?t . } }`); return json.results.bindings.map( - (binding: Record) => { + (binding) => { const result_1 = []; result_1[0] = binding.l.value; if (binding.t) { diff --git a/src/meta/fusekiFetch.ts b/src/meta/fusekiFetch.ts index bc607a8..84b69d7 100644 --- a/src/meta/fusekiFetch.ts +++ b/src/meta/fusekiFetch.ts @@ -8,7 +8,7 @@ type SparqlJson = { }; export async function fusekiFetch(query: string): Promise { - console.log((new Date()).toISOString(), "→ FUSEKI", query); + console.log((new Date()).toISOString(), "→ FUSEKI QUERY", query, "\n"); return await fetch("http://fuseki:3030/3DOC/query", { method: "POST", headers: { @@ -16,11 +16,27 @@ export async function fusekiFetch(query: string): Promise { "Content-Type": "application/sparql-query", }, body: query, - }).then((response) => { + }).then(async (response) => { if (response.ok) { return response.json(); } else { - throw new Error("Fuseki Error: " + response); + throw new Error("Fuseki Error: " + await response.text()); + } + }); +} + +export async function fusekiUpdate(query: string): Promise { + console.log((new Date()).toISOString(), "→ FUSEKI UPDATE", query, "\n"); + return await fetch("http://fuseki:3030/3DOC/update", { + method: "POST", + headers: { + "Authorization": "Basic " + btoa("admin:pw123"), + "Content-Type": "application/sparql-query", + }, + body: query, + }).then(async (response) => { + if (!response.ok) { + throw new Error("Fuseki Error: " + await response.text()); } }); } diff --git a/src/meta/store.ts b/src/meta/store.ts index 9b23424..d7116ca 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -1,3 +1,5 @@ +import { fusekiUpdate } from "./fusekiFetch.ts"; + function escapeLiteral(string: string) { return string.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace( /\r/g, @@ -6,18 +8,11 @@ function escapeLiteral(string: string) { } export function restore(turtleData: string) { - const statement = `CLEAR GRAPH ; - INSERT DATA { - GRAPH { ${turtleData} } - }`; - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update", - }, - body: statement, - }); + return fusekiUpdate(` +CLEAR GRAPH ; +INSERT DATA { + GRAPH { ${turtleData} } +}`); } export async function storeDocument( @@ -36,19 +31,5 @@ INSERT DATA { s:text "${escapeLiteral(text)}" . } }`; - return await fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update", - }, - body: query, - }).then((response) => { - //console.log("Fuseki returned: "+response.status); - if (response.ok) { - return response; - } else { - throw new Error("Error from Fuseki: " + response.statusText); - } - }); + return await fusekiUpdate(query); } From 1d4159873fbd76d306881e3d24e053e3a09d5b8c Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 12:02:18 +0000 Subject: [PATCH 18/90] added POST /doc/:id/comment --- src/handlers/doc.ts | 9 +++++++++ src/meta/store.ts | 19 +++++++++++++++++++ src/server/routes.ts | 2 +- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 9319fa6..919bfb6 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -79,6 +79,15 @@ export async function list( return respond(JSON.stringify(response)); } +export async function postComment( + request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + await metastore.addComment(id, await request.text()); + return respond(undefined, { status: 204 }); +} + export async function postPDF( request: Request, _match: URLPatternResult, diff --git a/src/meta/store.ts b/src/meta/store.ts index d7116ca..2a590c5 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -7,6 +7,25 @@ function escapeLiteral(string: string) { ).replace(/'/g, "\\'").replace(/"/g, '\\"'); } +export async function addComment(id: string, text: string) { + const now = new Date(); + const query = ` +PREFIX rdf: +PREFIX xsd: +PREFIX tridoc: +PREFIX s: +INSERT DATA { + GRAPH { + s:comment [ + a s:Comment ; + s:dateCreated "${now.toISOString()}"^^xsd:dateTime ; + s:text "${escapeLiteral(text)}" + ] . + } +}`; + return await fusekiUpdate(query); +} + export function restore(turtleData: string) { return fusekiUpdate(` CLEAR GRAPH ; diff --git a/src/server/routes.ts b/src/server/routes.ts index 5fed9e9..32149d4 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -63,7 +63,7 @@ export const routes: { handler: doc.postPDF, }, { pattern: new URLPattern({ pathname: "/doc/:id/comment" }), - handler: notImplemented, + handler: doc.postComment, }, { pattern: new URLPattern({ pathname: "/doc/:id/tag" }), handler: notImplemented, From a9828f30de8f54a42c9443896147a6a80c8ce33c Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 12:22:08 +0000 Subject: [PATCH 19/90] added GET /doc/:id/thumb --- src/handlers/doc.ts | 52 ++++++++++++++++++++++++++++++++++++++++- src/handlers/raw.ts | 4 ++-- src/meta/fusekiFetch.ts | 2 +- src/server/routes.ts | 2 +- src/server/server.ts | 6 ++--- 5 files changed, 58 insertions(+), 8 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 919bfb6..d5ba44d 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -63,13 +63,63 @@ export async function getPDF( }, }); } catch (error) { - if (!(error instanceof Deno.errors.NotFound)) { + if (error instanceof Deno.errors.NotFound) { return respond("404 Not Found", { status: 404 }); } throw error; } } +export async function getThumb( + _request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + const path = getPath(id); + const fileName = await metafinder.getBasicMeta(id).then(( + { title, created }, + ) => title || created || "document"); + let thumb: Deno.FsFile; + try { + thumb = await Deno.open(path + ".png", { read: true }); + } catch (error) { + if (error instanceof Deno.errors.NotFound) { + try { + await Deno.stat(path); // Check if PDF exists → 404 otherwise + const p = Deno.run({ + cmd: [ + "convert", + "-thumbnail", + "300x", + "-alpha", + "remove", + `${path}[0]`, + `${path}.png`, + ], + }); + const { success, code } = await p.status(); + if (!success) throw new Error("convert failed with code " + code); + thumb = await Deno.open(path + ".png", { read: true }); + } catch (error) { + if (error instanceof Deno.errors.NotFound) { + return respond("404 Not Found", { status: 404 }); + } + throw error; + } + } else { + throw error; + } + } + // Build a readable stream so the file doesn't have to be fully loaded into memory while we send it + const readableStream = thumb.readable; + return respond(readableStream, { + headers: { + "content-disposition": `inline; filename="${encodeURI(fileName)}.png"`, + "content-type": "image/png", + }, + }); +} + export async function list( request: Request, _match: URLPatternResult, diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index c819489..22ab9f5 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -9,7 +9,7 @@ export async function deleteRdfFile( _match: URLPatternResult, ): Promise { await Deno.remove("rdf.ttl"); - return respond("200: OK"); + return respond(undefined, { status: 204 }); } export async function putZip( @@ -38,5 +38,5 @@ export async function putZip( const turtleData = decoder.decode(await Deno.readFile("rdf.ttl")); await Deno.remove("rdf.ttl"); await restore(turtleData); - return respond("200: OK"); + return respond(undefined, { status: 204 }); } diff --git a/src/meta/fusekiFetch.ts b/src/meta/fusekiFetch.ts index 84b69d7..f28f551 100644 --- a/src/meta/fusekiFetch.ts +++ b/src/meta/fusekiFetch.ts @@ -31,7 +31,7 @@ export async function fusekiUpdate(query: string): Promise { method: "POST", headers: { "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query", + "Content-Type": "application/sparql-update", }, body: query, }).then(async (response) => { diff --git a/src/server/routes.ts b/src/server/routes.ts index 32149d4..7e06bff 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -32,7 +32,7 @@ export const routes: { handler: notImplemented, }, { pattern: new URLPattern({ pathname: "/doc/:id/thumb" }), - handler: notImplemented, + handler: doc.getThumb, }, { pattern: new URLPattern({ pathname: "/doc/:id/title" }), handler: notImplemented, diff --git a/src/server/server.ts b/src/server/server.ts index d49f105..4479ff1 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -19,7 +19,7 @@ const handler = async (request: Request): Promise => { path, "→ 401: Not Authenticated", ); - return respond("401: Not Authenticated", { + return respond("401 Not Authenticated", { status: 401, headers: { "WWW-Authenticate": "Basic" }, }); @@ -38,7 +38,7 @@ const handler = async (request: Request): Promise => { path, "→ 404: Path not found", ); - return respond("404: Path not found", { status: 404 }); + return respond("404 Path not found", { status: 404 }); } catch (error) { console.log( (new Date()).toISOString(), @@ -47,7 +47,7 @@ const handler = async (request: Request): Promise => { "→ 500: ", error, ); - return respond("500: " + error, { status: 500 }); + return respond("500 " + error, { status: 500 }); } }; From 6b67780b58f2fdef424d4c507c2026c6d8fb6ffa Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 12:32:24 +0000 Subject: [PATCH 20/90] added GET /doc/:id/title --- src/handlers/doc.ts | 10 +++++++++- src/server/routes.ts | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index d5ba44d..50ae584 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -78,7 +78,7 @@ export async function getThumb( const path = getPath(id); const fileName = await metafinder.getBasicMeta(id).then(( { title, created }, - ) => title || created || "document"); + ) => title || created || "thumbnail"); let thumb: Deno.FsFile; try { thumb = await Deno.open(path + ".png", { read: true }); @@ -120,6 +120,14 @@ export async function getThumb( }); } +export async function getTitle( + _request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + return respond((await metafinder.getBasicMeta(id)).title); +} + export async function list( request: Request, _match: URLPatternResult, diff --git a/src/server/routes.ts b/src/server/routes.ts index 7e06bff..63fc577 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -35,7 +35,7 @@ export const routes: { handler: doc.getThumb, }, { pattern: new URLPattern({ pathname: "/doc/:id/title" }), - handler: notImplemented, + handler: doc.getTitle, }, { pattern: new URLPattern({ pathname: "/doc/:id/meta" }), handler: notImplemented, From c5576efc5e398735fb241ded323175178642e6aa Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 12:47:33 +0000 Subject: [PATCH 21/90] added GET /doc/:id/tag, /doc/:id/meta --- README.md | 10 +++++----- src/handlers/doc.ts | 24 +++++++++++++++++++++++- src/meta/finder.ts | 34 ++++++++++++++++++++++++++++++++++ src/server/routes.ts | 4 ++-- 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 7664b31..16d8a3b 100644 --- a/README.md +++ b/README.md @@ -112,16 +112,16 @@ When getting a comment, a JSON array with objects of the following structure is | `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | ✅ | | `/doc/{id}` | GET | Get this document | - | PDF | 1.1.0 | ✅ | | `/doc/{id}` | DELETE | Deletes all metadata associated with the document. Document will not be deleted and is stays accessible over /doc/{id}. | - | - | 1.1.0 | ✅ | -| `/doc/{id}/comment` | POST | Add comment to document | Comment object / See above | - | 1.2.0 | +| `/doc/{id}/comment` | POST | Add comment to document | Comment object / See above | - | 1.2.0 | ✅ | | `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | ✅ | | `/doc/{id}/tag` | POST | Add a tag to document | Tag object / See above | - | 1.1.0 | -| `/doc/{id}/tag` | GET | Get tags of document | - | Array of tag objects | 1.1.0 | +| `/doc/{id}/tag` | GET | Get tags of document | - | Array of tag objects | 1.1.0 | ✅ | | `/doc/{id}/tag/{tagLabel}` | DELETE | Remove tag from document | - | - | 1.1.0 | -| `/doc/{id}/thumb` | GET | Get document thumbnail | - | PNG (300px wide) | 1.5.0 | +| `/doc/{id}/thumb` | GET | Get document thumbnail | - | PNG (300px wide) | 1.5.0 | ✅ | | `/doc/{id}/title` | PUT | Set document title | `{"title": "the_Title"}` | - | 1.1.0 | -| `/doc/{id}/title` | GET | Get document title | - | `{"title": "the_Title"}` | 1.1.0 | +| `/doc/{id}/title` | GET | Get document title | - | `{"title": "the_Title"}` | 1.1.0 | ✅ | | `/doc/{id}/title` | DELETE | Reset document title | - | - | 1.1.0 | -| `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | +| `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | ✅ | | `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | | `/raw/rdf` | DELETE | "Cancel" failed zip upload—use only if certain it’s done & failed | | | (deno only) | ✅ | | `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 50ae584..b82650c 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -70,6 +70,28 @@ export async function getPDF( } } +export async function getMeta( + _request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + return respond( + JSON.stringify({ + ...(await metafinder.getBasicMeta(id)), + comments: await metafinder.getComments(id), + tags: await metafinder.getTags(id), + }), + ); +} + +export async function getTags( + _request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + return respond(JSON.stringify(await metafinder.getTags(id))); +} + export async function getThumb( _request: Request, match: URLPatternResult, @@ -142,7 +164,7 @@ export async function postComment( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id; - await metastore.addComment(id, await request.text()); + await metastore.addComment(id, (await request.json()).text); return respond(undefined, { status: 204 }); } diff --git a/src/meta/finder.ts b/src/meta/finder.ts index 15bdf63..f894d35 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -188,6 +188,40 @@ WHERE { }); } +export async function getTags(id: string) { + const query = ` +PREFIX rdf: +PREFIX xsd: +PREFIX tridoc: +PREFIX s: +SELECT DISTINCT ?label ?type ?v + WHERE { + GRAPH { + tridoc:tag ?tag . + { + ?tag tridoc:label ?label . + } + UNION + { + ?tag tridoc:value ?v ; + tridoc:parameterizableTag ?ptag . + ?ptag tridoc:label ?label ; + tridoc:valueType ?type . + } + } +}`; + return await fusekiFetch(query).then((json) => + json.results.bindings.map((binding) => { + return { + label: binding.label.value, + parameter: binding.type + ? { type: binding.type.value, value: binding.v.value } + : undefined, + }; + }) + ); +} + export async function getTagTypes(labels: string[]) { const json = await fusekiFetch(` PREFIX tridoc: diff --git a/src/server/routes.ts b/src/server/routes.ts index 63fc577..f4435d6 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -29,7 +29,7 @@ export const routes: { handler: doc.getComments, }, { pattern: new URLPattern({ pathname: "/doc/:id/tag" }), - handler: notImplemented, + handler: doc.getTags, }, { pattern: new URLPattern({ pathname: "/doc/:id/thumb" }), handler: doc.getThumb, @@ -38,7 +38,7 @@ export const routes: { handler: doc.getTitle, }, { pattern: new URLPattern({ pathname: "/doc/:id/meta" }), - handler: notImplemented, + handler: doc.getMeta, }, { pattern: new URLPattern({ pathname: "/raw/rdf" }), handler: notImplemented, From 4187119303be81b321e1bbe4c4e5c3b06b76eaed Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 13:07:15 +0000 Subject: [PATCH 22/90] added GET /raw/rdf --- README.md | 2 +- src/handlers/raw.ts | 12 ++++++++++++ src/meta/fusekiFetch.ts | 14 ++++++++++++++ src/server/routes.ts | 2 +- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 16d8a3b..7942afe 100644 --- a/README.md +++ b/README.md @@ -122,7 +122,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/doc/{id}/title` | GET | Get document title | - | `{"title": "the_Title"}` | 1.1.0 | ✅ | | `/doc/{id}/title` | DELETE | Reset document title | - | - | 1.1.0 | | `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | ✅ | -| `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | +| `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | ✅ | | `/raw/rdf` | DELETE | "Cancel" failed zip upload—use only if certain it’s done & failed | | | (deno only) | ✅ | | `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | | `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | ✅ | diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index 22ab9f5..e63a12c 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -1,5 +1,6 @@ import { emptyDir, writableStreamFromWriter } from "../deps.ts"; import { respond } from "../helpers/cors.ts"; +import { dump } from "../meta/fusekiFetch.ts"; import { restore } from "../meta/store.ts"; const decoder = new TextDecoder("utf-8"); @@ -12,6 +13,17 @@ export async function deleteRdfFile( return respond(undefined, { status: 204 }); } +export async function getRdf( + request: Request, + _match: URLPatternResult, +): Promise { + const url = new URL(request.url); + const accept = url.searchParams.has("accept") + ? decodeURIComponent(url.searchParams.get("accept")!) + : request.headers.get("Accept") || "text/turtle"; + return await dump(accept); +} + export async function putZip( request: Request, _match: URLPatternResult, diff --git a/src/meta/fusekiFetch.ts b/src/meta/fusekiFetch.ts index f28f551..afbaf5a 100644 --- a/src/meta/fusekiFetch.ts +++ b/src/meta/fusekiFetch.ts @@ -7,6 +7,20 @@ type SparqlJson = { }; }; +export function dump(accept = "text/turtle") { + const query = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"; + console.log((new Date()).toISOString(), "→ FUSEKI QUERY", query, "\n"); + return fetch("http://fuseki:3030/3DOC/query", { + method: "POST", + headers: { + "Authorization": "Basic " + btoa("admin:pw123"), + "Content-Type": "application/sparql-query", + "Accept": accept, + }, + body: query, + }); +} + export async function fusekiFetch(query: string): Promise { console.log((new Date()).toISOString(), "→ FUSEKI QUERY", query, "\n"); return await fetch("http://fuseki:3030/3DOC/query", { diff --git a/src/server/routes.ts b/src/server/routes.ts index f4435d6..5fb3cf9 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -41,7 +41,7 @@ export const routes: { handler: doc.getMeta, }, { pattern: new URLPattern({ pathname: "/raw/rdf" }), - handler: notImplemented, + handler: raw.getRdf, }, { pattern: new URLPattern({ pathname: "/raw/zip" }), handler: notImplemented, From 520a4d006cab15fbd66224cbe5eac877229cc7ee Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 14:29:26 +0000 Subject: [PATCH 23/90] added GET /raw/tgz --- .devcontainer/docker-cmd.sh | 2 +- deno.jsonc | 5 +- docker-cmd.sh | 2 +- src/handlers/raw.ts | 105 ++++++++++++++++++++++++++++++++++-- src/server/routes.ts | 8 +-- 5 files changed, 111 insertions(+), 11 deletions(-) diff --git a/.devcontainer/docker-cmd.sh b/.devcontainer/docker-cmd.sh index cfe9532..6791b5b 100644 --- a/.devcontainer/docker-cmd.sh +++ b/.devcontainer/docker-cmd.sh @@ -3,7 +3,7 @@ echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' set -m -deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,pdftotext,zip,unzip --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts & +deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts & sleep 5 echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ diff --git a/deno.jsonc b/deno.jsonc index 5413c78..03da2df 100644 --- a/deno.jsonc +++ b/deno.jsonc @@ -5,7 +5,8 @@ } }, "tasks": { - "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttls --allow-run=convert,pdfsandwich,pdftotext,zip,unzip --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts", - "run-watch": "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,pdftotext,zip,unzip --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts" + // --allow-run=convert,pdfsandwich,pdftotext,tar,zip,unzip,bash + "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttls --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts", + "run-watch": "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts" } } diff --git a/docker-cmd.sh b/docker-cmd.sh index 0780872..c707f9c 100644 --- a/docker-cmd.sh +++ b/docker-cmd.sh @@ -3,7 +3,7 @@ echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' set -m -deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run=convert,pdfsandwich,pdftotext,zip,unzip --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts & +deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts & sleep 5 echo 'Attempting to create Dataset "3DOC"' curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index e63a12c..b233700 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -1,3 +1,4 @@ +import { ensureDir } from "https://deno.land/std@0.160.0/fs/ensure_dir.ts"; import { emptyDir, writableStreamFromWriter } from "../deps.ts"; import { respond } from "../helpers/cors.ts"; import { dump } from "../meta/fusekiFetch.ts"; @@ -5,7 +6,7 @@ import { restore } from "../meta/store.ts"; const decoder = new TextDecoder("utf-8"); -export async function deleteRdfFile( +export async function deleteRDFFile( _request: Request, _match: URLPatternResult, ): Promise { @@ -13,7 +14,7 @@ export async function deleteRdfFile( return respond(undefined, { status: 204 }); } -export async function getRdf( +export async function getRDF( request: Request, _match: URLPatternResult, ): Promise { @@ -24,7 +25,105 @@ export async function getRdf( return await dump(accept); } -export async function putZip( +export async function getTGZ( + _request: Request, + _match: URLPatternResult, +): Promise { + const timestamp = "" + Date.now(); + const tarPath = "blobs/tgz-" + timestamp; + const rdfName = "rdf-" + timestamp; + const rdfPath = "blobs/rdf/" + rdfName; + await ensureDir("blobs/rdf"); + const rdf = await Deno.open(rdfPath, { + create: true, + write: true, + truncate: true, + }); + const writableStream = writableStreamFromWriter(rdf); + await (await dump()).body?.pipeTo(writableStream); + /* const p = Deno.run({ + cmd: [ + "tar", + `--transform=s|${rdfPath}|rdf.ttl|`, + `--exclude-tag=${rdfName}`, + "-czvf", + tarPath, + "blobs/* /", // no space! + ], + }); */ + const p = Deno.run({ + cmd: [ + "bash", + "-c", + `tar --transform="s|${rdfPath}|rdf.ttl|" --exclude-tag="${rdfName}" -czvf ${tarPath} blobs/*/`, + ], + }); + const { success, code } = await p.status(); + if (!success) throw new Error("tar -czf failed with code " + code); + await Deno.remove(rdfPath); + const tar = await Deno.open(tarPath); + // Build a readable stream so the file doesn't have to be fully loaded into memory while we send it + const readableStream = tar.readable; + return respond(readableStream, { + headers: { + "content-disposition": + `inline; filename="tridoc_backup_${timestamp}.tar.gz"`, + "content-type": "application/gzip", + }, + }); + // TODO: Figure out how to delete these files +} + +export async function getZIP( + _request: Request, + _match: URLPatternResult, +): Promise { + const timestamp = "" + Date.now(); + const tarPath = "blobs/tgz-" + timestamp; + const rdfName = "rdf-" + timestamp; + const rdfPath = "blobs/rdf/" + rdfName; + await ensureDir("blobs/rdf"); + const rdf = await Deno.open(rdfPath, { + create: true, + write: true, + truncate: true, + }); + const writableStream = writableStreamFromWriter(rdf); + await (await dump()).body?.pipeTo(writableStream); + /* const p = Deno.run({ + cmd: [ + "tar", + `--transform=s|${rdfPath}|rdf.ttl|`, + `--exclude-tag=${rdfName}`, + "-czvf", + tarPath, + "blobs/* /", // no space! + ], + }); */ + const p = Deno.run({ + cmd: [ + "bash", + "-c", + `tar --transform="s|${rdfPath}|rdf.ttl|" --exclude-tag="${rdfName}" -czvf ${tarPath} blobs/*/`, + ], + }); + const { success, code } = await p.status(); + if (!success) throw new Error("tar -czf failed with code " + code); + await Deno.remove(rdfPath); + const tar = await Deno.open(tarPath); + // Build a readable stream so the file doesn't have to be fully loaded into memory while we send it + const readableStream = tar.readable; + return respond(readableStream, { + headers: { + "content-disposition": + `inline; filename="tridoc_backup_${timestamp}.tar.gz"`, + "content-type": "application/gzip", + }, + }); + // TODO: Figure out how to delete these files +} + +export async function putZIP( request: Request, _match: URLPatternResult, ): Promise { diff --git a/src/server/routes.ts b/src/server/routes.ts index 5fb3cf9..dc1940b 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -41,13 +41,13 @@ export const routes: { handler: doc.getMeta, }, { pattern: new URLPattern({ pathname: "/raw/rdf" }), - handler: raw.getRdf, + handler: raw.getRDF, }, { pattern: new URLPattern({ pathname: "/raw/zip" }), handler: notImplemented, }, { pattern: new URLPattern({ pathname: "/raw/tgz" }), - handler: notImplemented, + handler: raw.getTGZ, }, { pattern: new URLPattern({ pathname: "/tag" }), handler: notImplemented, @@ -76,7 +76,7 @@ export const routes: { handler: notImplemented, }, { pattern: new URLPattern({ pathname: "/raw/zip" }), - handler: raw.putZip, + handler: raw.putZIP, }], "DELETE": [{ pattern: new URLPattern({ pathname: "/doc/:id" }), @@ -92,6 +92,6 @@ export const routes: { handler: notImplemented, }, { pattern: new URLPattern({ pathname: "/raw/rdf" }), - handler: raw.deleteRdfFile, + handler: raw.deleteRDFFile, }], }; From 8151fe3e0f30bd01524a09cf6789e05725c24e1d Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 15:17:19 +0000 Subject: [PATCH 24/90] added GET /raw/zip --- src/handlers/raw.ts | 49 +++++++++++++++++--------------------------- src/server/routes.ts | 2 +- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index b233700..a680a83 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -41,16 +41,6 @@ export async function getTGZ( }); const writableStream = writableStreamFromWriter(rdf); await (await dump()).body?.pipeTo(writableStream); - /* const p = Deno.run({ - cmd: [ - "tar", - `--transform=s|${rdfPath}|rdf.ttl|`, - `--exclude-tag=${rdfName}`, - "-czvf", - tarPath, - "blobs/* /", // no space! - ], - }); */ const p = Deno.run({ cmd: [ "bash", @@ -79,10 +69,8 @@ export async function getZIP( _match: URLPatternResult, ): Promise { const timestamp = "" + Date.now(); - const tarPath = "blobs/tgz-" + timestamp; - const rdfName = "rdf-" + timestamp; - const rdfPath = "blobs/rdf/" + rdfName; - await ensureDir("blobs/rdf"); + const zipPath = `blobs/zip-${timestamp}.zip`; + const rdfPath = "blobs/rdf-" + timestamp; const rdf = await Deno.open(rdfPath, { create: true, write: true, @@ -90,34 +78,35 @@ export async function getZIP( }); const writableStream = writableStreamFromWriter(rdf); await (await dump()).body?.pipeTo(writableStream); - /* const p = Deno.run({ + // Create zip + const p_1 = Deno.run({ cmd: [ - "tar", - `--transform=s|${rdfPath}|rdf.ttl|`, - `--exclude-tag=${rdfName}`, - "-czvf", - tarPath, - "blobs/* /", // no space! + "bash", + "-c", + `zip -r ${zipPath} blobs/*/ ${rdfPath} -x "blobs/rdf/*"`, ], - }); */ - const p = Deno.run({ + }); + const r_1 = await p_1.status(); + if (!r_1.success) throw new Error("zip failed with code " + r_1.code); + // move rdf-??? to rdf.zip + const p_2 = Deno.run({ cmd: [ "bash", "-c", - `tar --transform="s|${rdfPath}|rdf.ttl|" --exclude-tag="${rdfName}" -czvf ${tarPath} blobs/*/`, + `printf "@ ${rdfPath}\\n@=rdf.ttl\\n" | zipnote -w ${zipPath}`, ], }); - const { success, code } = await p.status(); - if (!success) throw new Error("tar -czf failed with code " + code); + const r_2 = await p_2.status(); + if (!r_2.success) throw new Error("zipnote failed with code " + r_2.code); await Deno.remove(rdfPath); - const tar = await Deno.open(tarPath); + const zip = await Deno.open(zipPath); // Build a readable stream so the file doesn't have to be fully loaded into memory while we send it - const readableStream = tar.readable; + const readableStream = zip.readable; return respond(readableStream, { headers: { "content-disposition": - `inline; filename="tridoc_backup_${timestamp}.tar.gz"`, - "content-type": "application/gzip", + `inline; filename="tridoc_backup_${timestamp}.zip"`, + "content-type": "application/zip", }, }); // TODO: Figure out how to delete these files diff --git a/src/server/routes.ts b/src/server/routes.ts index dc1940b..1898af9 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -44,7 +44,7 @@ export const routes: { handler: raw.getRDF, }, { pattern: new URLPattern({ pathname: "/raw/zip" }), - handler: notImplemented, + handler: raw.getZIP, }, { pattern: new URLPattern({ pathname: "/raw/tgz" }), handler: raw.getTGZ, From 6050e11ef2e57e997cdd13c894923b76b8a2bab7 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 15:55:52 +0000 Subject: [PATCH 25/90] rm broken --- .vscode/launch.json | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index 01e694c..0000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "type": "node", - "request": "launch", - "name": "Launch Program", - "program": "${workspaceFolder}/lib/server", - "env": {"TRIDOC_PWD": "tridoc"} - } - ] -} \ No newline at end of file From 93bfaa63b1d3a8038803c50f14c9509d6dbc1fd0 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 17:11:18 +0000 Subject: [PATCH 26/90] added GET /tag/:tagLabel --- README.md | 6 ++--- src/handlers/tag.ts | 39 ++++++++++++++++++++++++++++ src/helpers/processParams.ts | 12 ++++++++- src/meta/finder.ts | 49 +++++++++++++++++++++++------------- src/server/routes.ts | 5 ++-- 5 files changed, 88 insertions(+), 23 deletions(-) create mode 100644 src/handlers/tag.ts diff --git a/README.md b/README.md index 7942afe..b966510 100644 --- a/README.md +++ b/README.md @@ -124,11 +124,11 @@ When getting a comment, a JSON array with objects of the following structure is | `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | ✅ | | `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | ✅ | | `/raw/rdf` | DELETE | "Cancel" failed zip upload—use only if certain it’s done & failed | | | (deno only) | ✅ | -| `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | +| `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | ✅ | | `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | ✅ | | `/tag` | POST | Create new tag | See above | - | 1.1.0 | -| `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | -| `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | +| `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | ✅ | +| `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | ✅ | | `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | | `/version` | GET | Get tridoc version | - | semver version number | 1.1.0 | ✅ | diff --git a/src/handlers/tag.ts b/src/handlers/tag.ts new file mode 100644 index 0000000..7d5ed81 --- /dev/null +++ b/src/handlers/tag.ts @@ -0,0 +1,39 @@ +import { respond } from "../helpers/cors.ts"; +import { processParams } from "../helpers/processParams.ts"; +import * as metafinder from "../meta/finder.ts"; + +type TagCreate = { + label: string; + parameter?: { + type: + | "http://www.w3.org/2001/XMLSchema#decimal" + | "http://www.w3.org/2001/XMLSchema#date"; + }; // only for parameterizable tags +}; +type TagAdd = { + label: string; + parameter?: { + type: + | "http://www.w3.org/2001/XMLSchema#decimal" + | "http://www.w3.org/2001/XMLSchema#date"; + value: string; // must be valid xsd:decimal or xsd:date, as specified in property type. + }; // only for parameterizable tags +}; + +export async function getDocs( + request: Request, + match: URLPatternResult, +): Promise { + const params = await processParams(request, { + tags: [[match.pathname.groups.tagLabel]], + }); + const response = await metafinder.getDocumentList(params); + return respond(JSON.stringify(response)); +} + +export async function getTagList( + _request: Request, + _match: URLPatternResult, +): Promise { + return respond(JSON.stringify(await metafinder.getTagList())); +} diff --git a/src/helpers/processParams.ts b/src/helpers/processParams.ts index d02de40..d03525a 100644 --- a/src/helpers/processParams.ts +++ b/src/helpers/processParams.ts @@ -19,6 +19,11 @@ type ParamTag = { maxIsExclusive?: boolean; //[5] }; +export type queryOverrides = { + tags?: string[][]; + nottags?: string[][]; +}; + export type Params = { tags?: ParamTag[]; nottags?: ParamTag[]; @@ -27,11 +32,16 @@ export type Params = { offset?: number; }; -export async function processParams(request: Request): Promise { +export async function processParams( + request: Request, + queryOverrides?: queryOverrides, +): Promise { const query = extractQuery(request); const result: Params = {}; const tags = query.tag?.map((t) => t.split(";")) ?? []; + if (queryOverrides?.tags) tags.push(...queryOverrides.tags); const nottags = query.nottag?.map((t) => t.split(";")) ?? []; + if (queryOverrides?.nottags) tags.push(...queryOverrides.nottags); result.text = query.text?.[0]; result.limit = parseInt(query.limit?.[0], 10) > 0 ? parseInt(query.limit[0]) diff --git a/src/meta/finder.ts b/src/meta/finder.ts index f894d35..06fb9b1 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -188,26 +188,41 @@ WHERE { }); } +export async function getTagList() { + const query = ` +PREFIX tridoc: +SELECT DISTINCT ?s ?label ?type +WHERE { + ?s tridoc:label ?label . + OPTIONAL { ?s tridoc:valueType ?type . } +}`; + return await fusekiFetch(query).then((json) => + json.results.bindings.map((binding) => { + return { + label: binding.label.value, + parameter: binding.type ? { type: binding.type.value } : undefined, + }; + }) + ); +} + export async function getTags(id: string) { const query = ` -PREFIX rdf: -PREFIX xsd: -PREFIX tridoc: -PREFIX s: -SELECT DISTINCT ?label ?type ?v - WHERE { - GRAPH { - tridoc:tag ?tag . +PREFIX tridoc: +SELECT DISTINCT ?label ?type ?v + WHERE { + GRAPH { + tridoc:tag ?tag . { - ?tag tridoc:label ?label . - } - UNION - { - ?tag tridoc:value ?v ; - tridoc:parameterizableTag ?ptag . - ?ptag tridoc:label ?label ; - tridoc:valueType ?type . - } + ?tag tridoc:label ?label . + } + UNION + { + ?tag tridoc:value ?v ; + tridoc:parameterizableTag ?ptag . + ?ptag tridoc:label ?label ; + tridoc:valueType ?type . + } } }`; return await fusekiFetch(query).then((json) => diff --git a/src/server/routes.ts b/src/server/routes.ts index 1898af9..3ec9967 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -3,6 +3,7 @@ import { count } from "../handlers/count.ts"; import * as doc from "../handlers/doc.ts"; import { notImplemented } from "../handlers/notImplemented.ts"; import * as raw from "../handlers/raw.ts"; +import * as tag from "../handlers/tag.ts"; import { version } from "../handlers/version.ts"; export const routes: { @@ -50,10 +51,10 @@ export const routes: { handler: raw.getTGZ, }, { pattern: new URLPattern({ pathname: "/tag" }), - handler: notImplemented, + handler: tag.getTagList, }, { pattern: new URLPattern({ pathname: "/tag/:tagLabel" }), - handler: notImplemented, + handler: tag.getDocs, }, { pattern: new URLPattern({ pathname: "/version" }), handler: version, From b0384de2a9ef7621938eea66322a51da63fcf5e3 Mon Sep 17 00:00:00 2001 From: nleanba Date: Sun, 23 Oct 2022 17:41:48 +0000 Subject: [PATCH 27/90] added POST /doc/:id/tag --- README.md | 2 +- src/handlers/doc.ts | 34 ++++++++++++++++++++++++++++++++++ src/handlers/tag.ts | 9 --------- src/meta/finder.ts | 1 + src/meta/store.ts | 29 +++++++++++++++++++++++++++++ src/server/routes.ts | 2 +- 6 files changed, 66 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index b966510..fa419c0 100644 --- a/README.md +++ b/README.md @@ -114,7 +114,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/doc/{id}` | DELETE | Deletes all metadata associated with the document. Document will not be deleted and is stays accessible over /doc/{id}. | - | - | 1.1.0 | ✅ | | `/doc/{id}/comment` | POST | Add comment to document | Comment object / See above | - | 1.2.0 | ✅ | | `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | ✅ | -| `/doc/{id}/tag` | POST | Add a tag to document | Tag object / See above | - | 1.1.0 | +| `/doc/{id}/tag` | POST | Add a tag to document | Tag object / See above | - | 1.1.0 | ✅ | | `/doc/{id}/tag` | GET | Get tags of document | - | Array of tag objects | 1.1.0 | ✅ | | `/doc/{id}/tag/{tagLabel}` | DELETE | Remove tag from document | - | - | 1.1.0 | | `/doc/{id}/thumb` | GET | Get document thumbnail | - | PNG (300px wide) | 1.5.0 | ✅ | diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index b82650c..a0537f8 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -7,6 +7,16 @@ import * as metadelete from "../meta/delete.ts"; import * as metafinder from "../meta/finder.ts"; import * as metastore from "../meta/store.ts"; +type TagAdd = { + label: string; + parameter?: { + type: + | "http://www.w3.org/2001/XMLSchema#decimal" + | "http://www.w3.org/2001/XMLSchema#date"; + value: string; // must be valid xsd:decimal or xsd:date, as specified in property type. + }; // only for parameterizable tags +}; + function getDir(id: string) { return "./blobs/" + id.slice(0, 2) + "/" + id.slice(2, 6) + "/" + id.slice(6, 14); @@ -212,3 +222,27 @@ export async function postPDF( }, }); } + +export async function postTag( + request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + const tagObject: TagAdd = await request.json(); + const [label, type] = + (await metafinder.getTagTypes([tagObject.label]))?.[0] ?? + [undefined, undefined]; + if (!label) { + return respond("Tag must exist before adding to a document", { + status: 400, + }); + } + if (tagObject.parameter?.type !== type) { + return respond("Type provided does not match", { status: 400 }); + } + if (tagObject.parameter?.type && !tagObject.parameter?.value) { + return respond("No value provided", { status: 400 }); + } + await metastore.addTag(id, tagObject.label, tagObject.parameter?.value, type); + return respond(undefined, { status: 204 }); +} diff --git a/src/handlers/tag.ts b/src/handlers/tag.ts index 7d5ed81..562ac12 100644 --- a/src/handlers/tag.ts +++ b/src/handlers/tag.ts @@ -10,15 +10,6 @@ type TagCreate = { | "http://www.w3.org/2001/XMLSchema#date"; }; // only for parameterizable tags }; -type TagAdd = { - label: string; - parameter?: { - type: - | "http://www.w3.org/2001/XMLSchema#decimal" - | "http://www.w3.org/2001/XMLSchema#date"; - value: string; // must be valid xsd:decimal or xsd:date, as specified in property type. - }; // only for parameterizable tags -}; export async function getDocs( request: Request, diff --git a/src/meta/finder.ts b/src/meta/finder.ts index 06fb9b1..ad3ed14 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -237,6 +237,7 @@ SELECT DISTINCT ?label ?type ?v ); } +// => [label, type?][] export async function getTagTypes(labels: string[]) { const json = await fusekiFetch(` PREFIX tridoc: diff --git a/src/meta/store.ts b/src/meta/store.ts index 2a590c5..c4af271 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -26,6 +26,35 @@ INSERT DATA { return await fusekiUpdate(query); } +export async function addTag( + id: string, + label: string, + value: string, + type: string, +) { + const tag = value + ? encodeURIComponent(label) + "/" + value + : encodeURIComponent(label); + const query = ` +PREFIX rdf: +PREFIX xsd: +PREFIX tridoc: +PREFIX s: +INSERT DATA { + GRAPH { + tridoc:tag .${ + value + ? ` + a tridoc:ParameterizedTag ; + tridoc:parameterizableTag ; + tridoc:value "${value}"^^<${type}> .` + : "" + } + } +}`; + return await fusekiUpdate(query); +} + export function restore(turtleData: string) { return fusekiUpdate(` CLEAR GRAPH ; diff --git a/src/server/routes.ts b/src/server/routes.ts index 3ec9967..3b6edba 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -67,7 +67,7 @@ export const routes: { handler: doc.postComment, }, { pattern: new URLPattern({ pathname: "/doc/:id/tag" }), - handler: notImplemented, + handler: doc.postTag, }, { pattern: new URLPattern({ pathname: "/tag" }), handler: notImplemented, From 690595b0bafd3dbfd52854a33a65531dab19123d Mon Sep 17 00:00:00 2001 From: nleanba Date: Mon, 24 Oct 2022 17:45:26 +0000 Subject: [PATCH 28/90] added PUT /doc/:id/title and fixed GET /doc/:id/title --- README.md | 2 +- src/handlers/doc.ts | 14 +++++++++++++- src/meta/store.ts | 11 +++++++++++ src/server/routes.ts | 2 +- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index fa419c0..1ab809f 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/doc/{id}/tag` | GET | Get tags of document | - | Array of tag objects | 1.1.0 | ✅ | | `/doc/{id}/tag/{tagLabel}` | DELETE | Remove tag from document | - | - | 1.1.0 | | `/doc/{id}/thumb` | GET | Get document thumbnail | - | PNG (300px wide) | 1.5.0 | ✅ | -| `/doc/{id}/title` | PUT | Set document title | `{"title": "the_Title"}` | - | 1.1.0 | +| `/doc/{id}/title` | PUT | Set document title | `{"title": "the_Title"}` | - | 1.1.0 | ✅ | | `/doc/{id}/title` | GET | Get document title | - | `{"title": "the_Title"}` | 1.1.0 | ✅ | | `/doc/{id}/title` | DELETE | Reset document title | - | - | 1.1.0 | | `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | ✅ | diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index a0537f8..e359bc3 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -157,7 +157,9 @@ export async function getTitle( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id; - return respond((await metafinder.getBasicMeta(id)).title); + return respond( + JSON.stringify({ title: (await metafinder.getBasicMeta(id)).title }), + ); } export async function list( @@ -246,3 +248,13 @@ export async function postTag( await metastore.addTag(id, tagObject.label, tagObject.parameter?.value, type); return respond(undefined, { status: 204 }); } + +export async function putTitle( + request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + const title: string = (await request.json())?.title; + await metastore.addTitle(id, title); + return respond(undefined, { status: 204 }); +} diff --git a/src/meta/store.ts b/src/meta/store.ts index c4af271..f2ac2ff 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -55,6 +55,17 @@ INSERT DATA { return await fusekiUpdate(query); } +export async function addTitle(id: string, title: string) { + const query = ` +PREFIX rdf: +PREFIX s: +WITH +DELETE { s:name ?o } +INSERT { s:name "${escapeLiteral(title)}" } +WHERE { OPTIONAL { s:name ?o } }`; + return await fusekiUpdate(query); +} + export function restore(turtleData: string) { return fusekiUpdate(` CLEAR GRAPH ; diff --git a/src/server/routes.ts b/src/server/routes.ts index 3b6edba..033a089 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -74,7 +74,7 @@ export const routes: { }], "PUT": [{ pattern: new URLPattern({ pathname: "/doc/:id/title" }), - handler: notImplemented, + handler: doc.putTitle, }, { pattern: new URLPattern({ pathname: "/raw/zip" }), handler: raw.putZIP, From 0479d8d6011b6847129da98e54777a3f9075cc2e Mon Sep 17 00:00:00 2001 From: nleanba Date: Mon, 24 Oct 2022 18:25:42 +0000 Subject: [PATCH 29/90] added POST /tag --- README.md | 2 +- src/handlers/tag.ts | 24 ++++++++++++++++++++++++ src/meta/store.ts | 22 ++++++++++++++++++++++ src/server/routes.ts | 2 +- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1ab809f..e1ea603 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/raw/rdf` | DELETE | "Cancel" failed zip upload—use only if certain it’s done & failed | | | (deno only) | ✅ | | `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | ✅ | | `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | ✅ | -| `/tag` | POST | Create new tag | See above | - | 1.1.0 | +| `/tag` | POST | Create new tag | See above | - | 1.1.0 | ✅ | | `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | ✅ | | `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | ✅ | | `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | diff --git a/src/handlers/tag.ts b/src/handlers/tag.ts index 562ac12..dc47706 100644 --- a/src/handlers/tag.ts +++ b/src/handlers/tag.ts @@ -1,6 +1,7 @@ import { respond } from "../helpers/cors.ts"; import { processParams } from "../helpers/processParams.ts"; import * as metafinder from "../meta/finder.ts"; +import * as metastore from "../meta/store.ts"; type TagCreate = { label: string; @@ -11,6 +12,29 @@ type TagCreate = { }; // only for parameterizable tags }; +export async function createTag( + request: Request, + _match: URLPatternResult, +): Promise { + const tagObject: TagCreate = await request.json(); + if (!tagObject?.label) return respond("No label provided", { status: 400 }); + if ( + tagObject?.parameter && + tagObject.parameter.type !== "http://www.w3.org/2001/XMLSchema#decimal" && + tagObject.parameter.type !== "http://www.w3.org/2001/XMLSchema#date" + ) return respond("Invalid type", { status: 400 }); + const tagList = await metafinder.getTagList(); + if (tagList.some((e) => e.label === tagObject.label)) { + return respond("Tag already exists", { status: 400 }); + } + const regex = /\s|^[.]{1,2}$|\/|\\|#|"|'|,|;|:|\?/; + if (regex.test(tagObject.label)) { + return respond("Label contains forbidden characters", { status: 400 }); + } + await metastore.createTag(tagObject.label, tagObject.parameter?.type); + return respond(undefined, { status: 204 }); +} + export async function getDocs( request: Request, match: URLPatternResult, diff --git a/src/meta/store.ts b/src/meta/store.ts index f2ac2ff..ed5826c 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -66,6 +66,28 @@ WHERE { OPTIONAL { s:name ?o } }`; return await fusekiUpdate(query); } +export async function createTag( + label: string, + type?: + | "http://www.w3.org/2001/XMLSchema#decimal" + | "http://www.w3.org/2001/XMLSchema#date", +) { + const tagType = type ? "ParameterizableTag" : "Tag"; + const valueType = type ? "tridoc:valueType <" + type + ">;\n" : ""; + const query = ` +PREFIX rdf: +PREFIX xsd: +PREFIX tridoc: +PREFIX s: +INSERT DATA { + GRAPH { + rdf:type tridoc:${tagType} ; + ${valueType} tridoc:label "${escapeLiteral(label)}" . + } +}`; + return await fusekiUpdate(query); +} + export function restore(turtleData: string) { return fusekiUpdate(` CLEAR GRAPH ; diff --git a/src/server/routes.ts b/src/server/routes.ts index 033a089..e00379a 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -70,7 +70,7 @@ export const routes: { handler: doc.postTag, }, { pattern: new URLPattern({ pathname: "/tag" }), - handler: notImplemented, + handler: tag.createTag, }], "PUT": [{ pattern: new URLPattern({ pathname: "/doc/:id/title" }), From 876f0ae5486086587a0561974ac752e9d5829afd Mon Sep 17 00:00:00 2001 From: nleanba Date: Mon, 24 Oct 2022 18:47:55 +0000 Subject: [PATCH 30/90] added DELETE /tag/label and /doc/:id/tag/:label --- README.md | 4 ++-- src/handlers/doc.ts | 11 +++++++++++ src/handlers/tag.ts | 11 +++++++++++ src/meta/delete.ts | 40 ++++++++++++++++++++++++++++++++++++++++ src/server/routes.ts | 4 ++-- 5 files changed, 66 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e1ea603..7c66da2 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | ✅ | | `/doc/{id}/tag` | POST | Add a tag to document | Tag object / See above | - | 1.1.0 | ✅ | | `/doc/{id}/tag` | GET | Get tags of document | - | Array of tag objects | 1.1.0 | ✅ | -| `/doc/{id}/tag/{tagLabel}` | DELETE | Remove tag from document | - | - | 1.1.0 | +| `/doc/{id}/tag/{tagLabel}` | DELETE | Remove tag from document | - | - | 1.1.0 |✅ | | `/doc/{id}/thumb` | GET | Get document thumbnail | - | PNG (300px wide) | 1.5.0 | ✅ | | `/doc/{id}/title` | PUT | Set document title | `{"title": "the_Title"}` | - | 1.1.0 | ✅ | | `/doc/{id}/title` | GET | Get document title | - | `{"title": "the_Title"}` | 1.1.0 | ✅ | @@ -129,7 +129,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/tag` | POST | Create new tag | See above | - | 1.1.0 | ✅ | | `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | ✅ | | `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | ✅ | -| `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | +| `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | ✅ | | `/version` | GET | Get tridoc version | - | semver version number | 1.1.0 | ✅ | #### URL-Parameters supported: diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index e359bc3..401b4ba 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -44,6 +44,17 @@ export async function deleteDoc( return respond(undefined, { status: 204 }); } +export async function deleteTag( + _request: Request, + match: URLPatternResult, +) { + await metadelete.deleteTag( + decodeURIComponent(match.pathname.groups.tagLabel), + match.pathname.groups.id, + ); + return respond(undefined, { status: 204 }); +} + export async function getComments( _request: Request, match: URLPatternResult, diff --git a/src/handlers/tag.ts b/src/handlers/tag.ts index dc47706..8c376bb 100644 --- a/src/handlers/tag.ts +++ b/src/handlers/tag.ts @@ -1,5 +1,6 @@ import { respond } from "../helpers/cors.ts"; import { processParams } from "../helpers/processParams.ts"; +import * as metadelete from "../meta/delete.ts"; import * as metafinder from "../meta/finder.ts"; import * as metastore from "../meta/store.ts"; @@ -35,6 +36,16 @@ export async function createTag( return respond(undefined, { status: 204 }); } +export async function deleteTag( + _request: Request, + match: URLPatternResult, +) { + await metadelete.deleteTag( + decodeURIComponent(match.pathname.groups.tagLabel), + ); + return respond(undefined, { status: 204 }); +} + export async function getDocs( request: Request, match: URLPatternResult, diff --git a/src/meta/delete.ts b/src/meta/delete.ts index 9398486..ba33012 100644 --- a/src/meta/delete.ts +++ b/src/meta/delete.ts @@ -1,5 +1,45 @@ import { fusekiUpdate } from "./fusekiFetch.ts"; +export async function deleteTag(label: string, id?: string) { + await fusekiUpdate(` +PREFIX rdf: +PREFIX s: +PREFIX tridoc: +WITH +DELETE { + ${ + id ? ` tridoc:tag ?ptag + ` : `?ptag ?p ?o . + ?s ?p1 ?ptag` + } +} +WHERE { + ?ptag tridoc:parameterizableTag ?tag. + ?tag tridoc:label "${label}" . + OPTIONAL { ?ptag ?p ?o } + OPTIONAL { + ${id ? ` tridoc:tag ?ptag` : "?s ?p1 ?ptag"} + } +}`); + await fusekiUpdate(` +PREFIX rdf: +PREFIX s: +PREFIX tridoc: +WITH +DELETE { + ${ + id ? ` tridoc:tag ?tag` : `?tag ?p ?o . + ?s ?p1 ?tag` + } +} +WHERE { + ?tag tridoc:label "${label}" . + OPTIONAL { ?tag ?p ?o } + OPTIONAL { + ${id ? ` ?p1 ?tag` : "?s ?p1 ?tag"} + } +}`); +} + export function deleteFile(id: string) { return fusekiUpdate(` WITH diff --git a/src/server/routes.ts b/src/server/routes.ts index e00379a..d784651 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -84,13 +84,13 @@ export const routes: { handler: doc.deleteDoc, }, { pattern: new URLPattern({ pathname: "/doc/:id/tag/:tagLabel" }), - handler: notImplemented, + handler: doc.deleteTag, }, { pattern: new URLPattern({ pathname: "/doc/:id/title" }), handler: notImplemented, }, { pattern: new URLPattern({ pathname: "/tag/:tagLabel" }), - handler: notImplemented, + handler: tag.deleteTag, }, { pattern: new URLPattern({ pathname: "/raw/rdf" }), handler: raw.deleteRDFFile, From aa92beb52e3d2dec8f0762eaca09cbd4973b94a4 Mon Sep 17 00:00:00 2001 From: nleanba Date: Mon, 24 Oct 2022 19:01:23 +0000 Subject: [PATCH 31/90] refined HTTP response codes --- src/handlers/doc.ts | 6 +++--- src/handlers/tag.ts | 2 +- src/meta/delete.ts | 18 ++++++++++-------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 401b4ba..007f36f 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -188,7 +188,7 @@ export async function postComment( ): Promise { const id = match.pathname.groups.id; await metastore.addComment(id, (await request.json()).text); - return respond(undefined, { status: 204 }); + return respond(undefined, { status: 201 }); } export async function postPDF( @@ -257,7 +257,7 @@ export async function postTag( return respond("No value provided", { status: 400 }); } await metastore.addTag(id, tagObject.label, tagObject.parameter?.value, type); - return respond(undefined, { status: 204 }); + return respond(undefined, { status: 201 }); } export async function putTitle( @@ -267,5 +267,5 @@ export async function putTitle( const id = match.pathname.groups.id; const title: string = (await request.json())?.title; await metastore.addTitle(id, title); - return respond(undefined, { status: 204 }); + return respond(undefined, { status: 201 }); } diff --git a/src/handlers/tag.ts b/src/handlers/tag.ts index 8c376bb..656547e 100644 --- a/src/handlers/tag.ts +++ b/src/handlers/tag.ts @@ -33,7 +33,7 @@ export async function createTag( return respond("Label contains forbidden characters", { status: 400 }); } await metastore.createTag(tagObject.label, tagObject.parameter?.type); - return respond(undefined, { status: 204 }); + return respond(undefined, { status: 201 }); } export async function deleteTag( diff --git a/src/meta/delete.ts b/src/meta/delete.ts index ba33012..f7a6ddb 100644 --- a/src/meta/delete.ts +++ b/src/meta/delete.ts @@ -1,16 +1,17 @@ import { fusekiUpdate } from "./fusekiFetch.ts"; export async function deleteTag(label: string, id?: string) { - await fusekiUpdate(` + await Promise.allSettled([ + fusekiUpdate(` PREFIX rdf: PREFIX s: PREFIX tridoc: WITH DELETE { ${ - id ? ` tridoc:tag ?ptag + ` : `?ptag ?p ?o . + id ? ` tridoc:tag ?ptag + ` : `?ptag ?p ?o . ?s ?p1 ?ptag` - } + } } WHERE { ?ptag tridoc:parameterizableTag ?tag. @@ -19,17 +20,17 @@ WHERE { OPTIONAL { ${id ? ` tridoc:tag ?ptag` : "?s ?p1 ?ptag"} } -}`); - await fusekiUpdate(` +}`), + fusekiUpdate(` PREFIX rdf: PREFIX s: PREFIX tridoc: WITH DELETE { ${ - id ? ` tridoc:tag ?tag` : `?tag ?p ?o . + id ? ` tridoc:tag ?tag` : `?tag ?p ?o . ?s ?p1 ?tag` - } + } } WHERE { ?tag tridoc:label "${label}" . @@ -37,7 +38,8 @@ WHERE { OPTIONAL { ${id ? ` ?p1 ?tag` : "?s ?p1 ?tag"} } -}`); +}`), + ]); } export function deleteFile(id: string) { From 8c4d889eb5c99f348a1e9445d8b0619f0d0d157a Mon Sep 17 00:00:00 2001 From: nleanba Date: Mon, 24 Oct 2022 19:06:22 +0000 Subject: [PATCH 32/90] implemented DELETE /doc/:id/title --- README.md | 48 ++++++++++++++++++++++---------------------- src/handlers/doc.ts | 8 ++++++++ src/meta/delete.ts | 14 ++++++++++--- src/server/routes.ts | 3 +-- 4 files changed, 44 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 7c66da2..58948b9 100644 --- a/README.md +++ b/README.md @@ -105,32 +105,32 @@ When getting a comment, a JSON array with objects of the following structure is ## API -| Address | Method | Description | Request / Payload | Response | Implemented in Version | deno? | +| Address | Method | Description | Request / Payload | Response | Implemented in Version | | - | - | - | - | - | - | - | -| `/count` | GET | Count (matching) documents | [1](#f1) [3](#f3) | Number | 1.1.0 | ✅ | -| `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | ✅ | -| `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | ✅ | -| `/doc/{id}` | GET | Get this document | - | PDF | 1.1.0 | ✅ | -| `/doc/{id}` | DELETE | Deletes all metadata associated with the document. Document will not be deleted and is stays accessible over /doc/{id}. | - | - | 1.1.0 | ✅ | -| `/doc/{id}/comment` | POST | Add comment to document | Comment object / See above | - | 1.2.0 | ✅ | -| `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | ✅ | -| `/doc/{id}/tag` | POST | Add a tag to document | Tag object / See above | - | 1.1.0 | ✅ | -| `/doc/{id}/tag` | GET | Get tags of document | - | Array of tag objects | 1.1.0 | ✅ | -| `/doc/{id}/tag/{tagLabel}` | DELETE | Remove tag from document | - | - | 1.1.0 |✅ | -| `/doc/{id}/thumb` | GET | Get document thumbnail | - | PNG (300px wide) | 1.5.0 | ✅ | -| `/doc/{id}/title` | PUT | Set document title | `{"title": "the_Title"}` | - | 1.1.0 | ✅ | -| `/doc/{id}/title` | GET | Get document title | - | `{"title": "the_Title"}` | 1.1.0 | ✅ | +| `/count` | GET | Count (matching) documents | [1](#f1) [3](#f3) | Number | 1.1.0 | +| `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | +| `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | +| `/doc/{id}` | GET | Get this document | - | PDF | 1.1.0 | +| `/doc/{id}` | DELETE | Deletes all metadata associated with the document. Document will not be deleted and is stays accessible over /doc/{id}. | - | - | 1.1.0 | +| `/doc/{id}/comment` | POST | Add comment to document | Comment object / See above | - | 1.2.0 | +| `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | +| `/doc/{id}/tag` | POST | Add a tag to document | Tag object / See above | - | 1.1.0 | +| `/doc/{id}/tag` | GET | Get tags of document | - | Array of tag objects | 1.1.0 | +| `/doc/{id}/tag/{tagLabel}` | DELETE | Remove tag from document | - | - | 1.1.0 | +| `/doc/{id}/thumb` | GET | Get document thumbnail | - | PNG (300px wide) | 1.5.0 | +| `/doc/{id}/title` | PUT | Set document title | `{"title": "the_Title"}` | - | 1.1.0 | +| `/doc/{id}/title` | GET | Get document title | - | `{"title": "the_Title"}` | 1.1.0 | | `/doc/{id}/title` | DELETE | Reset document title | - | - | 1.1.0 | -| `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | ✅ | -| `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | ✅ | -| `/raw/rdf` | DELETE | "Cancel" failed zip upload—use only if certain it’s done & failed | | | (deno only) | ✅ | -| `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | ✅ | -| `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | ✅ | -| `/tag` | POST | Create new tag | See above | - | 1.1.0 | ✅ | -| `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | ✅ | -| `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | ✅ | -| `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | ✅ | -| `/version` | GET | Get tridoc version | - | semver version number | 1.1.0 | ✅ | +| `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | +| `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | +| `/raw/rdf` | DELETE | "Cancel" failed zip upload—use only if certain it’s done & failed | | | (deno only) | +| `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | +| `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | +| `/tag` | POST | Create new tag | See above | - | 1.1.0 | +| `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | +| `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | +| `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | +| `/version` | GET | Get tridoc version | - | semver version number | 1.1.0 | #### URL-Parameters supported: diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 007f36f..471d0cb 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -54,6 +54,14 @@ export async function deleteTag( ); return respond(undefined, { status: 204 }); } +export async function deleteTitle( + request: Request, + match: URLPatternResult, +): Promise { + const id = match.pathname.groups.id; + await metadelete.deleteTitle(id); + return respond(undefined, { status: 201 }); +} export async function getComments( _request: Request, diff --git a/src/meta/delete.ts b/src/meta/delete.ts index f7a6ddb..be9e4bc 100644 --- a/src/meta/delete.ts +++ b/src/meta/delete.ts @@ -1,5 +1,12 @@ import { fusekiUpdate } from "./fusekiFetch.ts"; +export function deleteFile(id: string) { + return fusekiUpdate(` +WITH +DELETE { ?p ?o } +WHERE { ?p ?o }`); +} + export async function deleteTag(label: string, id?: string) { await Promise.allSettled([ fusekiUpdate(` @@ -42,9 +49,10 @@ WHERE { ]); } -export function deleteFile(id: string) { +export function deleteTitle(id: string) { return fusekiUpdate(` +PREFIX s: WITH -DELETE { ?p ?o } -WHERE { ?p ?o }`); +DELETE { s:name ?o } +WHERE { s:name ?o }`); } diff --git a/src/server/routes.ts b/src/server/routes.ts index d784651..231fd6c 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -1,7 +1,6 @@ import { options } from "../handlers/cors.ts"; import { count } from "../handlers/count.ts"; import * as doc from "../handlers/doc.ts"; -import { notImplemented } from "../handlers/notImplemented.ts"; import * as raw from "../handlers/raw.ts"; import * as tag from "../handlers/tag.ts"; import { version } from "../handlers/version.ts"; @@ -87,7 +86,7 @@ export const routes: { handler: doc.deleteTag, }, { pattern: new URLPattern({ pathname: "/doc/:id/title" }), - handler: notImplemented, + handler: doc.deleteTitle, }, { pattern: new URLPattern({ pathname: "/tag/:tagLabel" }), handler: tag.deleteTag, From 30e234ed326cc89c8c5e38c875432cdb1ee9a569 Mon Sep 17 00:00:00 2001 From: nleanba <25827850+nleanba@users.noreply.github.com> Date: Fri, 13 Jan 2023 19:49:53 +0100 Subject: [PATCH 33/90] added message about file permissions as temp fix --- src/server/server.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/server/server.ts b/src/server/server.ts index 4479ff1..e151cbc 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -40,6 +40,10 @@ const handler = async (request: Request): Promise => { ); return respond("404 Path not found", { status: 404 }); } catch (error) { + let message; + if (error instanceof Deno.errors.PermissionDenied) { + message = "Got “Permission Denied” trying to access the file on disk.\n\n Please run ```docker exec -u 0 [name of backend-container] chmod -R a+r ./blobs/ rdf.ttl``` on the host server to fix this and similar issues for the future." + } console.log( (new Date()).toISOString(), request.method, @@ -47,7 +51,7 @@ const handler = async (request: Request): Promise => { "→ 500: ", error, ); - return respond("500 " + error, { status: 500 }); + return respond("500 " + (message || error), { status: 500 }); } }; From b8752064301e3f807c9d28778bc7fabf89c881b2 Mon Sep 17 00:00:00 2001 From: nleanba <25827850+nleanba@users.noreply.github.com> Date: Sun, 15 Jan 2023 15:28:34 +0100 Subject: [PATCH 34/90] =?UTF-8?q?Added=20correct=20content-type=20for=20al?= =?UTF-8?q?l=20JSON=20responses=20=E2=86=92=20v1.6.0-alpha.deno.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/deps.ts | 2 +- src/handlers/doc.ts | 28 +++++++++++++++++++++++++--- src/handlers/tag.ts | 16 +++++++++++++--- src/server/server.ts | 2 +- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/src/deps.ts b/src/deps.ts index c2af758..85736e4 100644 --- a/src/deps.ts +++ b/src/deps.ts @@ -1,4 +1,4 @@ -export const VERSION = "1.6.0-alpha.deno"; +export const VERSION = "1.6.0-alpha.deno.1"; export { encode } from "https://deno.land/std@0.160.0/encoding/base64.ts"; export { emptyDir, ensureDir } from "https://deno.land/std@0.160.0/fs/mod.ts"; diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 471d0cb..db7b454 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -69,7 +69,11 @@ export async function getComments( ): Promise { const id = match.pathname.groups.id; const response = await metafinder.getComments(id); - return respond(JSON.stringify(response)); + return respond(JSON.stringify(response), { + headers: { + "content-type": "application/json; charset=utf-8", + }, + }); } export async function getPDF( @@ -110,6 +114,11 @@ export async function getMeta( comments: await metafinder.getComments(id), tags: await metafinder.getTags(id), }), + { + headers: { + "content-type": "application/json; charset=utf-8", + }, + }, ); } @@ -118,7 +127,11 @@ export async function getTags( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id; - return respond(JSON.stringify(await metafinder.getTags(id))); + return respond(JSON.stringify(await metafinder.getTags(id)), { + headers: { + "content-type": "application/json; charset=utf-8", + }, + }); } export async function getThumb( @@ -178,6 +191,11 @@ export async function getTitle( const id = match.pathname.groups.id; return respond( JSON.stringify({ title: (await metafinder.getBasicMeta(id)).title }), + { + headers: { + "content-type": "application/json; charset=utf-8", + }, + }, ); } @@ -187,7 +205,11 @@ export async function list( ): Promise { const params = await processParams(request); const response = await metafinder.getDocumentList(params); - return respond(JSON.stringify(response)); + return respond(JSON.stringify(response), { + headers: { + "content-type": "application/json; charset=utf-8", + }, + }); } export async function postComment( diff --git a/src/handlers/tag.ts b/src/handlers/tag.ts index 656547e..27cf821 100644 --- a/src/handlers/tag.ts +++ b/src/handlers/tag.ts @@ -23,7 +23,9 @@ export async function createTag( tagObject?.parameter && tagObject.parameter.type !== "http://www.w3.org/2001/XMLSchema#decimal" && tagObject.parameter.type !== "http://www.w3.org/2001/XMLSchema#date" - ) return respond("Invalid type", { status: 400 }); + ) { + return respond("Invalid type", { status: 400 }); + } const tagList = await metafinder.getTagList(); if (tagList.some((e) => e.label === tagObject.label)) { return respond("Tag already exists", { status: 400 }); @@ -54,12 +56,20 @@ export async function getDocs( tags: [[match.pathname.groups.tagLabel]], }); const response = await metafinder.getDocumentList(params); - return respond(JSON.stringify(response)); + return respond(JSON.stringify(response), { + headers: { + "content-type": "application/json; charset=utf-8", + }, + }); } export async function getTagList( _request: Request, _match: URLPatternResult, ): Promise { - return respond(JSON.stringify(await metafinder.getTagList())); + return respond(JSON.stringify(await metafinder.getTagList()), { + headers: { + "content-type": "application/json; charset=utf-8", + }, + }); } diff --git a/src/server/server.ts b/src/server/server.ts index e151cbc..f810f0a 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -48,7 +48,7 @@ const handler = async (request: Request): Promise => { (new Date()).toISOString(), request.method, path, - "→ 500: ", + "→ 500:", error, ); return respond("500 " + (message || error), { status: 500 }); From 9d17a038e920a6e9186e5bb63f5768e305939748 Mon Sep 17 00:00:00 2001 From: nleanba <25827850+nleanba@users.noreply.github.com> Date: Thu, 3 Aug 2023 18:31:58 +0200 Subject: [PATCH 35/90] Removed deadlock Apparently, waiting for status before waiting for output will wait forever --- src/helpers/pdfprocessor.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/helpers/pdfprocessor.ts b/src/helpers/pdfprocessor.ts index 585b3db..1e267d2 100644 --- a/src/helpers/pdfprocessor.ts +++ b/src/helpers/pdfprocessor.ts @@ -2,7 +2,8 @@ const decoder = new TextDecoder("utf-8"); export async function getText(path: string) { const p = Deno.run({ cmd: ["pdftotext", path, "-"], stdout: "piped" }); + const output = decoder.decode(await p.output()); const { success, code } = await p.status(); - if (!success) throw new Error("pdfsandwich failed with code " + code); - return decoder.decode(await p.output()); + if (!success) throw new Error("pdftotext failed with code " + code); + return output; } From 7aa1e21167ff13d8dc210329e95d1815bebfb54e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 04:07:43 +0000 Subject: [PATCH 36/90] tidying up --- Dockerfile | 3 +- old/lib/datastore.js | 155 ---------- old/lib/metadeleter.js | 91 ------ old/lib/metafinder.js | 300 ------------------ old/lib/metastorer.js | 197 ------------ old/lib/pdfprocessor.js | 29 -- old/lib/server.js | 661 ---------------------------------------- old/tdt.fish | 38 --- 8 files changed, 1 insertion(+), 1473 deletions(-) delete mode 100644 old/lib/datastore.js delete mode 100644 old/lib/metadeleter.js delete mode 100644 old/lib/metafinder.js delete mode 100644 old/lib/metastorer.js delete mode 100644 old/lib/pdfprocessor.js delete mode 100644 old/lib/server.js delete mode 100644 old/tdt.fish diff --git a/Dockerfile b/Dockerfile index 680ee6a..4fd1ebc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM denoland/deno:1.26.2 +FROM denoland/deno:2.4.4 EXPOSE 8000 @@ -7,7 +7,6 @@ WORKDIR /usr/src/app RUN apt update \ && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl zip unzip -RUN rm /etc/ImageMagick-6/policy.xml USER deno COPY src/deps.ts src/deps.ts diff --git a/old/lib/datastore.js b/old/lib/datastore.js deleted file mode 100644 index 1e20441..0000000 --- a/old/lib/datastore.js +++ /dev/null @@ -1,155 +0,0 @@ -const fs = require('fs'); -const archiver = require('archiver'); -const AdmZip = require('adm-zip'); -const path = require('path'); - -const metaFinder = require('./metafinder.js'); -const metaStorer = require('./metastorer.js'); -const { spawn, spawnSync } = require( 'child_process' ); - -function mkdir(dir, mode){ - console.log(dir); - try{ - fs.mkdirSync(dir, mode); - } - catch(e){ - //console.log(e); - if (e.code === 'EEXIST') { - return - } - if (e.code === 'EACCES') { - throw(e); - } - console.error("mkdir ERROR: " + e.errno + ": " + e.code); - //if(e.errno === 34){ //found this code on https://gist.github.com/progrape/bbccda9adc8845c94a6f, but getting -4058 on windows - mkdir(path.dirname(dir), mode); - mkdir(dir, mode); - //} - } -} - -function getPath(id) { - return "./blobs/"+id.slice(0,2)+"/"+id.slice(2,6)+"/"+id.slice(6,14)+"/"+id; -} - -function storeDocument(id,oldpath) { - return new Promise((accept, reject) => { - let newPath = getPath(id) - mkdir(path.dirname(newPath)); - fs.copyFile(oldpath, newPath, (error, result) => { - if (error) { - reject(error); - } else { - spawn('convert', ['-thumbnail', '300x', '-alpha', 'remove', `${newPath}[0]`, `${newPath}.png`]) - accept(result); - } - }); - }); -} - -function getDocument(id) { - return new Promise((accept, reject) => { - fs.readFile(getPath(id), (err, data) => { - if (err) { - reject(err); - } else { - accept(data); - } - }); - }); -} - -function getThumbnail(id) { - const path = getPath(id) - return new Promise((accept, reject) => { - fs.readFile(path + '.png', (err, data) => { - if (err) { - if (err.code === 'ENOENT') { - console.log(spawnSync('convert', ['-thumbnail', '300x', '-alpha', 'remove', `${path}[0]`, `${path}.png`]).output[2].toString()) - fs.readFile(path + '.png', (err, data) => { - if (err) { - reject(err); - } else { - accept(data); - } - }); - } else { - reject(err); - } - } else { - accept(data); - } - }); - }); -} - -function createArchive() { - const archive = new archiver('tar', { gzip: true }); - - // good practice to catch warnings (ie stat failures and other non-blocking errors) - archive.on('warning', function (err) { - if (err.code === 'ENOENT') { - // log warning - console.log(err); - } else { - // throw error - throw err; - } - }); - - // good practice to catch this error explicitly - archive.on('error', function (err) { - throw err; - }); - - return metaFinder.dump("text/turtle").then((response) => response.text()) - .then(data => { - archive.append(data, { name: "rdf.ttl" }); - archive.directory('./blobs/', 'blobs'); - archive.finalize(); - console.log("archived") - return archive; - }) -} - -function createZipArchive() { - const zip = new AdmZip(); - - - return metaFinder.dump("text/turtle").then((response) => response.text()) - .then(data => { - zip.addFile('rdf.ttl', Buffer.from(data)); - zip.addLocalFolder('./blobs/', 'blobs'); - console.log("zipped") - return zip; - }) -} - -function putData(file) { - const zip = new AdmZip(file); - var zipEntries = zip.getEntries(); - - zipEntries.forEach(function(zipEntry) { - if (zipEntry.entryName === 'rdf.ttl') { - metaStorer.restore(zipEntry.getData().toString('utf8')) - } - if (zipEntry.entryName.startsWith('blobs')) { - zip.extractEntryTo(zipEntry.entryName,'./', true, true) - } - }); - -} - -/* -return metaFinder.dump("text/turtle").then((response) => response.text()) - .then(data => h.response(dataStore.archive([{ data: data, name: "rdf.ttl" }])) - .type('application/gzip') - .header("content-disposition", `attachment; filename="tridoc_backup_${Date.now()}.tar.gz"`)); -*/ - -exports.storeDocument = storeDocument; -exports.getDocument = getDocument; -exports.getThumbnail = getThumbnail; -exports.createArchive = createArchive; -exports.createZipArchive = createZipArchive; -exports.putData = putData; \ No newline at end of file diff --git a/old/lib/metadeleter.js b/old/lib/metadeleter.js deleted file mode 100644 index d3b5608..0000000 --- a/old/lib/metadeleter.js +++ /dev/null @@ -1,91 +0,0 @@ -const fetch = require("node-fetch"); - -function deleteTitle(id) { - var now = new Date(); - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update" - }, - body: 'PREFIX rdf: \n' + - 'PREFIX s: \n' + - 'WITH \n' + - 'DELETE { s:name ?o }\n' + - 'WHERE { s:name ?o }' - }) -} - -function deleteTag(label,id) { - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update" - }, - body: 'PREFIX rdf: \n' + - 'PREFIX s: \n' + - 'PREFIX tridoc: \n' + - 'WITH \n' + - 'DELETE {\n' + - (id ? - ' tridoc:tag ?ptag \n' - : ' ?ptag ?p ?o .\n' + - ' ?s ?p1 ?ptag \n' - ) + - '}\n' + - 'WHERE {\n' + - ' ?ptag tridoc:parameterizableTag ?tag.\n' + - ' ?tag tridoc:label "' + label + '" .\n' + - ' OPTIONAL { ?ptag ?p ?o } \n' + - ' OPTIONAL { \n' + - (id ? ' tridoc:tag ?ptag \n' : ' ?s ?p1 ?ptag \n') + - ' } \n' + - '}' - }).catch(e => console.log(e)).then(() => { - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update" - }, - body: 'PREFIX rdf: \n' + - 'PREFIX s: \n' + - 'PREFIX tridoc: \n' + - 'WITH \n' + - 'DELETE {\n' + - (id ? - ' tridoc:tag ?tag\n' - : ' ?tag ?p ?o .\n' + - ' ?s ?p1 ?tag\n' - ) + - '}\n' + - 'WHERE {\n' + - ' ?tag tridoc:label "' + label + '" .\n' + - ' OPTIONAL { ?tag ?p ?o } \n' + - ' OPTIONAL { \n' + - (id ? ' ?p1 ?tag\n' : ' ?s ?p1 ?tag\n') + - ' } \n' + - '}' - }) - }) -} - -function deleteFile(id) { - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update" - }, - body: 'PREFIX rdf: \n' + - 'PREFIX s: \n' + - 'WITH \n' + - 'DELETE { ?p ?o }\n' + - 'WHERE { ?p ?o }' - }) -} - -exports.deleteTitle = deleteTitle; -exports.deleteFile = deleteFile; -exports.deleteTag = deleteTag; diff --git a/old/lib/metafinder.js b/old/lib/metafinder.js deleted file mode 100644 index 0dfdfc7..0000000 --- a/old/lib/metafinder.js +++ /dev/null @@ -1,300 +0,0 @@ -const fetch = require("node-fetch"); - -/** takes: { tags: [string, string, string][], nottags: [string, string, string][], text: string, limit: number, offset: number } */ -function getDocumentList({ tags, nottags, text, limit, offset }) { - let tagQuery = ""; - for (let i = 0 ; i < tags.length ; i++) { - if (tags[i][3]) { - tagQuery += -`{ ?s tridoc:tag ?ptag${i} . - ?ptag${i} tridoc:parameterizableTag ?atag${i} . - ?ptag${i} tridoc:value ?v${i} . - ?atag${i} tridoc:label "${tags[i][0]}" . - ${ tags[i][1] ? `FILTER (?v${i} >= "${tags[i][1]}"^^<${tags[i][3]}> )` : '' } - ${ tags[i][2] ? `FILTER (?v${i} ${ tags[i][5] ? '<' :'<='} "${tags[i][2]}"^^<${tags[i][3]}> )` : '' } }` - } else { - tagQuery += -`{ ?s tridoc:tag ?tag${i} . - ?tag${i} tridoc:label "${tags[i][0]}" . }` - } - } - let notTagQuery = ""; - for (let i = 0 ; i < nottags.length ; i++) { - if (nottags[i][3]) { - tagQuery += -`FILTER NOT EXISTS { ?s tridoc:tag ?ptag${i} . - ?ptag${i} tridoc:parameterizableTag ?atag${i} . - ?ptag${i} tridoc:value ?v${i} . - ?atag${i} tridoc:label "${nottags[i][0]}" . - ${ nottags[i][1] ? `FILTER (?v${i} >= "${nottags[i][1]}"^^<${nottags[i][3]}> )` : '' } - ${ nottags[i][2] ? `FILTER (?v${i} ${ nottags[i][5] ? '<' :'<='} "${nottags[i][2]}"^^<${nottags[i][3]}> )` : '' } }` - } else { - tagQuery += -`FILTER NOT EXISTS { ?s tridoc:tag ?tag${i} . - ?tag${i} tridoc:label "${nottags[i][0]}" . }` - } - } - let body = 'PREFIX rdf: \n' + - 'PREFIX s: \n' + - 'PREFIX tridoc: \n' + - 'PREFIX text: \n' + - 'SELECT DISTINCT ?s ?identifier ?title ?date\n' + - 'WHERE {\n' + - ' ?s s:identifier ?identifier .\n' + - ' ?s s:dateCreated ?date .\n' + - tagQuery + - notTagQuery + - ' OPTIONAL { ?s s:name ?title . }\n' + - (text ? '{ { ?s text:query (s:name \"' + text + '\") } UNION { ?s text:query (s:text \"' + text + '\")} } .\n' : '') + - '}\n' + - 'ORDER BY desc(?date)\n' + - (limit ? 'LIMIT ' + limit + '\n' : '') + - (offset ? 'OFFSET ' + offset : ''); - return fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query" - }, - body: body - }).then((response) => response.json()).then((json) => - json.results.bindings.map((binding) => { - let result = {}; - result.identifier = binding.identifier.value; - if (binding.title) { - result.title = binding.title.value; - } - if (binding.date) { - result.created = binding.date.value; - } - return result; - }) - ); -} - -function getDocumentNumber({ tags, nottags, text }) { - let tagQuery = ""; - for (let i = 0 ; i < tags.length ; i++) { - if (tags[i][3]) { - tagQuery += -`{ ?s tridoc:tag ?ptag${i} . - ?ptag${i} tridoc:parameterizableTag ?atag${i} . - ?ptag${i} tridoc:value ?v${i} . - ?atag${i} tridoc:label "${tags[i][0]}" . - ${ tags[i][1] ? `FILTER (?v${i} >= "${tags[i][1]}"^^<${tags[i][3]}> )` : '' } - ${ tags[i][2] ? `FILTER (?v${i} ${ tags[i][5] ? '<' :'<='} "${tags[i][2]}"^^<${tags[i][3]}> )` : '' } }` - } else { - tagQuery += -`{ ?s tridoc:tag ?tag${i} . - ?tag${i} tridoc:label "${tags[i][0]}" . }` - } - } - let notTagQuery = ""; - for (let i = 0 ; i < nottags.length ; i++) { - if (nottags[i][3]) { - tagQuery += -`FILTER NOT EXISTS { ?s tridoc:tag ?ptag${i} . - ?ptag${i} tridoc:parameterizableTag ?atag${i} . - ?ptag${i} tridoc:value ?v${i} . - ?atag${i} tridoc:label "${nottags[i][0]}" . - ${ nottags[i][1] ? `FILTER (?v${i} >= "${nottags[i][1]}"^^<${nottags[i][3]}> )` : '' } - ${ nottags[i][2] ? `FILTER (?v${i} ${ nottags[i][5] ? '<' :'<='} "${nottags[i][2]}"^^<${nottags[i][3]}> )` : '' } }` - } else { - tagQuery += -`FILTER NOT EXISTS { ?s tridoc:tag ?tag${i} . - ?tag${i} tridoc:label "${nottags[i][0]}" . }` - } - } - return fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query" - }, - body: 'PREFIX rdf: \n' + - 'PREFIX s: \n' + - 'PREFIX tridoc: \n' + - 'PREFIX text: \n' + - 'SELECT (COUNT(DISTINCT ?s) as ?count)\n' + - 'WHERE {\n' + - ' ?s s:identifier ?identifier .\n' + - tagQuery + notTagQuery + - (text ? '{ { ?s text:query (s:name \"'+text+'\") } UNION { ?s text:query (s:text \"'+text+'\")} } .\n':'')+ - '}' - }).then((response) => response.json()).then((json) => json.results.bindings[0].count.value); -} - -function getTagList() { - return fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query" - }, - body: 'PREFIX rdf: \n' + - 'PREFIX s: \n' + - 'PREFIX tridoc: \n' + - 'SELECT DISTINCT ?s ?label ?type\n' + - 'WHERE {\n' + - ' ?s tridoc:label ?label .\n' + - ' OPTIONAL { ?s tridoc:valueType ?type . }\n' + - '}' - }).then((response) => response.json()).then((json) => - json.results.bindings.map((binding) => { - let result = {}; - result.label = binding.label.value; - if (binding.type) { - result.parameter = {type: binding.type.value}; - } - return result; - }) - ); -} - -function getTagTypes(labels) { - return fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query" - }, - body: `PREFIX tridoc: -SELECT DISTINCT ?l ?t WHERE { VALUES ?l { "${labels.join('" "')}" } ?s tridoc:label ?l . OPTIONAL { ?s tridoc:valueType ?t . } }` - }).then((response) => response.json()).then((json) => - json.results.bindings.map((binding) => { - let result = []; - result[0] = binding.l.value; - if (binding.t) { - result[1] = binding.t.value; - } - return result; - }) - ); -} - -function getMeta(id) { - return fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query" - }, - body: 'PREFIX rdf: \n' + - 'PREFIX s: \n' + - 'SELECT ?title ?date\n' + - 'WHERE {\n' + - ' ?s s:identifier "' + id + '" .\n' + - ' ?s s:dateCreated ?date .\n' + - ' OPTIONAL { ?s s:name ?title . }\n' + - '}' - }).then((response) => response.json()).then((json) => { - const result = {} - if (json.results.bindings[0].title) result.title = json.results.bindings[0].title.value - if (json.results.bindings[0].date) result.created = json.results.bindings[0].date.value - return result - }); -} - -function getTags(id) { - let query = 'PREFIX rdf: \n' + - 'PREFIX xsd: \n' + - 'PREFIX tridoc: \n' + - 'PREFIX s: \n' + - 'SELECT DISTINCT ?label ?type ?v \n' + - ' WHERE { \n' + - ' GRAPH { \n' + - ' tridoc:tag ?tag . \n' + - ' {\n' + - ' ?tag tridoc:label ?label . \n' + - ' } \n' + - ' UNION \n' + - ' { \n' + - ' ?tag tridoc:value ?v ; \n' + - ' tridoc:parameterizableTag ?ptag . \n' + - ' ?ptag tridoc:label ?label ; \n' + - ' tridoc:valueType ?type . \n' + - ' } \n' + - ' }\n' + - '}'; - return fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query" - }, - body: query - }).then((response) => response.json()).then((json) => - json.results.bindings.map((binding) => { - let result = {}; - result.label = binding.label.value; - if (binding.type) { - result.parameter = { - "type": binding.type.value, - "value": binding.v.value - }; - } - return result; - }) - ); -} - -function getComments(id) { - let query = -`PREFIX rdf: -PREFIX xsd: -PREFIX tridoc: -PREFIX s: -SELECT DISTINCT ?d ?t WHERE { - GRAPH { - s:comment [ - a s:Comment ; - s:dateCreated ?d ; - s:text ?t - ] . - } -}`; - return fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query" - }, - body: query - }).then((response) => { - if (response.ok) { - return response.json(); - } else { - throw new Error(response); - } - }).then((json) => - json.results.bindings.map((binding) => { - let result = {}; - result.text = binding.t.value; - result.created = binding.d.value; - return result; - }) - ); -} - -function dump(accept = "text/turtle") { - let query = 'CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }'; - return fetch("http://fuseki:3030/3DOC/query", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-query", - "Accept": accept - }, - body: query - }) -} - - -exports.getDocumentList = getDocumentList; -exports.getDocumentNumber = getDocumentNumber; -exports.getTagList = getTagList; -exports.getTagTypes = getTagTypes; -exports.getTags = getTags; -exports.getComments = getComments; -exports.getMeta = getMeta; -exports.dump = dump; \ No newline at end of file diff --git a/old/lib/metastorer.js b/old/lib/metastorer.js deleted file mode 100644 index b7af211..0000000 --- a/old/lib/metastorer.js +++ /dev/null @@ -1,197 +0,0 @@ -const fetch = require("node-fetch"); - -function createTag(label, type) { - if (label.length < 1) { - return Promise.reject("Name must be specified") - } - let tagType = "Tag"; - let valueType = ""; - if (type) { - tagType = "ParameterizableTag"; - if ((type == "http://www.w3.org/2001/XMLSchema#decimal")||(type == "http://www.w3.org/2001/XMLSchema#date")) { - valueType = " tridoc:valueType <" + type + ">;\n"; - } else { - return Promise.reject("Invalid type"); - } - } - let query = 'PREFIX rdf: \n' + - 'PREFIX xsd: \n' + - 'PREFIX tridoc: \n' + - 'PREFIX s: \n' + - 'INSERT DATA {\n' + - ' GRAPH {\n' + - ' rdf:type tridoc:' + tagType + ' ;\n' + - valueType + - ' tridoc:label "' + escapeLiteral(label) + '" .\n' + - ' }\n' + - '}'; - //console.log(query); - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update" - }, - body: query - }) -} - -function addTag(id, label, value, type) { - let tag = value ? encodeURIComponent(label) + "/" + value : encodeURIComponent(label) ; - let query = 'PREFIX rdf: \n' + - 'PREFIX xsd: \n' + - 'PREFIX tridoc: \n' + - 'PREFIX s: \n' + - 'INSERT DATA {\n' + - ' GRAPH {\n' + - ' tridoc:tag . \n' + - (value ? ' a tridoc:ParameterizedTag ;\n' + - ' tridoc:parameterizableTag ;\n' + - ' tridoc:value "' + value + '"^^<' + type + '> .\n' : '') + - ' }\n' + - '}'; - //console.log(query); - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update" - }, - body: query - }) -} - -function addComment(id, text) { - const now = new Date() - const query = -`PREFIX rdf: -PREFIX xsd: -PREFIX tridoc: -PREFIX s: -INSERT DATA { - GRAPH { - s:comment [ - a s:Comment ; - s:dateCreated "${now.toISOString()}"^^xsd:dateTime ; - s:text "${escapeLiteral(text)}" - ] . - } -}`; - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update" - }, - body: query - }).then(response => { - if (response.ok) { - return response; - } else { - throw new Error(response.statusText); - } - }) -} - -function storeDocument(id, text, created) { - var now = created ? new Date(created) : new Date(); - let query = 'PREFIX rdf: \n' + - 'PREFIX xsd: \n' + - 'PREFIX s: \n' + - 'INSERT DATA {\n' + - ' GRAPH {\n' + - ' rdf:type s:DigitalDocument ;\n' + - ' s:dateCreated "' + now.toISOString() + '"^^xsd:dateTime ;\n' + - ' s:identifier "' + id + '" ;\n' + - ' s:text "' + - escapeLiteral(text) + '" .\n' + - ' }\n' + - '}'; - //console.log(query); - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update" - }, - body: query - }).then(response => { - //console.log("Fuseki returned: "+response.status); - if (response.ok) { - return response; - } else { - throw new Error("Error from Fuseki: " + response.statusText); - } - }) -} - -function escapeLiteral(string) { - return string.replace(/\\/g,"\\\\").replace(/\n/g,"\\n").replace(/\r/g,"\\r").replace(/'/g,"\\'").replace(/"/g,"\\\""); -} - -function setTitle(id, title) { - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update" - }, - body: 'PREFIX rdf: \n' + - 'PREFIX s: \n' + - 'WITH \n' + - 'DELETE { s:name ?o }\n' + - 'INSERT { s:name "' + escapeLiteral(title) + '" }\n' + - 'WHERE { OPTIONAL { s:name ?o } }' - }).then(response => { - if (response.ok) { - return response; - } else { - throw new Error("Error from Fuseki: " + response.statusText); - } - }) -} - -function uploadBackupMetaData(file, type = 'text/turtle') { - return fetch("http://fuseki:3030/3DOC/data?graph=http://3doc/meta", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": type, - "Accept": "application/json, */*;q=0.01" - }, - body: file - }).then(response => { - if (response.ok) { - return response; - } else { - throw new Error(response.statusText); - } - }) -} - -function restore(turtleData) { - let statement = `CLEAR GRAPH ; - INSERT DATA { - GRAPH { ${turtleData} } - }`; - return fetch("http://fuseki:3030/3DOC/update", { - method: "POST", - headers: { - "Authorization": "Basic " + btoa("admin:pw123"), - "Content-Type": "application/sparql-update" - }, - body: statement - }) -} - -exports.storeDocument = storeDocument; -exports.setTitle = setTitle; -exports.addTag = addTag; -exports.addComment = addComment; -exports.createTag = createTag; -exports.uploadBackupMetaData = uploadBackupMetaData; -exports.restore = restore; - - //' s:author < ???? > ;\n' + // To be decided whether to use s:author or s:creator - //' s:comment " ???? " ;\n' + - //' s:creator < ???? > ;\n' + // To be decided whether to use s:author or s:creator diff --git a/old/lib/pdfprocessor.js b/old/lib/pdfprocessor.js deleted file mode 100644 index 31fccf2..0000000 --- a/old/lib/pdfprocessor.js +++ /dev/null @@ -1,29 +0,0 @@ -const PDFJS = require('pdfjs-dist'); - -function getText(pdfUrl) { - var pdf = PDFJS.getDocument(pdfUrl); - return pdf.then(function (pdf) { // get all pages text - var maxPages = pdf.pdfInfo.numPages; - var countPromises = []; // collecting all page promises - for (var j = 1; j <= maxPages; j++) { - var page = pdf.getPage(j); - - var txt = ""; - countPromises.push(page.then(function (page) { // add page promise - var textContent = page.getTextContent(); - return textContent.then(function (text) { // return content promise - return text.items.map(function (s) { - return s.str; - }).join(' '); // value page text - - }); - })); - } - // Wait for all pages and join text - return Promise.all(countPromises).then(function (texts) { - return texts.join(' '); - }); - }); -} - -exports.getText = getText; diff --git a/old/lib/server.js b/old/lib/server.js deleted file mode 100644 index 082c009..0000000 --- a/old/lib/server.js +++ /dev/null @@ -1,661 +0,0 @@ -'use strict'; - -const Hapi = require('hapi'); -const util = require("util") -const { spawnSync } = require( 'child_process' ); -const pdfProcessor = require('./pdfprocessor.js'); -const metaStorer = require('./metastorer.js'); -const dataStore = require('./datastore.js'); -const metaFinder = require('./metafinder.js'); -const metaDeleter = require('./metadeleter.js'); -var nanoid = require('nanoid'); - -const log_info = (request) => console.log(request.method.toUpperCase() + " " + request.path); - -// HELPER FUNCTIONS - -function makeArray(maybeArray) { - if (Array.isArray(maybeArray)) { - return maybeArray; - } else { - let array = new Array(maybeArray); - return array; - } -} - -/** => { tags: [string, string, string][], nottags: [string, string, string][], text: string, limit: number, offset: number } */ -function processParams (query) { - let result = {} - result.tags = makeArray(query.tag ? makeArray(query.tag) : []).map(t => t.split(';')) - result.nottags = makeArray(query.nottag ? makeArray(query.nottag) : []).map(t => t.split(';')) - result.text = query.text - result.limit = (parseInt(query.limit, 10) > 0 ? parseInt(query.limit) : undefined) - result.offset = (parseInt(query.offset, 10) >= 0 ? parseInt(query.offset) : undefined) - return metaFinder.getTagTypes(result.tags.map(e => e[0]).concat(result.nottags.map(e => e[0]))).then(types => { - /** => [label, min, max, -, maxIsExclusive][] */ - function tagMap (t) { - const typ = types.find(e => e[0] === t[0]) - t[3] = typ ? typ[1] : undefined - if (typ[1] === 'http://www.w3.org/2001/XMLSchema#date') { - if (t[1]) { - switch (t[1].length) { - case 4: - t[1] += '-01-01' - break - case 7: - t[1] += '-01' - break - } - } - if (t[2]) { - switch (t[2].length) { - case 4: - t[2] += '-12-31' - break - case 7: - const month = parseInt(t[2].substring(5),10) + 1 - if (month < 13) { - t[2] = t[2].substring(0,5) + '-' + month.toString().padStart(2, '0') + '-01' - t[5] = true - } else { - t[2] += '-31' - } - break - } - } - } - return t - } - result.tags.map(tagMap) - result.nottags.map(tagMap) - console.log('eh??', util.inspect(result)) - return result - }) -} - -// SERVER - -const VERSION = process.env.npm_package_version || require('../package.json').version; - -// Create a server with a host and port -const server = Hapi.server({ - debug: { request: ['error'] }, - port: 8000, - routes: { - cors: { - additionalHeaders: ['Access-Control-Allow-Origin'], - origin: ['*'] - }, - auth: 'simple' - } -}); - -const validate = async (request, username, password) => { - - console.log('Authenticating ' + username + " " + password); - - if (username !== "tridoc") { - return { credentials: null, isValid: false }; - } - - const isValid = password === process.env.TRIDOC_PWD; - const credentials = { id: "0001", name: username }; - - return { isValid, credentials }; -}; - - - - - -// Start the server -async function start() { - - - try { - await server.start(); - } catch (err) { - console.log(err); - process.exit(1); - } - - - await server.register(require('hapi-auth-basic')); - - server.auth.strategy('simple', 'basic', { validate }); - - server.route({ - method: 'GET', - path: '/count', - handler: function (request, h) { - log_info(request); - return processParams(request.query).then(p => metaFinder.getDocumentNumber(p)) - } - }); - - server.route({ - method: 'POST', - path: '/doc', - config: { - handler: (request, h) => { - log_info(request); - var id = nanoid(); - return pdfProcessor.getText(request.payload.path).then(text => { - const lang = process.env.OCR_LANG ? process.env.OCR_LANG : 'fra+deu+eng' - if (text.length < 4) { - const sandwich = spawnSync( 'pdfsandwich', [ '-rgb', '-lang', lang, request.payload.path ] ); - if(sandwich.error) { - console.log( `error attempting to execute pdfsandwich: ${sandwich.error}` ); - return [text, request.payload.path]; - } else { - console.log( `pdfsandwich stderr: ${sandwich.stderr.toString()}` ); - console.log( `pdfsandwich stdout: ${sandwich.stdout.toString()}` ); - const ocrPath = request.payload.path+'_ocr' - return pdfProcessor.getText(ocrPath).then(text => [text, ocrPath]); - } - } else { - return [text, request.payload.path]; - } - }).then(([text,path]) => { - console.log("Document created with id " + id); - const datecheck = /^(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-6]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-6]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-6]\d([+-][0-2]\d:[0-5]\d|Z))$/; - return metaStorer.storeDocument(id, text, (request.query.date && request.query.date.match(datecheck)) ? request.query.date : undefined).then(() => { - return dataStore.storeDocument(id, path) - .then(() => - h.response() - .code(201) - .header("Location", "/doc/" + id) - .header("Access-Control-Expose-Headers", "Location") - ); - }); - }); - }, - payload: { - allow: 'application/pdf', - maxBytes: 209715200, - output: 'file', - parse: false - } - } - }); - - server.route({ - method: 'GET', - path: '/doc', - handler: function (request, h) { - log_info(request); - return processParams(request.query).then(p => metaFinder.getDocumentList(p)) - } - }); - - server.route({ - method: 'GET', - path: '/doc/{id}', - handler: function (request, h) { - log_info(request); - var id = request.params.id; - return dataStore.getDocument(id).then(data => { - return metaFinder.getMeta(id).then(titleObject => titleObject.title || titleObject.created).catch(e => "document").then(fileName => { - return h.response(data) - .header("content-disposition", "inline; filename=\"" + encodeURI(fileName) + ".pdf\"") - .header("content-type", "application/pdf"); - }); - }); - } - }); - - server.route({ - method: 'DELETE', - path: '/doc/{id}', - handler: function (request, h) { - log_info(request); - var id = request.params.id; - return metaDeleter.deleteFile(id); - } - }); - - server.route({ - method: 'POST', - path: '/doc/{id}/comment', - config: { - handler: (request, h) => { - log_info(request) - return metaStorer.addComment(request.params.id, request.payload.text).catch(e => - h.response({ "statusCode": 404, "error": e + " | (Document) Not Found", "message": "Not Found" }) - .code(404) - ); - }, - payload: { - allow: ['application/json'], - maxBytes: 209715200, - output: 'data', - parse: true - } - } - }); - - server.route({ - method: 'GET', - path: '/doc/{id}/comment', - config: { - handler: (request, h) => { - log_info(request); - return metaFinder.getComments(request.params.id).catch(e => - h.response({ "statusCode": 404, "error": e + "(Document) Not Found", "message": "Not Found" }) - .code(404) - ); - } - } - }); - - server.route({ - method: 'POST', - path: '/doc/{id}/tag', - config: { - handler: (request, h) => { - log_info(request); - let id = request.params.id; - let label = request.payload.label; - let value; - let type; - console.log(request.payload); - return metaFinder.getTagList().then(r => { - if (request.payload.parameter) { - value = request.payload.parameter.value; - type = request.payload.parameter.type; - } - let exists = r.find((element) => (element.label === label)); - if (exists) { - if (request.payload.parameter) { - if (exists.parameter.type === type) { - console.log("Adding tag \"" + label + "\" of type \"" + type + "\" to " + id) - return metaStorer.addTag(id, label, value, type) - } else { - return h.response({ - "statusCode": 400, - "error": "Wrong type", - "message": "Type provided does not match" - }).code(400) - } - } else { - if (exists.parameter) { - return h.response({ - "statusCode": 400, - "error": "Wrong type", - "message": "You need to specify a value" - }).code(400) - } - console.log("Adding tag \"" + label + "\" to " + id) - return metaStorer.addTag(id, label); - } - } else { - return h.response({ - "statusCode": 400, - "error": "Cannot find tag", - "message": "Tag must exist before adding to a document" - }) - .code(400) - } - }); - }, - payload: { - allow: ['application/json'], - maxBytes: 209715200, - output: 'data', - parse: true - } - } - }); - - server.route({ - method: 'GET', - path: '/doc/{id}/tag', - config: { - handler: (request, h) => { - log_info(request); - return metaFinder.getTags(request.params.id).catch(e => - h.response({ "statusCode": 404, "error": "(Document) Not Found", "message": "Not Found" }) - .code(404) - ); - } - } - }); - - server.route({ - method: 'DELETE', - path: '/doc/{id}/tag/{label}', - config: { - handler: (request, h) => { - log_info(request); - var label = decodeURIComponent(request.params.label); - var id = decodeURIComponent(request.params.id); - return metaDeleter.deleteTag(label, id); - } - } - }); - - server.route({ - method: 'GET', - path: '/doc/{id}/thumb', - handler: function (request, h) { - log_info(request); - var id = request.params.id; - return dataStore.getThumbnail(id).then(data => { - return metaFinder.getMeta(id).then(titleObject => titleObject.title || titleObject.created).catch(e => "document").then(fileName => { - return h.response(data) - .header("content-disposition", "inline; filename=\"" + encodeURI(fileName) + ".png\"") - .header("content-type", "image/png"); - }); - }); - } - }); - - server.route({ - method: 'PUT', - path: '/doc/{id}/title', - config: { - handler: (request, h) => { - log_info(request); - var id = request.params.id; - console.log(request.payload); - return metaStorer.setTitle(id, request.payload.title).then(() => { - return 'Title updated. Document-ID = ' + id; - }); - }, - payload: { - allow: ['application/json'], - maxBytes: 209715200, - output: 'data', - parse: true - } - } - }); - - server.route({ - method: 'GET', - path: '/doc/{id}/title', - config: { - handler: (request, h) => { - log_info(request); - var id = request.params.id; - return metaFinder.getMeta(id) - .then(r => ({"title": r.title})) - .catch(e => - h.response({ "statusCode": 500, "error": e, "message": "Not Found" }) - .code(404) - ); - } - } - }); - - server.route({ - method: 'DELETE', - path: '/doc/{id}/title', - config: { - handler: (request, h) => { - var id = request.params.id; - console.log("DELETE /doc/" + id + "/title"); - return metaDeleter.deleteTitle(id); - } - } - }); - - server.route({ - method: 'GET', - path: '/doc/{id}/meta', - config: { - handler: (request, h) => { - log_info(request); - var id = request.params.id; - return metaFinder.getMeta(id) - .then(response => { - return metaFinder.getTags(id) - .then(tags => { - response.tags = tags; - return metaFinder.getComments(id) - .then(comments => { - response.comments = comments - return response - }) - }) - }).catch(e => { - console.log("\x1b[31m ERROR:" + util.inspect(e)); - return h.response({ "statusCode": 404, "error": "(Document) Not Found", "message": "Not Found" }) - .code(404) - }); - } - } - }); - - server.route({ - method: 'POST', - path: '/raw/rdf', - handler: function (request, h) { - log_info(request); - let type = request.headers["content-type"]; - type = type || 'text/turtle'; - return metaStorer.uploadBackupMetaData(request.payload,type).then((response) => { - type = response.headers.get('Content-Type'); - console.log(type); - return response.text(); - }).then(data => { - console.log(type); - const response = h.response(data); - response.type(type); - return response; - }); - } - }); - - server.route({ - method: 'GET', - path: '/raw/rdf', - handler: function (request, h) { - log_info(request); - let accept = request.query.accept ? decodeURIComponent(request.query.accept) : request.headers.accept; - let type = 'text/turtle'; - return metaFinder.dump(accept).then((response) => { - type = response.headers.get('Content-Type'); - console.log(type); - return response.text(); - }).then(data => { - console.log(type); - const response = h.response(data); - response.type(type); - return response; - }); - } - }); - - server.route({ - method: 'GET', - path: '/raw/tgz', - handler: function (request, h) { - log_info(request); - return dataStore.createArchive() - .then(archive => h.response(archive) - .type('application/gzip') - .header("content-disposition", `attachment; filename="tridoc_backup_${Date.now()}.tar.gz"`)); - } - }); - - - server.route({ - method: 'GET', - path: '/raw/zip', - handler: function (request, h) { - log_info(request); - return dataStore.createZipArchive() - .then(archive => h.response(archive.toBuffer()) - .type('application/zip') - .header("content-disposition", `attachment; filename="tridoc_backup_${Date.now()}.zip"`)); - } - }); - - server.route({ - method: 'PUT', - path: '/raw/zip', - config: { - handler: (request, h) => { - log_info(request); - var id = request.params.id; - console.log(request.payload); - dataStore.putData(request.payload) - return 'data replaced' - }, - payload: { - allow: ['application/zip'], - defaultContentType: 'application/zip', - maxBytes: 10*1024*1024*1024, - output: 'data', - parse: false, - timeout: false - } - } - }); - - server.route({ - method: 'POST', - path: '/tag', - config: { - handler: (request, h) => { - console.log("POST /tag"); - console.log(request.payload); - return metaFinder.getTagList().then(r => { - let exists = r.find(function (element) { - return element.label == request.payload.label; - }); - if (exists) { - return h.response({ - "statusCode": 400, - "error": "Tag exists already", - "message": "Cannot create existing tag" - }) - .code(400) - } else { - let regex = /\s|^[.]{1,2}$|\/|\\|#|"|'|,|;|:|\?/; - if (!regex.test(request.payload.label)) { - if (request.payload.parameter) { - return metaStorer.createTag(request.payload.label, request.payload.parameter.type).catch(e => { - console.log(e); - return h.response({ - "statusCode": 500, - "error": "Could not add Tag", - "message": e - }).code(500) - }); - } else { - return metaStorer.createTag(request.payload.label).catch(e => { - console.log(e); - return h.response({ - "statusCode": 400, - "error": "Could not add Tag", - "message": e, - }).code(500) - }); - } - } else { - return h.response({ - "statusCode": 400, - "error": "Label contains forbidden characters", - "message": regex + " matches the Label" - }) - .code(400) - } - } - }); - - }, - payload: { - allow: ['application/json'], - output: 'data', - parse: true - } - } - }); - - /* - CREATE TAG JSON SYNTAX - -- - { - label : "tagname" , - parameter : { - type : "http://www.w3.org/2001/XMLSchema#decimal" or "http://www.w3.org/2001/XMLSchema#date" - } // only for parameterizable tags - } - ADD TAG JSON SYNTAX - -- - { - label : "tagname" , - parameter : { - type : "http://www.w3.org/2001/XMLSchema#decimal" or "http://www.w3.org/2001/XMLSchema#date", - value : "20.7" or "2018-08-12" // must be valid xsd:decimal or xsd:date, as specified in property type. - } // only for parameterizable tags - } - */ - - server.route({ - method: 'GET', - path: '/tag', - config: { - handler: (request, h) => { - log_info(request); - return metaFinder.getTagList().catch(e => - h.response({ "statusCode": 404, "error": "(Title) Not Found", "message": "Not Found" }) - .code(404) - ); - } - } - }); - - server.route({ - method: 'GET', - path: '/tag/{label}', - config: { - handler: (request, h) => { - console.log(request); - let arg = {} - arg.text = request.query.text - arg.limit = (parseInt(request.query.limit, 10) > 0 ? parseInt(request.query.limit) : undefined) - arg.offset = (parseInt(request.query.offset, 10) >= 0 ? parseInt(request.query.offset) : undefined) - arg.nottags = [] - arg.tags = [ decodeURIComponent(request.params.label) ] - return metaFinder.getDocumentList(arg).catch(e => - h.response({ - "statusCode": 404, - "error": "(Title) Not Found", - "message": util.inspect(e) - }) - .code(404) - ); - } - } - }); - - server.route({ - method: 'DELETE', - path: '/tag/{label}', - config: { - handler: (request, h) => { - console.log(request); - var label = decodeURIComponent(request.params.label); - return metaDeleter.deleteTag(label); - } - } - }); - - server.route({ - method: 'GET', - path: '/version', - config: { - handler: (request, h) => { - log_info(request); - return VERSION; - } - } - }); - - console.log('Server running at:', server.info.uri); -}; - -start(); \ No newline at end of file diff --git a/old/tdt.fish b/old/tdt.fish deleted file mode 100644 index a8f0eb8..0000000 --- a/old/tdt.fish +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env fish - -# Usage: `source ./tdt.fish` then `tdt ` -# are: `start`, `stop`, -# or a request type followed by any number of paths, each followed by request body (if method != `GET`) -# (eg.: `GET doc tag`, `POST tag '{"label": "Inbox"}'`) - -function tdt - if test (count $argv) -lt 1 - echo -e "\e[31mNo command specified\e[0m" - else - if test $argv[1] = 'start' - set -lx TRIDOC_PWD "pw123" - docker-compose down - docker-compose build - docker-compose up -d - else if test $argv[1] = 'stop' - docker-compose down - else if test $argv[1] = 'GET' - for path in $argv[2..-1] - echo -e "\e[36mGET /"$path":\e[0m" - curl -s "http://localhost:8000/$path" -H 'Connection: keep-alive' -H 'Authorization: Basic dHJpZG9jOnB3MTIz' \ - | node -e "s=process.openStdin();d=[];s.on('data',c=>d.push(c));s.on('end',()=>{console.log(require('util').inspect((JSON.parse(d.join(''))),{colors:true,depth:4,sorted:true}), '\n')})" - end - else - set -l args $argv[2..-1] - set -l i 1 - while test "$i" -lt (count $args) - set p $args[$i] - set load $args[(math "$i+1")] - echo -e "\e[36m$argv[1] /$p: $load\e[0m" - curl -s "http://localhost:8000/$p" -X $argv[1] -d "$load" -H "Content-Type: application/json" -H 'Connection: keep-alive' -H 'Authorization: Basic dHJpZG9jOnB3MTIz' \ - | node -e "s=process.openStdin();d=[];s.on('data',c=>d.push(c));s.on('end',()=>{console.log(require('util').inspect((JSON.parse(d.join(''))),{colors:true,depth:4,sorted:true}), '\n')})" - set i (math "$i+2") - end - end - end -end \ No newline at end of file From 70168fb1d3af825d6015fb593b990c07a38e520a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 04:25:56 +0000 Subject: [PATCH 37/90] version --- .devcontainer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 39c4d3a..55c16f0 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,4 +1,4 @@ -FROM denoland/deno:1.26.2 +FROM denoland/deno:2.4.4 EXPOSE 8000 From 354d2377fda3a624ac5ffe2f27d82bd60f763eea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 04:26:07 +0000 Subject: [PATCH 38/90] typo --- deno.jsonc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deno.jsonc b/deno.jsonc index 03da2df..7699961 100644 --- a/deno.jsonc +++ b/deno.jsonc @@ -6,7 +6,7 @@ }, "tasks": { // --allow-run=convert,pdfsandwich,pdftotext,tar,zip,unzip,bash - "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttls --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts", + "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts", "run-watch": "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts" } } From 74215d82fafd3d120d479e9271a0609eeb662529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 15:25:00 +0000 Subject: [PATCH 39/90] sonnet 4 --- .devcontainer/Dockerfile | 19 ++++---- .devcontainer/devcontainer.json | 34 +++++++------- .devcontainer/docker-compose.yml | 65 ++++++++++++++------------- .devcontainer/setup-dev.sh | 36 +++++++++++++++ .vscode/launch.json | 27 ++++++++++++ .vscode/tasks.json | 76 ++++++++++++++++++++++++++++++++ 6 files changed, 203 insertions(+), 54 deletions(-) create mode 100755 .devcontainer/setup-dev.sh create mode 100644 .vscode/launch.json create mode 100644 .vscode/tasks.json diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 55c16f0..f72f717 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -2,19 +2,22 @@ FROM denoland/deno:2.4.4 EXPOSE 8000 -RUN mkdir -p /home/deno -RUN chown -R deno /home/deno RUN mkdir -p /usr/src/app/src WORKDIR /usr/src/app +# Install required packages for development environment RUN apt update \ - && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip -RUN rm /etc/ImageMagick-6/policy.xml + && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip \ + && rm -rf /var/lib/apt/lists/* + +# Remove ImageMagick policy restrictions +RUN rm -f /etc/ImageMagick-6/policy.xml USER deno -COPY src/deps.ts src/deps.ts -RUN deno cache src/deps.ts -COPY . . +# Pre-cache dependencies (will be overridden by volume mount in dev) +# This is just to prepare the environment +RUN mkdir -p src && echo 'export {};' > src/deps.ts -CMD [ "/bin/bash", "/usr/src/app/.devcontainer/docker-cmd.sh" ] \ No newline at end of file +# Keep container running for development +CMD ["sleep", "infinity"] \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 03ce50f..c7e04b3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,17 +2,14 @@ // https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/docker-existing-docker-compose // If you want to run as a non-root user in the container, see .devcontainer/docker-compose.yml. { - "name": "Existing Docker Compose (Extend)", + "name": "Tridoc Backend Development", - // Update the 'dockerComposeFile' list if you have more compose files or use different names. - // The .devcontainer/docker-compose.yml file contains any overrides you need/want to make. - "dockerComposeFile": [ - "../dev-docker-compose.yml", - "docker-compose.yml" - ], + // Use the independent dev container docker-compose configuration + "dockerComposeFile": "docker-compose.yml", "containerEnv": { - "TRIDOC_PWD": "pw123", + "TRIDOC_PWD": "pw123", + "OCR_LANG": "deu" }, // The 'service' property is the name of the service for the container that VS Code should @@ -24,24 +21,29 @@ "workspaceFolder": "/usr/src/app", // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], + "forwardPorts": [8000, 8001], - // Uncomment the next line if you want start specific services in your Docker Compose config. - "runServices": [ "fuseki" ], + // Start the fuseki service when the dev container starts + "runServices": ["fuseki"], // Uncomment the next line if you want to keep your containers running after VS Code shuts down. - // "shutdownAction": "none", + "shutdownAction": "stopCompose", - // Uncomment the next line to run commands after the container is created - for example installing curl. - // "postCreateCommand": "apt-get update && apt-get install -y curl", + // Post-create command to set up the development environment + "postCreateCommand": "bash .devcontainer/setup-dev.sh", - // Uncomment to connect as a non-root user if you've added one. See https://aka.ms/vscode-remote/containers/non-root. + // Connect as the deno user "remoteUser": "deno", + "customizations": { "vscode": { "extensions": [ "denoland.vscode-deno" - ] + ], + "settings": { + "deno.enable": true, + "deno.lint": true + } } } } diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 3b042ba..26fb93f 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -1,37 +1,42 @@ -version: '3' +version: '3.8' + services: - # Update this to the name of the service you want to work with in your docker-compose.yml file + # Development environment for tridoc-backend tridoc: - # If you want add a non-root user to your Dockerfile, you can use the "remoteUser" - # property in devcontainer.json to cause VS Code its sub-processes (terminals, tasks, - # debugging) to execute as the user. Uncomment the next line if you want the entire - # container to run as this user instead. Note that, on Linux, you may need to - # ensure the UID and GID of the container user you create matches your local user. - # See https://aka.ms/vscode-remote/containers/non-root for details. - # - user: deno - - # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer - # folder. Note that the path of the Dockerfile and context is relative to the *primary* - # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile" - # array). The sample below assumes your primary file is in the root of your project. - # build: - context: . + context: .. dockerfile: .devcontainer/Dockerfile - + user: deno volumes: - # Update this to wherever you want VS Code to mount the folder of your project - - .:/usr/src/app:cached - - # Uncomment the next line to use Docker from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker-compose for details. - # - /var/run/docker.sock:/var/run/docker.sock + # Mount the entire workspace for development + - ..:/usr/src/app:cached + # Mount blobs directory separately if needed + - ../blobs:/usr/src/app/blobs + ports: + - "8000:8000" + depends_on: + - fuseki + environment: + TRIDOC_PWD: "${TRIDOC_PWD:-pw123}" + OCR_LANG: "${OCR_LANG:-deu}" + # Keep container running for development + command: ["sleep", "infinity"] + networks: + - tridoc-dev - # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust. - # cap_add: - # - SYS_PTRACE - # security_opt: - # - seccomp:unconfined + # Fuseki service accessible as 'fuseki' hostname + fuseki: + image: "linkedsolutions/fuseki" + environment: + ADMIN_PASSWORD: "${TRIDOC_PWD:-pw123}" + ports: + - "8001:3030" # Expose for development access + volumes: + - ../fuseki-base:/fuseki/base + - ../config-tdb.ttl:/fuseki/set-up-resources/config-tdb + networks: + - tridoc-dev - # Overrides default command so things don't shut down after the process ends. - # command: "/bin/bash -c \"TRIDOC_PWD=\\\"pw123\\\" deno run --allow-net --allow-read=blobs --allow-write=blobs --allow-run=convert,pdfsandwich --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts &\\\n sleep 5\\\n echo 'Attempting to create Dataset \\\"3DOC\\\"'\\\n curl 'http://fuseki:3030/$/datasets' -H \\\"Authorization: Basic $(echo -n admin:pw123 | base64)\\\" -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb'\\\n fg 1\\\n /bin/sh -c \\\"while sleep 1000; do :; done\\\"\"" +networks: + tridoc-dev: + driver: bridge diff --git a/.devcontainer/setup-dev.sh b/.devcontainer/setup-dev.sh new file mode 100755 index 0000000..951e490 --- /dev/null +++ b/.devcontainer/setup-dev.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +echo "Setting up Tridoc Backend development environment..." + +# Wait for Fuseki to be ready +echo "Waiting for Fuseki to start..." +until curl -s http://fuseki:3030/$/ping > /dev/null; do + echo "Waiting for Fuseki..." + sleep 2 +done + +echo "Fuseki is ready!" + +# Cache Deno dependencies if deps.ts exists +if [ -f "src/deps.ts" ]; then + echo "Caching Deno dependencies..." + deno cache src/deps.ts +fi + +# Create the 3DOC dataset in Fuseki +echo "Creating Dataset '3DOC' in Fuseki..." +curl 'http://fuseki:3030/$/datasets' \ + -H "Authorization: Basic $(echo -n admin:${TRIDOC_PWD:-pw123} | base64)" \ + -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' \ + --data 'dbName=3DOC&dbType=tdb' \ + --max-time 10 \ + --retry 3 + +echo "Development environment setup complete!" +echo "" +echo "You can now run the Tridoc backend with:" +echo "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts" +echo "" +echo "Fuseki is available at:" +echo "- Internal: http://fuseki:3030" +echo "- External: http://localhost:8001" diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..581641e --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,27 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Launch Tridoc Backend", + "type": "node", + "request": "launch", + "program": "${workspaceFolder}/src/main.ts", + "runtimeExecutable": "deno", + "runtimeArgs": [ + "run", + "--watch", + "--allow-net", + "--allow-read=blobs,rdf.ttl", + "--allow-write=blobs,rdf.ttl", + "--allow-run", + "--allow-env=TRIDOC_PWD,OCR_LANG" + ], + "attachSimplePort": 9229, + "env": { + "TRIDOC_PWD": "pw123", + "OCR_LANG": "deu" + }, + "console": "integratedTerminal" + } + ] +} diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..36a8930 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,76 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Start Tridoc Backend", + "type": "shell", + "command": "deno", + "args": [ + "run", + "--watch", + "--allow-net", + "--allow-read=blobs,rdf.ttl", + "--allow-write=blobs,rdf.ttl", + "--allow-run", + "--allow-env=TRIDOC_PWD,OCR_LANG", + "src/main.ts" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "presentation": { + "echo": true, + "reveal": "always", + "focus": false, + "panel": "new" + }, + "problemMatcher": [], + "options": { + "env": { + "TRIDOC_PWD": "pw123", + "OCR_LANG": "deu" + } + } + }, + { + "label": "Cache Dependencies", + "type": "shell", + "command": "deno", + "args": ["cache", "src/deps.ts"], + "group": "build", + "presentation": { + "echo": true, + "reveal": "always", + "focus": false, + "panel": "shared" + } + }, + { + "label": "Format Code", + "type": "shell", + "command": "deno", + "args": ["fmt"], + "group": "build", + "presentation": { + "echo": true, + "reveal": "silent", + "focus": false, + "panel": "shared" + } + }, + { + "label": "Lint Code", + "type": "shell", + "command": "deno", + "args": ["lint"], + "group": "test", + "presentation": { + "echo": true, + "reveal": "always", + "focus": false, + "panel": "shared" + } + } + ] +} From 070df3da39f0f40b7e6faa9435fd43c1478d6fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 16:19:03 +0000 Subject: [PATCH 40/90] Pending changes exported from your codespace --- .devcontainer/Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index f72f717..d432711 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -10,9 +10,13 @@ RUN apt update \ && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip \ && rm -rf /var/lib/apt/lists/* -# Remove ImageMagick policy restrictions +# Remove ImageMagick policy restrictions (only if file exists) RUN rm -f /etc/ImageMagick-6/policy.xml +# Change ownership of the working directory to deno user +RUN chown -R deno:deno /usr/src/app + +# Switch to deno user before creating files USER deno # Pre-cache dependencies (will be overridden by volume mount in dev) From 0d22487f2f1c9b53f0124ff544d9851464705df8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 19:58:47 +0000 Subject: [PATCH 41/90] builds/starts --- .devcontainer/Dockerfile | 4 ++- .devcontainer/docker-compose.yml | 9 +++++- .devcontainer/setup-dev.sh | 48 ++++++++++++++++++++++---------- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index d432711..0e27bf0 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -14,7 +14,9 @@ RUN apt update \ RUN rm -f /etc/ImageMagick-6/policy.xml # Change ownership of the working directory to deno user -RUN chown -R deno:deno /usr/src/app +RUN chown -R deno:deno /usr/src/app \ + && mkdir -p /home/deno \ + && chown -R deno:deno /home/deno # Switch to deno user before creating files USER deno diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 26fb93f..c82bc2f 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -15,7 +15,8 @@ services: ports: - "8000:8000" depends_on: - - fuseki + fuseki: + condition: service_healthy environment: TRIDOC_PWD: "${TRIDOC_PWD:-pw123}" OCR_LANG: "${OCR_LANG:-deu}" @@ -34,6 +35,12 @@ services: volumes: - ../fuseki-base:/fuseki/base - ../config-tdb.ttl:/fuseki/set-up-resources/config-tdb + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:3030/$/ping"] + interval: 5s + timeout: 3s + retries: 30 + start_period: 10s networks: - tridoc-dev diff --git a/.devcontainer/setup-dev.sh b/.devcontainer/setup-dev.sh index 951e490..5e86012 100755 --- a/.devcontainer/setup-dev.sh +++ b/.devcontainer/setup-dev.sh @@ -3,28 +3,48 @@ echo "Setting up Tridoc Backend development environment..." # Wait for Fuseki to be ready -echo "Waiting for Fuseki to start..." -until curl -s http://fuseki:3030/$/ping > /dev/null; do - echo "Waiting for Fuseki..." - sleep 2 +echo "Waiting for Fuseki to start (timeout 180s)..." +FUSEKI_TIMEOUT=180 +FUSEKI_START=$(date +%s) +while true; do + if curl -fsS http://fuseki:3030/$/ping > /dev/null 2>&1; then + echo "Fuseki is ready!" + break + fi + NOW=$(date +%s) + ELAPSED=$((NOW - FUSEKI_START)) + if [ "$ELAPSED" -ge "$FUSEKI_TIMEOUT" ]; then + echo "ERROR: Fuseki did not become ready within ${FUSEKI_TIMEOUT}s. Skipping dataset bootstrap. Check 'fuseki' service logs." >&2 + break + fi + echo "Waiting for Fuseki (${ELAPSED}s elapsed)..." + sleep 3 done -echo "Fuseki is ready!" - # Cache Deno dependencies if deps.ts exists if [ -f "src/deps.ts" ]; then echo "Caching Deno dependencies..." deno cache src/deps.ts fi -# Create the 3DOC dataset in Fuseki -echo "Creating Dataset '3DOC' in Fuseki..." -curl 'http://fuseki:3030/$/datasets' \ - -H "Authorization: Basic $(echo -n admin:${TRIDOC_PWD:-pw123} | base64)" \ - -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' \ - --data 'dbName=3DOC&dbType=tdb' \ - --max-time 10 \ - --retry 3 +if curl -fsS http://fuseki:3030/$/ping > /dev/null 2>&1; then + AUTH_HEADER="Authorization: Basic $(echo -n admin:${TRIDOC_PWD:-pw123} | base64)" + echo "Ensuring Dataset '3DOC' exists..." + if curl -fsS -H "$AUTH_HEADER" http://fuseki:3030/$/datasets | grep -q '"3DOC"'; then + echo "Dataset '3DOC' already exists." + else + if curl -fsS 'http://fuseki:3030/$/datasets' \ + -H "$AUTH_HEADER" \ + -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' \ + --data 'dbName=3DOC&dbType=tdb' ; then + echo "Dataset '3DOC' created." + else + echo "WARNING: Failed to create dataset '3DOC'. It may already exist or Fuseki refused the request." >&2 + fi + fi +else + echo "Skipping dataset creation because Fuseki is not reachable." +fi echo "Development environment setup complete!" echo "" From be1d662cafa79ca15d4aa8bb7151083e773ebb8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 20:15:16 +0000 Subject: [PATCH 42/90] allow pings --- .devcontainer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 0e27bf0..63cece1 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -7,7 +7,7 @@ WORKDIR /usr/src/app # Install required packages for development environment RUN apt update \ - && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip \ + && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip iputils-ping \ && rm -rf /var/lib/apt/lists/* # Remove ImageMagick policy restrictions (only if file exists) From 451013574b12021f754779e4741a87d2962648c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 20:24:48 +0000 Subject: [PATCH 43/90] bash --- .devcontainer/devcontainer.json | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index c7e04b3..e9cfbfd 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -42,7 +42,13 @@ ], "settings": { "deno.enable": true, - "deno.lint": true + "deno.lint": true, + "terminal.integrated.defaultProfile.linux": "bash", + "terminal.integrated.profiles.linux": { + "bash": { + "path": "/bin/bash" + } + } } } } From 00880708e08ac1fbc3ab1460ce7e6f413ccdb4af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 20:29:53 +0000 Subject: [PATCH 44/90] permission on deno-dir --- .devcontainer/Dockerfile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 63cece1..9a53ba3 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -10,6 +10,9 @@ RUN apt update \ && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip iputils-ping \ && rm -rf /var/lib/apt/lists/* +# Ensure Deno cache dir exists and is writable +RUN mkdir -p /deno-dir && chown -R deno:deno /deno-dir + # Remove ImageMagick policy restrictions (only if file exists) RUN rm -f /etc/ImageMagick-6/policy.xml @@ -21,8 +24,10 @@ RUN chown -R deno:deno /usr/src/app \ # Switch to deno user before creating files USER deno +# (Optional) set DENO_DIR explicitly +ENV DENO_DIR=/deno-dir + # Pre-cache dependencies (will be overridden by volume mount in dev) -# This is just to prepare the environment RUN mkdir -p src && echo 'export {};' > src/deps.ts # Keep container running for development From c302591c558e689550a19cf3ecb486464909d34f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 20:46:05 +0000 Subject: [PATCH 45/90] peesist bash history on host --- .devcontainer/Dockerfile | 8 ++++++++ .gitignore | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9a53ba3..2a8ecde 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -30,5 +30,13 @@ ENV DENO_DIR=/deno-dir # Pre-cache dependencies (will be overridden by volume mount in dev) RUN mkdir -p src && echo 'export {};' > src/deps.ts +# Persist bash history in the mounted workspace so it survives container rebuilds +# Use a file inside .devcontainer to keep project root clean +ENV HISTFILE=/usr/src/app/.devcontainer/.bash_history \ + HISTSIZE=5000 \ + HISTFILESIZE=10000 \ + PROMPT_COMMAND='history -a; history -n; $PROMPT_COMMAND' +RUN touch /usr/src/app/.devcontainer/.bash_history && chmod 600 /usr/src/app/.devcontainer/.bash_history + # Keep container running for development CMD ["sleep", "infinity"] \ No newline at end of file diff --git a/.gitignore b/.gitignore index ccf4377..8523b6c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ yarn-error.log* node_modules blobs -fuseki-base \ No newline at end of file +fuseki-base +.devcontainer/.bash_history +.bash_history \ No newline at end of file From 3e7cbc48fca1f8e35e4174d48f8861a7576f6e9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 20:58:29 +0000 Subject: [PATCH 46/90] avoiding ownership issuer --- .devcontainer/Dockerfile | 8 ++++---- .gitignore | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 2a8ecde..6d38125 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -24,11 +24,11 @@ RUN chown -R deno:deno /usr/src/app \ # Switch to deno user before creating files USER deno -# (Optional) set DENO_DIR explicitly -ENV DENO_DIR=/deno-dir +# Use a workspace-local Deno cache to avoid UID mismatch issues with /deno-dir +ENV DENO_DIR=/usr/src/app/.deno-dir -# Pre-cache dependencies (will be overridden by volume mount in dev) -RUN mkdir -p src && echo 'export {};' > src/deps.ts +# Pre-cache dependencies (will be overridden by volume mount in dev) and ensure cache dir exists +RUN mkdir -p "$DENO_DIR" src && echo 'export {};' > src/deps.ts # Persist bash history in the mounted workspace so it survives container rebuilds # Use a file inside .devcontainer to keep project root clean diff --git a/.gitignore b/.gitignore index 8523b6c..f0754c0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ node_modules blobs fuseki-base .devcontainer/.bash_history -.bash_history \ No newline at end of file +.bash_history +.deno-dir \ No newline at end of file From 813c55a52953aee17185376836ea9cc2b8168efc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 21:18:19 +0000 Subject: [PATCH 47/90] obsolete --- .devcontainer/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 6d38125..e750d96 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -10,9 +10,6 @@ RUN apt update \ && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip iputils-ping \ && rm -rf /var/lib/apt/lists/* -# Ensure Deno cache dir exists and is writable -RUN mkdir -p /deno-dir && chown -R deno:deno /deno-dir - # Remove ImageMagick policy restrictions (only if file exists) RUN rm -f /etc/ImageMagick-6/policy.xml From 87572366bb4586e10c79180880bd4b2042f8697e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Tue, 19 Aug 2025 23:24:17 +0200 Subject: [PATCH 48/90] ensure dir exists --- .devcontainer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index e750d96..1d0536d 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -2,7 +2,7 @@ FROM denoland/deno:2.4.4 EXPOSE 8000 -RUN mkdir -p /usr/src/app/src +RUN mkdir -p /usr/src/app/src /usr/src/app/.devcontainer WORKDIR /usr/src/app # Install required packages for development environment From de2ffc59f19527874792172450e92e80376e52cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 06:03:08 +0000 Subject: [PATCH 49/90] deno lock --- deno.lock | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 deno.lock diff --git a/deno.lock b/deno.lock new file mode 100644 index 0000000..ba80b48 --- /dev/null +++ b/deno.lock @@ -0,0 +1,68 @@ +{ + "version": "5", + "remote": { + "https://deno.land/std@0.160.0/_util/assert.ts": "e94f2eb37cebd7f199952e242c77654e43333c1ac4c5c700e929ea3aa5489f74", + "https://deno.land/std@0.160.0/_util/os.ts": "8a33345f74990e627b9dfe2de9b040004b08ea5146c7c9e8fe9a29070d193934", + "https://deno.land/std@0.160.0/async/abortable.ts": "87aa7230be8360c24ad437212311c9e8d4328854baec27b4c7abb26e85515c06", + "https://deno.land/std@0.160.0/async/deadline.ts": "48ac998d7564969f3e6ec6b6f9bf0217ebd00239b1b2292feba61272d5dd58d0", + "https://deno.land/std@0.160.0/async/debounce.ts": "dc8b92d4a4fe7eac32c924f2b8d3e62112530db70cadce27042689d82970b350", + "https://deno.land/std@0.160.0/async/deferred.ts": "d8fb253ffde2a056e4889ef7e90f3928f28be9f9294b6505773d33f136aab4e6", + "https://deno.land/std@0.160.0/async/delay.ts": "0419dfc993752849692d1f9647edf13407c7facc3509b099381be99ffbc9d699", + "https://deno.land/std@0.160.0/async/mod.ts": "dd0a8ed4f3984ffabe2fcca7c9f466b7932d57b1864ffee148a5d5388316db6b", + "https://deno.land/std@0.160.0/async/mux_async_iterator.ts": "3447b28a2a582224a3d4d3596bccbba6e85040da3b97ed64012f7decce98d093", + "https://deno.land/std@0.160.0/async/pool.ts": "ef9eb97b388543acbf0ac32647121e4dbe629236899586c4d4311a8770fbb239", + "https://deno.land/std@0.160.0/async/tee.ts": "9af3a3e7612af75861308b52249e167f5ebc3dcfc8a1a4d45462d96606ee2b70", + "https://deno.land/std@0.160.0/bytes/bytes_list.ts": "aba5e2369e77d426b10af1de0dcc4531acecec27f9b9056f4f7bfbf8ac147ab4", + "https://deno.land/std@0.160.0/bytes/equals.ts": "3c3558c3ae85526f84510aa2b48ab2ad7bdd899e2e0f5b7a8ffc85acb3a6043a", + "https://deno.land/std@0.160.0/bytes/mod.ts": "b2e342fd3669176a27a4e15061e9d588b89c1aaf5008ab71766e23669565d179", + "https://deno.land/std@0.160.0/datetime/formatter.ts": "7c8e6d16a0950f400aef41b9f1eb9168249869776ec520265dfda785d746589e", + "https://deno.land/std@0.160.0/datetime/mod.ts": "ea927ca96dfb28c7b9a5eed5bdc7ac46bb9db38038c4922631895cea342fea87", + "https://deno.land/std@0.160.0/datetime/tokenizer.ts": "7381e28f6ab51cb504c7e132be31773d73ef2f3e1e50a812736962b9df1e8c47", + "https://deno.land/std@0.160.0/encoding/base64.ts": "c57868ca7fa2fbe919f57f88a623ad34e3d970d675bdc1ff3a9d02bba7409db2", + "https://deno.land/std@0.160.0/fs/_util.ts": "fdc156f897197f261a1c096dcf8ff9267ed0ff42bd5b31f55053a4763a4bae3b", + "https://deno.land/std@0.160.0/fs/copy.ts": "73bdf24f4322648d9bc38ef983b818637ba368351d17aa03644209d3ce3eac31", + "https://deno.land/std@0.160.0/fs/empty_dir.ts": "c15a0aaaf40f8c21cca902aa1e01a789ad0c2fd1b7e2eecf4957053c5dbf707f", + "https://deno.land/std@0.160.0/fs/ensure_dir.ts": "76395fc1c989ca8d2de3aedfa8240eb8f5225cde20f926de957995b063135b80", + "https://deno.land/std@0.160.0/fs/ensure_file.ts": "b8e32ea63aa21221d0219760ba3f741f682d7f7d68d0d24a3ec067c338568152", + "https://deno.land/std@0.160.0/fs/ensure_link.ts": "5cc1c04f18487d7d1edf4c5469705f30b61390ffd24ad7db6df85e7209b32bb2", + "https://deno.land/std@0.160.0/fs/ensure_symlink.ts": "5273557b8c50be69477aa9cb003b54ff2240a336db52a40851c97abce76b96ab", + "https://deno.land/std@0.160.0/fs/eol.ts": "65b1e27320c3eec6fb653b27e20056ee3d015d3e91db388cfefa41616ebc7cb3", + "https://deno.land/std@0.160.0/fs/exists.ts": "6a447912e49eb79cc640adacfbf4b0baf8e17ede6d5bed057062ce33c4fa0d68", + "https://deno.land/std@0.160.0/fs/expand_glob.ts": "6b6a58413f2e82118d12f981e033818e68b567f90969156c59a86820e5e4c584", + "https://deno.land/std@0.160.0/fs/mod.ts": "354a6f972ef4e00c4dd1f1339a8828ef0764c1c23d3c0010af3fcc025d8655b0", + "https://deno.land/std@0.160.0/fs/move.ts": "6d7fa9da60dbc7a32dd7fdbc2ff812b745861213c8e92ba96dace0669b0c378c", + "https://deno.land/std@0.160.0/fs/walk.ts": "d96d4e5b6a3552e8304f28a0fd0b317b812298298449044f8de4932c869388a5", + "https://deno.land/std@0.160.0/http/_negotiation/common.ts": "410e902f01cdd324e4746e8017595be4fc357d6fc4cd6044f2f808a943d7eaf7", + "https://deno.land/std@0.160.0/http/_negotiation/encoding.ts": "f749c1d539d139af783e8a7741de5a47a98a5e3c9af82b8af512567ccf5fe632", + "https://deno.land/std@0.160.0/http/_negotiation/language.ts": "53c306186904d2dace4c624a8822542866ad332a7f40ac90e0af1504f95c63d0", + "https://deno.land/std@0.160.0/http/_negotiation/media_type.ts": "ecdda87286495f7ff25116858f5088856953e2f1585e593d314e0c71b826a137", + "https://deno.land/std@0.160.0/http/cookie.ts": "7a61e920f19c9c3ee8e07befe5fe5a530114d6babefd9ba2c50594cab724a822", + "https://deno.land/std@0.160.0/http/cookie_map.ts": "6b623a8476340685a9aa11a2944c79d225d0380cd1bb9b94a2a07f90d47f3068", + "https://deno.land/std@0.160.0/http/http_errors.ts": "fe9b7f95f7ee0592c3306f8c7aed03ba53d55d1ef81e00041c1171b9588f46d9", + "https://deno.land/std@0.160.0/http/http_status.ts": "897575a7d6bc2b9123f6a38ecbc0f03d95a532c5d92029315dc9f508e12526b8", + "https://deno.land/std@0.160.0/http/mod.ts": "329d40fe0113f24d878749d1b8e0afe037179906230dfb86247e7d140877d262", + "https://deno.land/std@0.160.0/http/negotiation.ts": "f35b1ff2ad4ff9feaa00ac234960b398172768205c8eceaef7f2eafe34716ba2", + "https://deno.land/std@0.160.0/http/server.ts": "e99c1bee8a3f6571ee4cdeb2966efad465b8f6fe62bec1bdb59c1f007cc4d155", + "https://deno.land/std@0.160.0/io/buffer.ts": "fae02290f52301c4e0188670e730cd902f9307fb732d79c4aa14ebdc82497289", + "https://deno.land/std@0.160.0/path/_constants.ts": "df1db3ffa6dd6d1252cc9617e5d72165cd2483df90e93833e13580687b6083c3", + "https://deno.land/std@0.160.0/path/_interface.ts": "ee3b431a336b80cf445441109d089b70d87d5e248f4f90ff906820889ecf8d09", + "https://deno.land/std@0.160.0/path/_util.ts": "d16be2a16e1204b65f9d0dfc54a9bc472cafe5f4a190b3c8471ec2016ccd1677", + "https://deno.land/std@0.160.0/path/common.ts": "bee563630abd2d97f99d83c96c2fa0cca7cee103e8cb4e7699ec4d5db7bd2633", + "https://deno.land/std@0.160.0/path/glob.ts": "cb5255638de1048973c3e69e420c77dc04f75755524cb3b2e160fe9277d939ee", + "https://deno.land/std@0.160.0/path/mod.ts": "56fec03ad0ebd61b6ab39ddb9b0ddb4c4a5c9f2f4f632e09dd37ec9ebfd722ac", + "https://deno.land/std@0.160.0/path/posix.ts": "6b63de7097e68c8663c84ccedc0fd977656eb134432d818ecd3a4e122638ac24", + "https://deno.land/std@0.160.0/path/separator.ts": "fe1816cb765a8068afb3e8f13ad272351c85cbc739af56dacfc7d93d710fe0f9", + "https://deno.land/std@0.160.0/path/win32.ts": "ee8826dce087d31c5c81cd414714e677eb68febc40308de87a2ce4b40e10fb8d", + "https://deno.land/std@0.160.0/streams/buffer.ts": "f3f1bd7e6bd2d29125aae7d3a8c7fe9f2394275b5466fe6341177e6458c6da94", + "https://deno.land/std@0.160.0/streams/conversion.ts": "328afbedee0a7e0c330ac4c7b4c1af569ee53974f970230f6a78f545b93abb9b", + "https://deno.land/std@0.160.0/streams/delimiter.ts": "e18febbded53df275a897fac9249a6d0a6a5efc943256ad0f6cb23bf4d757668", + "https://deno.land/std@0.160.0/streams/merge.ts": "88ed3dfa030ae076802688e4cadd762a251a41d81ed1776dfd9a2a9a0e970195", + "https://deno.land/std@0.160.0/streams/mod.ts": "f402791689d74bd091ecf8f4015bc5b7d5c18132a55c7c72fe0576d1fb254cf9", + "https://deno.land/x/nanoid@v3.0.0/customAlphabet.ts": "1cfd7cfd2f07ca8d78a7e7855fcc9f59abf01ef2a127484ef94328fadf940ead", + "https://deno.land/x/nanoid@v3.0.0/customRandom.ts": "af56e19038c891a4b4ef2be931554c27579bd407ee5bbea5cb64f6ee1347cbe3", + "https://deno.land/x/nanoid@v3.0.0/mod.ts": "3ead610e40c58d8fdca21d5da9ec661445a2b82526e19c34d05de5f90be8a1be", + "https://deno.land/x/nanoid@v3.0.0/nanoid.ts": "8d119bc89a0f34e7bbe0c2dbdc280d01753e431af553d189663492310a31085d", + "https://deno.land/x/nanoid@v3.0.0/random.ts": "4da71d5f72f2bfcc6a4ee79b5d4e72f48dcf4fe4c3835fd5ebab08b9f33cd598", + "https://deno.land/x/nanoid@v3.0.0/urlAlphabet.ts": "8b1511deb1ecb23c66202b6000dc10fb68f9a96b5550c6c8cef5009324793431" + } +} From 7764dd3a6420b5ba5c5e61a948780d965e07ba79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 06:14:19 +0000 Subject: [PATCH 50/90] removed obsolete --- .devcontainer/docker-cmd.sh | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 .devcontainer/docker-cmd.sh diff --git a/.devcontainer/docker-cmd.sh b/.devcontainer/docker-cmd.sh deleted file mode 100644 index 6791b5b..0000000 --- a/.devcontainer/docker-cmd.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -echo 'Attempting to create Dataset "3DOC"' -curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ - -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' -set -m -deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts & -sleep 5 -echo 'Attempting to create Dataset "3DOC"' -curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ - -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' -fg 1 From 767f2a8b6b004396aeef527ebd6996aa60573ac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 06:18:14 +0000 Subject: [PATCH 51/90] dev one in .devcontaIner --- dev-docker-compose.yml | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 dev-docker-compose.yml diff --git a/dev-docker-compose.yml b/dev-docker-compose.yml deleted file mode 100644 index 89798bb..0000000 --- a/dev-docker-compose.yml +++ /dev/null @@ -1,21 +0,0 @@ -version: '3' -services: - tridoc: - build: . - ports: - - "8000:8000" - depends_on: - - "fuseki" - volumes: - - ./blobs:/usr/src/app/blobs - environment: - TRIDOC_PWD: "${TRIDOC_PWD}" - fuseki: - image: "linkedsolutions/fuseki" - environment: - ADMIN_PASSWORD: "pw123" - ports: - - "8001:3030" # handy for development, remove in production - volumes: - - ./fuseki-base:/fuseki/base - - ./config-tdb.ttl:/fuseki/set-up-resources/config-tdb \ No newline at end of file From 20cec7f85b1a39179a52e10be24bff8d25c97f8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 06:43:35 +0000 Subject: [PATCH 52/90] removed duplication --- .devcontainer/docker-compose.yml | 4 +- .devcontainer/setup-dev.sh | 43 ++++------------------ Dockerfile | 26 ++++++++++++- database-create.sh | 63 ++++++++++++++++++++++++++++++++ docker-cmd.sh | 23 +++++++----- docker-compose.yml | 2 +- 6 files changed, 110 insertions(+), 51 deletions(-) create mode 100644 database-create.sh diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index c82bc2f..ab24918 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -5,7 +5,7 @@ services: tridoc: build: context: .. - dockerfile: .devcontainer/Dockerfile + dockerfile: Dockerfile user: deno volumes: # Mount the entire workspace for development @@ -29,7 +29,7 @@ services: fuseki: image: "linkedsolutions/fuseki" environment: - ADMIN_PASSWORD: "${TRIDOC_PWD:-pw123}" + ADMIN_PASSWORD: "${FUSEKI_PWD:-pw123}" ports: - "8001:3030" # Expose for development access volumes: diff --git a/.devcontainer/setup-dev.sh b/.devcontainer/setup-dev.sh index 5e86012..ac118a2 100755 --- a/.devcontainer/setup-dev.sh +++ b/.devcontainer/setup-dev.sh @@ -2,24 +2,12 @@ echo "Setting up Tridoc Backend development environment..." -# Wait for Fuseki to be ready -echo "Waiting for Fuseki to start (timeout 180s)..." -FUSEKI_TIMEOUT=180 -FUSEKI_START=$(date +%s) -while true; do - if curl -fsS http://fuseki:3030/$/ping > /dev/null 2>&1; then - echo "Fuseki is ready!" - break - fi - NOW=$(date +%s) - ELAPSED=$((NOW - FUSEKI_START)) - if [ "$ELAPSED" -ge "$FUSEKI_TIMEOUT" ]; then - echo "ERROR: Fuseki did not become ready within ${FUSEKI_TIMEOUT}s. Skipping dataset bootstrap. Check 'fuseki' service logs." >&2 - break - fi - echo "Waiting for Fuseki (${ELAPSED}s elapsed)..." - sleep 3 -done +# Ensure dataset exists using shared script (waits for Fuseki internally) +if [ -f "./database-create.sh" ]; then + bash ./database-create.sh 3DOC || echo "(setup-dev) Dataset ensure script exited with non-zero status; continuing." +else + echo "(setup-dev) WARNING: database-create.sh not found; skipping dataset ensure." +fi # Cache Deno dependencies if deps.ts exists if [ -f "src/deps.ts" ]; then @@ -27,24 +15,7 @@ if [ -f "src/deps.ts" ]; then deno cache src/deps.ts fi -if curl -fsS http://fuseki:3030/$/ping > /dev/null 2>&1; then - AUTH_HEADER="Authorization: Basic $(echo -n admin:${TRIDOC_PWD:-pw123} | base64)" - echo "Ensuring Dataset '3DOC' exists..." - if curl -fsS -H "$AUTH_HEADER" http://fuseki:3030/$/datasets | grep -q '"3DOC"'; then - echo "Dataset '3DOC' already exists." - else - if curl -fsS 'http://fuseki:3030/$/datasets' \ - -H "$AUTH_HEADER" \ - -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' \ - --data 'dbName=3DOC&dbType=tdb' ; then - echo "Dataset '3DOC' created." - else - echo "WARNING: Failed to create dataset '3DOC'. It may already exist or Fuseki refused the request." >&2 - fi - fi -else - echo "Skipping dataset creation because Fuseki is not reachable." -fi +echo "Dataset bootstrap (if needed) complete." echo "Development environment setup complete!" echo "" diff --git a/Dockerfile b/Dockerfile index 4fd1ebc..76a8bf3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,16 +2,38 @@ FROM denoland/deno:2.4.4 EXPOSE 8000 -RUN mkdir -p /usr/src/app/src +RUN mkdir -p /usr/src/app/src /usr/src/app/.devcontainer WORKDIR /usr/src/app +# Install required packages (union of prod + dev wants) RUN apt update \ - && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl zip unzip + && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip iputils-ping \ + && rm -rf /var/lib/apt/lists/* + +# Remove restrictive ImageMagick policy if present (non-fatal if absent) +RUN rm -f /etc/ImageMagick-6/policy.xml || true + +# Adjust ownership for non-root usage +RUN chown -R deno:deno /usr/src/app \ + && mkdir -p /home/deno \ + && chown -R deno:deno /home/deno USER deno + +# Local Deno cache + persistent bash history (handy even outside devcontainer) +ENV DENO_DIR=/usr/src/app/.deno-dir \ + HISTFILE=/usr/src/app/.devcontainer/.bash_history \ + HISTSIZE=5000 \ + HISTFILESIZE=10000 \ + PROMPT_COMMAND='history -a; history -n; $PROMPT_COMMAND' +RUN mkdir -p "$DENO_DIR" src && touch /usr/src/app/.devcontainer/.bash_history && chmod 600 /usr/src/app/.devcontainer/.bash_history + +# Pre-cache dependencies (will speed up builds; safe if later bind-mounted) COPY src/deps.ts src/deps.ts RUN deno cache src/deps.ts +# Copy application source COPY . . +# Default container command (can be overridden in dev to `sleep infinity`) CMD [ "/bin/bash", "/usr/src/app/docker-cmd.sh" ] \ No newline at end of file diff --git a/database-create.sh b/database-create.sh new file mode 100644 index 0000000..9f20a65 --- /dev/null +++ b/database-create.sh @@ -0,0 +1,63 @@ +#!/bin/bash +set -euo pipefail + +# database-create.sh +# Idempotently ensure a Fuseki dataset (default: 3DOC) exists. +# Waits for Fuseki to be reachable before attempting creation. +# +# Environment: +# FUSEKI_PWD Admin password for Fuseki (default: pw123) +# (Deprecated fallback: TRIDOC_PWD if FUSEKI_PWD unset) +# FUSEKI_HOST Hostname of fuseki service (default: fuseki) +# FUSEKI_PORT Port of fuseki service (default: 3030) +# FUSEKI_TIMEOUT Seconds to wait for readiness (default: 180) +# +# Usage: +# ./database-create.sh [DATASET_NAME] + +DATASET_NAME="${1:-3DOC}" +FUSEKI_PWD="${FUSEKI_PWD:-${TRIDOC_PWD:-pw123}}" +FUSEKI_HOST="${FUSEKI_HOST:-fuseki}" +FUSEKI_PORT="${FUSEKI_PORT:-3030}" +FUSEKI_TIMEOUT="${FUSEKI_TIMEOUT:-180}" + +BASE_URL="http://${FUSEKI_HOST}:${FUSEKI_PORT}" +PING_URL="${BASE_URL}/$/ping" +DATASETS_URL="${BASE_URL}/$/datasets" + +echo "[database-create] Ensuring Fuseki dataset '${DATASET_NAME}' exists (timeout ${FUSEKI_TIMEOUT}s)..." + +start_ts=$(date +%s) +while true; do + if curl -fsS "${PING_URL}" > /dev/null 2>&1; then + echo "[database-create] Fuseki is reachable." + break + fi + now_ts=$(date +%s) + elapsed=$((now_ts - start_ts)) + if [ "$elapsed" -ge "$FUSEKI_TIMEOUT" ]; then + echo "[database-create] ERROR: Fuseki not reachable after ${FUSEKI_TIMEOUT}s." >&2 + exit 1 + fi + echo "[database-create] Waiting for Fuseki (${elapsed}s elapsed)..." + sleep 3 +done + +AUTH_HEADER="Authorization: Basic $(echo -n admin:${FUSEKI_PWD} | base64)" + +if curl -fsS -H "${AUTH_HEADER}" "${DATASETS_URL}" | grep -q '"'"${DATASET_NAME}"'"'; then + echo "[database-create] Dataset '${DATASET_NAME}' already present." + exit 0 +fi + +echo "[database-create] Creating dataset '${DATASET_NAME}'..." +if curl -fsS "${DATASETS_URL}" \ + -H "${AUTH_HEADER}" \ + -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' \ + --data "dbName=${DATASET_NAME}&dbType=tdb" > /dev/null; then + echo "[database-create] Dataset '${DATASET_NAME}' created." +else + echo "[database-create] WARNING: Failed to create dataset '${DATASET_NAME}'. It may already exist or the server refused the request." >&2 +fi + +exit 0 diff --git a/docker-cmd.sh b/docker-cmd.sh index c707f9c..c880972 100644 --- a/docker-cmd.sh +++ b/docker-cmd.sh @@ -1,11 +1,14 @@ #!/bin/bash -echo 'Attempting to create Dataset "3DOC"' -curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ - -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' -set -m -deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts & -sleep 5 -echo 'Attempting to create Dataset "3DOC"' -curl 'http://fuseki:3030/$/datasets' -H "Authorization: Basic $(echo -n admin:pw123 | base64)" \ - -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' --data 'dbName=3DOC&dbType=tdb' -fg 1 +set -euo pipefail + +echo "[docker-cmd] Starting Tridoc backend..." + +# Ensure dataset exists (idempotent) +if [ -f "./database-create.sh" ]; then + bash ./database-create.sh 3DOC || echo "[docker-cmd] Dataset ensure script failed (continuing)." >&2 +else + echo "[docker-cmd] database-create.sh missing; proceeding without dataset bootstrap." >&2 +fi + +echo "[docker-cmd] Launching Deno application..." +exec deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts diff --git a/docker-compose.yml b/docker-compose.yml index 78d044c..30d6a21 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: fuseki: image: "linkedsolutions/fuseki" environment: - ADMIN_PASSWORD: "pw123" + ADMIN_PASSWORD: "${FUSEKI_PWD:-pw123}" volumes: - ./fuseki-base:/fuseki/base - ./config-tdb.ttl:/fuseki/set-up-resources/config-tdb \ No newline at end of file From 668f805e2f2c57d64136f1e2386c6057e2a12db8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 06:52:28 +0000 Subject: [PATCH 53/90] fixed hist conf --- Dockerfile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 76a8bf3..1d31180 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,14 +20,20 @@ RUN chown -R deno:deno /usr/src/app \ USER deno + # Local Deno cache + persistent bash history (handy even outside devcontainer) ENV DENO_DIR=/usr/src/app/.deno-dir \ HISTFILE=/usr/src/app/.devcontainer/.bash_history \ HISTSIZE=5000 \ - HISTFILESIZE=10000 \ - PROMPT_COMMAND='history -a; history -n; $PROMPT_COMMAND' + HISTFILESIZE=10000 RUN mkdir -p "$DENO_DIR" src && touch /usr/src/app/.devcontainer/.bash_history && chmod 600 /usr/src/app/.devcontainer/.bash_history +# Configure history persistence only for interactive shells by appending to the deno user's .bashrc +# This avoids PROMPT_COMMAND being executed in non-interactive shells where 'history' may not accept +# the supplied arguments and would emit errors. +RUN mkdir -p /home/deno && \ + printf '\n# Persist bash history across sessions (interactive shells only)\nif [[ $- == *i* ]]; then\n # append new history lines and read new lines from history file\n PROMPT_COMMAND="history -a; history -n; ${PROMPT_COMMAND:-}"\nfi\n' >> /home/deno/.bashrc || true + # Pre-cache dependencies (will speed up builds; safe if later bind-mounted) COPY src/deps.ts src/deps.ts RUN deno cache src/deps.ts From 4c6cc3da211a2be4c270b62568d51a462305a759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 07:07:21 +0000 Subject: [PATCH 54/90] responding as expected by cockpit --- src/handlers/doc.ts | 19 ++++++++++++++++--- src/meta/store.ts | 8 +++++--- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index db7b454..f73c764 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -217,8 +217,17 @@ export async function postComment( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id; - await metastore.addComment(id, (await request.json()).text); - return respond(undefined, { status: 201 }); + const body = await request.json(); + if (!body || typeof body.text !== "string" || body.text.trim() === "") { + return respond("Missing or invalid 'text' in request body", { status: 400 }); + } + const text: string = body.text; + const created = await metastore.addComment(id, text); + const respBody = JSON.stringify({ text, created }); + return respond(respBody, { + status: 200, + headers: { "content-type": "application/json; charset=utf-8" }, + }); } export async function postPDF( @@ -295,7 +304,11 @@ export async function putTitle( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id; - const title: string = (await request.json())?.title; + const body = await request.json(); + if (!body || typeof body.title !== "string" || body.title.trim() === "") { + return respond("Missing or invalid 'title' in request body", { status: 400 }); + } + const title: string = body.title; await metastore.addTitle(id, title); return respond(undefined, { status: 201 }); } diff --git a/src/meta/store.ts b/src/meta/store.ts index ed5826c..84bef15 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -9,6 +9,7 @@ function escapeLiteral(string: string) { export async function addComment(id: string, text: string) { const now = new Date(); + const created = now.toISOString(); const query = ` PREFIX rdf: PREFIX xsd: @@ -18,18 +19,19 @@ INSERT DATA { GRAPH { s:comment [ a s:Comment ; - s:dateCreated "${now.toISOString()}"^^xsd:dateTime ; + s:dateCreated "${created}"^^xsd:dateTime ; s:text "${escapeLiteral(text)}" ] . } }`; - return await fusekiUpdate(query); + await fusekiUpdate(query); + return created; } export async function addTag( id: string, label: string, - value: string, + value: string | undefined, type: string, ) { const tag = value From 1647370fc8f350054c6a72f87930cb5176b0bc0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 07:21:08 +0000 Subject: [PATCH 55/90] responses expected by cockpit --- src/handlers/doc.ts | 15 +++++++++++++-- src/handlers/tag.ts | 11 ++++++++++- src/meta/store.ts | 7 ++++++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index f73c764..62c893f 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -217,6 +217,7 @@ export async function postComment( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id; + if (!id) return respond("Missing document id in path", { status: 400 }); const body = await request.json(); if (!body || typeof body.text !== "string" || body.text.trim() === "") { return respond("Missing or invalid 'text' in request body", { status: 400 }); @@ -280,6 +281,7 @@ export async function postTag( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id; + if (!id) return respond("Missing document id in path", { status: 400 }); const tagObject: TagAdd = await request.json(); const [label, type] = (await metafinder.getTagTypes([tagObject.label]))?.[0] ?? @@ -295,8 +297,16 @@ export async function postTag( if (tagObject.parameter?.type && !tagObject.parameter?.value) { return respond("No value provided", { status: 400 }); } - await metastore.addTag(id, tagObject.label, tagObject.parameter?.value, type); - return respond(undefined, { status: 201 }); + const created = await metastore.addTag( + id, + tagObject.label, + tagObject.parameter?.value, + type, + ); + return respond(JSON.stringify(created), { + status: 200, + headers: { "content-type": "application/json; charset=utf-8" }, + }); } export async function putTitle( @@ -304,6 +314,7 @@ export async function putTitle( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id; + if (!id) return respond("Missing document id in path", { status: 400 }); const body = await request.json(); if (!body || typeof body.title !== "string" || body.title.trim() === "") { return respond("Missing or invalid 'title' in request body", { status: 400 }); diff --git a/src/handlers/tag.ts b/src/handlers/tag.ts index 27cf821..e572b25 100644 --- a/src/handlers/tag.ts +++ b/src/handlers/tag.ts @@ -35,7 +35,16 @@ export async function createTag( return respond("Label contains forbidden characters", { status: 400 }); } await metastore.createTag(tagObject.label, tagObject.parameter?.type); - return respond(undefined, { status: 201 }); + const created = { + label: tagObject.label, + parameter: tagObject.parameter?.type + ? { type: tagObject.parameter.type } + : undefined, + }; + return respond(JSON.stringify(created), { + status: 200, + headers: { "content-type": "application/json; charset=utf-8" }, + }); } export async function deleteTag( diff --git a/src/meta/store.ts b/src/meta/store.ts index 84bef15..010e86d 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -54,7 +54,12 @@ INSERT DATA { } } }`; - return await fusekiUpdate(query); + await fusekiUpdate(query); + // Return a representation matching getTags output for the created tag + return { + label, + parameter: value ? { type, value } : undefined, + }; } export async function addTitle(id: string, title: string) { From b1b4d61f521c0a1ae9a92181b68ccc5204caf83f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 14:32:41 +0000 Subject: [PATCH 56/90] addressed lint errors --- src/handlers/doc.ts | 78 ++++++++++++++++--------------------- src/handlers/raw.ts | 42 +++++++------------- src/helpers/pdfprocessor.ts | 10 +++-- 3 files changed, 55 insertions(+), 75 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 62c893f..3e3ba31 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -55,7 +55,7 @@ export async function deleteTag( return respond(undefined, { status: 204 }); } export async function deleteTitle( - request: Request, + _request: Request, match: URLPatternResult, ): Promise { const id = match.pathname.groups.id; @@ -67,7 +67,7 @@ export async function getComments( _request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; const response = await metafinder.getComments(id); return respond(JSON.stringify(response), { headers: { @@ -80,7 +80,7 @@ export async function getPDF( _request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; const path = getPath(id); try { const fileName = await metafinder.getBasicMeta(id).then(( @@ -107,7 +107,7 @@ export async function getMeta( _request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; return respond( JSON.stringify({ ...(await metafinder.getBasicMeta(id)), @@ -126,7 +126,7 @@ export async function getTags( _request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; return respond(JSON.stringify(await metafinder.getTags(id)), { headers: { "content-type": "application/json; charset=utf-8", @@ -138,7 +138,7 @@ export async function getThumb( _request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; const path = getPath(id); const fileName = await metafinder.getBasicMeta(id).then(( { title, created }, @@ -148,23 +148,16 @@ export async function getThumb( thumb = await Deno.open(path + ".png", { read: true }); } catch (error) { if (error instanceof Deno.errors.NotFound) { - try { - await Deno.stat(path); // Check if PDF exists → 404 otherwise - const p = Deno.run({ - cmd: [ - "convert", - "-thumbnail", - "300x", - "-alpha", - "remove", - `${path}[0]`, - `${path}.png`, - ], - }); - const { success, code } = await p.status(); - if (!success) throw new Error("convert failed with code " + code); - thumb = await Deno.open(path + ".png", { read: true }); - } catch (error) { + try { + await Deno.stat(path); // Check if PDF exists → 404 otherwise + const cmd = new Deno.Command("convert", { + args: ["-thumbnail", "300x", "-alpha", "remove", `${path}[0]`, `${path}.png`], + }); + const p = cmd.spawn(); + const status = await p.status; + if (!status.success) throw new Error("convert failed with code " + status.code); + thumb = await Deno.open(path + ".png", { read: true }); + } catch (error) { if (error instanceof Deno.errors.NotFound) { return respond("404 Not Found", { status: 404 }); } @@ -189,14 +182,12 @@ export async function getTitle( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id; - return respond( - JSON.stringify({ title: (await metafinder.getBasicMeta(id)).title }), - { - headers: { - "content-type": "application/json; charset=utf-8", - }, + const meta = await metafinder.getBasicMeta(id); + return respond(JSON.stringify({ title: meta.title ?? null }), { + headers: { + "content-type": "application/json; charset=utf-8", }, - ); + }); } export async function list( @@ -246,26 +237,25 @@ export async function postPDF( if (text.length < 4) { // run OCR const lang = Deno.env.get("OCR_LANG") || "fra+deu+eng"; - const p = Deno.run({ cmd: ["pdfsandwich", "-rgb", "-lang", lang, path] }); - const { success, code } = await p.status(); - if (!success) throw new Error("pdfsandwich failed with code " + code); + const cmd = new Deno.Command("pdfsandwich", { args: ["-rgb", "-lang", lang, path] }); + const p = cmd.spawn(); + const status = await p.status; + if (!status.success) throw new Error("pdfsandwich failed with code " + status.code); // pdfsandwich generates a file with the same name + _ocr await Deno.rename(path + "_ocr", path); text = await getText(path); console.log((new Date()).toISOString(), id, ": OCR finished"); } // no await as we don’t care for the result - if it fails, the thumbnail will be created upon request. - Deno.run({ - cmd: [ - "convert", - "-thumbnail", - "300x", - "-alpha", - "remove", - `${path}[0]`, - `${path}.png`, - ], - }); + // Fire-and-forget thumbnail generation (non-blocking) + try { + const cmd = new Deno.Command("convert", { + args: ["-thumbnail", "300x", "-alpha", "remove", `${path}[0]`, `${path}.png`], + }); + cmd.spawn(); + } catch (_) { + // ignore spawn errors for background thumbnail creation + } const date = datecheck(request); await metastore.storeDocument({ id, text, date }); return respond(undefined, { diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index a680a83..7dcd10b 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -41,15 +41,12 @@ export async function getTGZ( }); const writableStream = writableStreamFromWriter(rdf); await (await dump()).body?.pipeTo(writableStream); - const p = Deno.run({ - cmd: [ - "bash", - "-c", - `tar --transform="s|${rdfPath}|rdf.ttl|" --exclude-tag="${rdfName}" -czvf ${tarPath} blobs/*/`, - ], + const cmd = new Deno.Command("bash", { + args: ["-c", `tar --transform="s|${rdfPath}|rdf.ttl|" --exclude-tag="${rdfName}" -czvf ${tarPath} blobs/*/`], }); - const { success, code } = await p.status(); - if (!success) throw new Error("tar -czf failed with code " + code); + const p = cmd.spawn(); + const status = await p.status; + if (!status.success) throw new Error("tar -czf failed with code " + status.code); await Deno.remove(rdfPath); const tar = await Deno.open(tarPath); // Build a readable stream so the file doesn't have to be fully loaded into memory while we send it @@ -79,24 +76,14 @@ export async function getZIP( const writableStream = writableStreamFromWriter(rdf); await (await dump()).body?.pipeTo(writableStream); // Create zip - const p_1 = Deno.run({ - cmd: [ - "bash", - "-c", - `zip -r ${zipPath} blobs/*/ ${rdfPath} -x "blobs/rdf/*"`, - ], - }); - const r_1 = await p_1.status(); + const cmd1 = new Deno.Command("bash", { args: ["-c", `zip -r ${zipPath} blobs/*/ ${rdfPath} -x "blobs/rdf/*"`] }); + const p_1 = cmd1.spawn(); + const r_1 = await p_1.status; if (!r_1.success) throw new Error("zip failed with code " + r_1.code); // move rdf-??? to rdf.zip - const p_2 = Deno.run({ - cmd: [ - "bash", - "-c", - `printf "@ ${rdfPath}\\n@=rdf.ttl\\n" | zipnote -w ${zipPath}`, - ], - }); - const r_2 = await p_2.status(); + const cmd2 = new Deno.Command("bash", { args: ["-c", `printf "@ ${rdfPath}\n@=rdf.ttl\n" | zipnote -w ${zipPath}`] }); + const p_2 = cmd2.spawn(); + const r_2 = await p_2.status; if (!r_2.success) throw new Error("zipnote failed with code " + r_2.code); await Deno.remove(rdfPath); const zip = await Deno.open(zipPath); @@ -131,9 +118,10 @@ export async function putZIP( const zip = await Deno.open(zipPath, { write: true, create: true }); const writableStream = writableStreamFromWriter(zip); await request.body?.pipeTo(writableStream); - const p = Deno.run({ cmd: ["unzip", zipPath] }); - const { success, code } = await p.status(); - if (!success) throw new Error("unzip failed with code " + code); + const cmd = new Deno.Command("unzip", { args: [zipPath] }); + const p = cmd.spawn(); + const status = await p.status; + if (!status.success) throw new Error("unzip failed with code " + status.code); await Deno.remove(zipPath); const turtleData = decoder.decode(await Deno.readFile("rdf.ttl")); await Deno.remove("rdf.ttl"); diff --git a/src/helpers/pdfprocessor.ts b/src/helpers/pdfprocessor.ts index 1e267d2..d5d04ac 100644 --- a/src/helpers/pdfprocessor.ts +++ b/src/helpers/pdfprocessor.ts @@ -1,9 +1,11 @@ const decoder = new TextDecoder("utf-8"); export async function getText(path: string) { - const p = Deno.run({ cmd: ["pdftotext", path, "-"], stdout: "piped" }); - const output = decoder.decode(await p.output()); - const { success, code } = await p.status(); - if (!success) throw new Error("pdftotext failed with code " + code); + const cmd = new Deno.Command("pdftotext", { args: [path, "-"], stdout: "piped" as const }); + const p = cmd.spawn(); + const result = await p.output(); + const output = decoder.decode(result.stdout); + const status = await p.status; + if (!status.success) throw new Error("pdftotext failed with code " + status.code); return output; } From c84a413bced00aa67b0802dbb3a4f8ee83107aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 14:55:56 +0000 Subject: [PATCH 57/90] fixed table --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 58948b9..79edb09 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,7 @@ When getting a comment, a JSON array with objects of the following structure is ## API | Address | Method | Description | Request / Payload | Response | Implemented in Version | -| - | - | - | - | - | - | - | +| - | - | - | - | - | - | | `/count` | GET | Count (matching) documents | [1](#f1) [3](#f3) | Number | 1.1.0 | | `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | | `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | From 7e0d2c04200bb140977cfd173ff5d49084841ad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 15:04:35 +0000 Subject: [PATCH 58/90] clarified --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 79edb09..c1373f8 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/doc/{id}/title` | DELETE | Reset document title | - | - | 1.1.0 | | `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | | `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | -| `/raw/rdf` | DELETE | "Cancel" failed zip upload—use only if certain it’s done & failed | | | (deno only) | +| `/raw/rdf` | DELETE | Remove the temporary `rdf.ttl` file created during a backup upload (cancels a failed zip upload). Note: this does NOT delete stored metadata — `GET /raw/rdf` will continue to return the RDF data; use only if you are sure no upload is in progress. | - | 204 No Content | WIP | | `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | | `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | | `/tag` | POST | Create new tag | See above | - | 1.1.0 | From b1ec0c692e499ae68627a293f4c786d6549f566c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 20 Aug 2025 20:53:45 +0000 Subject: [PATCH 59/90] Allow to PUT graph --- .devcontainer/setup-dev.sh | 2 +- .env | 1 + .vscode/launch.json | 2 +- README.md | 1 + database-create.sh | 10 ++++++++-- deno.jsonc | 4 ++-- docker-cmd.sh | 2 +- src/config.ts | 1 + src/deps.ts | 2 +- src/handlers/raw.ts | 26 ++++++++++++++++++++++++-- src/meta/fusekiFetch.ts | 14 +++++++++++--- src/meta/store.ts | 26 +++++++++++++++++++------- src/server/routes.ts | 3 +++ 13 files changed, 74 insertions(+), 20 deletions(-) create mode 100644 .env create mode 100644 src/config.ts diff --git a/.devcontainer/setup-dev.sh b/.devcontainer/setup-dev.sh index ac118a2..9053fb4 100755 --- a/.devcontainer/setup-dev.sh +++ b/.devcontainer/setup-dev.sh @@ -20,7 +20,7 @@ echo "Dataset bootstrap (if needed) complete." echo "Development environment setup complete!" echo "" echo "You can now run the Tridoc backend with:" -echo "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts" +echo "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,FUSEKI_PWD,OCR_LANG src/main.ts" echo "" echo "Fuseki is available at:" echo "- Internal: http://fuseki:3030" diff --git a/.env b/.env new file mode 100644 index 0000000..4462ce7 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +FUSEKI_PWD=pw123 diff --git a/.vscode/launch.json b/.vscode/launch.json index 581641e..17bcca6 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -14,7 +14,7 @@ "--allow-read=blobs,rdf.ttl", "--allow-write=blobs,rdf.ttl", "--allow-run", - "--allow-env=TRIDOC_PWD,OCR_LANG" + "--allow-env=FUSEKI_PWD,TRIDOC_PWD,OCR_LANG" ], "attachSimplePort": 9229, "env": { diff --git a/README.md b/README.md index c1373f8..7d6aa2a 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | | `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | | `/raw/rdf` | DELETE | Remove the temporary `rdf.ttl` file created during a backup upload (cancels a failed zip upload). Note: this does NOT delete stored metadata — `GET /raw/rdf` will continue to return the RDF data; use only if you are sure no upload is in progress. | - | 204 No Content | WIP | +| `/raw/rdf` | PUT | Replace the `http://3doc/meta` metadata graph in the backend with the provided RDF payload. | Any RDF serialization (Content-Type) | 204 No Content | WIP | | `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | | `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | | `/tag` | POST | Create new tag | See above | - | 1.1.0 | diff --git a/database-create.sh b/database-create.sh index 9f20a65..6c4fd87 100644 --- a/database-create.sh +++ b/database-create.sh @@ -7,7 +7,6 @@ set -euo pipefail # # Environment: # FUSEKI_PWD Admin password for Fuseki (default: pw123) -# (Deprecated fallback: TRIDOC_PWD if FUSEKI_PWD unset) # FUSEKI_HOST Hostname of fuseki service (default: fuseki) # FUSEKI_PORT Port of fuseki service (default: 3030) # FUSEKI_TIMEOUT Seconds to wait for readiness (default: 180) @@ -16,7 +15,14 @@ set -euo pipefail # ./database-create.sh [DATASET_NAME] DATASET_NAME="${1:-3DOC}" -FUSEKI_PWD="${FUSEKI_PWD:-${TRIDOC_PWD:-pw123}}" +# Load defaults from .env if present. This allows a single place for the default pw123 +if [ -f ".env" ]; then + # shellcheck disable=SC1091 + source .env +fi + +# If FUSEKI_PWD not set in the environment, fall back to .env or default pw123 +FUSEKI_PWD="${FUSEKI_PWD:-${FUSEKI_PWD:-pw123}}" FUSEKI_HOST="${FUSEKI_HOST:-fuseki}" FUSEKI_PORT="${FUSEKI_PORT:-3030}" FUSEKI_TIMEOUT="${FUSEKI_TIMEOUT:-180}" diff --git a/deno.jsonc b/deno.jsonc index 7699961..67645b3 100644 --- a/deno.jsonc +++ b/deno.jsonc @@ -6,7 +6,7 @@ }, "tasks": { // --allow-run=convert,pdfsandwich,pdftotext,tar,zip,unzip,bash - "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts", - "run-watch": "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts" + "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,FUSEKI_PWD,OCR_LANG src/main.ts", + "run-watch": "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,FUSEKI_PWD,OCR_LANG src/main.ts" } } diff --git a/docker-cmd.sh b/docker-cmd.sh index c880972..3347264 100644 --- a/docker-cmd.sh +++ b/docker-cmd.sh @@ -11,4 +11,4 @@ else fi echo "[docker-cmd] Launching Deno application..." -exec deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts +exec deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,FUSEKI_PWD,OCR_LANG src/main.ts diff --git a/src/config.ts b/src/config.ts new file mode 100644 index 0000000..8bc3048 --- /dev/null +++ b/src/config.ts @@ -0,0 +1 @@ +export const DEFAULT_FUSEKI_PWD = "pw123"; diff --git a/src/deps.ts b/src/deps.ts index 85736e4..4f4cecc 100644 --- a/src/deps.ts +++ b/src/deps.ts @@ -1,4 +1,4 @@ -export const VERSION = "1.6.0-alpha.deno.1"; +export const VERSION = "1.6.0-alpha.deno.2"; export { encode } from "https://deno.land/std@0.160.0/encoding/base64.ts"; export { emptyDir, ensureDir } from "https://deno.land/std@0.160.0/fs/mod.ts"; diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index 7dcd10b..886aff8 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -2,7 +2,7 @@ import { ensureDir } from "https://deno.land/std@0.160.0/fs/ensure_dir.ts"; import { emptyDir, writableStreamFromWriter } from "../deps.ts"; import { respond } from "../helpers/cors.ts"; import { dump } from "../meta/fusekiFetch.ts"; -import { restore } from "../meta/store.ts"; +import { setGraph } from "../meta/store.ts"; const decoder = new TextDecoder("utf-8"); @@ -125,6 +125,28 @@ export async function putZIP( await Deno.remove(zipPath); const turtleData = decoder.decode(await Deno.readFile("rdf.ttl")); await Deno.remove("rdf.ttl"); - await restore(turtleData); + await setGraph(turtleData, "text/turtle"); + return respond(undefined, { status: 204 }); +} + +export async function putRDF( + request: Request, + _match: URLPatternResult, +): Promise { + // Replace the entire metadata graph with the provided RDF payload. + // Supported serializations: Turtle, TriG, RDF/XML, N-Triples, N-Quads, JSON-LD, etc. + // For Turtle/TriG we reuse the local `restore()` (which embeds Turtle into a SPARQL INSERT). + // For other serializations we forward the payload to Fuseki's dataset data endpoint + // using an HTTP PUT to replace the graph (). + const contentType = (request.headers.get("content-type") || "").toLowerCase(); + const body = await request.text(); + if (!body || body.trim() === "") { + return respond("Empty request body", { status: 400 }); + } + + // If content is Turtle or TriG, use the existing restore helper which expects Turtle. + // Use setGraph for all content-types; it will decide whether to use SPARQL INSERT + // (for Turtle/TriG) or forward the payload to Fuseki (for other serializations). + await setGraph(body, contentType || "application/octet-stream"); return respond(undefined, { status: 204 }); } diff --git a/src/meta/fusekiFetch.ts b/src/meta/fusekiFetch.ts index afbaf5a..d978944 100644 --- a/src/meta/fusekiFetch.ts +++ b/src/meta/fusekiFetch.ts @@ -7,13 +7,15 @@ type SparqlJson = { }; }; +import { DEFAULT_FUSEKI_PWD } from "../config.ts"; + export function dump(accept = "text/turtle") { const query = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"; console.log((new Date()).toISOString(), "→ FUSEKI QUERY", query, "\n"); return fetch("http://fuseki:3030/3DOC/query", { method: "POST", headers: { - "Authorization": "Basic " + btoa("admin:pw123"), + "Authorization": getAuthHeader(), "Content-Type": "application/sparql-query", "Accept": accept, }, @@ -26,7 +28,7 @@ export async function fusekiFetch(query: string): Promise { return await fetch("http://fuseki:3030/3DOC/query", { method: "POST", headers: { - "Authorization": "Basic " + btoa("admin:pw123"), + "Authorization": getAuthHeader(), "Content-Type": "application/sparql-query", }, body: query, @@ -44,7 +46,7 @@ export async function fusekiUpdate(query: string): Promise { return await fetch("http://fuseki:3030/3DOC/update", { method: "POST", headers: { - "Authorization": "Basic " + btoa("admin:pw123"), + "Authorization": getAuthHeader(), "Content-Type": "application/sparql-update", }, body: query, @@ -54,3 +56,9 @@ export async function fusekiUpdate(query: string): Promise { } }); } + +export function getAuthHeader() { + const pwd = Deno.env.get("FUSEKI_PWD") || DEFAULT_FUSEKI_PWD; + return "Basic " + btoa("admin:" + pwd); +} + diff --git a/src/meta/store.ts b/src/meta/store.ts index 010e86d..e771e50 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -1,4 +1,4 @@ -import { fusekiUpdate } from "./fusekiFetch.ts"; +import { fusekiUpdate, getAuthHeader } from "./fusekiFetch.ts"; function escapeLiteral(string: string) { return string.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace( @@ -95,12 +95,24 @@ INSERT DATA { return await fusekiUpdate(query); } -export function restore(turtleData: string) { - return fusekiUpdate(` -CLEAR GRAPH ; -INSERT DATA { - GRAPH { ${turtleData} } -}`); +export function setGraph(data: string, contentType = "text/turtle") { + // Forward all payloads to Fuseki's data endpoint and let Fuseki parse the provided + // serialization according to the Content-Type. This keeps a single code path + // and supports every Fuseki-supported RDF serialization uniformly. + const url = `http://fuseki:3030/3DOC/data?graph=${encodeURIComponent("http://3doc/meta")}`; + return fetch(url, { + method: "PUT", + headers: { + "Authorization": getAuthHeader(), + "Content-Type": contentType, + }, + body: data, + }).then(async (res) => { + if (!res.ok) { + const text = await res.text().catch(() => "(no response body)"); + throw new Error(`Fuseki Error while replacing graph: ${res.status} ${text}`); + } + }); } export async function storeDocument( diff --git a/src/server/routes.ts b/src/server/routes.ts index 231fd6c..941db4c 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -77,6 +77,9 @@ export const routes: { }, { pattern: new URLPattern({ pathname: "/raw/zip" }), handler: raw.putZIP, + }, { + pattern: new URLPattern({ pathname: "/raw/rdf" }), + handler: raw.putRDF, }], "DELETE": [{ pattern: new URLPattern({ pathname: "/doc/:id" }), From 73ee2ea089928a840b7ca78fa3bbc96249ca832b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Thu, 21 Aug 2025 07:39:41 +0000 Subject: [PATCH 60/90] get orphaned --- README.md | 2 + src/handlers/orphaned.ts | 98 ++++++++++++++++++++++++++++++++++++++++ src/server/routes.ts | 7 +++ 3 files changed, 107 insertions(+) create mode 100644 src/handlers/orphaned.ts diff --git a/README.md b/README.md index 7d6aa2a..6901e28 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,8 @@ When getting a comment, a JSON array with objects of the following structure is | `/raw/rdf` | DELETE | Remove the temporary `rdf.ttl` file created during a backup upload (cancels a failed zip upload). Note: this does NOT delete stored metadata — `GET /raw/rdf` will continue to return the RDF data; use only if you are sure no upload is in progress. | - | 204 No Content | WIP | | `/raw/rdf` | PUT | Replace the `http://3doc/meta` metadata graph in the backend with the provided RDF payload. | Any RDF serialization (Content-Type) | 204 No Content | WIP | | `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | +| `/orphaned/tgz` | GET | Get a tar.gz archive of orphaned blob files (files in `blobs/` not referenced in the metadata graph) | - | TGZ containing orphaned blobs | 1.6.0 | +| `/orphaned/zip` | GET | Get a zip archive of orphaned blob files (files in `blobs/` not referenced in the metadata graph) | - | ZIP containing orphaned blobs | 1.6.0 | | `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | | `/tag` | POST | Create new tag | See above | - | 1.1.0 | | `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | diff --git a/src/handlers/orphaned.ts b/src/handlers/orphaned.ts new file mode 100644 index 0000000..c97dcd5 --- /dev/null +++ b/src/handlers/orphaned.ts @@ -0,0 +1,98 @@ +import { respond } from "../helpers/cors.ts"; +import * as metafinder from "../meta/finder.ts"; + +function basename(path: string) { + return path.replace(/^.*\//, ""); +} + +async function listAllBlobFiles(): Promise { + const result: string[] = []; + async function walk(dir: string) { + for await (const entry of Deno.readDir(dir)) { + const p = dir + "/" + entry.name; + if (entry.isDirectory) { + // skip the rdf metadata folder + if (p.endsWith("/rdf")) continue; + await walk(p); + } else if (entry.isFile) { + result.push(p); + } + } + } + try { + await walk("blobs"); + } catch (err) { + if (err instanceof Deno.errors.NotFound) return []; + throw err; + } + return result; +} + +async function writeFileList(paths: string[]) { + const tmp = await Deno.makeTempFile({ prefix: "orphaned-filelist-" }); + const content = paths.map((p) => p.replace(/^blobs\//, "")).join("\n") + "\n"; + await Deno.writeTextFile(tmp, content); + return tmp; +} + +export async function getOrphanedTGZ( + _request: Request, + _match: URLPatternResult, +): Promise { + const allFiles = await listAllBlobFiles(); + const docs = await metafinder.getDocumentList({}); + const referenced = new Set(docs.map((d: Record) => d.identifier)); + const orphaned = allFiles.filter((p) => !referenced.has(basename(p))); + if (orphaned.length === 0) return respond(undefined, { status: 204 }); + + const ts = Date.now(); + const fileList = await writeFileList(orphaned); + const tarPath = `blobs/orphaned-tgz-${ts}.tar.gz`; + // Use tar -T to read file list and preserve file metadata + const cmd = new Deno.Command("bash", { + args: ["-c", `tar -C blobs -czf ${tarPath} -T ${fileList}`], + }); + const p = cmd.spawn(); + const status = await p.status; + await Deno.remove(fileList); + if (!status.success) throw new Error("tar failed with code " + status.code); + const f = await Deno.open(tarPath, { read: true }); + const readableStream = f.readable; + return respond(readableStream, { + headers: { + "content-disposition": `inline; filename="tridoc_orphaned_${ts}.tar.gz"`, + "content-type": "application/gzip", + }, + }); +} + +export async function getOrphanedZIP( + _request: Request, + _match: URLPatternResult, +): Promise { + const allFiles = await listAllBlobFiles(); + const docs = await metafinder.getDocumentList({}); + const referenced = new Set(docs.map((d: Record) => d.identifier)); + const orphaned = allFiles.filter((p) => !referenced.has(basename(p))); + if (orphaned.length === 0) return respond(undefined, { status: 204 }); + + const ts = Date.now(); + const fileList = await writeFileList(orphaned); + const zipPath = `blobs/orphaned-zip-${ts}.zip`; + // Use zip reading file list from stdin to avoid copying and preserve metadata where possible + const cmd = new Deno.Command("bash", { + args: ["-c", `cd blobs && xargs -a ${fileList} zip -@ ${zipPath}`], + }); + const p = cmd.spawn(); + const status = await p.status; + await Deno.remove(fileList); + if (!status.success) throw new Error("zip failed with code " + status.code); + const f = await Deno.open(zipPath, { read: true }); + const readableStream = f.readable; + return respond(readableStream, { + headers: { + "content-disposition": `inline; filename="tridoc_orphaned_${ts}.zip"`, + "content-type": "application/zip", + }, + }); +} diff --git a/src/server/routes.ts b/src/server/routes.ts index 941db4c..675eb7e 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -2,6 +2,7 @@ import { options } from "../handlers/cors.ts"; import { count } from "../handlers/count.ts"; import * as doc from "../handlers/doc.ts"; import * as raw from "../handlers/raw.ts"; +import * as orphaned from "../handlers/orphaned.ts"; import * as tag from "../handlers/tag.ts"; import { version } from "../handlers/version.ts"; @@ -48,6 +49,12 @@ export const routes: { }, { pattern: new URLPattern({ pathname: "/raw/tgz" }), handler: raw.getTGZ, + }, { + pattern: new URLPattern({ pathname: "/orphaned/tgz" }), + handler: orphaned.getOrphanedTGZ, + }, { + pattern: new URLPattern({ pathname: "/orphaned/zip" }), + handler: orphaned.getOrphanedZIP, }, { pattern: new URLPattern({ pathname: "/tag" }), handler: tag.getTagList, From 5b4f612b836c78fb9ca5887af1485f348d118a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Thu, 21 Aug 2025 20:24:08 +0000 Subject: [PATCH 61/90] comment obsolete --- src/handlers/raw.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index 886aff8..7c9b673 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -144,9 +144,6 @@ export async function putRDF( return respond("Empty request body", { status: 400 }); } - // If content is Turtle or TriG, use the existing restore helper which expects Turtle. - // Use setGraph for all content-types; it will decide whether to use SPARQL INSERT - // (for Turtle/TriG) or forward the payload to Fuseki (for other serializations). await setGraph(body, contentType || "application/octet-stream"); return respond(undefined, { status: 204 }); } From 19b10902aa5fc6796033aa5bbcb011d9cb7c3ed8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Thu, 21 Aug 2025 20:35:34 +0000 Subject: [PATCH 62/90] turtle default + cleanup --- src/handlers/raw.ts | 8 ++------ src/meta/store.ts | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index 7c9b673..32bcca3 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -134,16 +134,12 @@ export async function putRDF( _match: URLPatternResult, ): Promise { // Replace the entire metadata graph with the provided RDF payload. - // Supported serializations: Turtle, TriG, RDF/XML, N-Triples, N-Quads, JSON-LD, etc. - // For Turtle/TriG we reuse the local `restore()` (which embeds Turtle into a SPARQL INSERT). - // For other serializations we forward the payload to Fuseki's dataset data endpoint - // using an HTTP PUT to replace the graph (). - const contentType = (request.headers.get("content-type") || "").toLowerCase(); + const contentType = request.headers.get("content-type")?.toLowerCase(); const body = await request.text(); if (!body || body.trim() === "") { return respond("Empty request body", { status: 400 }); } - await setGraph(body, contentType || "application/octet-stream"); + await setGraph(body, contentType); return respond(undefined, { status: 204 }); } diff --git a/src/meta/store.ts b/src/meta/store.ts index e771e50..243bea4 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -110,7 +110,7 @@ export function setGraph(data: string, contentType = "text/turtle") { }).then(async (res) => { if (!res.ok) { const text = await res.text().catch(() => "(no response body)"); - throw new Error(`Fuseki Error while replacing graph: ${res.status} ${text}`); + throw new Error(`Fuseki Error replacing ${contentType} graph: ${res.status} ${text}`); } }); } From ee464b1f87ee31205de8d4be212ae5994fdb0b7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Thu, 21 Aug 2025 20:58:20 +0000 Subject: [PATCH 63/90] tmp - permission --- .vscode/launch.json | 2 +- .vscode/tasks.json | 2 +- Dockerfile | 4 ++++ docker-compose.yml | 3 +++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 17bcca6..204e29c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,7 +12,7 @@ "--watch", "--allow-net", "--allow-read=blobs,rdf.ttl", - "--allow-write=blobs,rdf.ttl", + "--allow-write=blobs,rdf.ttl,/tmp", "--allow-run", "--allow-env=FUSEKI_PWD,TRIDOC_PWD,OCR_LANG" ], diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 36a8930..266dd4d 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -10,7 +10,7 @@ "--watch", "--allow-net", "--allow-read=blobs,rdf.ttl", - "--allow-write=blobs,rdf.ttl", + "--allow-write=blobs,rdf.ttl,/tmp", "--allow-run", "--allow-env=TRIDOC_PWD,OCR_LANG", "src/main.ts" diff --git a/Dockerfile b/Dockerfile index 1d31180..409bd53 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,6 +38,10 @@ RUN mkdir -p /home/deno && \ COPY src/deps.ts src/deps.ts RUN deno cache src/deps.ts +# Entrypoint: If you add a CMD or ENTRYPOINT for deno run, make sure all required Deno permissions are present (e.g., --allow-write for all needed directories, --allow-read, --allow-net, etc.) +# Example: +# CMD ["run", "--allow-net", "--allow-read=blobs,rdf.ttl", "--allow-write=blobs,rdf.ttl,/tmp", "--allow-run", "--allow-env=TRIDOC_PWD,OCR_LANG", "src/main.ts"] + # Copy application source COPY . . diff --git a/docker-compose.yml b/docker-compose.yml index 30d6a21..d3d7aac 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,6 +10,9 @@ services: - ./blobs:/usr/src/app/blobs environment: TRIDOC_PWD: "${TRIDOC_PWD}" + # If you override the command, make sure all required Deno permissions are present (e.g., --allow-write for all needed directories, --allow-read, --allow-net, etc.) + # Example: + # command: deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl,/tmp --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts fuseki: image: "linkedsolutions/fuseki" environment: From 7eefc42770f067d63fba45bf0430cb4c45cd61d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Fri, 22 Aug 2025 07:36:44 +0000 Subject: [PATCH 64/90] create orphaned archive in tmp --- src/handlers/orphaned.ts | 44 +++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/src/handlers/orphaned.ts b/src/handlers/orphaned.ts index c97dcd5..84a35ab 100644 --- a/src/handlers/orphaned.ts +++ b/src/handlers/orphaned.ts @@ -47,16 +47,34 @@ export async function getOrphanedTGZ( const ts = Date.now(); const fileList = await writeFileList(orphaned); - const tarPath = `blobs/orphaned-tgz-${ts}.tar.gz`; - // Use tar -T to read file list and preserve file metadata + const tmpDir = await Deno.makeTempDir({ prefix: "orphaned-" }); + const tarPath = `${tmpDir}/orphaned-tgz-${ts}.tar.gz`; + // Use tar -T to read file list and preserve file metadata. Create archive in tmp dir const cmd = new Deno.Command("bash", { args: ["-c", `tar -C blobs -czf ${tarPath} -T ${fileList}`], }); const p = cmd.spawn(); const status = await p.status; + // Remove the temporary file list regardless of tar success await Deno.remove(fileList); - if (!status.success) throw new Error("tar failed with code " + status.code); + if (!status.success) { + // cleanup tmp dir if tar failed + try { + await Deno.remove(tmpDir, { recursive: true }); + } catch (_e) { + // ignore + } + throw new Error("tar failed with code " + status.code); + } const f = await Deno.open(tarPath, { read: true }); + // unlink the archive so it doesn't linger on disk; fd remains readable on POSIX systems + try { + await Deno.remove(tarPath); + // remove the temporary directory now that the file is unlinked + await Deno.remove(tmpDir, { recursive: true }); + } catch (_e) { + // ignore cleanup errors + } const readableStream = f.readable; return respond(readableStream, { headers: { @@ -78,16 +96,32 @@ export async function getOrphanedZIP( const ts = Date.now(); const fileList = await writeFileList(orphaned); - const zipPath = `blobs/orphaned-zip-${ts}.zip`; + const tmpDir = await Deno.makeTempDir({ prefix: "orphaned-" }); + const zipPath = `${tmpDir}/orphaned-zip-${ts}.zip`; // Use zip reading file list from stdin to avoid copying and preserve metadata where possible const cmd = new Deno.Command("bash", { args: ["-c", `cd blobs && xargs -a ${fileList} zip -@ ${zipPath}`], }); const p = cmd.spawn(); const status = await p.status; + // Remove the temporary file list regardless of zip success await Deno.remove(fileList); - if (!status.success) throw new Error("zip failed with code " + status.code); + if (!status.success) { + try { + await Deno.remove(tmpDir, { recursive: true }); + } catch (_e) { + // ignore + } + throw new Error("zip failed with code " + status.code); + } const f = await Deno.open(zipPath, { read: true }); + // unlink the archive so it doesn't linger on disk; fd remains readable on POSIX systems + try { + await Deno.remove(zipPath); + await Deno.remove(tmpDir, { recursive: true }); + } catch (_e) { + // ignore cleanup errors + } const readableStream = f.readable; return respond(readableStream, { headers: { From 25b8afaa50c50fe119191671eb71dcfc4c9663cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Fri, 22 Aug 2025 20:38:02 +0000 Subject: [PATCH 65/90] hash based blob storage #33 --- README.md | 10 +++ deno.lock | 15 ++++ src/deps.ts | 4 + src/handlers/doc.ts | 125 ++++++++++++++++++-------- src/handlers/migrate.ts | 189 +++++++++++++++++++++++++++++++++++++++ src/handlers/orphaned.ts | 13 ++- src/helpers/blobStore.ts | 81 +++++++++++++++++ src/helpers/ipfsHash.ts | 59 ++++++++++++ src/meta/finder.ts | 21 ++++- src/meta/store.ts | 21 +++++ src/server/routes.ts | 4 + 11 files changed, 500 insertions(+), 42 deletions(-) create mode 100644 src/handlers/migrate.ts create mode 100644 src/helpers/blobStore.ts create mode 100644 src/helpers/ipfsHash.ts diff --git a/README.md b/README.md index 6901e28..038986f 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ Server-side infrastructure for tridoc: easy document management for individuals ## Table Of Contents * [Setup](#setup) +* [Blob Storage](#blob-storage) * [Tag System](#tag-system) * [Simple Tags](#simple-tags) * [Parameterizable & Parameterized Tags](#parameterizable--parameterized-tags) @@ -31,6 +32,14 @@ $env:TRIDOC_PWD = "YOUR PASSWORD HERE" _For more Setup options see the DEV-README.md_ +## Blob Storage + +Tridoc uses hash-based blob storage for content deduplication and integrity verification. File content is hashed using IPFS-compatible SHA-256 multihash, and stored in a content-addressable file system. + +**Document vs Blob Separation**: Documents (logical entities with metadata like title, tags, comments) are separate from blobs (file content). Multiple documents can reference the same blob if they contain identical content. + +**Migration**: Use the `/migrate` endpoint to migrate existing installations from nanoid-based to hash-based storage. + ## Tag System There are two types of tags: simple tags and parameterizable tags. Parameterizable tags need a parameter to become a parameterized tag wich can be added to a document. @@ -133,6 +142,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | | `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | | `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | +| `/migrate` | GET | Migrate existing nanoid-based blob storage to hash-based storage. Separates documents from blobs in metadata. | - | Migration status JSON with counts and errors | 1.6.0 | | `/version` | GET | Get tridoc version | - | semver version number | 1.1.0 | #### URL-Parameters supported: diff --git a/deno.lock b/deno.lock index ba80b48..f03926d 100644 --- a/deno.lock +++ b/deno.lock @@ -15,10 +15,22 @@ "https://deno.land/std@0.160.0/bytes/bytes_list.ts": "aba5e2369e77d426b10af1de0dcc4531acecec27f9b9056f4f7bfbf8ac147ab4", "https://deno.land/std@0.160.0/bytes/equals.ts": "3c3558c3ae85526f84510aa2b48ab2ad7bdd899e2e0f5b7a8ffc85acb3a6043a", "https://deno.land/std@0.160.0/bytes/mod.ts": "b2e342fd3669176a27a4e15061e9d588b89c1aaf5008ab71766e23669565d179", + "https://deno.land/std@0.160.0/crypto/_fnv/fnv32.ts": "aa9bddead8c6345087d3abd4ef35fb9655622afc333fc41fff382b36e64280b5", + "https://deno.land/std@0.160.0/crypto/_fnv/fnv64.ts": "625d7e7505b6cb2e9801b5fd6ed0a89256bac12b2bbb3e4664b85a88b0ec5bef", + "https://deno.land/std@0.160.0/crypto/_fnv/index.ts": "a8f6a361b4c6d54e5e89c16098f99b6962a1dd6ad1307dbc97fa1ecac5d7060a", + "https://deno.land/std@0.160.0/crypto/_fnv/util.ts": "4848313bed7f00f55be3cb080aa0583fc007812ba965b03e4009665bde614ce3", + "https://deno.land/std@0.160.0/crypto/_wasm_crypto/lib/deno_std_wasm_crypto.generated.mjs": "258b484c2da27578bec61c01d4b62c21f72268d928d03c968c4eb590cb3bd830", + "https://deno.land/std@0.160.0/crypto/_wasm_crypto/mod.ts": "6c60d332716147ded0eece0861780678d51b560f533b27db2e15c64a4ef83665", + "https://deno.land/std@0.160.0/crypto/keystack.ts": "e481eed28007395e554a435e880fee83a5c73b9259ed8a135a75e4b1e4f381f7", + "https://deno.land/std@0.160.0/crypto/mod.ts": "fadedc013b4a86fda6305f1adc6d1c02225834d53cff5d95cc05f62b25127517", + "https://deno.land/std@0.160.0/crypto/timing_safe_equal.ts": "82a29b737bc8932d75d7a20c404136089d5d23629e94ba14efa98a8cc066c73e", "https://deno.land/std@0.160.0/datetime/formatter.ts": "7c8e6d16a0950f400aef41b9f1eb9168249869776ec520265dfda785d746589e", "https://deno.land/std@0.160.0/datetime/mod.ts": "ea927ca96dfb28c7b9a5eed5bdc7ac46bb9db38038c4922631895cea342fea87", "https://deno.land/std@0.160.0/datetime/tokenizer.ts": "7381e28f6ab51cb504c7e132be31773d73ef2f3e1e50a812736962b9df1e8c47", + "https://deno.land/std@0.160.0/encoding/base58.ts": "c8f8caf8d05af8ff7cac6cb9f7726ec1d7ac2d888829ecaf1d27f976deb18ad9", "https://deno.land/std@0.160.0/encoding/base64.ts": "c57868ca7fa2fbe919f57f88a623ad34e3d970d675bdc1ff3a9d02bba7409db2", + "https://deno.land/std@0.160.0/encoding/base64url.ts": "a5f82a9fa703bd85a5eb8e7c1296bc6529e601ebd9642cc2b5eaa6b38fa9e05a", + "https://deno.land/std@0.160.0/fmt/colors.ts": "9e36a716611dcd2e4865adea9c4bec916b5c60caad4cdcdc630d4974e6bb8bd4", "https://deno.land/std@0.160.0/fs/_util.ts": "fdc156f897197f261a1c096dcf8ff9267ed0ff42bd5b31f55053a4763a4bae3b", "https://deno.land/std@0.160.0/fs/copy.ts": "73bdf24f4322648d9bc38ef983b818637ba368351d17aa03644209d3ce3eac31", "https://deno.land/std@0.160.0/fs/empty_dir.ts": "c15a0aaaf40f8c21cca902aa1e01a789ad0c2fd1b7e2eecf4957053c5dbf707f", @@ -58,6 +70,9 @@ "https://deno.land/std@0.160.0/streams/delimiter.ts": "e18febbded53df275a897fac9249a6d0a6a5efc943256ad0f6cb23bf4d757668", "https://deno.land/std@0.160.0/streams/merge.ts": "88ed3dfa030ae076802688e4cadd762a251a41d81ed1776dfd9a2a9a0e970195", "https://deno.land/std@0.160.0/streams/mod.ts": "f402791689d74bd091ecf8f4015bc5b7d5c18132a55c7c72fe0576d1fb254cf9", + "https://deno.land/std@0.160.0/testing/_diff.ts": "a23e7fc2b4d8daa3e158fa06856bedf5334ce2a2831e8bf9e509717f455adb2c", + "https://deno.land/std@0.160.0/testing/_format.ts": "cd11136e1797791045e639e9f0f4640d5b4166148796cad37e6ef75f7d7f3832", + "https://deno.land/std@0.160.0/testing/asserts.ts": "1e340c589853e82e0807629ba31a43c84ebdcdeca910c4a9705715dfdb0f5ce8", "https://deno.land/x/nanoid@v3.0.0/customAlphabet.ts": "1cfd7cfd2f07ca8d78a7e7855fcc9f59abf01ef2a127484ef94328fadf940ead", "https://deno.land/x/nanoid@v3.0.0/customRandom.ts": "af56e19038c891a4b4ef2be931554c27579bd407ee5bbea5cb64f6ee1347cbe3", "https://deno.land/x/nanoid@v3.0.0/mod.ts": "3ead610e40c58d8fdca21d5da9ec661445a2b82526e19c34d05de5f90be8a1be", diff --git a/src/deps.ts b/src/deps.ts index 4f4cecc..9255bfb 100644 --- a/src/deps.ts +++ b/src/deps.ts @@ -6,3 +6,7 @@ export { serve } from "https://deno.land/std@0.160.0/http/mod.ts"; export { writableStreamFromWriter } from "https://deno.land/std@0.160.0/streams/mod.ts"; export { nanoid } from "https://deno.land/x/nanoid@v3.0.0/mod.ts"; + +// IPFS-compatible hashing utilities +export { crypto } from "https://deno.land/std@0.160.0/crypto/mod.ts"; +export { encode as encodeBase58 } from "https://deno.land/std@0.160.0/encoding/base58.ts"; diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 3e3ba31..1b6a51d 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -1,8 +1,8 @@ -import { ensureDir } from "https://deno.land/std@0.160.0/fs/ensure_dir.ts"; -import { nanoid, writableStreamFromWriter } from "../deps.ts"; +import { nanoid } from "../deps.ts"; import { respond } from "../helpers/cors.ts"; import { getText } from "../helpers/pdfprocessor.ts"; import { processParams } from "../helpers/processParams.ts"; +import { storeBlob, getBlobPath, getThumbnailPath } from "../helpers/blobStore.ts"; import * as metadelete from "../meta/delete.ts"; import * as metafinder from "../meta/finder.ts"; import * as metastore from "../meta/store.ts"; @@ -17,7 +17,7 @@ type TagAdd = { }; // only for parameterizable tags }; -function getDir(id: string) { +function _getDir(id: string) { return "./blobs/" + id.slice(0, 2) + "/" + id.slice(2, 6) + "/" + id.slice(6, 14); } @@ -39,7 +39,7 @@ export async function deleteDoc( _request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; await metadelete.deleteFile(id); return respond(undefined, { status: 204 }); } @@ -49,8 +49,8 @@ export async function deleteTag( match: URLPatternResult, ) { await metadelete.deleteTag( - decodeURIComponent(match.pathname.groups.tagLabel), - match.pathname.groups.id, + decodeURIComponent(match.pathname.groups.tagLabel!), + match.pathname.groups.id!, ); return respond(undefined, { status: 204 }); } @@ -58,7 +58,7 @@ export async function deleteTitle( _request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; await metadelete.deleteTitle(id); return respond(undefined, { status: 201 }); } @@ -81,11 +81,20 @@ export async function getPDF( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id!; - const path = getPath(id); + const meta = await metafinder.getBasicMeta(id); + + // Determine the file path based on whether we have a blob hash or legacy ID + let path: string; + if (meta.blob) { + // New hash-based storage + path = getBlobPath(meta.blob); + } else { + // Legacy nanoid-based storage + path = getPath(id); + } + try { - const fileName = await metafinder.getBasicMeta(id).then(( - { title, created }, - ) => title || created || "document"); + const fileName = meta.title || meta.created || "document"; const file = await Deno.open(path, { read: true }); // Build a readable stream so the file doesn't have to be fully loaded into memory while we send it const readableStream = file.readable; @@ -139,24 +148,41 @@ export async function getThumb( match: URLPatternResult, ): Promise { const id = match.pathname.groups.id!; - const path = getPath(id); - const fileName = await metafinder.getBasicMeta(id).then(( - { title, created }, - ) => title || created || "thumbnail"); + const meta = await metafinder.getBasicMeta(id); + + // Determine the file path based on whether we have a blob hash or legacy ID + let thumbPath: string; + if (meta.blob) { + // New hash-based storage + thumbPath = getThumbnailPath(meta.blob); + } else { + // Legacy nanoid-based storage + thumbPath = getPath(id) + ".png"; + } + + const fileName = meta.title || meta.created || "thumbnail"; let thumb: Deno.FsFile; try { - thumb = await Deno.open(path + ".png", { read: true }); + thumb = await Deno.open(thumbPath, { read: true }); } catch (error) { if (error instanceof Deno.errors.NotFound) { try { - await Deno.stat(path); // Check if PDF exists → 404 otherwise + // Get the blob path for thumbnail generation + let blobPath: string; + if (meta.blob) { + blobPath = getBlobPath(meta.blob); + } else { + blobPath = getPath(id); + } + + await Deno.stat(blobPath); // Check if PDF exists → 404 otherwise const cmd = new Deno.Command("convert", { - args: ["-thumbnail", "300x", "-alpha", "remove", `${path}[0]`, `${path}.png`], + args: ["-thumbnail", "300x", "-alpha", "remove", `${blobPath}[0]`, thumbPath], }); const p = cmd.spawn(); const status = await p.status; if (!status.success) throw new Error("convert failed with code " + status.code); - thumb = await Deno.open(path + ".png", { read: true }); + thumb = await Deno.open(thumbPath, { read: true }); } catch (error) { if (error instanceof Deno.errors.NotFound) { return respond("404 Not Found", { status: 404 }); @@ -181,7 +207,7 @@ export async function getTitle( _request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; const meta = await metafinder.getBasicMeta(id); return respond(JSON.stringify({ title: meta.title ?? null }), { headers: { @@ -207,7 +233,7 @@ export async function postComment( request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; if (!id) return respond("Missing document id in path", { status: 400 }); const body = await request.json(); if (!body || typeof body.text !== "string" || body.text.trim() === "") { @@ -226,38 +252,65 @@ export async function postPDF( request: Request, _match: URLPatternResult, ): Promise { - const id = nanoid(); - const path = getPath(id); - await ensureDir(getDir(id)); - const pdf = await Deno.open(path, { write: true, create: true }); - const writableStream = writableStreamFromWriter(pdf); - await request.body?.pipeTo(writableStream); - console.log((new Date()).toISOString(), "Document created with id", id); - let text = await getText(path); + const id = nanoid(); // Document ID (separate from blob hash) + + // Read the content into memory to compute hash and store blob + const chunks: Uint8Array[] = []; + const reader = request.body?.getReader(); + if (!reader) { + return respond("Missing request body", { status: 400 }); + } + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + chunks.push(value); + } + } finally { + reader.releaseLock(); + } + + // Combine chunks into a single Uint8Array + const totalLength = chunks.reduce((sum, chunk) => sum + chunk.length, 0); + const content = new Uint8Array(totalLength); + let offset = 0; + for (const chunk of chunks) { + content.set(chunk, offset); + offset += chunk.length; + } + + // Store blob using content hash + const blobHash = await storeBlob(content); + const blobPath = getBlobPath(blobHash); + + console.log((new Date()).toISOString(), "Document created with id", id, "blob hash", blobHash); + let text = await getText(blobPath); if (text.length < 4) { // run OCR const lang = Deno.env.get("OCR_LANG") || "fra+deu+eng"; - const cmd = new Deno.Command("pdfsandwich", { args: ["-rgb", "-lang", lang, path] }); + const cmd = new Deno.Command("pdfsandwich", { args: ["-rgb", "-lang", lang, blobPath] }); const p = cmd.spawn(); const status = await p.status; if (!status.success) throw new Error("pdfsandwich failed with code " + status.code); // pdfsandwich generates a file with the same name + _ocr - await Deno.rename(path + "_ocr", path); - text = await getText(path); + await Deno.rename(blobPath + "_ocr", blobPath); + text = await getText(blobPath); console.log((new Date()).toISOString(), id, ": OCR finished"); } // no await as we don’t care for the result - if it fails, the thumbnail will be created upon request. // Fire-and-forget thumbnail generation (non-blocking) try { + const thumbPath = getThumbnailPath(blobHash); const cmd = new Deno.Command("convert", { - args: ["-thumbnail", "300x", "-alpha", "remove", `${path}[0]`, `${path}.png`], + args: ["-thumbnail", "300x", "-alpha", "remove", `${blobPath}[0]`, thumbPath], }); cmd.spawn(); } catch (_) { // ignore spawn errors for background thumbnail creation } const date = datecheck(request); - await metastore.storeDocument({ id, text, date }); + await metastore.storeDocumentWithBlob({ id, text, date, blobHash }); return respond(undefined, { headers: { "Location": "/doc/" + id, @@ -270,7 +323,7 @@ export async function postTag( request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; if (!id) return respond("Missing document id in path", { status: 400 }); const tagObject: TagAdd = await request.json(); const [label, type] = @@ -303,7 +356,7 @@ export async function putTitle( request: Request, match: URLPatternResult, ): Promise { - const id = match.pathname.groups.id; + const id = match.pathname.groups.id!; if (!id) return respond("Missing document id in path", { status: 400 }); const body = await request.json(); if (!body || typeof body.title !== "string" || body.title.trim() === "") { diff --git a/src/handlers/migrate.ts b/src/handlers/migrate.ts new file mode 100644 index 0000000..66cf1bb --- /dev/null +++ b/src/handlers/migrate.ts @@ -0,0 +1,189 @@ +import { respond } from "../helpers/cors.ts"; +import { computeFileIPFSHash, hashToPath, hashToThumbnailPath } from "../helpers/ipfsHash.ts"; +import { fusekiFetch, fusekiUpdate } from "../meta/fusekiFetch.ts"; +import { ensureDir } from "../deps.ts"; + +interface MigrationStatus { + processed: number; + migrated: number; + skipped: number; + errors: string[]; + duplicatesFound: number; +} + +/** + * Migrate existing blob storage from nanoid-based to hash-based IDs + * Uses filesystem-driven approach: everything not in blobs/ipfs/ needs migration + */ +export async function migrateBlobs( + _request: Request, + _match: URLPatternResult, +): Promise { + const status: MigrationStatus = { + processed: 0, + migrated: 0, + skipped: 0, + errors: [], + duplicatesFound: 0 + }; + + try { + // Get all legacy blob files (filesystem-driven approach) + const legacyBlobs = await getLegacyBlobFiles(); + console.log(`Found ${legacyBlobs.length} legacy blob files to migrate`); + + for (const { identifier, legacyPath } of legacyBlobs) { + status.processed++; + try { + // Compute hash for the existing blob + const blobHash = await computeFileIPFSHash(legacyPath); + + // Check if hash-based blob already exists + const { dir: newDir, fullPath: newPath } = hashToPath(blobHash); + const { dir: thumbDir, fullPath: thumbPath } = hashToThumbnailPath(blobHash); + + let blobExists = false; + try { + await Deno.stat(newPath); + blobExists = true; + status.duplicatesFound++; + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + throw error; + } + } + + // Copy to hash-based location if it doesn't exist + if (!blobExists) { + await ensureDir(newDir); + await Deno.copyFile(legacyPath, newPath); + console.log(`Copied blob: ${legacyPath} -> ${newPath}`); + } + + // Handle thumbnail migration + const legacyThumbPath = legacyPath + ".png"; + try { + await Deno.stat(legacyThumbPath); + + // Copy thumbnail to new thumbs directory + let thumbExists = false; + try { + await Deno.stat(thumbPath); + thumbExists = true; + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + throw error; + } + } + + if (!thumbExists) { + await ensureDir(thumbDir); + await Deno.copyFile(legacyThumbPath, thumbPath); + console.log(`Copied thumbnail: ${legacyThumbPath} -> ${thumbPath}`); + } + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + throw error; + } + // Thumbnail doesn't exist, that's fine + } + + // Update metadata to include blob reference (if document exists in metadata) + await addBlobReferenceToDocument(identifier, blobHash); + + status.migrated++; + console.log(`Migrated document ${identifier} -> blob ${blobHash}`); + + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + status.errors.push(`Failed to migrate ${identifier}: ${errorMessage}`); + console.error(`Migration error for ${identifier}:`, error); + } + } + + console.log(`Migration completed: ${status.migrated}/${status.processed} files migrated`); + console.log(`Found ${status.duplicatesFound} duplicate files (content deduplication)`); + + return respond(JSON.stringify(status), { + headers: { + "content-type": "application/json; charset=utf-8", + }, + }); + + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + status.errors.push(`Migration failed: ${errorMessage}`); + return respond(JSON.stringify(status), { + status: 500, + headers: { + "content-type": "application/json; charset=utf-8", + }, + }); + } +} + +/** + * Get all legacy blob files (filesystem-driven approach) + * Returns everything in blobs/ that's not in blobs/ipfs/ or blobs/thumbs/ + */ +async function getLegacyBlobFiles(): Promise> { + const results: Array<{ identifier: string; legacyPath: string }> = []; + + async function walkLegacyBlobs(dir: string, depth = 0) { + try { + for await (const entry of Deno.readDir(dir)) { + const path = `${dir}/${entry.name}`; + + // Skip the new ipfs and thumbs directories + if (depth === 0 && (entry.name === "ipfs" || entry.name === "thumbs")) { + continue; + } + + if (entry.isDirectory && depth < 3) { + // Continue walking directory structure (blobs/xx/xxxx/xxxxxxxx/) + await walkLegacyBlobs(path, depth + 1); + } else if (entry.isFile && depth === 3 && !entry.name.endsWith('.png')) { + // This is a legacy blob file (not a thumbnail) + const identifier = entry.name; + results.push({ identifier, legacyPath: path }); + } + } + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + throw error; + } + } + } + + await walkLegacyBlobs("./blobs"); + return results; +} + +async function addBlobReferenceToDocument(docId: string, blobHash: string) { + // First check if document exists in metadata + const json = await fusekiFetch(` +PREFIX rdf: +PREFIX s: +SELECT ?s WHERE { + GRAPH { + ?s s:identifier "${docId}" . + } +} LIMIT 1`); + + if (json.results.bindings.length === 0) { + console.log(`Document ${docId} not found in metadata, skipping blob reference update`); + return; + } + + // Add blob reference to existing document + const query = ` +PREFIX rdf: +PREFIX s: +PREFIX tridoc: +INSERT DATA { + GRAPH { + tridoc:blob "${blobHash}" . + } +}`; + return await fusekiUpdate(query); +} diff --git a/src/handlers/orphaned.ts b/src/handlers/orphaned.ts index 84a35ab..acf116f 100644 --- a/src/handlers/orphaned.ts +++ b/src/handlers/orphaned.ts @@ -14,7 +14,8 @@ async function listAllBlobFiles(): Promise { // skip the rdf metadata folder if (p.endsWith("/rdf")) continue; await walk(p); - } else if (entry.isFile) { + } else if (entry.isFile && !entry.name.endsWith('.png')) { + // Only include non-thumbnail files result.push(p); } } @@ -40,8 +41,11 @@ export async function getOrphanedTGZ( _match: URLPatternResult, ): Promise { const allFiles = await listAllBlobFiles(); + const referenced = await metafinder.getReferencedBlobs(); + // Also include legacy document IDs that might still be referenced const docs = await metafinder.getDocumentList({}); - const referenced = new Set(docs.map((d: Record) => d.identifier)); + docs.forEach((d: Record) => referenced.add(d.identifier)); + const orphaned = allFiles.filter((p) => !referenced.has(basename(p))); if (orphaned.length === 0) return respond(undefined, { status: 204 }); @@ -89,8 +93,11 @@ export async function getOrphanedZIP( _match: URLPatternResult, ): Promise { const allFiles = await listAllBlobFiles(); + const referenced = await metafinder.getReferencedBlobs(); + // Also include legacy document IDs that might still be referenced const docs = await metafinder.getDocumentList({}); - const referenced = new Set(docs.map((d: Record) => d.identifier)); + docs.forEach((d: Record) => referenced.add(d.identifier)); + const orphaned = allFiles.filter((p) => !referenced.has(basename(p))); if (orphaned.length === 0) return respond(undefined, { status: 204 }); diff --git a/src/helpers/blobStore.ts b/src/helpers/blobStore.ts new file mode 100644 index 0000000..f83dc17 --- /dev/null +++ b/src/helpers/blobStore.ts @@ -0,0 +1,81 @@ +import { ensureDir } from "../deps.ts"; +import { computeIPFSHash, hashToPath, hashToThumbnailPath } from "./ipfsHash.ts"; + +/** + * Store a blob using content-based IPFS hash as identifier. + * Returns the hash-based ID. + */ +export async function storeBlob(content: Uint8Array): Promise { + // Compute content hash + const hash = await computeIPFSHash(content); + + // Get storage path in ipfs subdirectory + const { dir, fullPath } = hashToPath(hash); + + // Check if blob already exists (deduplication) + try { + await Deno.stat(fullPath); + console.log(`Blob ${hash} already exists, skipping storage`); + return hash; + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + throw error; + } + } + + // Create directory and store blob + await ensureDir(dir); + await Deno.writeFile(fullPath, content); + + console.log(`Stored new blob: ${hash}`); + return hash; +} + +/** + * Check if a blob exists by hash + */ +export async function blobExists(hash: string): Promise { + const { fullPath } = hashToPath(hash); + try { + await Deno.stat(fullPath); + return true; + } catch (error) { + if (error instanceof Deno.errors.NotFound) { + return false; + } + throw error; + } +} + +/** + * Get the file path for a blob hash + */ +export function getBlobPath(hash: string): string { + return hashToPath(hash).fullPath; +} + +/** + * Get the directory path for a blob hash + */ +export function getBlobDir(hash: string): string { + return hashToPath(hash).dir; +} + +/** + * Store thumbnail for a blob + */ +export async function storeThumbnail(hash: string, thumbnailContent: Uint8Array): Promise { + const { dir, fullPath } = hashToThumbnailPath(hash); + + await ensureDir(dir); + await Deno.writeFile(fullPath, thumbnailContent); + + console.log(`Stored thumbnail for blob: ${hash}`); +} + +/** + * Get thumbnail path for a blob hash + */ +export function getThumbnailPath(hash: string): string { + return hashToThumbnailPath(hash).fullPath; +} diff --git a/src/helpers/ipfsHash.ts b/src/helpers/ipfsHash.ts new file mode 100644 index 0000000..2fb4566 --- /dev/null +++ b/src/helpers/ipfsHash.ts @@ -0,0 +1,59 @@ +import { crypto, encodeBase58 } from "../deps.ts"; + +/** + * Compute IPFS-compatible hash for content using SHA-256. + * + * IPFS uses multihash format: + * - 1 byte: hash function code (0x12 for SHA-256) + * - 1 byte: digest length (0x20 for 32 bytes) + * - N bytes: actual hash digest + * + * Then encoded with base58btc for content addressing. + */ +export async function computeIPFSHash(content: Uint8Array): Promise { + // Compute SHA-256 hash + const hashBuffer = await crypto.subtle.digest("SHA-256", content); + const hashBytes = new Uint8Array(hashBuffer); + + // Create multihash: [fn_code, digest_size, ...digest] + const multihash = new Uint8Array(34); // 1 + 1 + 32 bytes + multihash[0] = 0x12; // SHA-256 function code + multihash[1] = 0x20; // 32 bytes digest length + multihash.set(hashBytes, 2); + + // Encode with base58btc (CIDv0 format already includes the "Qm" prefix) + const base58Hash = encodeBase58(multihash.buffer); + + return base58Hash; +} + +/** + * Compute hash for a file at the given path + */ +export async function computeFileIPFSHash(filePath: string): Promise { + const content = await Deno.readFile(filePath); + return computeIPFSHash(content); +} + +/** + * Convert hash to directory structure for IPFS blob storage + * Uses first 4 chars (e.g., QmAb) as top level to ensure meaningful distribution + */ +export function hashToPath(hash: string): { dir: string; fullPath: string } { + // Store in blobs/ipfs/ subdirectory using first 4 chars as top level + const dir = `./blobs/ipfs/${hash.slice(0, 4)}/${hash.slice(4, 8)}/${hash.slice(8, 16)}`; + const fullPath = `${dir}/${hash}.pdf`; + + return { dir, fullPath }; +} + +/** + * Convert hash to thumbnail path + */ +export function hashToThumbnailPath(hash: string): { dir: string; fullPath: string } { + // Store in blobs/thumbs/ subdirectory using same structure as blobs + const dir = `./blobs/thumbs/${hash.slice(0, 4)}/${hash.slice(4, 8)}/${hash.slice(8, 16)}`; + const fullPath = `${dir}/${hash}.png`; + + return { dir, fullPath }; +} diff --git a/src/meta/finder.ts b/src/meta/finder.ts index ad3ed14..d9485c0 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -175,15 +175,19 @@ export async function getBasicMeta(id: string) { return await fusekiFetch(` PREFIX rdf: PREFIX s: -SELECT ?title ?date +PREFIX tridoc: +SELECT ?title ?date ?blob WHERE { ?s s:identifier "${id}" . ?s s:dateCreated ?date . OPTIONAL { ?s s:name ?title . } + OPTIONAL { ?s tridoc:blob ?blob . } }`).then((json) => { + const binding = json.results.bindings[0]; return { - title: json.results.bindings[0]?.title?.value, - created: json.results.bindings[0]?.date?.value, + title: binding?.title?.value, + created: binding?.date?.value, + blob: binding?.blob?.value, }; }); } @@ -255,3 +259,14 @@ SELECT DISTINCT ?l ?t WHERE { VALUES ?l { "${ }, ); } + +export async function getReferencedBlobs(): Promise> { + const json = await fusekiFetch(` +PREFIX tridoc: +SELECT DISTINCT ?blob WHERE { + GRAPH { + ?s tridoc:blob ?blob . + } +}`); + return new Set(json.results.bindings.map((binding) => binding.blob.value)); +} diff --git a/src/meta/store.ts b/src/meta/store.ts index 243bea4..a57aa64 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -133,3 +133,24 @@ INSERT DATA { }`; return await fusekiUpdate(query); } + +export async function storeDocumentWithBlob( + { id, text, date, blobHash }: { id: string; text: string; date?: string; blobHash: string }, +) { + const created = (date ? new Date(date) : new Date()).toISOString(); + const query = ` +PREFIX rdf: +PREFIX xsd: +PREFIX s: +PREFIX tridoc: +INSERT DATA { + GRAPH { + rdf:type s:DigitalDocument ; + s:dateCreated "${created}"^^xsd:dateTime ; + s:identifier "${id}" ; + s:text "${escapeLiteral(text)}" ; + tridoc:blob "${blobHash}" . + } +}`; + return await fusekiUpdate(query); +} diff --git a/src/server/routes.ts b/src/server/routes.ts index 675eb7e..81f649e 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -4,6 +4,7 @@ import * as doc from "../handlers/doc.ts"; import * as raw from "../handlers/raw.ts"; import * as orphaned from "../handlers/orphaned.ts"; import * as tag from "../handlers/tag.ts"; +import * as migrate from "../handlers/migrate.ts"; import { version } from "../handlers/version.ts"; export const routes: { @@ -64,6 +65,9 @@ export const routes: { }, { pattern: new URLPattern({ pathname: "/version" }), handler: version, + }, { + pattern: new URLPattern({ pathname: "/migrate" }), + handler: migrate.migrateBlobs, }], "POST": [{ pattern: new URLPattern({ pathname: "/doc" }), From 40b9075fbf4356288ff4e9a5fe19a1bd3f876a74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 23 Aug 2025 03:50:40 +0000 Subject: [PATCH 66/90] removing extension before comparing with hash #33 --- src/handlers/orphaned.ts | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/handlers/orphaned.ts b/src/handlers/orphaned.ts index acf116f..d4647c7 100644 --- a/src/handlers/orphaned.ts +++ b/src/handlers/orphaned.ts @@ -2,9 +2,17 @@ import { respond } from "../helpers/cors.ts"; import * as metafinder from "../meta/finder.ts"; function basename(path: string) { - return path.replace(/^.*\//, ""); + // Return the filename without any directory prefix and without extension. + // RDF stores the bare hash (no path, no extension), so strip extensions + // from filesystem names before comparing. + return path.replace(/^.*\//, "").replace(/\.[^/.]+$/, ""); } +function stripExtension(name: string) { + return name.replace(/\.[^/.]+$/, ""); +} + + async function listAllBlobFiles(): Promise { const result: string[] = []; async function walk(dir: string) { @@ -45,8 +53,13 @@ export async function getOrphanedTGZ( // Also include legacy document IDs that might still be referenced const docs = await metafinder.getDocumentList({}); docs.forEach((d: Record) => referenced.add(d.identifier)); - - const orphaned = allFiles.filter((p) => !referenced.has(basename(p))); + + // RDF stores the bare hash (no path, no extension). Strip extensions from + // filesystem names and compare directly against the referenced set. + const orphaned = allFiles.filter((p) => { + const nameNoExt = stripExtension(basename(p)); + return !referenced.has(nameNoExt); + }); if (orphaned.length === 0) return respond(undefined, { status: 204 }); const ts = Date.now(); @@ -97,8 +110,13 @@ export async function getOrphanedZIP( // Also include legacy document IDs that might still be referenced const docs = await metafinder.getDocumentList({}); docs.forEach((d: Record) => referenced.add(d.identifier)); - - const orphaned = allFiles.filter((p) => !referenced.has(basename(p))); + + // RDF stores the bare hash (no path, no extension). Strip extensions from + // filesystem names and compare directly against the referenced set. + const orphaned = allFiles.filter((p) => { + const nameNoExt = stripExtension(basename(p)); + return !referenced.has(nameNoExt); + }); if (orphaned.length === 0) return respond(undefined, { status: 204 }); const ts = Date.now(); From 854f0b2e3f1a8135dc126b225fff768eb2dcaf16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 23 Aug 2025 04:42:56 +0000 Subject: [PATCH 67/90] procps and deno 2.4.5 --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 409bd53..f3116d9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM denoland/deno:2.4.4 +FROM denoland/deno:2.4.5 EXPOSE 8000 @@ -7,7 +7,7 @@ WORKDIR /usr/src/app # Install required packages (union of prod + dev wants) RUN apt update \ - && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip iputils-ping \ + && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip iputils-ping procps \ && rm -rf /var/lib/apt/lists/* # Remove restrictive ImageMagick policy if present (non-fatal if absent) From 6a5e9d42f5c28adbc5e4d78985961ccc3893ce85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 23 Aug 2025 06:11:43 +0000 Subject: [PATCH 68/90] code duplication / fixes --- src/handlers/orphaned.ts | 131 ++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 69 deletions(-) diff --git a/src/handlers/orphaned.ts b/src/handlers/orphaned.ts index d4647c7..5d1e1a4 100644 --- a/src/handlers/orphaned.ts +++ b/src/handlers/orphaned.ts @@ -12,7 +12,6 @@ function stripExtension(name: string) { return name.replace(/\.[^/.]+$/, ""); } - async function listAllBlobFiles(): Promise { const result: string[] = []; async function walk(dir: string) { @@ -44,10 +43,7 @@ async function writeFileList(paths: string[]) { return tmp; } -export async function getOrphanedTGZ( - _request: Request, - _match: URLPatternResult, -): Promise { +async function getOrphanedFiles(): Promise { const allFiles = await listAllBlobFiles(); const referenced = await metafinder.getReferencedBlobs(); // Also include legacy document IDs that might still be referenced @@ -60,98 +56,95 @@ export async function getOrphanedTGZ( const nameNoExt = stripExtension(basename(p)); return !referenced.has(nameNoExt); }); - if (orphaned.length === 0) return respond(undefined, { status: 204 }); + + return orphaned; +} +async function createArchive( + orphaned: string[], + format: "zip" | "tgz" +): Promise<{ path: string; tmpDir: string; fileList: string }> { const ts = Date.now(); const fileList = await writeFileList(orphaned); const tmpDir = await Deno.makeTempDir({ prefix: "orphaned-" }); - const tarPath = `${tmpDir}/orphaned-tgz-${ts}.tar.gz`; - // Use tar -T to read file list and preserve file metadata. Create archive in tmp dir - const cmd = new Deno.Command("bash", { - args: ["-c", `tar -C blobs -czf ${tarPath} -T ${fileList}`], - }); + const archivePath = `${tmpDir}/orphaned-${format}-${ts}.${format === "zip" ? "zip" : "tar.gz"}`; + + let cmd: Deno.Command; + + if (format === "zip") { + // Use zip with file list - need to use xargs to read from file properly + cmd = new Deno.Command("bash", { + args: ["-c", `cd blobs && cat ${fileList} | xargs zip ${archivePath}`], + }); + } else { + // Use tar with file list + cmd = new Deno.Command("bash", { + args: ["-c", `tar -C blobs -czf ${archivePath} -T ${fileList}`], + }); + } + const p = cmd.spawn(); const status = await p.status; - // Remove the temporary file list regardless of tar success - await Deno.remove(fileList); + if (!status.success) { - // cleanup tmp dir if tar failed + // Clean up on failure try { + await Deno.remove(fileList); await Deno.remove(tmpDir, { recursive: true }); } catch (_e) { - // ignore + // ignore cleanup errors } - throw new Error("tar failed with code " + status.code); + throw new Error(`${format} creation failed with code ${status.code}`); } - const f = await Deno.open(tarPath, { read: true }); + + return { path: archivePath, tmpDir, fileList }; +} + +async function createArchiveResponse( + format: "zip" | "tgz" +): Promise { + const orphaned = await getOrphanedFiles(); + if (orphaned.length === 0) return respond(undefined, { status: 204 }); + + const { path: archivePath, tmpDir, fileList } = await createArchive(orphaned, format); + + // Remove the temporary file list + await Deno.remove(fileList); + + const f = await Deno.open(archivePath, { read: true }); + // unlink the archive so it doesn't linger on disk; fd remains readable on POSIX systems try { - await Deno.remove(tarPath); + await Deno.remove(archivePath); // remove the temporary directory now that the file is unlinked await Deno.remove(tmpDir, { recursive: true }); } catch (_e) { // ignore cleanup errors } + const readableStream = f.readable; + const ts = Date.now(); + const extension = format === "zip" ? "zip" : "tar.gz"; + const contentType = format === "zip" ? "application/zip" : "application/gzip"; + return respond(readableStream, { headers: { - "content-disposition": `inline; filename="tridoc_orphaned_${ts}.tar.gz"`, - "content-type": "application/gzip", + "content-disposition": `inline; filename="tridoc_orphaned_${ts}.${extension}"`, + "content-type": contentType, }, }); } -export async function getOrphanedZIP( +export async function getOrphanedTGZ( _request: Request, _match: URLPatternResult, ): Promise { - const allFiles = await listAllBlobFiles(); - const referenced = await metafinder.getReferencedBlobs(); - // Also include legacy document IDs that might still be referenced - const docs = await metafinder.getDocumentList({}); - docs.forEach((d: Record) => referenced.add(d.identifier)); - - // RDF stores the bare hash (no path, no extension). Strip extensions from - // filesystem names and compare directly against the referenced set. - const orphaned = allFiles.filter((p) => { - const nameNoExt = stripExtension(basename(p)); - return !referenced.has(nameNoExt); - }); - if (orphaned.length === 0) return respond(undefined, { status: 204 }); + return await createArchiveResponse("tgz"); +} - const ts = Date.now(); - const fileList = await writeFileList(orphaned); - const tmpDir = await Deno.makeTempDir({ prefix: "orphaned-" }); - const zipPath = `${tmpDir}/orphaned-zip-${ts}.zip`; - // Use zip reading file list from stdin to avoid copying and preserve metadata where possible - const cmd = new Deno.Command("bash", { - args: ["-c", `cd blobs && xargs -a ${fileList} zip -@ ${zipPath}`], - }); - const p = cmd.spawn(); - const status = await p.status; - // Remove the temporary file list regardless of zip success - await Deno.remove(fileList); - if (!status.success) { - try { - await Deno.remove(tmpDir, { recursive: true }); - } catch (_e) { - // ignore - } - throw new Error("zip failed with code " + status.code); - } - const f = await Deno.open(zipPath, { read: true }); - // unlink the archive so it doesn't linger on disk; fd remains readable on POSIX systems - try { - await Deno.remove(zipPath); - await Deno.remove(tmpDir, { recursive: true }); - } catch (_e) { - // ignore cleanup errors - } - const readableStream = f.readable; - return respond(readableStream, { - headers: { - "content-disposition": `inline; filename="tridoc_orphaned_${ts}.zip"`, - "content-type": "application/zip", - }, - }); +export async function getOrphanedZIP( + _request: Request, + _match: URLPatternResult, +): Promise { + return await createArchiveResponse("zip"); } From 56752b010628766bc9fbc079e46582a4e2f192df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 23 Aug 2025 06:12:49 +0000 Subject: [PATCH 69/90] permissions / no-prompt --- .vscode/launch.json | 3 ++- .vscode/tasks.json | 3 ++- docker-cmd.sh | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 204e29c..0a2c506 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,8 +10,9 @@ "runtimeArgs": [ "run", "--watch", + "--no-prompt", "--allow-net", - "--allow-read=blobs,rdf.ttl", + "--allow-read=blobs,rdf.ttl,/tmp", "--allow-write=blobs,rdf.ttl,/tmp", "--allow-run", "--allow-env=FUSEKI_PWD,TRIDOC_PWD,OCR_LANG" diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 266dd4d..57dfecd 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -8,8 +8,9 @@ "args": [ "run", "--watch", + "--no-prompt", "--allow-net", - "--allow-read=blobs,rdf.ttl", + "--allow-read=blobs,rdf.ttl,/tmp", "--allow-write=blobs,rdf.ttl,/tmp", "--allow-run", "--allow-env=TRIDOC_PWD,OCR_LANG", diff --git a/docker-cmd.sh b/docker-cmd.sh index 3347264..f8b4e9f 100644 --- a/docker-cmd.sh +++ b/docker-cmd.sh @@ -11,4 +11,4 @@ else fi echo "[docker-cmd] Launching Deno application..." -exec deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,FUSEKI_PWD,OCR_LANG src/main.ts +exec deno run --no-prompt --allow-net --allow-read=blobs,rdf.ttl,/tmp --allow-write=blobs,rdf.ttl,/tmp --allow-run --allow-env=TRIDOC_PWD,FUSEKI_PWD,OCR_LANG src/main.ts From de5834f70abe8d7ae8ba4eeedc13a9457c4038ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 23 Aug 2025 07:43:39 +0000 Subject: [PATCH 70/90] flat file structure --- src/handlers/orphaned.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/handlers/orphaned.ts b/src/handlers/orphaned.ts index 5d1e1a4..77ba496 100644 --- a/src/handlers/orphaned.ts +++ b/src/handlers/orphaned.ts @@ -72,14 +72,14 @@ async function createArchive( let cmd: Deno.Command; if (format === "zip") { - // Use zip with file list - need to use xargs to read from file properly + // Create flat zip - use -j flag to junk (ignore) paths, storing files flat cmd = new Deno.Command("bash", { - args: ["-c", `cd blobs && cat ${fileList} | xargs zip ${archivePath}`], + args: ["-c", `cd blobs && cat ${fileList} | xargs zip -j ${archivePath}`], }); } else { - // Use tar with file list + // Create flat tar - use --transform to strip directory paths cmd = new Deno.Command("bash", { - args: ["-c", `tar -C blobs -czf ${archivePath} -T ${fileList}`], + args: ["-c", `tar -C blobs -czf ${archivePath} --transform 's|.*/||' -T ${fileList}`], }); } From 28442c5d55b1a08ea283a542c80118dbdcb3d42e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 23 Aug 2025 18:37:43 +0000 Subject: [PATCH 71/90] obsolete --- .devcontainer/Dockerfile | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 .devcontainer/Dockerfile diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 1d0536d..0000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,39 +0,0 @@ -FROM denoland/deno:2.4.4 - -EXPOSE 8000 - -RUN mkdir -p /usr/src/app/src /usr/src/app/.devcontainer -WORKDIR /usr/src/app - -# Install required packages for development environment -RUN apt update \ - && apt -y install pdfsandwich tesseract-ocr-deu tesseract-ocr-fra curl git zip unzip iputils-ping \ - && rm -rf /var/lib/apt/lists/* - -# Remove ImageMagick policy restrictions (only if file exists) -RUN rm -f /etc/ImageMagick-6/policy.xml - -# Change ownership of the working directory to deno user -RUN chown -R deno:deno /usr/src/app \ - && mkdir -p /home/deno \ - && chown -R deno:deno /home/deno - -# Switch to deno user before creating files -USER deno - -# Use a workspace-local Deno cache to avoid UID mismatch issues with /deno-dir -ENV DENO_DIR=/usr/src/app/.deno-dir - -# Pre-cache dependencies (will be overridden by volume mount in dev) and ensure cache dir exists -RUN mkdir -p "$DENO_DIR" src && echo 'export {};' > src/deps.ts - -# Persist bash history in the mounted workspace so it survives container rebuilds -# Use a file inside .devcontainer to keep project root clean -ENV HISTFILE=/usr/src/app/.devcontainer/.bash_history \ - HISTSIZE=5000 \ - HISTFILESIZE=10000 \ - PROMPT_COMMAND='history -a; history -n; $PROMPT_COMMAND' -RUN touch /usr/src/app/.devcontainer/.bash_history && chmod 600 /usr/src/app/.devcontainer/.bash_history - -# Keep container running for development -CMD ["sleep", "infinity"] \ No newline at end of file From f2db2a3b249e8e90827bdb6d90458a428bd24b91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 23 Aug 2025 18:43:43 +0000 Subject: [PATCH 72/90] fixed thumbnails --- src/handlers/doc.ts | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 1b6a51d..7843528 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -6,6 +6,8 @@ import { storeBlob, getBlobPath, getThumbnailPath } from "../helpers/blobStore.t import * as metadelete from "../meta/delete.ts"; import * as metafinder from "../meta/finder.ts"; import * as metastore from "../meta/store.ts"; +import { ensureDir } from "../deps.ts"; +import { hashToThumbnailPath } from "../helpers/ipfsHash.ts"; type TagAdd = { label: string; @@ -171,23 +173,37 @@ export async function getThumb( let blobPath: string; if (meta.blob) { blobPath = getBlobPath(meta.blob); + // Ensure the thumbnail directory exists for hash-based storage + const { dir: thumbDir } = hashToThumbnailPath(meta.blob); + await ensureDir(thumbDir); } else { blobPath = getPath(id); + // For legacy storage the directory should already exist with the PDF } await Deno.stat(blobPath); // Check if PDF exists → 404 otherwise const cmd = new Deno.Command("convert", { args: ["-thumbnail", "300x", "-alpha", "remove", `${blobPath}[0]`, thumbPath], + stdout: "piped", + stderr: "piped", }); - const p = cmd.spawn(); - const status = await p.status; - if (!status.success) throw new Error("convert failed with code " + status.code); + const { success, code, stdout, stderr } = await cmd.output(); + if (!success) { + const td = new TextDecoder(); + const err = td.decode(stderr) || td.decode(stdout); + console.error("ImageMagick convert error (on-demand):", err.trim()); + throw new Error("convert failed with code " + code + (err ? ": " + err : "")); + } thumb = await Deno.open(thumbPath, { read: true }); } catch (error) { if (error instanceof Deno.errors.NotFound) { return respond("404 Not Found", { status: 404 }); } - throw error; + // Surface ImageMagick error to client for easier debugging + if (error instanceof Error) { + return respond("Thumbnail generation failed: " + error.message, { status: 500 }); + } + return respond("Thumbnail generation failed", { status: 500 }); } } else { throw error; @@ -301,9 +317,14 @@ export async function postPDF( // no await as we don’t care for the result - if it fails, the thumbnail will be created upon request. // Fire-and-forget thumbnail generation (non-blocking) try { + const { dir: thumbDir } = hashToThumbnailPath(blobHash); + await ensureDir(thumbDir); const thumbPath = getThumbnailPath(blobHash); const cmd = new Deno.Command("convert", { args: ["-thumbnail", "300x", "-alpha", "remove", `${blobPath}[0]`, thumbPath], + // Inherit stdio so any ImageMagick errors are visible in server logs + stdout: "inherit", + stderr: "inherit", }); cmd.spawn(); } catch (_) { From 89362197beb7688cc7a6f8f8cb8c0e9ba1f64ba6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 23 Aug 2025 19:08:23 +0000 Subject: [PATCH 73/90] adapted container paths --- .devcontainer/docker-compose.yml | 2 +- .gitignore | 2 +- docker-compose.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index ab24918..41f0788 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -33,7 +33,7 @@ services: ports: - "8001:3030" # Expose for development access volumes: - - ../fuseki-base:/fuseki/base + - ../fuseki-db:/DB - ../config-tdb.ttl:/fuseki/set-up-resources/config-tdb healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost:3030/$/ping"] diff --git a/.gitignore b/.gitignore index f0754c0..7bb40d6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ yarn-error.log* node_modules blobs -fuseki-base +fuseki-db .devcontainer/.bash_history .bash_history .deno-dir \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index d3d7aac..56ebddc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,5 +18,5 @@ services: environment: ADMIN_PASSWORD: "${FUSEKI_PWD:-pw123}" volumes: - - ./fuseki-base:/fuseki/base + - ./fuseki-db:/DB - ./config-tdb.ttl:/fuseki/set-up-resources/config-tdb \ No newline at end of file From ebe62aeef09e949f482a88b34d55e24dea228f94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 27 Aug 2025 04:58:40 +0000 Subject: [PATCH 74/90] fixed indent --- src/handlers/doc.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 7843528..d0f93c1 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -305,10 +305,10 @@ export async function postPDF( if (text.length < 4) { // run OCR const lang = Deno.env.get("OCR_LANG") || "fra+deu+eng"; - const cmd = new Deno.Command("pdfsandwich", { args: ["-rgb", "-lang", lang, blobPath] }); - const p = cmd.spawn(); - const status = await p.status; - if (!status.success) throw new Error("pdfsandwich failed with code " + status.code); + const cmd = new Deno.Command("pdfsandwich", { args: ["-rgb", "-lang", lang, blobPath] }); + const p = cmd.spawn(); + const status = await p.status; + if (!status.success) throw new Error("pdfsandwich failed with code " + status.code); // pdfsandwich generates a file with the same name + _ocr await Deno.rename(blobPath + "_ocr", blobPath); text = await getText(blobPath); From 008c5f3bc74969e79125e01afeac13e69e96e944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 3 Sep 2025 18:20:17 +0000 Subject: [PATCH 75/90] removing legacy files after migration --- src/handlers/migrate.ts | 114 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 106 insertions(+), 8 deletions(-) diff --git a/src/handlers/migrate.ts b/src/handlers/migrate.ts index 66cf1bb..f1ec707 100644 --- a/src/handlers/migrate.ts +++ b/src/handlers/migrate.ts @@ -9,6 +9,8 @@ interface MigrationStatus { skipped: number; errors: string[]; duplicatesFound: number; + filesRemoved: number; + directoriesRemoved: number; } /** @@ -24,9 +26,13 @@ export async function migrateBlobs( migrated: 0, skipped: 0, errors: [], - duplicatesFound: 0 + duplicatesFound: 0, + filesRemoved: 0, + directoriesRemoved: 0 }; + const successfullyMigrated: Array<{ identifier: string; legacyPath: string }> = []; + try { // Get all legacy blob files (filesystem-driven approach) const legacyBlobs = await getLegacyBlobFiles(); @@ -91,6 +97,9 @@ export async function migrateBlobs( // Update metadata to include blob reference (if document exists in metadata) await addBlobReferenceToDocument(identifier, blobHash); + // Track successful migration for cleanup + successfullyMigrated.push({ identifier, legacyPath }); + status.migrated++; console.log(`Migrated document ${identifier} -> blob ${blobHash}`); @@ -101,8 +110,12 @@ export async function migrateBlobs( } } + // Clean up obsolete files after successful migration + await cleanupObsoleteFiles(successfullyMigrated, status); + console.log(`Migration completed: ${status.migrated}/${status.processed} files migrated`); console.log(`Found ${status.duplicatesFound} duplicate files (content deduplication)`); + console.log(`Removed ${status.filesRemoved} obsolete files and ${status.directoriesRemoved} empty directories`); return respond(JSON.stringify(status), { headers: { @@ -134,18 +147,20 @@ async function getLegacyBlobFiles(): Promise, + status: MigrationStatus +) { + const directoriesToCheck = new Set(); + + for (const { legacyPath } of migratedFiles) { + try { + // Remove the legacy blob file + await Deno.remove(legacyPath); + status.filesRemoved++; + console.log(`Removed obsolete blob: ${legacyPath}`); + + // Remove the legacy thumbnail if it exists + const legacyThumbPath = legacyPath + ".png"; + try { + await Deno.stat(legacyThumbPath); + await Deno.remove(legacyThumbPath); + status.filesRemoved++; + console.log(`Removed obsolete thumbnail: ${legacyThumbPath}`); + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + throw error; + } + // Thumbnail doesn't exist, that's fine + } + + // Track directory for potential cleanup + const dir = legacyPath.substring(0, legacyPath.lastIndexOf('/')); + directoriesToCheck.add(dir); + + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + console.error(`Failed to remove obsolete file ${legacyPath}: ${errorMessage}`); + // Don't add to status.errors since this is cleanup, not core migration + } + } + + // Clean up empty directories + await cleanupEmptyDirectories(directoriesToCheck, status); +} + +/** + * Remove empty directories from the legacy blob structure + */ +async function cleanupEmptyDirectories( + directoriesToCheck: Set, + status: MigrationStatus +) { + // Sort directories by depth (deepest first) to ensure proper cleanup order + const sortedDirs = Array.from(directoriesToCheck).sort((a, b) => b.split('/').length - a.split('/').length); + + for (const dir of sortedDirs) { + try { + // Check if directory is empty + const entries = []; + for await (const entry of Deno.readDir(dir)) { + entries.push(entry); + break; // We only need to know if there's at least one entry + } + + if (entries.length === 0) { + await Deno.remove(dir); + status.directoriesRemoved++; + console.log(`Removed empty directory: ${dir}`); + + // Check parent directory too + const parentDir = dir.substring(0, dir.lastIndexOf('/')); + if (parentDir && parentDir !== './blobs' && !directoriesToCheck.has(parentDir)) { + directoriesToCheck.add(parentDir); + } + } + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + console.error(`Failed to check/remove directory ${dir}:`, error); + } + } + } +} From ec31e3925196963023da4ea41ca59e3bb8bf7ec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Wed, 3 Sep 2025 20:07:37 +0000 Subject: [PATCH 76/90] fixed OCR --- .vscode/launch.json | 2 +- src/handlers/doc.ts | 162 ++++++++++++++++++++++++++++++++------------ 2 files changed, 121 insertions(+), 43 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 0a2c506..24ba25c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -15,7 +15,7 @@ "--allow-read=blobs,rdf.ttl,/tmp", "--allow-write=blobs,rdf.ttl,/tmp", "--allow-run", - "--allow-env=FUSEKI_PWD,TRIDOC_PWD,OCR_LANG" + "--allow-env=FUSEKI_PWD,TRIDOC_PWD,OCR_LANG, TMPDIR" ], "attachSimplePort": 9229, "env": { diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index d0f93c1..e601f34 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -268,15 +268,12 @@ export async function postPDF( request: Request, _match: URLPatternResult, ): Promise { - const id = nanoid(); // Document ID (separate from blob hash) - - // Read the content into memory to compute hash and store blob + // Read request body into memory (unchanged approach) const chunks: Uint8Array[] = []; const reader = request.body?.getReader(); if (!reader) { return respond("Missing request body", { status: 400 }); } - try { while (true) { const { done, value } = await reader.read(); @@ -286,8 +283,6 @@ export async function postPDF( } finally { reader.releaseLock(); } - - // Combine chunks into a single Uint8Array const totalLength = chunks.reduce((sum, chunk) => sum + chunk.length, 0); const content = new Uint8Array(totalLength); let offset = 0; @@ -295,49 +290,132 @@ export async function postPDF( content.set(chunk, offset); offset += chunk.length; } - - // Store blob using content hash - const blobHash = await storeBlob(content); - const blobPath = getBlobPath(blobHash); - - console.log((new Date()).toISOString(), "Document created with id", id, "blob hash", blobHash); - let text = await getText(blobPath); - if (text.length < 4) { - // run OCR - const lang = Deno.env.get("OCR_LANG") || "fra+deu+eng"; - const cmd = new Deno.Command("pdfsandwich", { args: ["-rgb", "-lang", lang, blobPath] }); - const p = cmd.spawn(); - const status = await p.status; - if (!status.success) throw new Error("pdfsandwich failed with code " + status.code); - // pdfsandwich generates a file with the same name + _ocr - await Deno.rename(blobPath + "_ocr", blobPath); - text = await getText(blobPath); - console.log((new Date()).toISOString(), id, ": OCR finished"); + + // Put upload into its own temp directory so pdfsandwich writes are predictable + const tmpDir = await Deno.makeTempDir({ prefix: "upload_" }); + const tmpUploadPath = `${tmpDir}/upload.pdf`; + await Deno.writeFile(tmpUploadPath, content); + + try { + const { id, ocrMissing } = await processPDF(tmpUploadPath); + + if (ocrMissing) { + return respond("OCR not produced; stored original PDF without embedded text", { + headers: { + "Location": "/doc/" + id, + "Access-Control-Expose-Headers": "Location", + }, + }); + } + return respond(undefined, { + headers: { + "Location": "/doc/" + id, + "Access-Control-Expose-Headers": "Location", + }, + }); + } finally { + try { await Deno.remove(tmpDir, { recursive: true }); } catch (_) { /* ignore cleanup errors */ } } - // no await as we don’t care for the result - if it fails, the thumbnail will be created upon request. - // Fire-and-forget thumbnail generation (non-blocking) +} + +// Process a PDF file path: if it already contains text => storePDF; otherwise run pdfsandwich +// and store OCR output if present. Returns the generated id and whether OCR output was missing. +async function processPDF(pdfPath: string): Promise<{ id: string; ocrMissing: boolean }> { + let text = ""; try { - const { dir: thumbDir } = hashToThumbnailPath(blobHash); - await ensureDir(thumbDir); - const thumbPath = getThumbnailPath(blobHash); - const cmd = new Deno.Command("convert", { - args: ["-thumbnail", "300x", "-alpha", "remove", `${blobPath}[0]`, thumbPath], - // Inherit stdio so any ImageMagick errors are visible in server logs + text = await getText(pdfPath); + } catch (_) { + text = ""; + } + + if (text.length >= 4) { + const id = await storePDF(pdfPath); + return { id, ocrMissing: false }; + } + + // run pdfsandwich in same directory as pdfPath so output lands predictably + const dir = pdfPath.substring(0, Math.max(0, pdfPath.lastIndexOf("/"))) || "."; + const base = pdfPath.substring(pdfPath.lastIndexOf("/") + 1).replace(/\.pdf$/i, ""); + const lang = Deno.env.get("OCR_LANG") || "fra+deu+eng"; + try { + const cmd = new Deno.Command("pdfsandwich", { + args: ["-rgb", "-lang", lang, pdfPath], + cwd: dir, stdout: "inherit", stderr: "inherit", }); - cmd.spawn(); - } catch (_) { - // ignore spawn errors for background thumbnail creation + const child = cmd.spawn(); + const status = await child.status; + if (!status.success) { + console.error("pdfsandwich failed with code", status.code); + const id = await storePDF(pdfPath); + return { id, ocrMissing: true }; + } + + // Expect pdfsandwich to write _ocr.pdf next to the input file + const ocrCandidate = `${dir}/${base}_ocr.pdf`; + try { + await Deno.stat(ocrCandidate); + const id = await storePDF(ocrCandidate); + return { id, ocrMissing: false }; + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + console.error("OCR output not found at expected location:", ocrCandidate); + const id = await storePDF(pdfPath); + return { id, ocrMissing: true }; + } + throw err; + } + } catch (err) { + console.error("pdfsandwich execution failed:", String(err)); + const id = await storePDF(pdfPath); + return { id, ocrMissing: true }; } - const date = datecheck(request); +} + +// storePDF: read pdfPath bytes, extract text (if any), store blob, ensure thumbnail (only if missing), +// create an ID and persist metadata (id,text,date,blobHash). Returns the generated id. +async function storePDF(pdfPath: string): Promise { + // Extract text (best effort) + let text = ""; + try { + text = await getText(pdfPath); + } catch (err) { + console.warn("getText failed when storing PDF:", String(err)); + text = ""; + } + + // Read file bytes and store as blob so hash matches delivered content + const finalBytes = await Deno.readFile(pdfPath); + const blobHash = await storeBlob(finalBytes); + + // Ensure thumbnail directory and generate thumbnail only if missing + try { + const { dir: thumbDir, fullPath: thumbPath } = hashToThumbnailPath(blobHash); + await ensureDir(thumbDir); + let thumbExists = false; + try { + await Deno.stat(thumbPath); + thumbExists = true; + } catch (e) { + if (!(e instanceof Deno.errors.NotFound)) throw e; + } + if (!thumbExists) { + const cmd = new Deno.Command("convert", { + args: ["-thumbnail", "300x", "-alpha", "remove", `${getBlobPath(blobHash)}[0]`, thumbPath], + stdout: "inherit", + stderr: "inherit", + }); + cmd.spawn(); + } + } catch (err) { + console.warn("Thumbnail generation skipped/failed:", String(err)); + } + + const date = new Date().toISOString(); + const id = nanoid(); await metastore.storeDocumentWithBlob({ id, text, date, blobHash }); - return respond(undefined, { - headers: { - "Location": "/doc/" + id, - "Access-Control-Expose-Headers": "Location", - }, - }); + return id; } export async function postTag( From 5e8825ed83bad44a3f62a2c102f6a182557396c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Thu, 4 Sep 2025 17:53:40 +0000 Subject: [PATCH 77/90] cleanupAllEmptyLegacyDirectories --- src/handlers/migrate.ts | 197 +++++++++++++++++++++++++++++----------- 1 file changed, 146 insertions(+), 51 deletions(-) diff --git a/src/handlers/migrate.ts b/src/handlers/migrate.ts index f1ec707..5286e09 100644 --- a/src/handlers/migrate.ts +++ b/src/handlers/migrate.ts @@ -1,5 +1,9 @@ import { respond } from "../helpers/cors.ts"; -import { computeFileIPFSHash, hashToPath, hashToThumbnailPath } from "../helpers/ipfsHash.ts"; +import { + computeFileIPFSHash, + hashToPath, + hashToThumbnailPath, +} from "../helpers/ipfsHash.ts"; import { fusekiFetch, fusekiUpdate } from "../meta/fusekiFetch.ts"; import { ensureDir } from "../deps.ts"; @@ -28,10 +32,12 @@ export async function migrateBlobs( errors: [], duplicatesFound: 0, filesRemoved: 0, - directoriesRemoved: 0 + directoriesRemoved: 0, }; - const successfullyMigrated: Array<{ identifier: string; legacyPath: string }> = []; + const successfullyMigrated: Array< + { identifier: string; legacyPath: string } + > = []; try { // Get all legacy blob files (filesystem-driven approach) @@ -43,11 +49,13 @@ export async function migrateBlobs( try { // Compute hash for the existing blob const blobHash = await computeFileIPFSHash(legacyPath); - + // Check if hash-based blob already exists const { dir: newDir, fullPath: newPath } = hashToPath(blobHash); - const { dir: thumbDir, fullPath: thumbPath } = hashToThumbnailPath(blobHash); - + const { dir: thumbDir, fullPath: thumbPath } = hashToThumbnailPath( + blobHash, + ); + let blobExists = false; try { await Deno.stat(newPath); @@ -70,7 +78,7 @@ export async function migrateBlobs( const legacyThumbPath = legacyPath + ".png"; try { await Deno.stat(legacyThumbPath); - + // Copy thumbnail to new thumbs directory let thumbExists = false; try { @@ -81,7 +89,7 @@ export async function migrateBlobs( throw error; } } - + if (!thumbExists) { await ensureDir(thumbDir); await Deno.copyFile(legacyThumbPath, thumbPath); @@ -96,15 +104,16 @@ export async function migrateBlobs( // Update metadata to include blob reference (if document exists in metadata) await addBlobReferenceToDocument(identifier, blobHash); - + // Track successful migration for cleanup successfullyMigrated.push({ identifier, legacyPath }); - + status.migrated++; console.log(`Migrated document ${identifier} -> blob ${blobHash}`); - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error); + const errorMessage = error instanceof Error + ? error.message + : String(error); status.errors.push(`Failed to migrate ${identifier}: ${errorMessage}`); console.error(`Migration error for ${identifier}:`, error); } @@ -113,16 +122,24 @@ export async function migrateBlobs( // Clean up obsolete files after successful migration await cleanupObsoleteFiles(successfullyMigrated, status); - console.log(`Migration completed: ${status.migrated}/${status.processed} files migrated`); - console.log(`Found ${status.duplicatesFound} duplicate files (content deduplication)`); - console.log(`Removed ${status.filesRemoved} obsolete files and ${status.directoriesRemoved} empty directories`); - + // Clean up any remaining empty legacy directories + await cleanupAllEmptyLegacyDirectories(status); + + console.log( + `Migration completed: ${status.migrated}/${status.processed} files migrated`, + ); + console.log( + `Found ${status.duplicatesFound} duplicate files (content deduplication)`, + ); + console.log( + `Removed ${status.filesRemoved} obsolete files and ${status.directoriesRemoved} empty directories`, + ); + return respond(JSON.stringify(status), { headers: { "content-type": "application/json; charset=utf-8", }, }); - } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); status.errors.push(`Migration failed: ${errorMessage}`); @@ -139,25 +156,27 @@ export async function migrateBlobs( * Get all legacy blob files (filesystem-driven approach) * Returns everything in blobs/ that's not in blobs/ipfs/ or blobs/thumbs/ */ -async function getLegacyBlobFiles(): Promise> { +async function getLegacyBlobFiles(): Promise< + Array<{ identifier: string; legacyPath: string }> +> { const results: Array<{ identifier: string; legacyPath: string }> = []; - + async function walkLegacyBlobs(dir: string, depth = 0) { try { for await (const entry of Deno.readDir(dir)) { const path = `${dir}/${entry.name}`; - + // Skip the new ipfs and thumbs directories at the top level if (depth === 0 && (entry.name === "ipfs" || entry.name === "thumbs")) { continue; } - + if (entry.isDirectory) { // Recurse into any subdirectory (no fixed depth limit) await walkLegacyBlobs(path, depth + 1); } else if (entry.isFile) { // Treat any file (except thumbnails) as a legacy blob leaf - if (!entry.name.endsWith('.png')) { + if (!entry.name.endsWith(".png")) { const identifier = entry.name; results.push({ identifier, legacyPath: path }); } @@ -169,7 +188,7 @@ async function getLegacyBlobFiles(): Promise, - status: MigrationStatus + status: MigrationStatus, ) { const directoriesToCheck = new Set(); @@ -234,12 +255,23 @@ async function cleanupObsoleteFiles( } // Track directory for potential cleanup - const dir = legacyPath.substring(0, legacyPath.lastIndexOf('/')); + let dir = legacyPath.substring(0, legacyPath.lastIndexOf("/")); directoriesToCheck.add(dir); + // Add all parent directories to the check list as well + while (dir !== "./blobs" && dir.includes("/")) { + dir = dir.substring(0, dir.lastIndexOf("/")); + if (dir) { + directoriesToCheck.add(dir); + } + } } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error); - console.error(`Failed to remove obsolete file ${legacyPath}: ${errorMessage}`); + const errorMessage = error instanceof Error + ? error.message + : String(error); + console.error( + `Failed to remove obsolete file ${legacyPath}: ${errorMessage}`, + ); // Don't add to status.errors since this is cleanup, not core migration } } @@ -248,39 +280,102 @@ async function cleanupObsoleteFiles( await cleanupEmptyDirectories(directoriesToCheck, status); } +/** + * Find and clean up all empty legacy directories in the blobs folder + * excluding the ipfs and thumbs directories + */ +async function cleanupAllEmptyLegacyDirectories(status: MigrationStatus) { + const legacyDirs = new Set(); + + // Function to collect all directories + async function collectDirectories(dir: string) { + try { + // Skip special directories at the top level + if (dir === "./blobs/ipfs" || dir === "./blobs/thumbs") { + return; + } + + for await (const entry of Deno.readDir(dir)) { + if (entry.isDirectory) { + const path = `${dir}/${entry.name}`; + legacyDirs.add(path); + await collectDirectories(path); + } + } + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + console.error(`Error collecting directories in ${dir}:`, error); + } + } + } + + // Start by collecting all directories under blobs/ except ipfs/ and thumbs/ + await collectDirectories("./blobs"); + + // Clean up the collected directories + if (legacyDirs.size > 0) { + console.log( + `Found ${legacyDirs.size} potential legacy directories to check`, + ); + await cleanupEmptyDirectories(legacyDirs, status); + } +} + /** * Remove empty directories from the legacy blob structure + * Recursively checks and removes empty parent directories */ async function cleanupEmptyDirectories( directoriesToCheck: Set, - status: MigrationStatus + status: MigrationStatus, ) { // Sort directories by depth (deepest first) to ensure proper cleanup order - const sortedDirs = Array.from(directoriesToCheck).sort((a, b) => b.split('/').length - a.split('/').length); + const sortedDirs = Array.from(directoriesToCheck).sort((a, b) => + b.split("/").length - a.split("/").length + ); - for (const dir of sortedDirs) { - try { - // Check if directory is empty - const entries = []; - for await (const entry of Deno.readDir(dir)) { - entries.push(entry); - break; // We only need to know if there's at least one entry - } + // Set to track all directories that need to be checked, including parent directories + const allDirectoriesToCheck = new Set(sortedDirs); + + // Process directories until no more are added to the set + while (allDirectoriesToCheck.size > 0) { + // Get the deepest directories first + const currentDirs = Array.from(allDirectoriesToCheck).sort((a, b) => + b.split("/").length - a.split("/").length + ); - if (entries.length === 0) { - await Deno.remove(dir); - status.directoriesRemoved++; - console.log(`Removed empty directory: ${dir}`); + // Clear the set to start fresh + allDirectoriesToCheck.clear(); - // Check parent directory too - const parentDir = dir.substring(0, dir.lastIndexOf('/')); - if (parentDir && parentDir !== './blobs' && !directoriesToCheck.has(parentDir)) { - directoriesToCheck.add(parentDir); + for (const dir of currentDirs) { + try { + // Skip if this is the root blobs directory + if (dir === "./blobs") { + continue; + } + + // Check if directory is empty + const entries = []; + for await (const entry of Deno.readDir(dir)) { + entries.push(entry); + break; // We only need to know if there's at least one entry + } + + if (entries.length === 0) { + await Deno.remove(dir); + status.directoriesRemoved++; + console.log(`Removed empty directory: ${dir}`); + + // Add parent directory to check in the next iteration + const parentDir = dir.substring(0, dir.lastIndexOf("/")); + if (parentDir && parentDir !== "./blobs") { + allDirectoriesToCheck.add(parentDir); + } + } + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + console.error(`Failed to check/remove directory ${dir}:`, error); } - } - } catch (error) { - if (!(error instanceof Deno.errors.NotFound)) { - console.error(`Failed to check/remove directory ${dir}:`, error); } } } From 4bbdcd59a934a36ff9fa0b01918aca90c5666e58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 20 Sep 2025 19:09:33 +0000 Subject: [PATCH 78/90] updated fuseki, uniondefaultgraph not working --- .devcontainer/docker-compose.yml | 8 +++--- .gitignore | 4 +-- config-tdb.ttl | 46 ++++++++------------------------ 3 files changed, 16 insertions(+), 42 deletions(-) diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 41f0788..4784162 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: # Development environment for tridoc-backend tridoc: @@ -27,14 +25,14 @@ services: # Fuseki service accessible as 'fuseki' hostname fuseki: - image: "linkedsolutions/fuseki" + image: "linkedsolutions/fuseki-base:5.4.0" environment: ADMIN_PASSWORD: "${FUSEKI_PWD:-pw123}" ports: - "8001:3030" # Expose for development access volumes: - - ../fuseki-db:/DB - - ../config-tdb.ttl:/fuseki/set-up-resources/config-tdb + - ./fuseki-data:/fuseki/base + - ../config-tdb.ttl:/config.ttl healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost:3030/$/ping"] interval: 5s diff --git a/.gitignore b/.gitignore index 7bb40d6..8b67441 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ yarn-error.log* node_modules blobs -fuseki-db +fuseki-data .devcontainer/.bash_history .bash_history -.deno-dir \ No newline at end of file +.deno-dir diff --git a/config-tdb.ttl b/config-tdb.ttl index 518e420..dddc3e8 100644 --- a/config-tdb.ttl +++ b/config-tdb.ttl @@ -1,8 +1,8 @@ -@prefix : . +@prefix : <#> . @prefix fuseki: . @prefix rdf: . @prefix rdfs: . -@prefix tdb: . +@prefix tdb2: . @prefix ja: . @prefix text: . @prefix schema: . @@ -18,48 +18,24 @@ fuseki:serviceReadWriteGraphStore "data" ; # A separate read-only graph store endpoint: fuseki:serviceReadGraphStore "get" ; - fuseki:dataset :text_dataset ; + fuseki:dataset <#dataset> ; . -## Example of a TDB dataset and text index -## Initialize TDB -[] ja:loadClass "com.hp.hpl.jena.tdb.TDB" . -tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset . -tdb:GraphTDB rdfs:subClassOf ja:Model . - -## Initialize text query -[] ja:loadClass "org.apache.jena.query.text.TextQuery" . -# A TextDataset is a regular dataset with a text index. -text:TextDataset rdfs:subClassOf ja:RDFDataset . -# Lucene index -text:TextIndexLucene rdfs:subClassOf text:TextIndex . -# Solr index -text:TextIndexSolrne rdfs:subClassOf text:TextIndex . - -## --------------------------------------------------------------- -## This URI must be fixed - it's used to assemble the text dataset. - -:text_dataset rdf:type text:TextDataset ; - text:dataset <#dataset> ; - text:index <#indexLucene> ; +<#dataset> rdf:type text:TextDataset ; + text:dataset <#tdb_dataset> ; + text:index <#indexLucene> ; . -# A TDB datset used for RDF storage -<#dataset> rdf:type tdb:DatasetTDB ; - tdb:location "DB" ; - tdb:unionDefaultGraph true ; # Optional - . +<#tdb_dataset> rdf:type tdb2:DatasetTDB2 ; + tdb2:location "/fuseki/base/databases/3DOC" ; + tdb2:unionDefaultGraph true ; +. -# Text index description <#indexLucene> a text:TextIndexLucene ; - text:directory ; - ##text:directory "mem" ; + text:directory ; text:entityMap <#entMap> ; . -# Mapping in the index -# URI stored in field "uri" -# rdfs:label is mapped to field "text" <#entMap> a text:EntityMap ; text:entityField "uri" ; text:defaultField "text" ; From c7184521efc1758efe95834ab26de23d794a6563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 20 Sep 2025 20:19:53 +0000 Subject: [PATCH 79/90] specifying graph rather than using uniondefault --- .vscode/tasks.json | 2 +- config-tdb.ttl | 1 - src/meta/delete.ts | 59 ++++++++++++++++++++++++++--------------- src/meta/finder.ts | 59 ++++++++++++++++++++++++++--------------- src/meta/fusekiFetch.ts | 2 +- src/meta/store.ts | 13 ++++++--- 6 files changed, 86 insertions(+), 50 deletions(-) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 57dfecd..2736acd 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -13,7 +13,7 @@ "--allow-read=blobs,rdf.ttl,/tmp", "--allow-write=blobs,rdf.ttl,/tmp", "--allow-run", - "--allow-env=TRIDOC_PWD,OCR_LANG", + "--allow-env=TRIDOC_PWD,OCR_LANG,FUSEKI_PWD", "src/main.ts" ], "group": { diff --git a/config-tdb.ttl b/config-tdb.ttl index dddc3e8..4b133c1 100644 --- a/config-tdb.ttl +++ b/config-tdb.ttl @@ -28,7 +28,6 @@ <#tdb_dataset> rdf:type tdb2:DatasetTDB2 ; tdb2:location "/fuseki/base/databases/3DOC" ; - tdb2:unionDefaultGraph true ; . <#indexLucene> a text:TextIndexLucene ; diff --git a/src/meta/delete.ts b/src/meta/delete.ts index be9e4bc..df720aa 100644 --- a/src/meta/delete.ts +++ b/src/meta/delete.ts @@ -2,9 +2,15 @@ import { fusekiUpdate } from "./fusekiFetch.ts"; export function deleteFile(id: string) { return fusekiUpdate(` -WITH -DELETE { ?p ?o } -WHERE { ?p ?o }`); +PREFIX rdf: +PREFIX s: +PREFIX tridoc: +DELETE { + GRAPH { ?p ?o } +} +WHERE { + GRAPH { ?p ?o } +}`); } export async function deleteTag(label: string, id?: string) { @@ -13,37 +19,43 @@ export async function deleteTag(label: string, id?: string) { PREFIX rdf: PREFIX s: PREFIX tridoc: -WITH DELETE { - ${ - id ? ` tridoc:tag ?ptag + ` : `?ptag ?p ?o . - ?s ?p1 ?ptag` + GRAPH { + ${ + id ? ` tridoc:tag ?ptag` : `?ptag ?p ?o . + ?s ?p1 ?ptag` } + } } WHERE { - ?ptag tridoc:parameterizableTag ?tag. - ?tag tridoc:label "${label}" . - OPTIONAL { ?ptag ?p ?o } - OPTIONAL { - ${id ? ` tridoc:tag ?ptag` : "?s ?p1 ?ptag"} + GRAPH { + ?ptag tridoc:parameterizableTag ?tag. + ?tag tridoc:label "${label}" . + OPTIONAL { ?ptag ?p ?o } + OPTIONAL { + ${id ? ` tridoc:tag ?ptag` : "?s ?p1 ?ptag"} + } } }`), fusekiUpdate(` PREFIX rdf: PREFIX s: PREFIX tridoc: -WITH DELETE { - ${ + GRAPH { + ${ id ? ` tridoc:tag ?tag` : `?tag ?p ?o . - ?s ?p1 ?tag` + ?s ?p1 ?tag` } + } } WHERE { - ?tag tridoc:label "${label}" . - OPTIONAL { ?tag ?p ?o } - OPTIONAL { - ${id ? ` ?p1 ?tag` : "?s ?p1 ?tag"} + GRAPH { + ?tag tridoc:label "${label}" . + OPTIONAL { ?tag ?p ?o } + OPTIONAL { + ${id ? ` ?p1 ?tag` : "?s ?p1 ?tag"} + } } }`), ]); @@ -52,7 +64,10 @@ WHERE { export function deleteTitle(id: string) { return fusekiUpdate(` PREFIX s: -WITH -DELETE { s:name ?o } -WHERE { s:name ?o }`); +DELETE { + GRAPH { s:name ?o } +} +WHERE { + GRAPH { s:name ?o } +}`); } diff --git a/src/meta/finder.ts b/src/meta/finder.ts index d9485c0..a8a4967 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -78,14 +78,17 @@ export async function getDocumentList( "PREFIX text: \n" + "SELECT DISTINCT ?s ?identifier ?title ?date\n" + "WHERE {\n" + - " ?s s:identifier ?identifier .\n" + - " ?s s:dateCreated ?date .\n" + + " GRAPH {\n" + + " ?s s:identifier ?identifier .\n" + + " ?s s:dateCreated ?date .\n" + tagQuery + - " OPTIONAL { ?s s:name ?title . }\n" + + " OPTIONAL { ?s s:name ?title . }\n" + (text - ? '{ { ?s text:query (s:name "' + text + - '") } UNION { ?s text:query (s:text "' + text + '")} } .\n' + ? ' OPTIONAL { ?s s:text ?fulltext . }\n' + + ' FILTER (CONTAINS(LCASE(COALESCE(?title, "")), LCASE("' + text + '")) || ' + + ' CONTAINS(LCASE(COALESCE(?fulltext, "")), LCASE("' + text + '")))\n' : "") + + " }\n" + "}\n" + "ORDER BY desc(?date)\n" + (limit ? "LIMIT " + limit + "\n" : "") + @@ -162,13 +165,19 @@ PREFIX tridoc: PREFIX text: SELECT (COUNT(DISTINCT ?s) as ?count) WHERE { - ?s s:identifier ?identifier . - ${tagQuery} - ${ - text - ? `{ { ?s text:query (s:name "${text}") } UNION { ?s text:query (s:text "${text}")} } .\n` - : "" - }}`).then((json) => parseInt(json.results.bindings[0].count.value, 10)); + GRAPH { + ?s s:identifier ?identifier . + ${tagQuery} + ${ + text + ? `OPTIONAL { ?s s:name ?title . } + OPTIONAL { ?s s:text ?fulltext . } + FILTER (CONTAINS(LCASE(COALESCE(?title, "")), LCASE("${text}")) || + CONTAINS(LCASE(COALESCE(?fulltext, "")), LCASE("${text}")))\n` + : "" + } + } +}`).then((json) => parseInt(json.results.bindings[0].count.value, 10)); } export async function getBasicMeta(id: string) { @@ -178,10 +187,12 @@ PREFIX s: PREFIX tridoc: SELECT ?title ?date ?blob WHERE { - ?s s:identifier "${id}" . - ?s s:dateCreated ?date . - OPTIONAL { ?s s:name ?title . } - OPTIONAL { ?s tridoc:blob ?blob . } + GRAPH { + ?s s:identifier "${id}" . + ?s s:dateCreated ?date . + OPTIONAL { ?s s:name ?title . } + OPTIONAL { ?s tridoc:blob ?blob . } + } }`).then((json) => { const binding = json.results.bindings[0]; return { @@ -197,8 +208,10 @@ export async function getTagList() { PREFIX tridoc: SELECT DISTINCT ?s ?label ?type WHERE { - ?s tridoc:label ?label . - OPTIONAL { ?s tridoc:valueType ?type . } + GRAPH { + ?s tridoc:label ?label . + OPTIONAL { ?s tridoc:valueType ?type . } + } }`; return await fusekiFetch(query).then((json) => json.results.bindings.map((binding) => { @@ -245,9 +258,13 @@ SELECT DISTINCT ?label ?type ?v export async function getTagTypes(labels: string[]) { const json = await fusekiFetch(` PREFIX tridoc: -SELECT DISTINCT ?l ?t WHERE { VALUES ?l { "${ - labels.join('" "') - }" } ?s tridoc:label ?l . OPTIONAL { ?s tridoc:valueType ?t . } }`); +SELECT DISTINCT ?l ?t WHERE { + GRAPH { + VALUES ?l { "${labels.join('" "')}" } + ?s tridoc:label ?l . + OPTIONAL { ?s tridoc:valueType ?t . } + } +}`); return json.results.bindings.map( (binding) => { const result_1 = []; diff --git a/src/meta/fusekiFetch.ts b/src/meta/fusekiFetch.ts index d978944..8b71765 100644 --- a/src/meta/fusekiFetch.ts +++ b/src/meta/fusekiFetch.ts @@ -10,7 +10,7 @@ type SparqlJson = { import { DEFAULT_FUSEKI_PWD } from "../config.ts"; export function dump(accept = "text/turtle") { - const query = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"; + const query = "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH { ?s ?p ?o } }"; console.log((new Date()).toISOString(), "→ FUSEKI QUERY", query, "\n"); return fetch("http://fuseki:3030/3DOC/query", { method: "POST", diff --git a/src/meta/store.ts b/src/meta/store.ts index a57aa64..c3c27d3 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -66,10 +66,15 @@ export async function addTitle(id: string, title: string) { const query = ` PREFIX rdf: PREFIX s: -WITH -DELETE { s:name ?o } -INSERT { s:name "${escapeLiteral(title)}" } -WHERE { OPTIONAL { s:name ?o } }`; +DELETE { + GRAPH { s:name ?o } +} +INSERT { + GRAPH { s:name "${escapeLiteral(title)}" } +} +WHERE { + GRAPH { OPTIONAL { s:name ?o } } +}`; return await fusekiUpdate(query); } From 64eff5a24e8237589bcab65e7ba8aff6d6803268 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 20 Sep 2025 22:57:45 +0200 Subject: [PATCH 80/90] using fuseki-base:5.4.0 --- docker-compose.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 56ebddc..2850b27 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,22 +1,28 @@ -version: '3' services: tridoc: build: . ports: - "8000:8000" depends_on: - - "fuseki" + fuseki: + condition: service_healthy volumes: - ./blobs:/usr/src/app/blobs environment: - TRIDOC_PWD: "${TRIDOC_PWD}" + TRIDOC_PWD: "${TRIDOC_PWD:-pw123}" # If you override the command, make sure all required Deno permissions are present (e.g., --allow-write for all needed directories, --allow-read, --allow-net, etc.) # Example: # command: deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl,/tmp --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts fuseki: - image: "linkedsolutions/fuseki" + image: "linkedsolutions/fuseki-base:5.4.0" environment: ADMIN_PASSWORD: "${FUSEKI_PWD:-pw123}" volumes: - - ./fuseki-db:/DB - - ./config-tdb.ttl:/fuseki/set-up-resources/config-tdb \ No newline at end of file + - ./fuseki-data:/fuseki/base + - ./config-tdb.ttl:/config.ttl + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:3030/$/ping"] + interval: 5s + timeout: 3s + retries: 30 + start_period: 10s \ No newline at end of file From ef799e004833a21bb116c85b0611d3dbefeaa279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Fri, 7 Nov 2025 17:48:33 +0000 Subject: [PATCH 81/90] removed `.env` --- .env | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .env diff --git a/.env b/.env deleted file mode 100644 index 4462ce7..0000000 --- a/.env +++ /dev/null @@ -1 +0,0 @@ -FUSEKI_PWD=pw123 From c56ee554493b0f203d29fca99832c4cb820e9e3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Fri, 7 Nov 2025 18:04:51 +0000 Subject: [PATCH 82/90] /migrate changed to POST as the operation changes the state of the server --- README.md | 2 +- src/server/routes.ts | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 038986f..feea891 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ When getting a comment, a JSON array with objects of the following structure is | `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | | `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | | `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | -| `/migrate` | GET | Migrate existing nanoid-based blob storage to hash-based storage. Separates documents from blobs in metadata. | - | Migration status JSON with counts and errors | 1.6.0 | +| `/migrate` | POST | Migrate existing nanoid-based blob storage to hash-based storage. Separates documents from blobs in metadata. | - | Migration status JSON with counts and errors | 1.6.0 | | `/version` | GET | Get tridoc version | - | semver version number | 1.1.0 | #### URL-Parameters supported: diff --git a/src/server/routes.ts b/src/server/routes.ts index 81f649e..ad8aa37 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -65,9 +65,6 @@ export const routes: { }, { pattern: new URLPattern({ pathname: "/version" }), handler: version, - }, { - pattern: new URLPattern({ pathname: "/migrate" }), - handler: migrate.migrateBlobs, }], "POST": [{ pattern: new URLPattern({ pathname: "/doc" }), @@ -81,6 +78,9 @@ export const routes: { }, { pattern: new URLPattern({ pathname: "/tag" }), handler: tag.createTag, + }, { + pattern: new URLPattern({ pathname: "/migrate" }), + handler: migrate.migrateBlobs, }], "PUT": [{ pattern: new URLPattern({ pathname: "/doc/:id/title" }), From d1bcbe7e4e86e7a19e3eb012e7d5db999183f244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 8 Nov 2025 09:16:00 +0000 Subject: [PATCH 83/90] remove unused datecheck function from doc.ts --- src/handlers/doc.ts | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index e601f34..c730609 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -29,13 +29,6 @@ function getPath(id: string) { id.slice(6, 14) + "/" + id; } -function datecheck(request: Request) { - const url = new URL(request.url); - const regex = - /^(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-6]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-6]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-6]\d([+-][0-2]\d:[0-5]\d|Z))$/; - const date = url.searchParams.get("date"); - return date ? (regex.test(date) ? date : undefined) : undefined; -} export async function deleteDoc( _request: Request, From 91878bec7867cc7b61a16a9188f5ff7aad3589f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 8 Nov 2025 09:17:26 +0000 Subject: [PATCH 84/90] formated (deno fmt) --- .devcontainer/devcontainer.json | 100 +++++++++--------- .vscode/launch.json | 52 +++++----- .vscode/settings.json | 2 +- .vscode/tasks.json | 148 +++++++++++++-------------- DEV-README.md | 9 +- README.md | 173 ++++++++++++++++++-------------- deno.jsonc | 4 +- docker-compose.yml | 2 +- src/handlers/doc.ts | 139 ++++++++++++++++--------- src/handlers/orphaned.ts | 45 +++++---- src/handlers/raw.ts | 17 +++- src/helpers/blobStore.ts | 23 +++-- src/helpers/ipfsHash.ts | 26 +++-- src/helpers/pdfprocessor.ts | 9 +- src/meta/finder.ts | 16 +-- src/meta/fusekiFetch.ts | 8 +- src/meta/store.ts | 19 +++- src/server/server.ts | 3 +- 18 files changed, 456 insertions(+), 339 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index e9cfbfd..7df722c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,54 +2,54 @@ // https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/docker-existing-docker-compose // If you want to run as a non-root user in the container, see .devcontainer/docker-compose.yml. { - "name": "Tridoc Backend Development", - - // Use the independent dev container docker-compose configuration - "dockerComposeFile": "docker-compose.yml", - - "containerEnv": { - "TRIDOC_PWD": "pw123", - "OCR_LANG": "deu" - }, - - // The 'service' property is the name of the service for the container that VS Code should - // use. Update this value and .devcontainer/docker-compose.yml to the real service name. - "service": "tridoc", - - // The optional 'workspaceFolder' property is the path VS Code should open by default when - // connected. This is typically a file mount in .devcontainer/docker-compose.yml - "workspaceFolder": "/usr/src/app", - - // Use 'forwardPorts' to make a list of ports inside the container available locally. - "forwardPorts": [8000, 8001], - - // Start the fuseki service when the dev container starts - "runServices": ["fuseki"], - - // Uncomment the next line if you want to keep your containers running after VS Code shuts down. - "shutdownAction": "stopCompose", - - // Post-create command to set up the development environment - "postCreateCommand": "bash .devcontainer/setup-dev.sh", - - // Connect as the deno user - "remoteUser": "deno", - - "customizations": { - "vscode": { - "extensions": [ - "denoland.vscode-deno" - ], - "settings": { - "deno.enable": true, - "deno.lint": true, - "terminal.integrated.defaultProfile.linux": "bash", - "terminal.integrated.profiles.linux": { - "bash": { - "path": "/bin/bash" - } - } - } - } - } + "name": "Tridoc Backend Development", + + // Use the independent dev container docker-compose configuration + "dockerComposeFile": "docker-compose.yml", + + "containerEnv": { + "TRIDOC_PWD": "pw123", + "OCR_LANG": "deu" + }, + + // The 'service' property is the name of the service for the container that VS Code should + // use. Update this value and .devcontainer/docker-compose.yml to the real service name. + "service": "tridoc", + + // The optional 'workspaceFolder' property is the path VS Code should open by default when + // connected. This is typically a file mount in .devcontainer/docker-compose.yml + "workspaceFolder": "/usr/src/app", + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + "forwardPorts": [8000, 8001], + + // Start the fuseki service when the dev container starts + "runServices": ["fuseki"], + + // Uncomment the next line if you want to keep your containers running after VS Code shuts down. + "shutdownAction": "stopCompose", + + // Post-create command to set up the development environment + "postCreateCommand": "bash .devcontainer/setup-dev.sh", + + // Connect as the deno user + "remoteUser": "deno", + + "customizations": { + "vscode": { + "extensions": [ + "denoland.vscode-deno" + ], + "settings": { + "deno.enable": true, + "deno.lint": true, + "terminal.integrated.defaultProfile.linux": "bash", + "terminal.integrated.profiles.linux": { + "bash": { + "path": "/bin/bash" + } + } + } + } + } } diff --git a/.vscode/launch.json b/.vscode/launch.json index 24ba25c..b9ed418 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,28 +1,28 @@ { - "version": "0.2.0", - "configurations": [ - { - "name": "Launch Tridoc Backend", - "type": "node", - "request": "launch", - "program": "${workspaceFolder}/src/main.ts", - "runtimeExecutable": "deno", - "runtimeArgs": [ - "run", - "--watch", - "--no-prompt", - "--allow-net", - "--allow-read=blobs,rdf.ttl,/tmp", - "--allow-write=blobs,rdf.ttl,/tmp", - "--allow-run", - "--allow-env=FUSEKI_PWD,TRIDOC_PWD,OCR_LANG, TMPDIR" - ], - "attachSimplePort": 9229, - "env": { - "TRIDOC_PWD": "pw123", - "OCR_LANG": "deu" - }, - "console": "integratedTerminal" - } - ] + "version": "0.2.0", + "configurations": [ + { + "name": "Launch Tridoc Backend", + "type": "node", + "request": "launch", + "program": "${workspaceFolder}/src/main.ts", + "runtimeExecutable": "deno", + "runtimeArgs": [ + "run", + "--watch", + "--no-prompt", + "--allow-net", + "--allow-read=blobs,rdf.ttl,/tmp", + "--allow-write=blobs,rdf.ttl,/tmp", + "--allow-run", + "--allow-env=FUSEKI_PWD,TRIDOC_PWD,OCR_LANG, TMPDIR" + ], + "attachSimplePort": 9229, + "env": { + "TRIDOC_PWD": "pw123", + "OCR_LANG": "deu" + }, + "console": "integratedTerminal" + } + ] } diff --git a/.vscode/settings.json b/.vscode/settings.json index 1535e13..e40716f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,4 +2,4 @@ "deno.enable": true, "deno.lint": true, "deno.unstable": true -} \ No newline at end of file +} diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 2736acd..3b115d5 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -1,77 +1,77 @@ { - "version": "2.0.0", - "tasks": [ - { - "label": "Start Tridoc Backend", - "type": "shell", - "command": "deno", - "args": [ - "run", - "--watch", - "--no-prompt", - "--allow-net", - "--allow-read=blobs,rdf.ttl,/tmp", - "--allow-write=blobs,rdf.ttl,/tmp", - "--allow-run", - "--allow-env=TRIDOC_PWD,OCR_LANG,FUSEKI_PWD", - "src/main.ts" - ], - "group": { - "kind": "build", - "isDefault": true - }, - "presentation": { - "echo": true, - "reveal": "always", - "focus": false, - "panel": "new" - }, - "problemMatcher": [], - "options": { - "env": { - "TRIDOC_PWD": "pw123", - "OCR_LANG": "deu" - } - } - }, - { - "label": "Cache Dependencies", - "type": "shell", - "command": "deno", - "args": ["cache", "src/deps.ts"], - "group": "build", - "presentation": { - "echo": true, - "reveal": "always", - "focus": false, - "panel": "shared" - } - }, - { - "label": "Format Code", - "type": "shell", - "command": "deno", - "args": ["fmt"], - "group": "build", - "presentation": { - "echo": true, - "reveal": "silent", - "focus": false, - "panel": "shared" - } - }, - { - "label": "Lint Code", - "type": "shell", - "command": "deno", - "args": ["lint"], - "group": "test", - "presentation": { - "echo": true, - "reveal": "always", - "focus": false, - "panel": "shared" - } + "version": "2.0.0", + "tasks": [ + { + "label": "Start Tridoc Backend", + "type": "shell", + "command": "deno", + "args": [ + "run", + "--watch", + "--no-prompt", + "--allow-net", + "--allow-read=blobs,rdf.ttl,/tmp", + "--allow-write=blobs,rdf.ttl,/tmp", + "--allow-run", + "--allow-env=TRIDOC_PWD,OCR_LANG,FUSEKI_PWD", + "src/main.ts" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "presentation": { + "echo": true, + "reveal": "always", + "focus": false, + "panel": "new" + }, + "problemMatcher": [], + "options": { + "env": { + "TRIDOC_PWD": "pw123", + "OCR_LANG": "deu" } - ] + } + }, + { + "label": "Cache Dependencies", + "type": "shell", + "command": "deno", + "args": ["cache", "src/deps.ts"], + "group": "build", + "presentation": { + "echo": true, + "reveal": "always", + "focus": false, + "panel": "shared" + } + }, + { + "label": "Format Code", + "type": "shell", + "command": "deno", + "args": ["fmt"], + "group": "build", + "presentation": { + "echo": true, + "reveal": "silent", + "focus": false, + "panel": "shared" + } + }, + { + "label": "Lint Code", + "type": "shell", + "command": "deno", + "args": ["lint"], + "group": "test", + "presentation": { + "echo": true, + "reveal": "always", + "focus": false, + "panel": "shared" + } + } + ] } diff --git a/DEV-README.md b/DEV-README.md index cc6a6a2..59a5642 100644 --- a/DEV-README.md +++ b/DEV-README.md @@ -4,10 +4,11 @@ Use the vscode-devcontainer: this will start tridoc and fuseki. -It will use TRIDOC_PWD = "pw123". -Access tridoc from http://localhost:8000 and fuseki from http://localhost:8001 +It will use TRIDOC_PWD = "pw123". Access tridoc from http://localhost:8000 and +fuseki from http://localhost:8001 -You might need to `chown deno:deno` blobs/ and fuseki-base (attach bash to docker as root from outside) +You might need to `chown deno:deno` blobs/ and fuseki-base (attach bash to +docker as root from outside) Watch the logs from outside of vscode with @@ -15,10 +16,10 @@ Watch the logs from outside of vscode with docker logs -f tridoc-backend_tridoc_1 ``` - ## Tips & Tricks - Upload Backups with + ```sh curl -D - -X PUT --data-binary @tridoc_backup_sumthing.zip -H "content-Type: application/zip" -u tridoc:pw123 http://localhost:8000/raw/zip ``` diff --git a/README.md b/README.md index feea891..dff9030 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,27 @@ # tridoc -Server-side infrastructure for tridoc: easy document management for individuals and small teams. +Server-side infrastructure for tridoc: easy document management for individuals +and small teams. ## Table Of Contents -* [Setup](#setup) -* [Blob Storage](#blob-storage) -* [Tag System](#tag-system) - * [Simple Tags](#simple-tags) - * [Parameterizable & Parameterized Tags](#parameterizable--parameterized-tags) -* [Comments](#comments) -* [API](#api) + +- [Setup](#setup) +- [Blob Storage](#blob-storage) +- [Tag System](#tag-system) + - [Simple Tags](#simple-tags) + - [Parameterizable & Parameterized Tags](#parameterizable--parameterized-tags) +- [Comments](#comments) +- [API](#api) ## Setup -This will setup tridoc on port 8000 and fuseki avaliable on port 8001. -Make sure you have `docker-compose` installed. +This will setup tridoc on port 8000 and fuseki avaliable on port 8001. Make sure +you have `docker-compose` installed. Replace `YOUR PASSWORD HERE` in the first command with your choice of password. Unix/Linux/wsl: + ```bash export TRIDOC_PWD="YOUR PASSWORD HERE" docker-compose build @@ -26,6 +29,7 @@ docker-compose up ``` On windows, relpace the first line with: + ```powershell $env:TRIDOC_PWD = "YOUR PASSWORD HERE" ``` @@ -34,62 +38,77 @@ _For more Setup options see the DEV-README.md_ ## Blob Storage -Tridoc uses hash-based blob storage for content deduplication and integrity verification. File content is hashed using IPFS-compatible SHA-256 multihash, and stored in a content-addressable file system. +Tridoc uses hash-based blob storage for content deduplication and integrity +verification. File content is hashed using IPFS-compatible SHA-256 multihash, +and stored in a content-addressable file system. -**Document vs Blob Separation**: Documents (logical entities with metadata like title, tags, comments) are separate from blobs (file content). Multiple documents can reference the same blob if they contain identical content. +**Document vs Blob Separation**: Documents (logical entities with metadata like +title, tags, comments) are separate from blobs (file content). Multiple +documents can reference the same blob if they contain identical content. -**Migration**: Use the `/migrate` endpoint to migrate existing installations from nanoid-based to hash-based storage. +**Migration**: Use the `/migrate` endpoint to migrate existing installations +from nanoid-based to hash-based storage. ## Tag System -There are two types of tags: simple tags and parameterizable tags. Parameterizable tags need a parameter to become a parameterized tag wich can be added to a document. +There are two types of tags: simple tags and parameterizable tags. +Parameterizable tags need a parameter to become a parameterized tag wich can be +added to a document. ### Simple Tags -Simple tags can be created by `POST` to `/tag`. You need to send an JSON object like this: +Simple tags can be created by `POST` to `/tag`. You need to send an JSON object +like this: ```json -{"label": "Inbox"} +{ "label": "Inbox" } ``` > Note: `label` must be unique. -> The label must not contain any of the following: whitespace, `/`, `\`, `#`, `"`, `'`, `,`, `;`, `:`, `?`;\ +> The label must not contain any of the following: whitespace, `/`, `\`, `#`, +> `"`, `'`, `,`, `;`, `:`, `?`;\ > The label must not equal `.` (single dot) or `..` (double dot). -Tags can be added to a document by `POST` to `/doc/{id}/tag`. You need to send an JSON object like the one above. +Tags can be added to a document by `POST` to `/doc/{id}/tag`. You need to send +an JSON object like the one above. > Tags must be created before adding them to a document. ### Parameterizable & Parameterized Tags -Parameterizable tags can be created by `POST` to `/tag` too. You need to send an JSON object like this: +Parameterizable tags can be created by `POST` to `/tag` too. You need to send an +JSON object like this: ```json { - "label": "Amount", - "parameter": { - "type":"http://www.w3.org/2001/XMLSchema#decimal" - } + "label": "Amount", + "parameter": { + "type": "http://www.w3.org/2001/XMLSchema#decimal" + } } -``` +``` -> Again, `label` must be unique. \ -> `parameter.type` can either be http://www.w3.org/2001/XMLSchema#decimal or http://www.w3.org/2001/XMLSchema#date . +> Again, `label` must be unique.\ +> `parameter.type` can either be http://www.w3.org/2001/XMLSchema#decimal or +> http://www.w3.org/2001/XMLSchema#date . -Parameterizable tags can only be added to a document with a value assigned. By `POST`ing a JSON object like the following to `/doc/{id}/tag`, a parameterized tag is created and added to the document. +Parameterizable tags can only be added to a document with a value assigned. By +`POST`ing a JSON object like the following to `/doc/{id}/tag`, a parameterized +tag is created and added to the document. ```json { - "label": "Amount", - "parameter": { - "type":"http://www.w3.org/2001/XMLSchema#decimal", - "value":"12.50" - } + "label": "Amount", + "parameter": { + "type": "http://www.w3.org/2001/XMLSchema#decimal", + "value": "12.50" + } } -``` +``` -> A parameterizable tag with this `label` and `parameter.type` has to be created before. +> A parameterizable tag with this `label` and `parameter.type` has to be created +> before. ## Comments @@ -99,67 +118,73 @@ You can either send an JSON document like the following ```json { - "text": "This is a comment" + "text": "This is a comment" } ``` -When getting a comment, a JSON array with objects of the following structure is provided: +When getting a comment, a JSON array with objects of the following structure is +provided: ```json { - "text": "This is a comment", - "created": "2020-03-12T10:07:20.493Z" + "text": "This is a comment", + "created": "2020-03-12T10:07:20.493Z" } ``` ## API -| Address | Method | Description | Request / Payload | Response | Implemented in Version | -| - | - | - | - | - | - | -| `/count` | GET | Count (matching) documents | [1](#f1) [3](#f3) | Number | 1.1.0 | -| `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | -| `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | -| `/doc/{id}` | GET | Get this document | - | PDF | 1.1.0 | -| `/doc/{id}` | DELETE | Deletes all metadata associated with the document. Document will not be deleted and is stays accessible over /doc/{id}. | - | - | 1.1.0 | -| `/doc/{id}/comment` | POST | Add comment to document | Comment object / See above | - | 1.2.0 | -| `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | -| `/doc/{id}/tag` | POST | Add a tag to document | Tag object / See above | - | 1.1.0 | -| `/doc/{id}/tag` | GET | Get tags of document | - | Array of tag objects | 1.1.0 | -| `/doc/{id}/tag/{tagLabel}` | DELETE | Remove tag from document | - | - | 1.1.0 | -| `/doc/{id}/thumb` | GET | Get document thumbnail | - | PNG (300px wide) | 1.5.0 | -| `/doc/{id}/title` | PUT | Set document title | `{"title": "the_Title"}` | - | 1.1.0 | -| `/doc/{id}/title` | GET | Get document title | - | `{"title": "the_Title"}` | 1.1.0 | -| `/doc/{id}/title` | DELETE | Reset document title | - | - | 1.1.0 | -| `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | -| `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | -| `/raw/rdf` | DELETE | Remove the temporary `rdf.ttl` file created during a backup upload (cancels a failed zip upload). Note: this does NOT delete stored metadata — `GET /raw/rdf` will continue to return the RDF data; use only if you are sure no upload is in progress. | - | 204 No Content | WIP | -| `/raw/rdf` | PUT | Replace the `http://3doc/meta` metadata graph in the backend with the provided RDF payload. | Any RDF serialization (Content-Type) | 204 No Content | WIP | -| `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | -| `/orphaned/tgz` | GET | Get a tar.gz archive of orphaned blob files (files in `blobs/` not referenced in the metadata graph) | - | TGZ containing orphaned blobs | 1.6.0 | -| `/orphaned/zip` | GET | Get a zip archive of orphaned blob files (files in `blobs/` not referenced in the metadata graph) | - | ZIP containing orphaned blobs | 1.6.0 | -| `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | -| `/tag` | POST | Create new tag | See above | - | 1.1.0 | -| `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | -| `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | -| `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | -| `/migrate` | POST | Migrate existing nanoid-based blob storage to hash-based storage. Separates documents from blobs in metadata. | - | Migration status JSON with counts and errors | 1.6.0 | -| `/version` | GET | Get tridoc version | - | semver version number | 1.1.0 | +| Address | Method | Description | Request / Payload | Response | Implemented in Version | +| -------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | -------------------------------------- | +| `/count` | GET | Count (matching) documents | [1](#f1) [3](#f3) | Number | 1.1.0 | +| `/doc` | POST | Add / Store Document | PDF[5](#f5) | - | 1.1.0 | +| `/doc` | GET | Get List of all (matching) documents | [1](#f1) [2](#f2) [3](#f3) | Array of objects with document identifiers and titles (where available) | 1.1.0 | +| `/doc/{id}` | GET | Get this document | - | PDF | 1.1.0 | +| `/doc/{id}` | DELETE | Deletes all metadata associated with the document. Document will not be deleted and is stays accessible over /doc/{id}. | - | - | 1.1.0 | +| `/doc/{id}/comment` | POST | Add comment to document | Comment object / See above | - | 1.2.0 | +| `/doc/{id}/comment` | GET | Get comments | - | Array of comment objects | 1.2.0 | +| `/doc/{id}/tag` | POST | Add a tag to document | Tag object / See above | - | 1.1.0 | +| `/doc/{id}/tag` | GET | Get tags of document | - | Array of tag objects | 1.1.0 | +| `/doc/{id}/tag/{tagLabel}` | DELETE | Remove tag from document | - | - | 1.1.0 | +| `/doc/{id}/thumb` | GET | Get document thumbnail | - | PNG (300px wide) | 1.5.0 | +| `/doc/{id}/title` | PUT | Set document title | `{"title": "the_Title"}` | - | 1.1.0 | +| `/doc/{id}/title` | GET | Get document title | - | `{"title": "the_Title"}` | 1.1.0 | +| `/doc/{id}/title` | DELETE | Reset document title | - | - | 1.1.0 | +| `/doc/{id}/meta` | GET | Get various metadata | - | `{"title": "the_Title", "tags":[...], "comments": [...] ... }` | 1.1.0 \| .comments & .created in 1.2.1 | +| `/raw/rdf` | GET | Get all metadata as RDF. Useful for Backups | [4](#f4) | RDF, Content-Type defined over request Headers or ?accept. Fallback to text/turtle. | 1.1.0 | +| `/raw/rdf` | DELETE | Remove the temporary `rdf.ttl` file created during a backup upload (cancels a failed zip upload). Note: this does NOT delete stored metadata — `GET /raw/rdf` will continue to return the RDF data; use only if you are sure no upload is in progress. | - | 204 No Content | WIP | +| `/raw/rdf` | PUT | Replace the `http://3doc/meta` metadata graph in the backend with the provided RDF payload. | Any RDF serialization (Content-Type) | 204 No Content | WIP | +| `/raw/zip` or `/raw/tgz` | GET | Get all data. Useful for backups | - | ZIP / TGZ containing blobs/ directory with all pdfs as stored within tridoc and a rdf.ttl file with all metadata. | 1.3.0 | +| `/orphaned/tgz` | GET | Get a tar.gz archive of orphaned blob files (files in `blobs/` not referenced in the metadata graph) | - | TGZ containing orphaned blobs | 1.6.0 | +| `/orphaned/zip` | GET | Get a zip archive of orphaned blob files (files in `blobs/` not referenced in the metadata graph) | - | ZIP containing orphaned blobs | 1.6.0 | +| `/raw/zip` | PUT | Replace all data with backup zip | ZIP | Replaces the metadata and adds the blobs from the zip | 1.3.0 | +| `/tag` | POST | Create new tag | See above | - | 1.1.0 | +| `/tag` | GET | Get (list of) all tags | - | - | 1.1.0 | +| `/tag/{tagLabel}` | GET | Get Documents with this tag. Same as `/doc?tag={tagLabel}` | [1](#f1) [2](#f2) | Array of objects with document identifiers and titles (where available) | 1.1.0 | +| `/tag/{tagLabel}` | DELETE | Delete this tag | - | - | 1.1.0 | +| `/migrate` | POST | Migrate existing nanoid-based blob storage to hash-based storage. Separates documents from blobs in metadata. | - | Migration status JSON with counts and errors | 1.6.0 | +| `/version` | GET | Get tridoc version | - | semver version number | 1.1.0 | #### URL-Parameters supported: -[1](#f1) : ?text \ +[1](#f1) : ?text\ [2](#f2) : ?limit and ?offset -[3](#f3) : ?tag and ?nottag \ -Since 1.4.4, filtering for Tag Ranges is possible with the following syntax: `…={label};{min};{max}`. `min` or `max` may be ommitted for unbounded search. Trailing semocolons may be omitted. -Example: +[3](#f3) : ?tag and ?nottag\ +Since 1.4.4, filtering for Tag Ranges is possible with the following syntax: +`…={label};{min};{max}`. `min` or `max` may be ommitted for unbounded search. +Trailing semocolons may be omitted. Example: + ``` …?tag=foo;;30&tag=bar;2020-01-01;2020-12-31 ``` + gives all that have tag foo with a value <= 30, and bar values within 2020. + > Be aware that this may need replacing of the caracter `;` by `%3B`. -[4](#f4) : ?accept \ -[5](#f5) : ?date followed by an ISO 8601 date string including time and timezone, seconds optional, sets creation date +[4](#f4) : ?accept\ +[5](#f5) : ?date followed by an ISO 8601 date string +including time and timezone, seconds optional, sets creation date > Deleting / editing comments might be supported in the future diff --git a/deno.jsonc b/deno.jsonc index 67645b3..d7dfd2f 100644 --- a/deno.jsonc +++ b/deno.jsonc @@ -6,7 +6,7 @@ }, "tasks": { // --allow-run=convert,pdfsandwich,pdftotext,tar,zip,unzip,bash - "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,FUSEKI_PWD,OCR_LANG src/main.ts", - "run-watch": "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,FUSEKI_PWD,OCR_LANG src/main.ts" + "run": "deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,FUSEKI_PWD,OCR_LANG src/main.ts", + "run-watch": "deno run --watch --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl --allow-run --allow-env=TRIDOC_PWD,FUSEKI_PWD,OCR_LANG src/main.ts" } } diff --git a/docker-compose.yml b/docker-compose.yml index 2850b27..cac7a60 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,4 +25,4 @@ services: interval: 5s timeout: 3s retries: 30 - start_period: 10s \ No newline at end of file + start_period: 10s diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index c730609..130f846 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -2,7 +2,11 @@ import { nanoid } from "../deps.ts"; import { respond } from "../helpers/cors.ts"; import { getText } from "../helpers/pdfprocessor.ts"; import { processParams } from "../helpers/processParams.ts"; -import { storeBlob, getBlobPath, getThumbnailPath } from "../helpers/blobStore.ts"; +import { + getBlobPath, + getThumbnailPath, + storeBlob, +} from "../helpers/blobStore.ts"; import * as metadelete from "../meta/delete.ts"; import * as metafinder from "../meta/finder.ts"; import * as metastore from "../meta/store.ts"; @@ -29,7 +33,6 @@ function getPath(id: string) { id.slice(6, 14) + "/" + id; } - export async function deleteDoc( _request: Request, match: URLPatternResult, @@ -77,7 +80,7 @@ export async function getPDF( ): Promise { const id = match.pathname.groups.id!; const meta = await metafinder.getBasicMeta(id); - + // Determine the file path based on whether we have a blob hash or legacy ID let path: string; if (meta.blob) { @@ -87,7 +90,7 @@ export async function getPDF( // Legacy nanoid-based storage path = getPath(id); } - + try { const fileName = meta.title || meta.created || "document"; const file = await Deno.open(path, { read: true }); @@ -144,7 +147,7 @@ export async function getThumb( ): Promise { const id = match.pathname.groups.id!; const meta = await metafinder.getBasicMeta(id); - + // Determine the file path based on whether we have a blob hash or legacy ID let thumbPath: string; if (meta.blob) { @@ -154,47 +157,58 @@ export async function getThumb( // Legacy nanoid-based storage thumbPath = getPath(id) + ".png"; } - + const fileName = meta.title || meta.created || "thumbnail"; let thumb: Deno.FsFile; try { thumb = await Deno.open(thumbPath, { read: true }); } catch (error) { if (error instanceof Deno.errors.NotFound) { - try { - // Get the blob path for thumbnail generation - let blobPath: string; - if (meta.blob) { - blobPath = getBlobPath(meta.blob); - // Ensure the thumbnail directory exists for hash-based storage - const { dir: thumbDir } = hashToThumbnailPath(meta.blob); - await ensureDir(thumbDir); - } else { - blobPath = getPath(id); - // For legacy storage the directory should already exist with the PDF - } - - await Deno.stat(blobPath); // Check if PDF exists → 404 otherwise - const cmd = new Deno.Command("convert", { - args: ["-thumbnail", "300x", "-alpha", "remove", `${blobPath}[0]`, thumbPath], - stdout: "piped", - stderr: "piped", - }); - const { success, code, stdout, stderr } = await cmd.output(); - if (!success) { - const td = new TextDecoder(); - const err = td.decode(stderr) || td.decode(stdout); - console.error("ImageMagick convert error (on-demand):", err.trim()); - throw new Error("convert failed with code " + code + (err ? ": " + err : "")); - } - thumb = await Deno.open(thumbPath, { read: true }); - } catch (error) { + try { + // Get the blob path for thumbnail generation + let blobPath: string; + if (meta.blob) { + blobPath = getBlobPath(meta.blob); + // Ensure the thumbnail directory exists for hash-based storage + const { dir: thumbDir } = hashToThumbnailPath(meta.blob); + await ensureDir(thumbDir); + } else { + blobPath = getPath(id); + // For legacy storage the directory should already exist with the PDF + } + + await Deno.stat(blobPath); // Check if PDF exists → 404 otherwise + const cmd = new Deno.Command("convert", { + args: [ + "-thumbnail", + "300x", + "-alpha", + "remove", + `${blobPath}[0]`, + thumbPath, + ], + stdout: "piped", + stderr: "piped", + }); + const { success, code, stdout, stderr } = await cmd.output(); + if (!success) { + const td = new TextDecoder(); + const err = td.decode(stderr) || td.decode(stdout); + console.error("ImageMagick convert error (on-demand):", err.trim()); + throw new Error( + "convert failed with code " + code + (err ? ": " + err : ""), + ); + } + thumb = await Deno.open(thumbPath, { read: true }); + } catch (error) { if (error instanceof Deno.errors.NotFound) { return respond("404 Not Found", { status: 404 }); } // Surface ImageMagick error to client for easier debugging if (error instanceof Error) { - return respond("Thumbnail generation failed: " + error.message, { status: 500 }); + return respond("Thumbnail generation failed: " + error.message, { + status: 500, + }); } return respond("Thumbnail generation failed", { status: 500 }); } @@ -246,7 +260,9 @@ export async function postComment( if (!id) return respond("Missing document id in path", { status: 400 }); const body = await request.json(); if (!body || typeof body.text !== "string" || body.text.trim() === "") { - return respond("Missing or invalid 'text' in request body", { status: 400 }); + return respond("Missing or invalid 'text' in request body", { + status: 400, + }); } const text: string = body.text; const created = await metastore.addComment(id, text); @@ -293,12 +309,15 @@ export async function postPDF( const { id, ocrMissing } = await processPDF(tmpUploadPath); if (ocrMissing) { - return respond("OCR not produced; stored original PDF without embedded text", { - headers: { - "Location": "/doc/" + id, - "Access-Control-Expose-Headers": "Location", + return respond( + "OCR not produced; stored original PDF without embedded text", + { + headers: { + "Location": "/doc/" + id, + "Access-Control-Expose-Headers": "Location", + }, }, - }); + ); } return respond(undefined, { headers: { @@ -307,13 +326,17 @@ export async function postPDF( }, }); } finally { - try { await Deno.remove(tmpDir, { recursive: true }); } catch (_) { /* ignore cleanup errors */ } + try { + await Deno.remove(tmpDir, { recursive: true }); + } catch (_) { /* ignore cleanup errors */ } } } // Process a PDF file path: if it already contains text => storePDF; otherwise run pdfsandwich // and store OCR output if present. Returns the generated id and whether OCR output was missing. -async function processPDF(pdfPath: string): Promise<{ id: string; ocrMissing: boolean }> { +async function processPDF( + pdfPath: string, +): Promise<{ id: string; ocrMissing: boolean }> { let text = ""; try { text = await getText(pdfPath); @@ -327,8 +350,12 @@ async function processPDF(pdfPath: string): Promise<{ id: string; ocrMissing: bo } // run pdfsandwich in same directory as pdfPath so output lands predictably - const dir = pdfPath.substring(0, Math.max(0, pdfPath.lastIndexOf("/"))) || "."; - const base = pdfPath.substring(pdfPath.lastIndexOf("/") + 1).replace(/\.pdf$/i, ""); + const dir = pdfPath.substring(0, Math.max(0, pdfPath.lastIndexOf("/"))) || + "."; + const base = pdfPath.substring(pdfPath.lastIndexOf("/") + 1).replace( + /\.pdf$/i, + "", + ); const lang = Deno.env.get("OCR_LANG") || "fra+deu+eng"; try { const cmd = new Deno.Command("pdfsandwich", { @@ -353,7 +380,10 @@ async function processPDF(pdfPath: string): Promise<{ id: string; ocrMissing: bo return { id, ocrMissing: false }; } catch (err) { if (err instanceof Deno.errors.NotFound) { - console.error("OCR output not found at expected location:", ocrCandidate); + console.error( + "OCR output not found at expected location:", + ocrCandidate, + ); const id = await storePDF(pdfPath); return { id, ocrMissing: true }; } @@ -384,7 +414,9 @@ async function storePDF(pdfPath: string): Promise { // Ensure thumbnail directory and generate thumbnail only if missing try { - const { dir: thumbDir, fullPath: thumbPath } = hashToThumbnailPath(blobHash); + const { dir: thumbDir, fullPath: thumbPath } = hashToThumbnailPath( + blobHash, + ); await ensureDir(thumbDir); let thumbExists = false; try { @@ -395,7 +427,14 @@ async function storePDF(pdfPath: string): Promise { } if (!thumbExists) { const cmd = new Deno.Command("convert", { - args: ["-thumbnail", "300x", "-alpha", "remove", `${getBlobPath(blobHash)}[0]`, thumbPath], + args: [ + "-thumbnail", + "300x", + "-alpha", + "remove", + `${getBlobPath(blobHash)}[0]`, + thumbPath, + ], stdout: "inherit", stderr: "inherit", }); @@ -452,7 +491,9 @@ export async function putTitle( if (!id) return respond("Missing document id in path", { status: 400 }); const body = await request.json(); if (!body || typeof body.title !== "string" || body.title.trim() === "") { - return respond("Missing or invalid 'title' in request body", { status: 400 }); + return respond("Missing or invalid 'title' in request body", { + status: 400, + }); } const title: string = body.title; await metastore.addTitle(id, title); diff --git a/src/handlers/orphaned.ts b/src/handlers/orphaned.ts index 77ba496..7e813d5 100644 --- a/src/handlers/orphaned.ts +++ b/src/handlers/orphaned.ts @@ -21,7 +21,7 @@ async function listAllBlobFiles(): Promise { // skip the rdf metadata folder if (p.endsWith("/rdf")) continue; await walk(p); - } else if (entry.isFile && !entry.name.endsWith('.png')) { + } else if (entry.isFile && !entry.name.endsWith(".png")) { // Only include non-thumbnail files result.push(p); } @@ -56,21 +56,23 @@ async function getOrphanedFiles(): Promise { const nameNoExt = stripExtension(basename(p)); return !referenced.has(nameNoExt); }); - + return orphaned; } async function createArchive( orphaned: string[], - format: "zip" | "tgz" + format: "zip" | "tgz", ): Promise<{ path: string; tmpDir: string; fileList: string }> { const ts = Date.now(); const fileList = await writeFileList(orphaned); const tmpDir = await Deno.makeTempDir({ prefix: "orphaned-" }); - const archivePath = `${tmpDir}/orphaned-${format}-${ts}.${format === "zip" ? "zip" : "tar.gz"}`; - + const archivePath = `${tmpDir}/orphaned-${format}-${ts}.${ + format === "zip" ? "zip" : "tar.gz" + }`; + let cmd: Deno.Command; - + if (format === "zip") { // Create flat zip - use -j flag to junk (ignore) paths, storing files flat cmd = new Deno.Command("bash", { @@ -79,13 +81,16 @@ async function createArchive( } else { // Create flat tar - use --transform to strip directory paths cmd = new Deno.Command("bash", { - args: ["-c", `tar -C blobs -czf ${archivePath} --transform 's|.*/||' -T ${fileList}`], + args: [ + "-c", + `tar -C blobs -czf ${archivePath} --transform 's|.*/||' -T ${fileList}`, + ], }); } - + const p = cmd.spawn(); const status = await p.status; - + if (!status.success) { // Clean up on failure try { @@ -96,23 +101,26 @@ async function createArchive( } throw new Error(`${format} creation failed with code ${status.code}`); } - + return { path: archivePath, tmpDir, fileList }; } async function createArchiveResponse( - format: "zip" | "tgz" + format: "zip" | "tgz", ): Promise { const orphaned = await getOrphanedFiles(); if (orphaned.length === 0) return respond(undefined, { status: 204 }); - const { path: archivePath, tmpDir, fileList } = await createArchive(orphaned, format); - + const { path: archivePath, tmpDir, fileList } = await createArchive( + orphaned, + format, + ); + // Remove the temporary file list await Deno.remove(fileList); - + const f = await Deno.open(archivePath, { read: true }); - + // unlink the archive so it doesn't linger on disk; fd remains readable on POSIX systems try { await Deno.remove(archivePath); @@ -121,15 +129,16 @@ async function createArchiveResponse( } catch (_e) { // ignore cleanup errors } - + const readableStream = f.readable; const ts = Date.now(); const extension = format === "zip" ? "zip" : "tar.gz"; const contentType = format === "zip" ? "application/zip" : "application/gzip"; - + return respond(readableStream, { headers: { - "content-disposition": `inline; filename="tridoc_orphaned_${ts}.${extension}"`, + "content-disposition": + `inline; filename="tridoc_orphaned_${ts}.${extension}"`, "content-type": contentType, }, }); diff --git a/src/handlers/raw.ts b/src/handlers/raw.ts index 32bcca3..7ac4b7d 100644 --- a/src/handlers/raw.ts +++ b/src/handlers/raw.ts @@ -42,11 +42,16 @@ export async function getTGZ( const writableStream = writableStreamFromWriter(rdf); await (await dump()).body?.pipeTo(writableStream); const cmd = new Deno.Command("bash", { - args: ["-c", `tar --transform="s|${rdfPath}|rdf.ttl|" --exclude-tag="${rdfName}" -czvf ${tarPath} blobs/*/`], + args: [ + "-c", + `tar --transform="s|${rdfPath}|rdf.ttl|" --exclude-tag="${rdfName}" -czvf ${tarPath} blobs/*/`, + ], }); const p = cmd.spawn(); const status = await p.status; - if (!status.success) throw new Error("tar -czf failed with code " + status.code); + if (!status.success) { + throw new Error("tar -czf failed with code " + status.code); + } await Deno.remove(rdfPath); const tar = await Deno.open(tarPath); // Build a readable stream so the file doesn't have to be fully loaded into memory while we send it @@ -76,12 +81,16 @@ export async function getZIP( const writableStream = writableStreamFromWriter(rdf); await (await dump()).body?.pipeTo(writableStream); // Create zip - const cmd1 = new Deno.Command("bash", { args: ["-c", `zip -r ${zipPath} blobs/*/ ${rdfPath} -x "blobs/rdf/*"`] }); + const cmd1 = new Deno.Command("bash", { + args: ["-c", `zip -r ${zipPath} blobs/*/ ${rdfPath} -x "blobs/rdf/*"`], + }); const p_1 = cmd1.spawn(); const r_1 = await p_1.status; if (!r_1.success) throw new Error("zip failed with code " + r_1.code); // move rdf-??? to rdf.zip - const cmd2 = new Deno.Command("bash", { args: ["-c", `printf "@ ${rdfPath}\n@=rdf.ttl\n" | zipnote -w ${zipPath}`] }); + const cmd2 = new Deno.Command("bash", { + args: ["-c", `printf "@ ${rdfPath}\n@=rdf.ttl\n" | zipnote -w ${zipPath}`], + }); const p_2 = cmd2.spawn(); const r_2 = await p_2.status; if (!r_2.success) throw new Error("zipnote failed with code " + r_2.code); diff --git a/src/helpers/blobStore.ts b/src/helpers/blobStore.ts index f83dc17..0a9f053 100644 --- a/src/helpers/blobStore.ts +++ b/src/helpers/blobStore.ts @@ -1,5 +1,9 @@ import { ensureDir } from "../deps.ts"; -import { computeIPFSHash, hashToPath, hashToThumbnailPath } from "./ipfsHash.ts"; +import { + computeIPFSHash, + hashToPath, + hashToThumbnailPath, +} from "./ipfsHash.ts"; /** * Store a blob using content-based IPFS hash as identifier. @@ -8,10 +12,10 @@ import { computeIPFSHash, hashToPath, hashToThumbnailPath } from "./ipfsHash.ts" export async function storeBlob(content: Uint8Array): Promise { // Compute content hash const hash = await computeIPFSHash(content); - + // Get storage path in ipfs subdirectory const { dir, fullPath } = hashToPath(hash); - + // Check if blob already exists (deduplication) try { await Deno.stat(fullPath); @@ -22,11 +26,11 @@ export async function storeBlob(content: Uint8Array): Promise { throw error; } } - + // Create directory and store blob await ensureDir(dir); await Deno.writeFile(fullPath, content); - + console.log(`Stored new blob: ${hash}`); return hash; } @@ -64,12 +68,15 @@ export function getBlobDir(hash: string): string { /** * Store thumbnail for a blob */ -export async function storeThumbnail(hash: string, thumbnailContent: Uint8Array): Promise { +export async function storeThumbnail( + hash: string, + thumbnailContent: Uint8Array, +): Promise { const { dir, fullPath } = hashToThumbnailPath(hash); - + await ensureDir(dir); await Deno.writeFile(fullPath, thumbnailContent); - + console.log(`Stored thumbnail for blob: ${hash}`); } diff --git a/src/helpers/ipfsHash.ts b/src/helpers/ipfsHash.ts index 2fb4566..37515ee 100644 --- a/src/helpers/ipfsHash.ts +++ b/src/helpers/ipfsHash.ts @@ -2,28 +2,28 @@ import { crypto, encodeBase58 } from "../deps.ts"; /** * Compute IPFS-compatible hash for content using SHA-256. - * + * * IPFS uses multihash format: * - 1 byte: hash function code (0x12 for SHA-256) * - 1 byte: digest length (0x20 for 32 bytes) * - N bytes: actual hash digest - * + * * Then encoded with base58btc for content addressing. */ export async function computeIPFSHash(content: Uint8Array): Promise { // Compute SHA-256 hash const hashBuffer = await crypto.subtle.digest("SHA-256", content); const hashBytes = new Uint8Array(hashBuffer); - + // Create multihash: [fn_code, digest_size, ...digest] const multihash = new Uint8Array(34); // 1 + 1 + 32 bytes multihash[0] = 0x12; // SHA-256 function code multihash[1] = 0x20; // 32 bytes digest length multihash.set(hashBytes, 2); - + // Encode with base58btc (CIDv0 format already includes the "Qm" prefix) const base58Hash = encodeBase58(multihash.buffer); - + return base58Hash; } @@ -41,19 +41,25 @@ export async function computeFileIPFSHash(filePath: string): Promise { */ export function hashToPath(hash: string): { dir: string; fullPath: string } { // Store in blobs/ipfs/ subdirectory using first 4 chars as top level - const dir = `./blobs/ipfs/${hash.slice(0, 4)}/${hash.slice(4, 8)}/${hash.slice(8, 16)}`; + const dir = `./blobs/ipfs/${hash.slice(0, 4)}/${hash.slice(4, 8)}/${ + hash.slice(8, 16) + }`; const fullPath = `${dir}/${hash}.pdf`; - + return { dir, fullPath }; } /** * Convert hash to thumbnail path */ -export function hashToThumbnailPath(hash: string): { dir: string; fullPath: string } { +export function hashToThumbnailPath( + hash: string, +): { dir: string; fullPath: string } { // Store in blobs/thumbs/ subdirectory using same structure as blobs - const dir = `./blobs/thumbs/${hash.slice(0, 4)}/${hash.slice(4, 8)}/${hash.slice(8, 16)}`; + const dir = `./blobs/thumbs/${hash.slice(0, 4)}/${hash.slice(4, 8)}/${ + hash.slice(8, 16) + }`; const fullPath = `${dir}/${hash}.png`; - + return { dir, fullPath }; } diff --git a/src/helpers/pdfprocessor.ts b/src/helpers/pdfprocessor.ts index d5d04ac..b00f584 100644 --- a/src/helpers/pdfprocessor.ts +++ b/src/helpers/pdfprocessor.ts @@ -1,11 +1,16 @@ const decoder = new TextDecoder("utf-8"); export async function getText(path: string) { - const cmd = new Deno.Command("pdftotext", { args: [path, "-"], stdout: "piped" as const }); + const cmd = new Deno.Command("pdftotext", { + args: [path, "-"], + stdout: "piped" as const, + }); const p = cmd.spawn(); const result = await p.output(); const output = decoder.decode(result.stdout); const status = await p.status; - if (!status.success) throw new Error("pdftotext failed with code " + status.code); + if (!status.success) { + throw new Error("pdftotext failed with code " + status.code); + } return output; } diff --git a/src/meta/finder.ts b/src/meta/finder.ts index a8a4967..a8d9942 100644 --- a/src/meta/finder.ts +++ b/src/meta/finder.ts @@ -84,9 +84,11 @@ export async function getDocumentList( tagQuery + " OPTIONAL { ?s s:name ?title . }\n" + (text - ? ' OPTIONAL { ?s s:text ?fulltext . }\n' + - ' FILTER (CONTAINS(LCASE(COALESCE(?title, "")), LCASE("' + text + '")) || ' + - ' CONTAINS(LCASE(COALESCE(?fulltext, "")), LCASE("' + text + '")))\n' + ? " OPTIONAL { ?s s:text ?fulltext . }\n" + + ' FILTER (CONTAINS(LCASE(COALESCE(?title, "")), LCASE("' + text + + '")) || ' + + ' CONTAINS(LCASE(COALESCE(?fulltext, "")), LCASE("' + text + + '")))\n' : "") + " }\n" + "}\n" + @@ -169,13 +171,13 @@ WHERE { ?s s:identifier ?identifier . ${tagQuery} ${ - text - ? `OPTIONAL { ?s s:name ?title . } + text + ? `OPTIONAL { ?s s:name ?title . } OPTIONAL { ?s s:text ?fulltext . } FILTER (CONTAINS(LCASE(COALESCE(?title, "")), LCASE("${text}")) || CONTAINS(LCASE(COALESCE(?fulltext, "")), LCASE("${text}")))\n` - : "" - } + : "" + } } }`).then((json) => parseInt(json.results.bindings[0].count.value, 10)); } diff --git a/src/meta/fusekiFetch.ts b/src/meta/fusekiFetch.ts index 8b71765..208643d 100644 --- a/src/meta/fusekiFetch.ts +++ b/src/meta/fusekiFetch.ts @@ -10,12 +10,13 @@ type SparqlJson = { import { DEFAULT_FUSEKI_PWD } from "../config.ts"; export function dump(accept = "text/turtle") { - const query = "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH { ?s ?p ?o } }"; + const query = + "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH { ?s ?p ?o } }"; console.log((new Date()).toISOString(), "→ FUSEKI QUERY", query, "\n"); return fetch("http://fuseki:3030/3DOC/query", { method: "POST", headers: { - "Authorization": getAuthHeader(), + "Authorization": getAuthHeader(), "Content-Type": "application/sparql-query", "Accept": accept, }, @@ -28,7 +29,7 @@ export async function fusekiFetch(query: string): Promise { return await fetch("http://fuseki:3030/3DOC/query", { method: "POST", headers: { - "Authorization": getAuthHeader(), + "Authorization": getAuthHeader(), "Content-Type": "application/sparql-query", }, body: query, @@ -61,4 +62,3 @@ export function getAuthHeader() { const pwd = Deno.env.get("FUSEKI_PWD") || DEFAULT_FUSEKI_PWD; return "Basic " + btoa("admin:" + pwd); } - diff --git a/src/meta/store.ts b/src/meta/store.ts index c3c27d3..9b28f3f 100644 --- a/src/meta/store.ts +++ b/src/meta/store.ts @@ -70,7 +70,9 @@ DELETE { GRAPH { s:name ?o } } INSERT { - GRAPH { s:name "${escapeLiteral(title)}" } + GRAPH { s:name "${ + escapeLiteral(title) + }" } } WHERE { GRAPH { OPTIONAL { s:name ?o } } @@ -104,7 +106,9 @@ export function setGraph(data: string, contentType = "text/turtle") { // Forward all payloads to Fuseki's data endpoint and let Fuseki parse the provided // serialization according to the Content-Type. This keeps a single code path // and supports every Fuseki-supported RDF serialization uniformly. - const url = `http://fuseki:3030/3DOC/data?graph=${encodeURIComponent("http://3doc/meta")}`; + const url = `http://fuseki:3030/3DOC/data?graph=${ + encodeURIComponent("http://3doc/meta") + }`; return fetch(url, { method: "PUT", headers: { @@ -115,7 +119,9 @@ export function setGraph(data: string, contentType = "text/turtle") { }).then(async (res) => { if (!res.ok) { const text = await res.text().catch(() => "(no response body)"); - throw new Error(`Fuseki Error replacing ${contentType} graph: ${res.status} ${text}`); + throw new Error( + `Fuseki Error replacing ${contentType} graph: ${res.status} ${text}`, + ); } }); } @@ -140,7 +146,12 @@ INSERT DATA { } export async function storeDocumentWithBlob( - { id, text, date, blobHash }: { id: string; text: string; date?: string; blobHash: string }, + { id, text, date, blobHash }: { + id: string; + text: string; + date?: string; + blobHash: string; + }, ) { const created = (date ? new Date(date) : new Date()).toISOString(); const query = ` diff --git a/src/server/server.ts b/src/server/server.ts index f810f0a..c186db0 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -42,7 +42,8 @@ const handler = async (request: Request): Promise => { } catch (error) { let message; if (error instanceof Deno.errors.PermissionDenied) { - message = "Got “Permission Denied” trying to access the file on disk.\n\n Please run ```docker exec -u 0 [name of backend-container] chmod -R a+r ./blobs/ rdf.ttl``` on the host server to fix this and similar issues for the future." + message = + "Got “Permission Denied” trying to access the file on disk.\n\n Please run ```docker exec -u 0 [name of backend-container] chmod -R a+r ./blobs/ rdf.ttl``` on the host server to fix this and similar issues for the future."; } console.log( (new Date()).toISOString(), From cf814e2f0830bca47b73803c6997bfe1685d1720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 8 Nov 2025 09:28:59 +0000 Subject: [PATCH 85/90] validate tag label presence in deleteTag and getDocs functions --- src/handlers/tag.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/handlers/tag.ts b/src/handlers/tag.ts index e572b25..80b21b7 100644 --- a/src/handlers/tag.ts +++ b/src/handlers/tag.ts @@ -51,8 +51,10 @@ export async function deleteTag( _request: Request, match: URLPatternResult, ) { + const tagLabel = match.pathname.groups.tagLabel; + if (!tagLabel) return respond("Tag label missing", { status: 400 }); await metadelete.deleteTag( - decodeURIComponent(match.pathname.groups.tagLabel), + decodeURIComponent(tagLabel), ); return respond(undefined, { status: 204 }); } @@ -61,8 +63,10 @@ export async function getDocs( request: Request, match: URLPatternResult, ): Promise { + const tagLabel = match.pathname.groups.tagLabel; + if (!tagLabel) return respond("Tag label missing", { status: 400 }); const params = await processParams(request, { - tags: [[match.pathname.groups.tagLabel]], + tags: [[tagLabel]], }); const response = await metafinder.getDocumentList(params); return respond(JSON.stringify(response), { From fd59d51119dc24074f11513ca204fc36620061d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 8 Nov 2025 09:39:03 +0000 Subject: [PATCH 86/90] Refactor: extract pdfsandwich OCR to helper runPdfsandwich --- src/handlers/doc.ts | 50 +++++++-------------------------------------- src/helpers/ocr.ts | 49 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 43 deletions(-) create mode 100644 src/helpers/ocr.ts diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 130f846..9ce9b92 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -1,6 +1,7 @@ import { nanoid } from "../deps.ts"; import { respond } from "../helpers/cors.ts"; import { getText } from "../helpers/pdfprocessor.ts"; +import { runPdfsandwich } from "../helpers/ocr.ts"; import { processParams } from "../helpers/processParams.ts"; import { getBlobPath, @@ -349,51 +350,14 @@ async function processPDF( return { id, ocrMissing: false }; } - // run pdfsandwich in same directory as pdfPath so output lands predictably - const dir = pdfPath.substring(0, Math.max(0, pdfPath.lastIndexOf("/"))) || - "."; - const base = pdfPath.substring(pdfPath.lastIndexOf("/") + 1).replace( - /\.pdf$/i, - "", - ); const lang = Deno.env.get("OCR_LANG") || "fra+deu+eng"; - try { - const cmd = new Deno.Command("pdfsandwich", { - args: ["-rgb", "-lang", lang, pdfPath], - cwd: dir, - stdout: "inherit", - stderr: "inherit", - }); - const child = cmd.spawn(); - const status = await child.status; - if (!status.success) { - console.error("pdfsandwich failed with code", status.code); - const id = await storePDF(pdfPath); - return { id, ocrMissing: true }; - } - - // Expect pdfsandwich to write _ocr.pdf next to the input file - const ocrCandidate = `${dir}/${base}_ocr.pdf`; - try { - await Deno.stat(ocrCandidate); - const id = await storePDF(ocrCandidate); - return { id, ocrMissing: false }; - } catch (err) { - if (err instanceof Deno.errors.NotFound) { - console.error( - "OCR output not found at expected location:", - ocrCandidate, - ); - const id = await storePDF(pdfPath); - return { id, ocrMissing: true }; - } - throw err; - } - } catch (err) { - console.error("pdfsandwich execution failed:", String(err)); - const id = await storePDF(pdfPath); - return { id, ocrMissing: true }; + const ocrPath = await runPdfsandwich(pdfPath, lang); + if (ocrPath) { + const id = await storePDF(ocrPath); + return { id, ocrMissing: false }; } + const id = await storePDF(pdfPath); + return { id, ocrMissing: true }; } // storePDF: read pdfPath bytes, extract text (if any), store blob, ensure thumbnail (only if missing), diff --git a/src/helpers/ocr.ts b/src/helpers/ocr.ts new file mode 100644 index 0000000..c27c265 --- /dev/null +++ b/src/helpers/ocr.ts @@ -0,0 +1,49 @@ +// Helper for running pdfsandwich OCR on a PDF lacking embedded text. +// Returns the path to the generated OCR PDF ("_ocr.pdf") if successful, otherwise null. +// Keeps implementation minimal so handlers own flow decisions. + +export async function runPdfsandwich( + pdfPath: string, + lang: string, +): Promise { + // Determine working directory and expected output file name + const dir = pdfPath.substring(0, Math.max(0, pdfPath.lastIndexOf("/"))) || + "."; + const base = pdfPath.substring(pdfPath.lastIndexOf("/") + 1).replace( + /\.pdf$/i, + "", + ); + const ocrCandidate = `${dir}/${base}_ocr.pdf`; + + try { + const cmd = new Deno.Command("pdfsandwich", { + args: ["-rgb", "-lang", lang, pdfPath], + cwd: dir, + stdout: "inherit", + stderr: "inherit", + }); + const child = cmd.spawn(); + const status = await child.status; + if (!status.success) { + console.error("pdfsandwich failed with code", status.code); + return null; + } + // Expect pdfsandwich to write _ocr.pdf next to input + try { + await Deno.stat(ocrCandidate); + return ocrCandidate; + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + console.error( + "OCR output not found at expected location:", + ocrCandidate, + ); + return null; + } + throw err; + } + } catch (err) { + console.error("pdfsandwich execution failed:", String(err)); + return null; + } +} From a53fca44d582fdf7f1b1970ab0741ef2233abcf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 8 Nov 2025 09:51:29 +0000 Subject: [PATCH 87/90] Remove unstable flag from Deno settings in VSCode configuration --- .vscode/settings.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index e40716f..e1533c2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,4 @@ { "deno.enable": true, - "deno.lint": true, - "deno.unstable": true + "deno.lint": true } From 356a082ca3a610e8ca8a71535fe18ba5005fb621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 8 Nov 2025 10:04:33 +0000 Subject: [PATCH 88/90] removed variable as this wronly suggests configurability while in fact the fallback is hardcoded in shell-script --- src/config.ts | 1 - src/meta/fusekiFetch.ts | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 src/config.ts diff --git a/src/config.ts b/src/config.ts deleted file mode 100644 index 8bc3048..0000000 --- a/src/config.ts +++ /dev/null @@ -1 +0,0 @@ -export const DEFAULT_FUSEKI_PWD = "pw123"; diff --git a/src/meta/fusekiFetch.ts b/src/meta/fusekiFetch.ts index 208643d..63fb9d5 100644 --- a/src/meta/fusekiFetch.ts +++ b/src/meta/fusekiFetch.ts @@ -7,8 +7,6 @@ type SparqlJson = { }; }; -import { DEFAULT_FUSEKI_PWD } from "../config.ts"; - export function dump(accept = "text/turtle") { const query = "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH { ?s ?p ?o } }"; @@ -59,6 +57,6 @@ export async function fusekiUpdate(query: string): Promise { } export function getAuthHeader() { - const pwd = Deno.env.get("FUSEKI_PWD") || DEFAULT_FUSEKI_PWD; + const pwd = Deno.env.get("FUSEKI_PWD") || "pw123"; return "Basic " + btoa("admin:" + pwd); } From fa8edb125ad07234dcd7258eef2b2c5e8f25ae0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 8 Nov 2025 10:19:33 +0000 Subject: [PATCH 89/90] Refactor: remove unused _getDir function and update getPath for legacy storage --- src/handlers/doc.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/handlers/doc.ts b/src/handlers/doc.ts index 9ce9b92..b4d2303 100644 --- a/src/handlers/doc.ts +++ b/src/handlers/doc.ts @@ -24,11 +24,9 @@ type TagAdd = { }; // only for parameterizable tags }; -function _getDir(id: string) { - return "./blobs/" + id.slice(0, 2) + "/" + id.slice(2, 6) + "/" + - id.slice(6, 14); -} - +/** + * Used for legacy nanoid-based storage + */ function getPath(id: string) { return "./blobs/" + id.slice(0, 2) + "/" + id.slice(2, 6) + "/" + id.slice(6, 14) + "/" + id; From 4134f2c5147d7fda14570978ccbc97cc4d4389fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Gm=C3=BCr?= Date: Sat, 8 Nov 2025 10:59:26 +0000 Subject: [PATCH 90/90] setting OCR_LANG --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index cac7a60..30c44ef 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,6 +10,7 @@ services: - ./blobs:/usr/src/app/blobs environment: TRIDOC_PWD: "${TRIDOC_PWD:-pw123}" + OCR_LANG: "${OCR_LANG:-fra+deu+eng}" # If you override the command, make sure all required Deno permissions are present (e.g., --allow-write for all needed directories, --allow-read, --allow-net, etc.) # Example: # command: deno run --allow-net --allow-read=blobs,rdf.ttl --allow-write=blobs,rdf.ttl,/tmp --allow-run --allow-env=TRIDOC_PWD,OCR_LANG src/main.ts