diff --git a/.github/workflows/check_property_files.yml b/.github/workflows/check_property_files.yml index 4ae2b8aee5e..842efc70d1a 100644 --- a/.github/workflows/check_property_files.yml +++ b/.github/workflows/check_property_files.yml @@ -9,7 +9,7 @@ jobs: name: Duplicate Keys runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Run duplicates detection script shell: bash run: tests/check_duplicate_properties.sh @@ -18,7 +18,7 @@ jobs: name: Metadata Blocks Properties runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup GraalVM + Native Image uses: graalvm/setup-graalvm@v1 with: diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 907452f4614..97273f06844 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -61,7 +61,7 @@ jobs: # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 # Add any setup steps before running the `github/codeql-action/init` action. # This includes steps like installing compilers or runtimes (`actions/setup-node` diff --git a/.github/workflows/container_app_pr.yml b/.github/workflows/container_app_pr.yml index 898b46c2652..a2c134070b0 100644 --- a/.github/workflows/container_app_pr.yml +++ b/.github/workflows/container_app_pr.yml @@ -20,14 +20,14 @@ jobs: if: ${{ github.repository_owner == 'IQSS' }} steps: # Checkout the pull request code as when merged - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 with: ref: 'refs/pull/${{ github.event.client_payload.pull_request.number }}/merge' - uses: actions/setup-java@v5 with: - java-version: "17" + java-version: "21" distribution: 'adopt' - - uses: actions/cache@v4 + - uses: actions/cache@v5 with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} @@ -35,14 +35,14 @@ jobs: # Note: Accessing, pushing tags etc. to GHCR will only succeed in upstream because secrets. - name: Login to Github Container Registry - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: registry: ghcr.io username: ${{ secrets.GHCR_USERNAME }} password: ${{ secrets.GHCR_TOKEN }} - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v4 # Get the image tag from either the command or default to branch name (Not used for now) #- name: Get the target tag name @@ -69,7 +69,7 @@ jobs: -Dapp.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=ghcr.io -Ddocker.platforms=${{ env.PLATFORMS }} - - uses: marocchino/sticky-pull-request-comment@v2 + - uses: marocchino/sticky-pull-request-comment@v3 with: header: registry-push hide_and_recreate: true diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index 0472ab97dee..fb98a926dbe 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -101,20 +101,20 @@ jobs: # Depending on context, we push to different targets. Login accordingly. - if: github.event_name != 'pull_request' name: Log in to Docker Hub registry - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - if: ${{ github.event_name == 'pull_request' }} name: Login to Github Container Registry - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: registry: ghcr.io username: ${{ secrets.GHCR_USERNAME }} password: ${{ secrets.GHCR_TOKEN }} - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v4 - name: Add rolling image tag when pushing to develop if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} @@ -141,7 +141,7 @@ jobs: ${{ env.REGISTRY }} -Ddocker.platforms=${{ env.PLATFORMS }} -P ct deploy - - uses: marocchino/sticky-pull-request-comment@v2 + - uses: marocchino/sticky-pull-request-comment@v3 if: ${{ github.event_name == 'pull_request' }} with: header: registry-push diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 3b375e13864..731a00fa7f5 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -42,7 +42,7 @@ jobs: # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! - name: Log in to the Container registry - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} @@ -50,7 +50,7 @@ jobs: # In case this is a push to develop, we care about buildtime. # Configure a remote ARM64 build host in addition to the local AMD64 in two steps. - name: Setup SSH agent - uses: webfactory/ssh-agent@v0.9.1 + uses: webfactory/ssh-agent@v0.10.0 with: ssh-private-key: ${{ secrets.BUILDER_ARM64_SSH_PRIVATE_KEY }} - name: Provide the known hosts key and the builder config diff --git a/.github/workflows/container_maintenance.yml b/.github/workflows/container_maintenance.yml index d863f838881..981cef106a8 100644 --- a/.github/workflows/container_maintenance.yml +++ b/.github/workflows/container_maintenance.yml @@ -79,12 +79,12 @@ jobs: # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! - name: Log in to the Container registry - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v4 with: platforms: ${{ env.PLATFORMS }} @@ -122,12 +122,12 @@ jobs: # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! - name: Log in to the Container registry - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v4 with: platforms: ${{ env.PLATFORMS }} @@ -164,16 +164,16 @@ jobs: # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! - name: Log in to the Container registry - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v4 with: platforms: ${{ env.PLATFORMS }} - name: Setup Trivy binary for vulnerability scanning - uses: aquasecurity/setup-trivy@v0.2.4 + uses: aquasecurity/setup-trivy@v0.2.5 with: version: v0.63.0 @@ -199,7 +199,7 @@ jobs: - configbaker-image steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 ### BASE IMAGE - name: Render README for base image diff --git a/.github/workflows/deploy_beta_testing.yml b/.github/workflows/deploy_beta_testing.yml index 48fd5c80d3b..9325a5ed734 100644 --- a/.github/workflows/deploy_beta_testing.yml +++ b/.github/workflows/deploy_beta_testing.yml @@ -14,12 +14,12 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: actions/setup-java@v5 with: distribution: 'zulu' - java-version: '17' + java-version: '21' - name: Enable API Session Auth feature flag working-directory: src/main/resources/META-INF @@ -36,7 +36,7 @@ jobs: run: echo "war_file=$(ls *.war | head -1)">> $GITHUB_ENV - name: Upload war artifact - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v7 with: name: built-app path: ./target/${{ env.war_file }} @@ -47,10 +47,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Download war artifact - uses: actions/download-artifact@v6 + uses: actions/download-artifact@v8 with: name: built-app path: ./ @@ -69,7 +69,7 @@ jobs: overwrite: true - name: Execute payara war deployment remotely - uses: appleboy/ssh-action@v1.2.3 + uses: appleboy/ssh-action@v1.2.5 env: INPUT_WAR_FILE: ${{ env.war_file }} with: @@ -79,11 +79,11 @@ jobs: envs: INPUT_WAR_FILE script: | APPLICATION_NAME=dataverse-backend - ASADMIN='/usr/local/payara6/bin/asadmin --user admin' + ASADMIN='/usr/local/payara7/bin/asadmin --user admin' $ASADMIN undeploy $APPLICATION_NAME #$ASADMIN stop-domain - #rm -rf /usr/local/payara6/glassfish/domains/domain1/generated - #rm -rf /usr/local/payara6/glassfish/domains/domain1/osgi-cache + #rm -rf /usr/local/payara7/glassfish/domains/domain1/generated + #rm -rf /usr/local/payara7/glassfish/domains/domain1/osgi-cache #$ASADMIN start-domain $ASADMIN deploy --name $APPLICATION_NAME $INPUT_WAR_FILE #$ASADMIN stop-domain diff --git a/.github/workflows/deploy_to_internal.yml b/.github/workflows/deploy_to_internal.yml new file mode 100644 index 00000000000..9502b51e9a8 --- /dev/null +++ b/.github/workflows/deploy_to_internal.yml @@ -0,0 +1,90 @@ +name: 'Deploy to dataverse-internal.iq.harvard.edu' + +on: + workflow_dispatch: + inputs: + buildlabel: + description: 'Custom label that will appear after the version number (the equivalent of the old "build number" entry). Leaving it empty will default to the legacy behavior, i.e. " build -".' + type: string + required: false + +permissions: + contents: read + +concurrency: + group: deploy-to-internal + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-java@v5 + with: + distribution: 'zulu' + java-version: '21' + + - name: Set build number + run: scripts/installer/custom-build-number "${{ github.event.inputs.buildlabel }}" + + - name: Build application war + run: mvn package + + - name: Get war file name + working-directory: target + run: echo "war_file=$(ls *.war | head -1)">> $GITHUB_ENV + + - name: Upload war artifact + uses: actions/upload-artifact@v7 + with: + name: built-app + path: ./target/${{ env.war_file }} + + deploy-to-payara: + needs: build + if: ${{ github.repository_owner == 'IQSS' }} + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + + - name: Download war artifact + uses: actions/download-artifact@v8 + with: + name: built-app + path: ./ + + - name: Get war file name + run: echo "war_file=$(ls *.war | head -1)">> $GITHUB_ENV + + - name: Copy war file to remote instance + uses: appleboy/scp-action@master + with: + host: ${{ secrets.INTERNAL_PAYARA_INSTANCE_HOST }} + username: ${{ secrets.INTERNAL_PAYARA_INSTANCE_USERNAME }} + key: ${{ secrets.INTERNAL_PAYARA_INSTANCE_SSH_PRIVATE_KEY }} + source: './${{ env.war_file }}' + target: '/home/${{ secrets.INTERNAL_PAYARA_INSTANCE_USERNAME }}' + overwrite: true + + - name: Execute payara war deployment remotely + uses: appleboy/ssh-action@v1.2.5 + env: + INPUT_WAR_FILE: ${{ env.war_file }} + with: + host: ${{ secrets.INTERNAL_PAYARA_INSTANCE_HOST }} + username: ${{ secrets.INTERNAL_PAYARA_INSTANCE_USERNAME }} + key: ${{ secrets.INTERNAL_PAYARA_INSTANCE_SSH_PRIVATE_KEY }} + envs: INPUT_WAR_FILE + script: | + APPLICATION_NAME=dataverse-backend + ASADMIN='/usr/local/payara7/bin/asadmin --user admin' + $ASADMIN undeploy $APPLICATION_NAME + #$ASADMIN stop-domain + #$ASADMIN start-domain + $ASADMIN deploy --name $APPLICATION_NAME $INPUT_WAR_FILE + #$ASADMIN stop-domain + #$ASADMIN start-domain diff --git a/.github/workflows/generate_war_file.yml b/.github/workflows/generate_war_file.yml new file mode 100644 index 00000000000..9e0fee2ae1e --- /dev/null +++ b/.github/workflows/generate_war_file.yml @@ -0,0 +1,37 @@ +name: 'Generate dataverse war file' + +on: + workflow_dispatch: + inputs: + buildlabel: + description: 'Custom label that will appear after the version number (the equivalent of the old "build number" entry). Leaving it empty will default to the legacy behavior, i.e. " build -".' + type: string + required: false + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-java@v5 + with: + distribution: 'zulu' + java-version: '21' + + - name: Set build number + run: scripts/installer/custom-build-number "${{ github.event.inputs.buildlabel }}" + + - name: Build application war + run: mvn package + + - name: Get war file name + working-directory: target + run: echo "war_file=$(ls *.war | head -1)">> $GITHUB_ENV + + - name: Upload war artifact + uses: actions/upload-artifact@v7 + with: + name: built-app + path: ./target/${{ env.war_file }} diff --git a/.github/workflows/guides_build_sphinx.yml b/.github/workflows/guides_build_sphinx.yml index b41606d0c99..fd93172b671 100644 --- a/.github/workflows/guides_build_sphinx.yml +++ b/.github/workflows/guides_build_sphinx.yml @@ -10,7 +10,7 @@ jobs: docs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - id: lookup run: | echo "sphinx_version=$(grep Sphinx== ./doc/sphinx-guides/requirements.txt | tr -s "=" | cut -f 2 -d=)" | tee -a "${GITHUB_OUTPUT}" diff --git a/.github/workflows/maven_cache_management.yml b/.github/workflows/maven_cache_management.yml index 6bfb567c90b..b84c09ba01a 100644 --- a/.github/workflows/maven_cache_management.yml +++ b/.github/workflows/maven_cache_management.yml @@ -32,7 +32,7 @@ jobs: contents: read steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Determine Java version from Parent POM run: echo "JAVA_VERSION=$(grep '' modules/dataverse-parent/pom.xml | cut -f2 -d'>' | cut -f1 -d'<')" >> ${GITHUB_ENV} - name: Set up JDK ${{ env.JAVA_VERSION }} @@ -62,7 +62,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Save the common cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ${{ env.COMMON_CACHE_PATH }} key: ${{ env.COMMON_CACHE_KEY }} @@ -80,7 +80,7 @@ jobs: contents: read steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Cleanup caches run: | gh extension install actions/gh-actions-cache diff --git a/.github/workflows/maven_unit_test.yml b/.github/workflows/maven_unit_test.yml index 4de4a953a70..76ccb5a87ab 100644 --- a/.github/workflows/maven_unit_test.yml +++ b/.github/workflows/maven_unit_test.yml @@ -29,7 +29,7 @@ jobs: strategy: fail-fast: false matrix: - jdk: [ '17' ] + jdk: [ '21' ] experimental: [false] status: ["Stable"] continue-on-error: ${{ matrix.experimental }} @@ -37,7 +37,7 @@ jobs: steps: # TODO: As part of #10618 change to setup-maven custom action # Basic setup chores - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Set up JDK ${{ matrix.jdk }} uses: actions/setup-java@v5 with: @@ -62,7 +62,7 @@ jobs: # Upload the built war file. For download, it will be wrapped in a ZIP by GitHub. # See also https://github.com/actions/upload-artifact#zipped-artifact-downloads - - uses: actions/upload-artifact@v5 + - uses: actions/upload-artifact@v7 with: name: dataverse-java${{ matrix.jdk }}.war path: target/dataverse*.war @@ -72,7 +72,7 @@ jobs: - run: | tar -cvf java-builddir.tar target tar -cvf java-m2-selection.tar ~/.m2/repository/io/gdcc/dataverse-* - - uses: actions/upload-artifact@v5 + - uses: actions/upload-artifact@v7 with: name: java-artifacts path: | @@ -87,7 +87,7 @@ jobs: strategy: fail-fast: false matrix: - jdk: [ '17' ] + jdk: [ '21' ] experimental: [ false ] status: [ "Stable" ] # @@ -103,7 +103,7 @@ jobs: steps: # TODO: As part of #10618 change to setup-maven custom action # Basic setup chores - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Set up JDK ${{ matrix.jdk }} uses: actions/setup-java@v5 with: @@ -112,7 +112,7 @@ jobs: cache: maven # Get the build output from the unit test job - - uses: actions/download-artifact@v6 + - uses: actions/download-artifact@v8 with: name: java-artifacts - run: | @@ -124,7 +124,7 @@ jobs: # Wrap up and send to coverage job - run: tar -cvf java-reportdir.tar target/site - - uses: actions/upload-artifact@v5 + - uses: actions/upload-artifact@v7 with: name: java-reportdir path: java-reportdir.tar @@ -137,15 +137,15 @@ jobs: steps: # TODO: As part of #10618 change to setup-maven custom action # Basic setup chores - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: actions/setup-java@v5 with: - java-version: '17' + java-version: '21' distribution: temurin cache: maven # Get the build output from the integration test job - - uses: actions/download-artifact@v6 + - uses: actions/download-artifact@v8 with: name: java-reportdir - run: tar -xvf java-reportdir.tar diff --git a/.github/workflows/pr_comment_commands.yml b/.github/workflows/pr_comment_commands.yml index 06b11b1ac5b..638d9e6c35e 100644 --- a/.github/workflows/pr_comment_commands.yml +++ b/.github/workflows/pr_comment_commands.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Dispatch - uses: peter-evans/slash-command-dispatch@v4 + uses: peter-evans/slash-command-dispatch@v5 with: # This token belongs to @dataversebot and has sufficient scope. token: ${{ secrets.GHCR_TOKEN }} diff --git a/.github/workflows/reviewdog_checkstyle.yml b/.github/workflows/reviewdog_checkstyle.yml index fb91f2c718e..ad16445ddce 100644 --- a/.github/workflows/reviewdog_checkstyle.yml +++ b/.github/workflows/reviewdog_checkstyle.yml @@ -10,7 +10,7 @@ jobs: name: Checkstyle job steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Run check style uses: nikitasavinov/checkstyle-action@master with: diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml index 55a760bad21..9769737f307 100644 --- a/.github/workflows/shellcheck.yml +++ b/.github/workflows/shellcheck.yml @@ -21,7 +21,7 @@ jobs: permissions: pull-requests: write steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: shellcheck uses: reviewdog/action-shellcheck@v1 with: diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml index d49d399b792..c7b937c642a 100644 --- a/.github/workflows/shellspec.yml +++ b/.github/workflows/shellspec.yml @@ -19,7 +19,7 @@ jobs: steps: - name: Install shellspec run: curl -fsSL https://git.io/shellspec | sh -s ${{ env.SHELLSPEC_VERSION }} --yes - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Run Shellspec run: | cd tests/shell @@ -30,7 +30,7 @@ jobs: container: image: rockylinux/rockylinux:9 steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install shellspec run: | curl -fsSL https://github.com/shellspec/shellspec/releases/download/${{ env.SHELLSPEC_VERSION }}/shellspec-dist.tar.gz | tar -xz -C /usr/share @@ -47,7 +47,7 @@ jobs: steps: - name: Install shellspec run: curl -fsSL https://git.io/shellspec | sh -s 0.28.1 --yes - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Run Shellspec run: | cd tests/shell diff --git a/.github/workflows/spi_release.yml b/.github/workflows/spi_release.yml index 378e6ff9b67..feda533c856 100644 --- a/.github/workflows/spi_release.yml +++ b/.github/workflows/spi_release.yml @@ -37,15 +37,15 @@ jobs: runs-on: ubuntu-latest if: github.event_name == 'pull_request' && needs.check-secrets.outputs.available == 'true' steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: actions/setup-java@v5 with: - java-version: '17' + java-version: '21' distribution: 'adopt' server-id: central server-username: MAVEN_USERNAME server-password: MAVEN_PASSWORD - - uses: actions/cache@v4 + - uses: actions/cache@v5 with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} @@ -63,12 +63,12 @@ jobs: runs-on: ubuntu-latest if: github.event_name == 'push' && needs.check-secrets.outputs.available == 'true' steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: actions/setup-java@v5 with: - java-version: '17' + java-version: '21' distribution: 'adopt' - - uses: actions/cache@v4 + - uses: actions/cache@v5 with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} @@ -78,7 +78,7 @@ jobs: - name: Set up Maven Central Repository uses: actions/setup-java@v5 with: - java-version: '17' + java-version: '21' distribution: 'adopt' server-id: central server-username: MAVEN_USERNAME diff --git a/conf/jhove/jhove.conf b/conf/jhove/jhove.conf index 971c60acfaa..11fbb7bf737 100644 --- a/conf/jhove/jhove.conf +++ b/conf/jhove/jhove.conf @@ -3,7 +3,7 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://hul.harvard.edu/ois/xml/ns/jhove/jhoveConfig" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/jhove/jhoveConfig - file:///usr/local/payara6/glassfish/domains/domain1/config/jhoveConfig.xsd"> + file:///usr/local/payara7/glassfish/domains/domain1/config/jhoveConfig.xsd"> /usr/local/src/jhove utf-8 /tmp diff --git a/conf/keycloak/builtin-users-spi/pom.xml b/conf/keycloak/builtin-users-spi/pom.xml index 2a730621f85..4f096a15738 100644 --- a/conf/keycloak/builtin-users-spi/pom.xml +++ b/conf/keycloak/builtin-users-spi/pom.xml @@ -100,7 +100,7 @@ - 26.3.4 + 26.5.5 17 3.2.0 0.4 diff --git a/doc/release-notes/6.10-release-notes.md b/doc/release-notes/6.10-release-notes.md new file mode 100644 index 00000000000..a82741997d7 --- /dev/null +++ b/doc/release-notes/6.10-release-notes.md @@ -0,0 +1,552 @@ +# Dataverse 6.10 + +Please note: To read these instructions in full, please go to https://github.com/IQSS/dataverse/releases/tag/v6.10 rather than the [list of releases](https://github.com/IQSS/dataverse/releases), which will cut them off. + +This release brings new features, enhancements, and bug fixes to Dataverse. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project! + +## Release Highlights + +This release contains major upgrades to core components. Detailed upgrade instructions can be found below. + +Highlights for Dataverse 6.10 include: + +- Optionally require acknowledgment of a disclaimer when publishing +- Optionally require embargo reason +- Harvesting improvements +- Croissant support now built in +- Archiving, OAI-ORE, and BagIt export improvements +- Support for REFI-QDA Codebook and Project files +- Review datasets +- Infrastructure: Payara has been upgraded from version 6 to 7 +- Infrastructure: Java has been upgraded from version 17 to 21 +- New and improved APIs, including filling in a guestbook when downloading files +- Bug fixes + +### Optionally Require Acknowledgment of a Disclaimer When Publishing + +When users click "Publish" on a dataset they have always seen a popup displaying various information to read before clicking "Continue" to proceed with publication. + +Now you can optionally require users to check a box in this popup to acknowledge a disclaimer that you specify through a new setting called `:PublishDatasetDisclaimerText`. + +For backward compatibility, APIs will continue to publish without the acknowledgment for now. An [API endpoint](https://guides.dataverse.org/en/6.10/api/native-api.html#show-disclaimer-for-publishing-datasets) was added for anyone to retrieve the disclaimer text anonymously. + +See [the guides](https://guides.dataverse.org/en/6.10/installation/config.html#publishdatasetdisclaimertext) and #12051. + +### Optionally Require Embargo Reason + +It is now possible to configure Dataverse to require an embargo reason when a user creates an embargo on one or more files. By default, the embargo reason is optional. `dataverse.feature.require-embargo-reason` can be set to true to enable this feature. + +In addition, with this release, if an embargo reason is supplied, it must not be blank. + +See [the guides](https://guides.dataverse.org/en/6.10/installation/config.html#dataverse-feature-require-embargo-reason), #8692, #11956, #12067. + +### Harvesting Improvements + +A setting has been added for configuring sleep intervals in between OAI-PMH calls on a per-server basis. This can help when some of the servers you want to harvest from have rate limiting policies. You can set a default sleep time and custom sleep times for servers that need more. + +Additionally, this release fixes a problem with harvesting from DataCite OAI-PMH where initial, long-running harvests were failing on sets with large numbers of records. + +See [:HarvestingClientCallRateLimit](https://guides.dataverse.org/en/6.10/installation/config.html#harvestingclientcallratelimit) in the guides, #11473, and #11486. + +### Croissant Support Is Now Built In, Slim Version Added + +Croissant is a metadata export format for machine learning datasets that (until this release) was optional and implemented as external exporter. The code has been merged into the main Dataverse code base which means the Croissant format is automatically available in your installation of Dataverse, alongside older formats like Dublin Core and DDI. If you were using the external Croissant exporter, the merged code is equivalent to version 0.1.6. Croissant bugs and feature requests should now be filed against the main Dataverse repo (https://github.com/IQSS/dataverse) and the old repo (https://github.com/gdcc/exporter-croissant) should be considered retired. + +As described in the [Discoverability](https://guides.dataverse.org/en/6.10/admin/discoverability.html#id6) section of the Admin Guide, Croissant is inserted into the "head" of the HTML of dataset landing pages, as requested by the [Google Dataset Search](https://datasetsearch.research.google.com) team so that their tool can filter by datasets that support Croissant. In previous versions of Dataverse, when Croissant was optional and hadn't been enabled, we used the older "Schema.org JSON-LD" format in the "head". If you'd like to keep this behavior, you can use the feature flag [dataverse.legacy.schemaorg-in-html-head](https://guides.dataverse.org/en/6.10/installation/config.html#dataverse.legacy.schemaorg-in-html-head). + +Both Croissant and Schema.org JSON-LD formats can become quite large when the dataset has many files or (for Croissant) when the files have many variables. As of this release, the "head" of the HTML contains a "slim" version of Croissant that doesn't contain information about files or variables. The original, full version of Croissant is still available via the "Export Metadata" dropdown. Both "croissant" and "croissantSlim" formats are available via API. + +See also #11254, #12123, #12130, and #12191. + +### Archiving, OAI-ORE, and BagIt Export Improvements + +This release includes multiple updates to the OAI-ORE metadata export and the process of creating archival bags, improving performance, fixing bugs, and adding significant new functionality. See #12144, #12129, #12122, #12104, #12103, #12101, and #12213. + +#### General Archiving Improvements + +- Multiple performance and scaling improvements have been made for creating archival bags for large datasets, including: + - The duration of archiving tasks triggered from the version table or API are no longer limited by the transaction time limit. + - Temporary storage space requirements have been increased by `1/:BagGeneratorThreads` of the zipped bag size. (Often this is by half because the default value for `:BagGeneratorThreads` is 2.) This is a consequence of changes to avoid timeout errors on larger files/datasets. + - The size of individual data files and the total dataset size that will be included in an archival bag can now be limited. Admins can choose whether files above these limits are transferred along with, but outside, the zipped bag (creating a complete archival copy) or are just referenced (using the concept of a "holey" bag and just listing the oversized files and the Dataverse URLs from which they can be retrieved in a `fetch.txt` file). In the holey bag case, an active service on the archiving platform must retrieve the oversized files (using appropriate credentials as needed) to make a complete copy. + - Superusers can now see a pending status in the dataset version table while archiving is active. + - Workflows are now triggered outside the transactions related to publication, assuring that workflow locks and status updates are always recorded. + - Potential conflicts between archiving/workflows, indexing, and metadata exports after publication have been resolved, avoiding cases where the status/last update times for these actions were not recorded. +- A bug has been fixed where superusers would incorrectly see the "Submit" button to launch archiving from the dataset page version table. +- The local, S3, and Google archivers have been updated to support deleting existing archival files for a version to allow re-creating the bag for a given version. +- For archivers that support file deletion, it is now possible to recreate an archival bag after "Update Current Version" has been used (replacing the original bag). By default, Dataverse will mark the current version's archive as out-of-date, but will not automatically re-archive it. + - A new "obsolete" status has been added to indicate when an archival bag exists for a version but it was created prior to an "Update Current Version" change. +- Improvements have been made to file retrieval for bagging, including retries on errors and when download requests are being throttled. + - A bug causing `:BagGeneratorThreads` to be ignored has been fixed, and the default has been reduced to 2. +- Retrieval of files for inclusion in an archival bag is no longer counted as a download. +- It is now possible to require that all previous versions have been successfully archived before archiving of a newly published version can succeed. This is intended to support use cases where de-duplication of files between dataset versions will be done and is a step towards supporting the Oxford Common File Layout (OCFL). +- The pending status has changed to use the same JSON format as other statuses. + +#### OAI-ORE Export Updates + +- The export now uses URIs for checksum algorithms, conforming with JSON-LD requirements. +- A bug causing failures with deaccessioned versions has been fixed. This occurred when the deaccession note ("Deaccession Reason" in the UI) was null, which is permissible via the API. +- The `https://schema.org/additionalType` has been updated to "Dataverse OREMap Format v1.0.2" to reflect format changes. + +#### Archival Bag (BagIt) Updates + +- The `bag-info.txt` file now correctly includes information for dataset contacts, fixing a bug where nothing was included when multiple contacts were defined. (Multiple contacts were always included in the OAI-ORE file in the bag; only the baginfo file was affected). +- Values used in the `bag-info.txt` file that may be multi-line (i.e. with embedded CR or LF characters) are now properly indented and wrapped per the BagIt specification (`Internal-Sender-Identifier`, `External-Description`, `Source-Organization`, `Organization-Address`). +- The dataset name is no longer used as a subdirectory within the `data/` directory to reduce issues with unzipping long paths on some filesystems. +- For dataset versions with no files, the empty `manifest-.txt` file will now use the algorithm from the `:FileFixityChecksumAlgorithm` setting instead of defaulting to MD5. +- A new key, `Dataverse-Bag-Version`, has been added to `bag-info.txt` with the value "1.0" to allow for tracking changes to Dataverse's archival bag generation over time. +- When using the `holey` bag option discussed above, the required `fetch.txt` file will be included. + +### Support for REFI-QDA Codebook and Project Files + +.qdc and .qdpx files are now detected as [REFI-QDA standard](https://www.qdasoftware.org) Codebook and Project files, respectively, for qualitative data analysis, which allows them to be used with the new REFI QDA Previewers. See https://github.com/gdcc/dataverse-previewers/pull/137 for screenshots. + +To enable existing .qdc and .qdpx files to be used with the previewers, their content type (MIME type) will need to be [redetected](https://guides.dataverse.org/en/6.10/api/native-api.html#redetect-file-type). See #12163. + +### Review Datasets + +Dataverse now supports review datasets, a type of dataset that can be used to review resources such as other datasets in the Dataverse installation itself or various resources in external data repositories. APIs and a new "review" metadata block (with an "Item Reviewed" field) are in place but the UI for this feature will only be available in a future version of the new React-based [Dataverse Frontend](https://github.com/IQSS/dataverse-frontend) (see [#876](https://github.com/IQSS/dataverse-frontend/pull/876)). See the [guides](https://guides.dataverse.org/en/6.10/user/dataset-management.html#review-datasets), #11747, #12015, #11887, #12115, and #11753. This feature is experimental. + +## Features Added + +These are features that weren't already mentioned under "highlights" above. + +- A new "DATASETMOVED" notification type was added for when datasets are moved from one collection (dataverse) to another. This requires the :SendNotificationOnDatasetMove setting to be enabled. See #11670 and #11805. +- Performance has been improved for the Solr search index. Changes in v6.9 that significantly improved re-indexing performance and lowered memory use (in situations such as when a user's role on the root collection were changed) also slowed reindexing of individual datasets after editing and publication. This release restores/improves the individual dataset reindexing performance while retaining the benefits of the earlier update. This release also avoids creating unused Solr entries for files in drafts of new versions of published datasets (decreasing the Solr database size and thereby improving performance). See #12082, #12093, and #12094. +- In prior versions of Dataverse, configuring a proxy to forward to Dataverse over an HTTP connection could result in failure of signed URLs (e.g. for external tools). This version of Dataverse supports having a proxy send an `X-Forwarded-Proto` header set to HTTPS to avoid this issue. See [the guides](https://guides.dataverse.org/en/6.10/installation/config.html#using-x-forwarded-proto-for-signed-urls) and #11787. +- Citation Style Language (CSL) output now includes "type:software" or "type:review" when those dataset types are used. See the [guides](https://guides.dataverse.org/en/6.10/api/native-api.html#get-citation-in-other-formats) and #11753. + +## External Tool Updates + +### New Globus Features in rdm-integration 2.0.1 + +[rdm-integration](https://github.com/libis/rdm-integration) is a Dataverse external tool for synchronizing files from various source repositories into Dataverse, with support for background processing, DDI-CDI metadata generation, and high-performance Globus transfers. You can find it on the [Integrations](https://guides.dataverse.org/en/6.10/admin/integrations.html#integrations-dashboard) section of the Dataverse Admin Guide. + +Release 2.0.1 brings several new Globus capabilities: + +- **Guest downloads** — public datasets can be downloaded via Globus without a Dataverse account +- **Preview URL support** — reviewers can download draft dataset files via Globus using general preview URLs +- **Scoped institutional login** — `session_required_single_domain` support enables access to institutional Globus endpoints (e.g., HPC clusters); scopes are automatically removed for guest and preview access +- **Real-time transfer progress** — polling-based progress monitoring with percentage display and status updates (ACTIVE/SUCCEEDED/FAILED) +- **Download filtering** — only datasets where the user can download all files are shown, avoiding failed transfers for restricted or embargoed content +- **Hierarchical file tree** — recursive folder selection and color-coded file status + +For full details, see the [README](https://github.com/libis/rdm-integration#readme) and [GLOBUS_INTEGRATION.md](https://github.com/libis/rdm-integration/blob/main/GLOBUS_INTEGRATION.md). + +### New Previewer for REFI-QDA Codebook and Project Files + +See the note above about support for REFI-QDA files. Screenshots of the previewer can be found at https://github.com/gdcc/dataverse-previewers/pull/137 + +## Bugs Fixed + +- Hidden fields on a dataset creation form remained visible and setting a field to "hidden" was not working. See #11992 and #12017. +- The names of host collections were visible when using anonymized preview URLs. See #11085 and #12111. +- As of Dataverse 6.8, the "replace file" feature was not working. See #11976, #12107, and #12157. +- A dataset or collection (dataverse) was still visible in browse/search results immediately after deleting it if you didn't refresh the page. See #11206 and #12072. +- The text in "assign role" notifications now only shows the role that was just assigned. Previously, the notification showed all the roles associated with the dataset. See #11773 and #11915. +- Handles from hdl.handle.net with urls of `/citation` instead of `/dataset.xhtml` were not properly redirecting. This fix adds a lookup for alternate PID so `/citation` endpoint will redirect to `/dataset.xhtml`. See #11943. +- Dataverse no longer sends duplicate [COAR Notify Relationship Announcement Workflow](https://coar-notify.net/catalogue/workflows/repository-relationship-repository/) messages when new dataset versions are published (and the relationship metadata has not been changed). See #11983. +- 500 error when deleting dataset type by name. See #11833 and #11753. +- Dataset Type facet works in JSF but not the SPA. See #11758 and #11753. +- PIDs could not be generated when the `identifier-generation-style` was set to `storedProcGenerated`. See #12126 and #12127. +- It came to our attention that the [Dataverse Uploader GitHub Action](https://guides.dataverse.org/en/6.10/admin/integrations.html#github) was [failing](https://github.com/IQSS/dataverse-uploader/issues/28) with an "unhashable type" error. This has been fixed in a new release, [v1.7](https://github.com/IQSS/dataverse-uploader/releases/tag/v1.7). + +## API Updates + +- A guestbook response can be passed to file access (file download) APIs when required. This is explained in greater detail under "backward incompatible changes" below. See the [guides](https://guides.dataverse.org/en/6.10/api/dataaccess.html#basic-file-access), #12001, and #12110. +- CRUD endpoints for guestbooks were added. (Note: There is no update or delete at this time. You can disable a guestbook and create a new one.) See the [guides](https://guides.dataverse.org/en/6.10/api/native-api.html#guestbooks), #12001, and #12110. + - Create a guestbook + - Get a guestbook + - Get a list of guestbooks linked to a collection (dataverse) + - Enable/disable a guestbook +- The MyData API now supports the `metadata_fields`, `sort`, `order`, `show_collections` and `fq` parameters, which enhances its functionality and brings it in line with the Search API. See [the guides](https://guides.dataverse.org/en/6.10/api/native-api.html#mydata) and #12009. +- This release removes an undocumented restriction on the API calls to get, set, and delete [archival status](https://guides.dataverse.org/en/6.10/api/native-api.html#get-the-archival-status-of-a-dataset-by-version). They did not work on deaccessioned dataset versions and now do. See #12065. +- Dataset templates can be listed and deleted for a given collection (dataverse). See [the guides](https://guides.dataverse.org/en/6.10/api/native-api.html#list-single-template-by-its-identifier), #11918, and #11969. The default template can also be set. See [the guides](https://guides.dataverse.org/en/6.10/api/native-api.html#set-a-default-template-for-a-collection), #11914 and #11989. +- Because some clients (such as the [new frontend](https://github.com/IQSS/dataverse-frontend)) need to retrieve contact email addresses along with the rest of the dataset metadata, a new query parameter called `ignoreSettingExcludeEmailFromExport` has been introduced. It requires "EditDataset" permission. See [the guides](https://guides.dataverse.org/en/6.10/api/native-api.html#get-json-representation-of-a-dataset), #11714, and #11819. +- The Change Collection Attributes API now supports `allowedDatasetTypes`. See the [guides](https://guides.dataverse.org/en/6.10/api/native-api.html#change-collection-attributes), #12115, and #11753. +- The API returning information about datasets (`/api/datasets/{id}`) now includes a `locks` field containing a list of the types of all existing locks, e.g. `"locks": ["InReview"]`. See #12008. +- Cleaned up Access APIs to localize getting user from session for JSF backward compatibility. This bug requires a frontend fix to send the Bearer Token in the API call. See #11740 and #11844. + +## Security Updates + +This release contains important security updates. If you are not receiving security notices, please sign up by following [the steps](https://guides.dataverse.org/en/latest/installation/config.html#ongoing-security-of-your-installation) in the guides. + +## Backward Incompatible Changes + +Generally speaking, see the [API Changelog](https://guides.dataverse.org/en/latest/api/changelog.html) for a list of backward-incompatible API changes. + +### A Guestbook Response Can Be Required for File Access (File Download) APIs + +The following File Access (File Download) APIs will now return `400` (`Bad Request`) if a required guestbook response is not supplied. + +GET APIs + + - **/api/access/datafile/{fileId:.+}** + - **/api/access/datafiles/{fileIds}** + - **/api/access/dataset/{id}** + - **/api/access/dataset/{id}/versions/{versionId}** + +POST APIs + + - **/api/access/datafiles** + - **/api/access/datafile/bundle/{fileId}** + +PUT APIs (when `dataverse.files.guestbook-at-request=true`) + + - **/api/access/datafile/{id}/requestAccess** + +To construct the JSON to send as a guestbook response you must first retrieve the guestbook for the dataset whose files you are trying to access. The guestbook may include custom questions. Call `GET /api/guestbooks/{id}` with the `guestbookId` from the dataset (the `guestbookId` should be included in the `400` error mentioned above). + +Using the guestbook and custom questions, build a guestbook response in JSON with `"guestbookResponse":{}` and send it in the body of a POST call. (You can find a JSON example in the [guides](https://guides.dataverse.org/en/6.10/api/dataaccess.html#basic-file-access) for the general format.) + +This procedure can also be found in the API Guide under [Data Access API](https://guides.dataverse.org/en/6.10/api/dataaccess.html#basic-file-access) and #12110. + +### Archival Zip Filename Change + +The filename of the archival zipped bag produced by the `LocalSubmitToArchiveCommand` archiver now has a "." character before the "v" (for version number) to mirror the filename used by other archivers. For example, the filename will look like + +`doi-10-5072-fk2-fosg5q.v1.0.zip` + +rather than + +`doi-10-5072-fk2-fosg5qv1.0.zip`. + +### Dataset Types Must Be Allowed, Per-Collection, Before Use + +In previous releases of Dataverse, as soon as additional dataset types were added (such as "software", "workflow", etc.), they could be used by all users when creating datasets (via API only). As of this release, on a per-collection basis, superusers must allow these dataset types to be used. See #12115 and #11753. + +## End-Of-Life (EOL) Announcements + +### PostgreSQL 13 Reached EOL on 13 November 2025 + +We mentioned this in the Dataverse [6.6](https://github.com/IQSS/dataverse/releases/tag/v6.6), [6.8](https://github.com/IQSS/dataverse/releases/tag/v6.8), [6.9](https://github.com/IQSS/dataverse/releases/tag/v6.9) release notes, but as a reminder, according to https://www.postgresql.org/support/versioning/ PostgreSQL 13 reached EOL on 13 November 2025. As stated in the [Installation Guide](https://guides.dataverse.org/en/6.10/installation/prerequisites.html#postgresql), we recommend running PostgreSQL 16 since it is the version we test with in our continuous integration ([since](https://github.com/gdcc/dataverse-ansible/commit/8ebbd84ad2cf3903b8f995f0d34578250f4223ff) February 2025). The [Dataverse 5.4 release notes](https://github.com/IQSS/dataverse/releases/tag/v5.4) explained the upgrade process from 9 to 13 (e.g. pg_dumpall, etc.) and the steps will be similar. If you have any problems, please feel free to reach out (see "getting help" in these release notes). + +## Notes for Dataverse Installation Administrators + +### Hiding OIDC Provider from JSF UI + +This release fixes a bug where the value of the [dataverse.auth.oidc.enabled](https://guides.dataverse.org/en/6.10/installation/oidc.html#provision-via-jvm-options) setting (available when provisioning an authentication provider via JVM options) was not being propagated to the current Dataverse user interface (JSF, where `enabled=false` providers are not displayed for login/registration) or represented in the GET /api/admin/authenticationProviders API call. + +A new JVM setting (`dataverse.auth.oidc.hidden-jsf`) was added to hide an enabled OIDC Provider from the JSF UI. + +For Dataverse instances deploying both the current JSF UI and the [new SPA UI](https://github.com/IQSS/dataverse-frontend), this fix allows the OIDC Keycloak provider configured for the SPA to be hidden in the JSF UI. This is useful in cases where it would duplicate other configured providers. + +Note: The API to create a new Auth Provider can only be used to create a provider for both JSF and SPA. Use JVM / MicroProfile config setting to create SPA-only providers. + +See [dataverse.auth.oidc.hidden-jsf](https://guides.dataverse.org/en/6.10/installation/oidc.html#provision-via-jvm-options) in the guides, #11606, and #11922. + +## New Settings + +### New JVM Options (MicroProfile Config Settings) + +- dataverse.auth.oidc.hidden-jsf +- dataverse.bagit.archive-on-version-update +- dataverse.bagit.zip.holey +- dataverse.bagit.zip.max-data-size +- dataverse.bagit.zip.max-file-size +- dataverse.feature.require-embargo-reason +- dataverse.legacy.schemaorg-in-html-head + +### New Database Settings + +- :ArchiveOnlyIfEarlierVersionsAreArchived +- :HarvestingClientCallRateLimit +- :PublishDatasetDisclaimerText +- :SendNotificationOnDatasetMove + +## Complete List of Changes + +For the complete list of code changes in this release, see the [6.10 milestone](https://github.com/IQSS/dataverse/issues?q=milestone%3A6.10+is%3Aclosed) in GitHub. + +## Getting Help + +For help with upgrading, installing, or general questions please see [getting help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) in the Installation Guide. + +## Installation + +If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! + +Once you are in production, we would be delighted to update our [map of Dataverse installations around the world](https://dataverse.org/installations) to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! + +You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC). + +## Upgrade Instructions + +Upgrading requires a maintenance window and downtime. Please plan accordingly, create backups of your database, etc. + +Note: These instructions assume that you are upgrading from the immediate previous version. That is to say, you've already upgraded through all the 6.x releases and are now running Dataverse 6.9. See [tags on GitHub](https://github.com/IQSS/dataverse/tags) for a list of versions. If you are running an earlier version, the only supported way to upgrade is to progress through the upgrades to all the releases in between before attempting the upgrade to this version. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. By default, Payara runs as the `dataverse` user. In the commands below, we use sudo to run the commands as a non-root user. + +Also, we assume that Payara 6 is installed in `/usr/local/payara6`. If not, adjust as needed. + +### Upgrade from Java 17 to Java 21 + +Java 21 is now required to run Dataverse. Solr can run under Java 17 or Java 21 but the latter is recommended and the switch is shown below. In preparation for the Java upgrade, the steps below instruct you to stop both Dataverse (Payara) and Solr. + +1. Undeploy Dataverse from Payara 6, if deployed, using the unprivileged service account ("dataverse", by default). + + `sudo -u dataverse /usr/local/payara6/bin/asadmin list-applications` + + `sudo -u dataverse /usr/local/payara6/bin/asadmin undeploy dataverse-6.9` + +1. Stop Payara 6. + + `sudo -u dataverse /usr/local/payara6/bin/asadmin stop-domain` + +1. Stop Solr. + + `sudo systemctl stop solr.service` + +1. Install Java 21. + + Assuming you are using RHEL or a derivative such as Rocky Linux: + + `sudo dnf install java-21-openjdk` + + Note: if you see something like `Error: Failed to download metadata for repo 'pgdg13'`, you are probably still on PostgreSQL 13 and should upgrade, as explained in the End-Of-Life (EOL) note above. A workaround is to disable that repo with `sudo dnf config-manager --set-disabled pgdg13`. + +1. Set Java 21 as the default. + + Assuming you are using RHEL or a derivative such as Rocky Linux: + + `sudo alternatives --config java` + +1. Test that Java 21 is the default. + + `java -version` + +### Upgrade from Payara 6 to Payara 7 + +As a reminder, if you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +The steps below involve carefully transferring your configuration settings from your existing Payara 6 domain directory into the brand new domain from the Payara 7 distribution. You may also want to review the Payara upgrade instructions as it could be helpful during any troubleshooting: +[Payara Release Notes](https://docs.payara.fish/community/docs/Release%20Notes/Release%20Notes%207.2026.2.html). +We also recommend you ensure you followed all update instructions from the past releases regarding Payara. +(The most recent Payara update was for [Dataverse 6.9](https://github.com/IQSS/dataverse/releases/tag/v6.9).) + +1. Download Payara 7.2026.2. + + `curl -L -O https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/7.2026.2/payara-7.2026.2.zip` + +1. Unzip it to /usr/local (or your preferred location). + + `sudo unzip payara-7.2026.2.zip -d /usr/local/` + +1. Set permission for the service account ("dataverse" by default). + + `sudo chown -R root:root /usr/local/payara7` + `sudo chown dataverse /usr/local/payara7/glassfish/lib` + `sudo chown -R dataverse:dataverse /usr/local/payara7/glassfish/domains/domain1` + +1. Start and stop Payara 7 to let it reformat its domain.xml file. + + When Payara starts, it will reformat its domain.xml file, which we will be backing up and editing. By stopping and starting Payara, the `diff` between the backup and the edited file will be easier to read. + + `sudo -u dataverse /usr/local/payara7/bin/asadmin start-domain` + `sudo -u dataverse /usr/local/payara7/bin/asadmin stop-domain` + +1. Copy Dataverse-related lines from Payara 6 to Payara 7 domain.xml. + + First, back up the Payara 7 domain.xml file we will be editing. + + `sudo -u dataverse cp -a /usr/local/payara7/glassfish/domains/domain1/config/domain.xml /usr/local/payara7/glassfish/domains/domain1/config/domain.xml.orig` + + Save the Dataverse-related lines from Payara 6 to a text file. Note that "doi" is for legacy settings like "doi.baseurlstring" that should be [converted](https://guides.dataverse.org/en/6.10/installation/config.html#legacy-single-pid-provider-dataverse-pid-datacite-mds-api-url) to modern equivalents if they are still present. + + `sudo egrep 'dataverse|doi' /usr/local/payara6/glassfish/domains/domain1/config/domain.xml > lines.txt` + + Edit the Payara 7 domain.xml and insert the Dataverse-related lines. More details are below. + + `sudo vi /usr/local/payara7/glassfish/domains/domain1/config/domain.xml` + + If any JVM options reference the old payara6 path (`/usr/local/payara6`) be sure to change it to payara7. + + The lines will appear in two sections, examples shown below (but your content will vary). + + Section 1: system properties (under ``) + + ``` + + + + + + + ``` + + Please note that if your existing `domain.xml` file contains the old-style mail configuration entry that looks like the following... + + `` + + ... this may be a good time to replace it with new-style `system-property` entries, using the lines in the example above as a model. See also the section on [SMTP configuration](https://guides.dataverse.org/en/6.10/installation/config.html#smtp-email-configuration) in the guides. + + Note: If you used the Dataverse installer, you won't have a `dataverse.db.password` property. See "Create password aliases" below. + + Section 2: JVM options (under ``, the one under ``, not under ``) + + As an example, the following jvm options were encountered and transferred when upgrading a local test server: + + ``` + -Ddataverse.files.directory=/usr/local/dvn/data + -Ddataverse.files.file.type=file + -Ddataverse.files.file.label=file + -Ddataverse.files.file.directory=/usr/local/dvn/data + -Ddataverse.rserve.host=localhost + -Ddataverse.rserve.port=6311 + -Ddataverse.rserve.user=rserve + -Ddataverse.rserve.password=rserve + -Ddataverse.fqdn=dev1.dataverse.org + -Ddataverse.siteUrl=https://dev1.dataverse.org + -Ddataverse.auth.password-reset-timeout-in-minutes=60 + -Ddataverse.pid.providers=fake + -Ddataverse.pid.fake.type=FAKE + -Ddataverse.pid.fake.label=Fake DOI Provider + -Ddataverse.pid.fake.authority=10.5072 + -Ddataverse.pid.fake.shoulder=FK2/ + -Ddataverse.pid.default-provider=fake + -Ddataverse.timerServer=true + -Ddataverse.files.storage-driver-id=file + -Ddataverse.mail.system-email=noreply@dev1.dataverse.org + -Ddataverse.files.uploads=/tmp + -Ddataverse.feature.api-session-auth=0 + -Ddataverse.spi.exporters.directory=/var/lib/dataverse/exporters + ``` + +1. Check the `Xmx` setting in `domain.xml`. + + Under `/usr/local/payara7/glassfish/domains/domain1/config/domain.xml`, check the `Xmx` setting under ``, where you put the Dataverse-related JVM options, not the one under ``. This sets the JVM heap size; a good rule of thumb is half of your system's total RAM. You may specify the value in MB (`8192m`) or GB (`8g`). + +1. **Please make sure** to check for any other custom configuration settings you may have in your current `domain.xml` that are not explicitly covered in the sections above. As an example, for Harvard Dataverse we have the thread pool size configured as follows for the main http listener: + + `` + + Regardless of whether you have it configured in your Payara 6 domain, it is generally recommended to have the number of "acceptor threads" set to the number of the CPUs on your server, for example: + + `` + + in the `` section of your server configuration. + +1. Comment out JSP servlet mappings. + + First, backup the file you'll be editing. + + `sudo cp -a /usr/local/payara7/glassfish/domains/domain1/config/default-web.xml /usr/local/payara7/glassfish/domains/domain1/config/default-web.xml.orig` + + Then, edit the file and follow the instructions below. + + `sudo vi /usr/local/payara7/glassfish/domains/domain1/config/default-web.xml` + + Comment out the following section and save the file. + + ``` + + jsp + *.jsp + *.jspx + + ``` + +1. Copy `jhove.conf` and `jhoveConfig.xsd` from Payara 6, edit and change `payara6` to `payara7`. + + `sudo bash -c 'cp /usr/local/payara6/glassfish/domains/domain1/config/jhove* /usr/local/payara7/glassfish/domains/domain1/config'` + + `sudo bash -c 'chown dataverse /usr/local/payara7/glassfish/domains/domain1/config/jhove*'` + + `sudo -u dataverse vi /usr/local/payara6/glassfish/domains/domain1/config/jhove.conf` + +1. Copy logos from Payara 6 to Payara 7. + + These logos are for collections (dataverses). + + `sudo -u dataverse cp -r /usr/local/payara6/glassfish/domains/domain1/docroot/logos /usr/local/payara7/glassfish/domains/domain1/docroot` + +1. Copy sitemap from Payara 6 to Payara 7. + + It's a good practice to set up a sitemap, but you can skip this step if you don't want one. + + If you already have a cron job in place as [recommended](https://guides.dataverse.org/en/6.10/installation/config.html#creating-a-sitemap-and-submitting-it-to-search-engines) by the guides, you could run that cron job manually as a final step, but we recommend copying over your existing sitemap. + + `sudo -u dataverse cp -r /usr/local/payara6/glassfish/domains/domain1/docroot/sitemap /usr/local/payara7/glassfish/domains/domain1/docroot` + +1. If you are using Make Data Count (MDC), make various updates. + + Your `:MDCLogPath` database setting might be pointing to a Payara 6 directory such as `/usr/local/payara6/glassfish/domains/domain1/logs`. If so, use the settings API to change it to point to the payara7 location (once Dataverse is running again): + + `curl -X PUT -d '/usr/local/payara7/glassfish/domains/domain1/logs' http://localhost:8080/api/admin/settings/:MDCLogPath` + + You'll probably want to copy your logs over as well. + + Update Counter Processer to put payara7 in the counter-processor-config.yaml and counter_daily.sh. See https://guides.dataverse.org/en/6.10/admin/make-data-count.html + +1. If you've enabled access logging or any other site-specific configuration, be sure to preserve them. For instance, the default domain.xml includes + + ``` + + + ``` + + but you may wish to include + + ``` + + + ``` + + Be sure to keep a previous copy of your domain.xml for reference. + +1. Update systemd unit file (or other init system) from `/usr/local/payara6` to `/usr/local/payara7`, if applicable. + + This example is for systemd: + + `sudo vi /usr/lib/systemd/system/payara.service` + `sudo systemctl daemon-reload` + + See also https://guides.dataverse.org/en/6.10/installation/prerequisites.html#launching-payara-on-system-boot + +1. Start Payara 7. + + `sudo systemctl start payara` + +1. Create password aliases for your database, rserve and datacite jvm-options, if you're using them. + + `echo "AS_ADMIN_ALIASPASSWORD=yourDBpassword" > /tmp/dataverse.db.password.txt` + + `sudo -u dataverse /usr/local/payara7/bin/asadmin create-password-alias --passwordfile /tmp/dataverse.db.password.txt` + + When you are prompted "Enter the value for the aliasname operand", enter `dataverse.db.password` + + You should see "Command create-password-alias executed successfully." + + You'll want to perform similar commands for `rserve_password_alias` and `doi_password_alias` if you're using Rserve and/or DataCite. + +1. Create the network listener on port 8009. + + `sudo -u dataverse /usr/local/payara7/bin/asadmin create-network-listener --protocol http-listener-1 --listenerport 8009 --jkenabled true jk-connector` + +1. Deploy the Dataverse 6.10 war file. + + `wget https://github.com/IQSS/dataverse/releases/download/v6.10/dataverse-6.10.war` + + `sudo -u dataverse /usr/local/payara7/bin/asadmin deploy dataverse-6.10.war` + +1. Check that you get a version number from Dataverse. + + This is just a sanity check that Dataverse has been deployed properly. + + `curl http://localhost:8080/api/info/version` + +1. Perform one final Payara restart to ensure that timers are initialized properly. + + `sudo -u dataverse /usr/local/payara7/bin/asadmin stop-domain` + + `sudo -u dataverse /usr/local/payara7/bin/asadmin start-domain` + +1. Start Solr under Java 21 now that it's the default. + + `sudo systemctl start solr.service` + +1. For installations with internationalization or text customizations: + + Please remember to update translations via [Dataverse language packs](https://github.com/GlobalDataverseCommunityConsortium/dataverse-language-packs). + + If you have text customizations you can get the latest English files from . diff --git a/doc/sphinx-guides/source/_static/admin/counter-processor-config.yaml b/doc/sphinx-guides/source/_static/admin/counter-processor-config.yaml index f3501ead7b3..d76ec3a6f18 100644 --- a/doc/sphinx-guides/source/_static/admin/counter-processor-config.yaml +++ b/doc/sphinx-guides/source/_static/admin/counter-processor-config.yaml @@ -1,8 +1,8 @@ # currently no other option but to have daily logs and have year-month-day format in the name with # 4-digit year and 2-digit month and day -# /usr/local/payara6/glassfish/domains/domain1/logs/counter_2019-01-11.log +# /usr/local/payara7/glassfish/domains/domain1/logs/counter_2019-01-11.log #log_name_pattern: sample_logs/counter_(yyyy-mm-dd).log -log_name_pattern: /usr/local/payara6/glassfish/domains/domain1/logs/mdc/counter_(yyyy-mm-dd).log +log_name_pattern: /usr/local/payara7/glassfish/domains/domain1/logs/mdc/counter_(yyyy-mm-dd).log # path_types regular expressions allow matching to classify page urls as either an investigation or request # based on specific URL structure for your system. diff --git a/doc/sphinx-guides/source/_static/api/dataset-create-review.json b/doc/sphinx-guides/source/_static/api/dataset-create-review.json new file mode 100644 index 00000000000..e7a507ad39c --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/dataset-create-review.json @@ -0,0 +1,99 @@ +{ + "datasetType": "review", + "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, + "metadataBlocks": { + "citation": { + "fields": [ + { + "typeName": "title", + "value": "Review of Percent of Children That Have Asthma", + "typeClass": "primitive", + "multiple": false + }, + { + "value": [ + { + "authorName": { + "value": "Wazowski, Mike", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { + "datasetContactEmail": { + "value": "mwazowski@mailinator.com", + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [ + { + "dsDescriptionValue": { + "value": "This is a review of a dataset.", + "typeClass": "primitive", + "multiple": false, + "typeName": "dsDescriptionValue" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + }, + { + "value": { + "itemReviewedUrl": { + "value": "https://datacommons.org/tools/statvar#sv=Percent_Person_Children_WithAsthma", + "typeClass": "primitive", + "multiple": false, + "typeName": "itemReviewedUrl" + }, + "itemReviewedType": { + "value": "Dataset", + "typeClass": "controlledVocabulary", + "multiple": false, + "typeName": "itemReviewedType" + }, + "itemReviewedCitation": { + "value": "\"Statistical Variable Explorer - Data Commons.\" Datacommons.org, 2026, datacommons.org/tools/statvar#sv=Percent_Person_Children_WithAsthma. Accessed 9 Mar. 2026.", + "typeClass": "primitive", + "multiple": false, + "typeName": "itemReviewedCitation" + } + }, + "typeClass": "compound", + "multiple": false, + "typeName": "itemReviewed" + } + ] + } + } + } +} diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.root b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.root index b9ef9960318..6143c19ea19 100755 --- a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.root +++ b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.root @@ -4,7 +4,7 @@ set -e -ASADMIN=/usr/local/payara6/bin/asadmin +ASADMIN=/usr/local/payara7/bin/asadmin case "$1" in start) diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.service b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.service index 19bb190e740..ec6f1d6375a 100755 --- a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.service +++ b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/payara.init.service @@ -3,7 +3,7 @@ # description: Payara App Server set -e -ASADMIN=/usr/local/payara6/bin/asadmin +ASADMIN=/usr/local/payara7/bin/asadmin APP_SERVER_USER=dataverse case "$1" in diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/systemd/payara.service b/doc/sphinx-guides/source/_static/installation/files/etc/systemd/payara.service index c8efcb9c6f9..d1fcb2de720 100644 --- a/doc/sphinx-guides/source/_static/installation/files/etc/systemd/payara.service +++ b/doc/sphinx-guides/source/_static/installation/files/etc/systemd/payara.service @@ -4,9 +4,9 @@ After = syslog.target network.target [Service] Type = forking -ExecStart = /usr/bin/java -jar /usr/local/payara6/glassfish/lib/client/appserver-cli.jar start-domain -ExecStop = /usr/bin/java -jar /usr/local/payara6/glassfish/lib/client/appserver-cli.jar stop-domain -ExecReload = /usr/bin/java -jar /usr/local/payara6/glassfish/lib/client/appserver-cli.jar restart-domain +ExecStart = /usr/bin/java -jar /usr/local/payara7/glassfish/lib/client/appserver-cli.jar start-domain +ExecStop = /usr/bin/java -jar /usr/local/payara7/glassfish/lib/client/appserver-cli.jar stop-domain +ExecReload = /usr/bin/java -jar /usr/local/payara7/glassfish/lib/client/appserver-cli.jar restart-domain User=dataverse LimitNOFILE=32768 Environment="LANG=en_US.UTF-8" diff --git a/doc/sphinx-guides/source/_static/util/clear_timer.sh b/doc/sphinx-guides/source/_static/util/clear_timer.sh index 641b2695084..1bee9f05587 100755 --- a/doc/sphinx-guides/source/_static/util/clear_timer.sh +++ b/doc/sphinx-guides/source/_static/util/clear_timer.sh @@ -8,7 +8,7 @@ # if you'd like to avoid that. # directory where Payara is installed -PAYARA_DIR=/usr/local/payara6 +PAYARA_DIR=/usr/local/payara7 # directory within Payara (defaults) DV_DIR=${PAYARA_DIR}/glassfish/domains/domain1 diff --git a/doc/sphinx-guides/source/_static/util/counter_daily.sh b/doc/sphinx-guides/source/_static/util/counter_daily.sh index 07c32882a0b..974eab404c8 100644 --- a/doc/sphinx-guides/source/_static/util/counter_daily.sh +++ b/doc/sphinx-guides/source/_static/util/counter_daily.sh @@ -2,7 +2,7 @@ #counter_daily.sh COUNTER_PROCESSOR_DIRECTORY="/usr/local/counter-processor-1.06" -MDC_LOG_DIRECTORY="/usr/local/payara6/glassfish/domains/domain1/logs/mdc" +MDC_LOG_DIRECTORY="/usr/local/payara7/glassfish/domains/domain1/logs/mdc" COUNTER_PROCESSOR_TMP_DIRECTORY="/tmp" # If you wish to keep the logs, use a directory that is not periodically cleaned, e.g. #COUNTER_PROCESSOR_TMP_DIRECTORY="/usr/local/counter-processor-1.06/tmp" diff --git a/doc/sphinx-guides/source/admin/big-data-administration.rst b/doc/sphinx-guides/source/admin/big-data-administration.rst index b3c7e79c382..c1d2a02c4a2 100644 --- a/doc/sphinx-guides/source/admin/big-data-administration.rst +++ b/doc/sphinx-guides/source/admin/big-data-administration.rst @@ -206,7 +206,7 @@ Challenges: Users will need to be made aware of these limitations and the possibilities for managing them (e.g. by aggregating multiple files in a single, larger file, or storing smaller files in the base-store via the normal Dataverse upload UI). - There is currently `a bug `_ that won't allow users to transfer files from/to endpoints where they do not have permission to list the overall file tree (i.e. an institution manages /institution_name but the user only has access to /institution_name/my_dir.) Until that is fixed, a work-around is to first transfer data to an endpoint without this restriction. -- An alternative, experimental implementation of Globus polling of ongoing upload transfers was added in v6.4. This framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. While it is now the recommended option, it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag (see :ref:`feature-flags`) and the JVM option :ref:`dataverse.files.globus-monitoring-server`. +- An alternative, experimental implementation of Globus polling of ongoing upload transfers was added in v6.4. This framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. While it is now the recommended option, it is not enabled by default. See the :ref:`dataverse.feature.globus-use-experimental-async-framework` feature flag and the JVM option :ref:`dataverse.files.globus-monitoring-server`. More details of the setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. @@ -280,11 +280,11 @@ Scaling-related Configuration There are a broad range of options (that are not turned on by default) for improving how well Solr indexing and searching scales and for handling more files per dataset. Some of these are useful for all installations while others are related to specific use cases, or are mostly for emergency use (e.g. disabling facets). (see :ref:`database-settings`, :ref:`jvm-options`, and :ref:`feature-flags` for more details): -- dataverse.feature.add-publicobject-solr-field=true - specifically marks unrestricted content as public in Solr. See :ref:`feature-flags`. -- dataverse.feature.avoid-expensive-solr-join=true - this tells Dataverse to use the feature above to speed up searches. See :ref:`feature-flags`. -- dataverse.feature.reduce-solr-deletes=true - when Solr entries are being updated, this avoids an unnecessary step (deletion of existing entries) for entries that are being replaced. See :ref:`feature-flags`. -- dataverse.feature.disable-dataset-thumbnail-autoselect=true - by default, Dataverse scans through all files in a dataset to find one that can be used as a thumbnail, which is expensive for many files. This disables that behavior to improve performance. See :ref:`feature-flags`. -- dataverse.feature.only-update-datacite-when-needed=true - reduces the load on DataCite and reduces Dataverse failures related to that load, which is important when using file PIDs on Datasets with many files. See :ref:`feature-flags`. +- :ref:`dataverse.feature.add-publicobject-solr-field` =true - specifically marks unrestricted content as public in Solr. +- :ref:`dataverse.feature.avoid-expensive-solr-join` =true - this tells Dataverse to use the feature above to speed up searches. +- :ref:`dataverse.feature.reduce-solr-deletes` =true - when Solr entries are being updated, this avoids an unnecessary step (deletion of existing entries) for entries that are being replaced. +- :ref:`dataverse.feature.disable-dataset-thumbnail-autoselect` =true - by default, Dataverse scans through all files in a dataset to find one that can be used as a thumbnail, which is expensive for many files. This disables that behavior to improve performance. +- :ref:`dataverse.feature.only-update-datacite-when-needed` =true - reduces the load on DataCite and reduces Dataverse failures related to that load, which is important when using file PIDs on Datasets with many files. - :ref:`dataverse.solr.min-files-to-use-proxy` = - improve performance/lower memory requirements when indexing datasets with many files, suggested value is in the range 200 to 500 - :ref:`dataverse.solr.concurrency.max-async-indexes` = - limits the number of index operations running in parallel. The default is 4, larger values may improve performance (if the Solr instance is appropriately sized) - :ref:`:SolrFullTextIndexing` - false improves performance at the expense of not indexing file contents @@ -302,6 +302,7 @@ There are a broad range of options (that are not turned on by default) for impro - :ref:`:DisableSolrFacetsWithoutJsession` - disables facets for users who have disabled cookies (e.g. for bots) - :ref:`:DisableUncheckedTypesFacet` - only disables the facet showing the number of collections, datasets, files matching the query (this facet is potentially less useful than others) - :ref:`:StoreIngestedTabularFilesWithVarHeaders` - by default, Dataverse stores ingested files without headers and dynamically adds them back at download time. Once this setting is enabled, Dataverse will leave the headers in place (for newly ingested files), reducing the cost of downloads +- :ref:`dataverse.bagit.zip.max-file-size`, :ref:`dataverse.bagit.zip.max-data-size`, and :ref:`dataverse.bagit.zip.holey` - options to control the size and temporary storage requirements when generating archival Bags - see :ref:`BagIt Export` Scaling Infrastructure diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index c916b79aaa8..9696c758b04 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -109,6 +109,64 @@ If the :AllowedCurationLabels setting has a value, one of the available choices Individual datasets can be configured to use specific curationLabelSets as well. See the "Datasets" section below. +.. _review-datasets-setup: + +Configure a Collection for Review Datasets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:ref:`review-datasets-user` are a specialized type of dataset that can be used to review resources (such as datasets) in the Dataverse installation itself or resources in external data repositories. + +Review datasets require some setup, as described below. + +Load the Review Metadata Block +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, download the Review metadata block tsv file from :ref:`experimental-metadata`. + +Then, load the block and update Solr. See the following sections of :doc:`metadatacustomization` for details: + +- :ref:`load-tsv` +- :ref:`update-solr-schema` + +Create and Enable Custom "Rubric" Metadata Blocks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Review metadata block gives you a few basic fields common to all reviews such as the URL of the item being reviewed. + +You probably will want to create your own metadata blocks specific to the resources you are reviewing, your own "rubric". See :doc:`metadatacustomization` for details on creating and enabling custom metadata blocks. + +Instead of creating a new custom metadata block from scratch (if you simply want to evaluate the feature, for example), you can use the metadata blocks at https://github.com/IQSS/dataverse.harvard.edu + +After loading the block, don't forget to update the Solr schema! + +Create a Review Dataset Type +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Review datasets are built on the :ref:`dataset-types` feature. Dataset types can only be created via API so follow the steps under :ref:`api-add-dataset-type`. Copy and paste from below or download :download:`review.json <../../../../scripts/api/data/datasetTypes/review.json>` and pass it to the API. + +.. literalinclude:: ../../../../scripts/api/data/datasetTypes/review.json + :language: json + +Create a Collection for Reviews and Configure Permissions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Follow the normal steps: + +- :ref:`create-dataverse`. +- :ref:`dataverse-permissions`. + +Allow the Review Dataset Type for the Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Non-dataset types, such as the "review" type, are only available when a collection admin has enabled them, via API. + +Using the API :ref:`collection-attributes-api`, change the ``allowedDatasetTypes`` attribute so that it includes "review". If you only want to allow reviews, you can pass just ``review``. If you want to allow multiple dataset types, you can pass a comma-separated list, such as ``review,dataset``. + +Invite Users to Create Review Datasets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +At this point, users should be able to create review datasets via API, if you gave them permission on the collection. You can point them to :ref:`creating-a-review-dataset` for details. + Datasets -------- diff --git a/doc/sphinx-guides/source/admin/discoverability.rst b/doc/sphinx-guides/source/admin/discoverability.rst index 22ff66246f0..3db42101e27 100644 --- a/doc/sphinx-guides/source/admin/discoverability.rst +++ b/doc/sphinx-guides/source/admin/discoverability.rst @@ -30,21 +30,22 @@ The HTML source of a dataset landing page includes "DC" (Dublin Core) ```` `` of Dataset Landing Pages ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -The ```` of the HTML source of a dataset landing page includes Schema.org JSON-LD metadata like this:: +`Croissant `_ is a metadata format for machine learning datasets. +In Dataverse, the ```` of the HTML source of a dataset landing page includes a "slim" version of Croissant metadata like this:: - @@ -2029,8 +2031,18 @@

#{bundle['dataset.publish.terms.help.tip']}

+ +
+ + + +
+
-
@@ -153,8 +165,9 @@ - diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index df497e890e8..37246ddf40f 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -244,7 +244,7 @@
+ jsf:rendered="#{((editMode == 'METADATA' or dsf.datasetFieldType.shouldDisplayOnCreate() or !dsf.isEmpty() or dsf.required or dsf.hasRequiredChildren) and dsf.include) or (!datasetPage and dsf.include)}"> diff --git a/src/main/webapp/permissions-manage.xhtml b/src/main/webapp/permissions-manage.xhtml index d2a37268288..819c6c59fcc 100644 --- a/src/main/webapp/permissions-manage.xhtml +++ b/src/main/webapp/permissions-manage.xhtml @@ -105,7 +105,8 @@ + oncomplete="PF('userGroupsForm').show();handleResizeDialog('userGroupDialog');" + process='@this'> #{bundle['dataverse.permissions.usersOrGroups.assignBtn']}
@@ -133,8 +134,9 @@ - + process="assignedRoles" + oncomplete="PF('confirmation').show()"> + #{bundle['dataverse.permissions.usersOrGroups.removeBtn']} @@ -272,8 +274,9 @@ #{bundle['dataverse.permissions.usersOrGroups.removeBtn.confirmation']}

-
@@ -221,11 +221,11 @@

- - diff --git a/src/test/java/edu/harvard/iq/dataverse/CitationServletTest.java b/src/test/java/edu/harvard/iq/dataverse/CitationServletTest.java new file mode 100644 index 00000000000..73162bbf4dd --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/CitationServletTest.java @@ -0,0 +1,112 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.UnmanagedDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.crossref.CrossRefDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.ezid.EZIdDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeProviderFactory; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandleProviderFactory; +import edu.harvard.iq.dataverse.pidproviders.handle.UnmanagedHandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.UnmanagedPermaLinkPidProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import jakarta.servlet.ServletException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.io.IOException; +import java.util.*; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +@LocalJvmSettings + +//HANDLE 1 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "HDL 1", varArgs = "hdl1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = HandlePidProvider.TYPE, varArgs = "hdl1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "20.500.1234", varArgs = "hdl1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "test", varArgs = "hdl1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_MANAGED_LIST, value = "hdl:20.20.20/FK2ABCDEF", varArgs ="hdl1") +@JvmSetting(key = JvmSettings.HANDLENET_AUTH_HANDLE, value = "20.500.1234/ADMIN", varArgs ="hdl1") +@JvmSetting(key = JvmSettings.HANDLENET_INDEPENDENT_SERVICE, value = "true", varArgs ="hdl1") +@JvmSetting(key = JvmSettings.HANDLENET_INDEX, value = "1", varArgs ="hdl1") +@JvmSetting(key = JvmSettings.HANDLENET_KEY_PASSPHRASE, value = "passphrase", varArgs ="hdl1") +@JvmSetting(key = JvmSettings.HANDLENET_KEY_PATH, value = "/tmp/cred", varArgs ="hdl1") +//List to instantiate +@JvmSetting(key = JvmSettings.PID_PROVIDERS, value = "hdl1") + +public class CitationServletTest { + + @Mock + DvObjectServiceBean dvObjectService; + @Mock + HttpServletRequest request; + @Mock + HttpServletResponse response; + + + static CitationServlet citationServlet = new CitationServlet(); + + @BeforeAll + public static void setUp() { + Map pidProviderFactoryMap = new HashMap<>(); + pidProviderFactoryMap.put(HandlePidProvider.TYPE, new HandleProviderFactory()); + + PidUtil.clearPidProviders(); + + //Read list of providers to add + List providers = Arrays.asList(JvmSettings.PID_PROVIDERS.lookup().split(",\\s")); + //Iterate through the list of providers and add them using the PidProviderFactory of the appropriate type + for (String providerId : providers) { + System.out.println("Loading provider: " + providerId); + String type = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + PidProviderFactory factory = pidProviderFactoryMap.get(type); + PidUtil.addToProviderList(factory.createPidProvider(providerId)); + } + PidUtil.addAllToUnmanagedProviderList(Arrays.asList(new UnmanagedDOIProvider(), + new UnmanagedHandlePidProvider(), new UnmanagedPermaLinkPidProvider())); + } + @BeforeEach + public void initMocks() { + MockitoAnnotations.initMocks(this); + } + + @AfterAll + public static void tearDownClass() throws Exception { + PidUtil.clearPidProviders(); + } + + @Test + public void testHandleRedirect() throws ServletException, IOException { + String pidString = "hdl:1902.1/test10052"; + DvObject dvObj = new Dataset(); + citationServlet.dvObjectService = dvObjectService = Mockito.mock(DvObjectServiceBean.class); + when(dvObjectService.findByGlobalId(any(GlobalId.class))).thenReturn(null); + when(dvObjectService.findByAltGlobalId(any(GlobalId.class), any())).thenReturn(dvObj); + when(request.getParameter("persistentId")).thenReturn(pidString); + ArgumentCaptor valueCapture = ArgumentCaptor.forClass(String.class); + doNothing().when(response).sendRedirect(valueCapture.capture()); + + citationServlet.doGet(request, response); + assertEquals("dataset.xhtml?persistentId=hdl:1902.1/test10052", valueCapture.getValue()); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetVersionDifferenceTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetVersionDifferenceTest.java index 601d0c2d748..4ca4a984c2a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DatasetVersionDifferenceTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetVersionDifferenceTest.java @@ -57,6 +57,10 @@ public void testDifferencing() { "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0"), URI.create("/resources/images/cc0.png"), true, 1l); + License license2 = new License("CC BY 4.0", + "Share — copy and redistribute the material in any medium or format for any purpose, even commercially.", + URI.create("https://creativecommons.org/licenses/by/4.0/"), URI.create("/resources/images/cc0.png"), + true, 2l); license.setDefault(true); dataset.setProtocol("doi"); dataset.setAuthority("10.5072/FK2"); @@ -66,10 +70,14 @@ public void testDifferencing() { datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED); datasetVersion.setVersionNumber(1L); datasetVersion.setTermsOfUseAndAccess(new TermsOfUseAndAccess()); + datasetVersion.getTermsOfUseAndAccess().setLicense(license); DatasetVersion datasetVersion2 = new DatasetVersion(); datasetVersion2.setDataset(dataset); datasetVersion2.setVersionState(DatasetVersion.VersionState.DRAFT); - + datasetVersion2.setTermsOfUseAndAccess(new TermsOfUseAndAccess()); + datasetVersion2.getTermsOfUseAndAccess().setLicense(license); + datasetVersion.setFileMetadatas(new ArrayList<>()); + // Published version's two files DataFile dataFile = new DataFile(); dataFile.setId(1L); @@ -81,19 +89,17 @@ public void testDifferencing() { FileMetadata fileMetadata2 = createFileMetadata(20L, datasetVersion, dataFile2, "file2.txt"); + List fileMetadatas = new ArrayList<>(Arrays.asList(fileMetadata1, fileMetadata2)); + datasetVersion.setFileMetadatas(fileMetadatas); + // Draft version - same two files with one label change - FileMetadata fileMetadata3 = fileMetadata1.createCopy(); + FileMetadata fileMetadata3 = fileMetadata1.createCopyInVersion(datasetVersion2); fileMetadata3.setId(30L); - FileMetadata fileMetadata4 = fileMetadata2.createCopy(); + FileMetadata fileMetadata4 = fileMetadata2.createCopyInVersion(datasetVersion2); fileMetadata4.setLabel("file3.txt"); fileMetadata4.setId(40L); - List fileMetadatas = new ArrayList<>(Arrays.asList(fileMetadata1, fileMetadata2)); - datasetVersion.setFileMetadatas(fileMetadatas); - List fileMetadatas2 = new ArrayList<>(Arrays.asList(fileMetadata3, fileMetadata4)); - datasetVersion2.setFileMetadatas(fileMetadatas2); - SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); Date publicationDate; try { @@ -163,6 +169,7 @@ public void testDifferencing() { // Set the published version's TermsOfUseAndAccess to a non-null value TermsOfUseAndAccess termsOfUseAndAccess = new TermsOfUseAndAccess(); datasetVersion.setTermsOfUseAndAccess(termsOfUseAndAccess); + datasetVersion.getTermsOfUseAndAccess().setLicense(license); compareResults(datasetVersion, datasetVersion2, expectedAddedFiles, expectedRemovedFiles, expectedChangedFileMetadata, expectedChangedVariableMetadata, expectedReplacedFiles, changedTerms); @@ -170,6 +177,7 @@ public void testDifferencing() { // Set the draft version's TermsOfUseAndAccess to a non-null value datasetVersion2.setTermsOfUseAndAccess(new TermsOfUseAndAccess()); + datasetVersion2.getTermsOfUseAndAccess().setLicense(license); compareResults(datasetVersion, datasetVersion2, expectedAddedFiles, expectedRemovedFiles, expectedChangedFileMetadata, expectedChangedVariableMetadata, expectedReplacedFiles, changedTerms); @@ -194,6 +202,21 @@ public void testDifferencing() { compareResults(datasetVersion, datasetVersion2, expectedAddedFiles, expectedRemovedFiles, expectedChangedFileMetadata, expectedChangedVariableMetadata, expectedReplacedFiles, changedTerms); + + // Change License in Draft version + + datasetVersion2.getTermsOfUseAndAccess().setLicense(license2); + datasetVersion2.getTermsOfUseAndAccess().setTermsOfUse(""); + datasetVersion.getTermsOfUseAndAccess().setDisclaimer(""); + + String[] termField3 = new String[] { + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.license"), + "CC0 1.0", "CC BY 4.0" }; + changedTerms = new ArrayList<>(); + changedTerms.add(termField3); + + compareResults(datasetVersion, datasetVersion2, expectedAddedFiles, expectedRemovedFiles, + expectedChangedFileMetadata, expectedChangedVariableMetadata, expectedReplacedFiles, changedTerms); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java index dd8ddd2d315..fb6ccdec977 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java @@ -5,31 +5,33 @@ */ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Guestbook; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.json.JsonParseException; import io.restassured.RestAssured; import io.restassured.path.json.JsonPath; import io.restassured.response.Response; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.util.FileUtil; -import java.io.IOException; -import java.util.zip.ZipInputStream; - -import jakarta.json.Json; +import org.hamcrest.collection.IsMapContaining; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import java.util.zip.ZipEntry; + import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; -import org.hamcrest.collection.IsMapContaining; - +import static io.restassured.RestAssured.get; import static jakarta.ws.rs.core.Response.Status.*; -import static org.hamcrest.MatcherAssert.*; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.not; +import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.*; /** @@ -158,6 +160,7 @@ public static void setUp() throws InterruptedException { tabFile4NameUnpublishedConvert = tabFile4NameUnpublished.substring(0, tabFile4NameUnpublished.indexOf(".dta")) + ".tab"; String tab4PathToFile = "scripts/search/data/tabular/" + tabFile4NameUnpublished; Response tab4AddResponse = UtilIT.uploadFileViaNative(datasetId.toString(), tab4PathToFile, apiToken); + tab4AddResponse.prettyPrint(); tabFile4IdUnpublished = JsonPath.from(tab4AddResponse.body().asString()).getInt("data.files[0].dataFile.id"); assertTrue(UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + tabFile2Name); @@ -411,18 +414,23 @@ public void testDownloadMultipleFiles_LoggedAndNot_Unpublished() throws IOExcept HashMap files2 = readZipResponse(authDownloadConvertedUnpublished.getBody().asInputStream()); assertEquals(4, files2.size()); //size +1 for manifest, we have access to unpublished + // Guest User can not access tabFile4IdUnpublished so only the first 2 files will be downloaded Response anonDownloadOriginalUnpublished = UtilIT.downloadFilesOriginal(new Integer[]{basicFileId,tabFile1Id,tabFile4IdUnpublished}); - assertEquals(404, anonDownloadOriginalUnpublished.getStatusCode()); + assertEquals(200, anonDownloadOriginalUnpublished.getStatusCode()); int origAnonSize = anonDownloadOriginalUnpublished.getBody().asByteArray().length; HashMap files3 = readZipResponse(anonDownloadOriginalUnpublished.getBody().asInputStream()); - assertEquals(0, files3.size()); //A size of 0 indicates the zip creation was interrupted. + // expect the zip to have 3 files: 2 downloaded files plus the manifest + assertEquals(3, files3.size()); + assertTrue(files3.containsKey("120745.dta")); assertTrue(origAnonSize < origAuthSize + margin); Response anonDownloadConvertedUnpublished = UtilIT.downloadFiles(new Integer[]{basicFileId,tabFile1Id,tabFile4IdUnpublished}); - assertEquals(404, anonDownloadConvertedUnpublished.getStatusCode()); + assertEquals(200, anonDownloadConvertedUnpublished.getStatusCode()); int convertAnonSize = anonDownloadConvertedUnpublished.getBody().asByteArray().length; HashMap files4 = readZipResponse(anonDownloadConvertedUnpublished.getBody().asInputStream()); - assertEquals(0, files4.size()); //A size of 0 indicates the zip creation was interrupted. + // expect the zip to have 3 files: 2 downloaded files plus the manifest + assertEquals(3, files4.size()); + assertTrue(files4.containsKey("120745.tab")); assertTrue(convertAnonSize < convertAuthSize + margin); } @@ -487,15 +495,24 @@ private HashMap readZipResponse(InputStream iStrea return fileStreams; } - + @Test - public void testRequestAccess() throws InterruptedException { - + public void testRequestAccess() throws InterruptedException, IOException, JsonParseException { + String pathToJsonFile = "scripts/api/data/dataset-create-new.json"; Response createDatasetResponse = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken); + createDatasetResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); createDatasetResponse.prettyPrint(); Integer datasetIdNew = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); - + String persistentIdNew = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); + + // Test without guestbook-at-request=true the required guestbook response will not prevent the access request from succeeding + // Create a Guestbook + Guestbook guestbook = UtilIT.createRandomGuestbook(dataverseAlias, persistentId, apiToken); + // Set the guestbook on the Dataset + UtilIT.updateDatasetGuestbook(persistentIdNew, guestbook.getId(), apiToken).prettyPrint(); + basicFileName = "004.txt"; String basicPathToFile = "scripts/search/data/replace_test/" + basicFileName; Response basicAddResponse = UtilIT.uploadFileViaNative(datasetIdNew.toString(), basicPathToFile, apiToken); @@ -507,7 +524,7 @@ public void testRequestAccess() throws InterruptedException { Integer tabFile3IdRestrictedNew = JsonPath.from(tab3AddResponse.body().asString()).getInt("data.files[0].dataFile.id"); assertTrue(UtilIT.sleepForLock(datasetIdNew.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + tab3PathToFile); - + Response restrictResponse = UtilIT.restrictFile(tabFile3IdRestrictedNew.toString(), true, apiToken); restrictResponse.prettyPrint(); restrictResponse.then().assertThat() @@ -551,21 +568,135 @@ public void testRequestAccess() throws InterruptedException { //grant file access Response grantFileAccessResponse = UtilIT.grantFileAccess(tabFile3IdRestrictedNew.toString(), "@" + apiIdentifierRando, apiToken); assertEquals(200, grantFileAccessResponse.getStatusCode()); - + //if you make a request while you have been granted access you should get a command exception requestFileAccessResponse = UtilIT.requestFileAccess(tabFile3IdRestrictedNew.toString(), apiTokenRando); assertEquals(400, requestFileAccessResponse.getStatusCode()); - + //if you make a request of a public file you should also get a command exception requestFileAccessResponse = UtilIT.requestFileAccess(basicFileIdNew.toString(), apiTokenRando); assertEquals(400, requestFileAccessResponse.getStatusCode()); - + // disable the guestbook so we can download without guestbook response + UtilIT.enableGuestbook(dataverseAlias, guestbook.getId(), apiToken, "false").prettyPrint(); //Now should be able to download randoDownload = UtilIT.downloadFile(tabFile3IdRestrictedNew, apiTokenRando); assertEquals(OK.getStatusCode(), randoDownload.getStatusCode()); - //revokeFileAccess + //revokeFileAccess + Response revokeFileAccessResponse = UtilIT.revokeFileAccess(tabFile3IdRestrictedNew.toString(), "@" + apiIdentifierRando, apiToken); + assertEquals(200, revokeFileAccessResponse.getStatusCode()); + + listAccessRequestResponse = UtilIT.getAccessRequestList(tabFile3IdRestrictedNew.toString(), apiToken); + assertEquals(404, listAccessRequestResponse.getStatusCode()); + } + + @Test + @Disabled // Only run manually after setting JVM setting -Ddataverse.files.guestbook-at-request=true + public void testRequestAccessWithGuestbook() throws IOException, JsonParseException { + + String pathToJsonFile = "scripts/api/data/dataset-create-new.json"; + Response createDatasetResponse = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken); + createDatasetResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + createDatasetResponse.prettyPrint(); + Integer datasetIdNew = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + String persistentIdNew = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); + + // Create a Guestbook + Guestbook guestbook = UtilIT.createRandomGuestbook(dataverseAlias, persistentId, apiToken); + String guestbookResponseJson = UtilIT.generateGuestbookResponse(guestbook); + + basicFileName = "004.txt"; + String basicPathToFile = "scripts/search/data/replace_test/" + basicFileName; + Response basicAddResponse = UtilIT.uploadFileViaNative(datasetIdNew.toString(), basicPathToFile, apiToken); + Integer basicFileIdNew = JsonPath.from(basicAddResponse.body().asString()).getInt("data.files[0].dataFile.id"); + + String tabFile3NameRestrictedNew = "stata13-auto-withstrls.dta"; + String tab3PathToFile = "scripts/search/data/tabular/" + tabFile3NameRestrictedNew; + Response tab3AddResponse = UtilIT.uploadFileViaNative(datasetIdNew.toString(), tab3PathToFile, apiToken); + Integer tabFile3IdRestrictedNew = JsonPath.from(tab3AddResponse.body().asString()).getInt("data.files[0].dataFile.id"); + + assertTrue(UtilIT.sleepForLock(datasetIdNew.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + tab3PathToFile); + + Response restrictResponse = UtilIT.restrictFile(tabFile3IdRestrictedNew.toString(), true, apiToken); + restrictResponse.prettyPrint(); + restrictResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + assertEquals(200, createUser.getStatusCode()); + String apiTokenRando = UtilIT.getApiTokenFromResponse(createUser); + String apiIdentifierRando = UtilIT.getUsernameFromResponse(createUser); + + Response randoDownload = UtilIT.downloadFile(tabFile3IdRestrictedNew, apiTokenRando); + assertEquals(403, randoDownload.getStatusCode()); + + Response requestFileAccessResponse = UtilIT.requestFileAccess(tabFile3IdRestrictedNew.toString(), apiTokenRando); + //Cannot request until we set the dataset to allow requests + assertEquals(400, requestFileAccessResponse.getStatusCode()); + //Update Dataset to allow requests + Response allowAccessRequestsResponse = UtilIT.allowAccessRequests(datasetIdNew.toString(), true, apiToken); + assertEquals(200, allowAccessRequestsResponse.getStatusCode()); + //Must republish to get it to work + Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetIdNew, "major", apiToken); + assertEquals(200, publishDataset.getStatusCode()); + + // Set the guestbook on the Dataset + UtilIT.updateDatasetGuestbook(persistentIdNew, guestbook.getId(), apiToken).prettyPrint(); + // Set the response required on the Access Request as apposed to being on Download + UtilIT.setGuestbookEntryOnRequest(datasetId.toString(), apiToken, Boolean.TRUE).prettyPrint(); + // Request file access WITHOUT the required Guestbook Response (getEffectiveGuestbookEntryAtRequest) + requestFileAccessResponse = UtilIT.requestFileAccess(tabFile3IdRestrictedNew.toString(), apiTokenRando); + requestFileAccessResponse.prettyPrint(); + assertEquals(400, requestFileAccessResponse.getStatusCode()); + // Request file access with the required Guestbook Response (getEffectiveGuestbookEntryAtRequest) + requestFileAccessResponse = UtilIT.requestFileAccess(tabFile3IdRestrictedNew.toString(), apiTokenRando, guestbookResponseJson); + requestFileAccessResponse.prettyPrint(); + assertEquals(200, requestFileAccessResponse.getStatusCode()); + // Request a second time should fail since the request was already made + requestFileAccessResponse = UtilIT.requestFileAccess(tabFile3IdRestrictedNew.toString(), apiTokenRando, guestbookResponseJson); + requestFileAccessResponse.prettyPrint(); + requestFileAccessResponse.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", equalTo(BundleUtil.getStringFromBundle("access.api.requestAccess.failure.requestExists"))); + + Response listAccessRequestResponse = UtilIT.getAccessRequestList(tabFile3IdRestrictedNew.toString(), apiToken); + listAccessRequestResponse.prettyPrint(); + assertEquals(200, listAccessRequestResponse.getStatusCode()); + System.out.println("List Access Request: " + listAccessRequestResponse.prettyPrint()); + + listAccessRequestResponse = UtilIT.getAccessRequestList(tabFile3IdRestrictedNew.toString(), apiTokenRando); + listAccessRequestResponse.prettyPrint(); + assertEquals(403, listAccessRequestResponse.getStatusCode()); + + Response rejectFileAccessResponse = UtilIT.rejectFileAccessRequest(tabFile3IdRestrictedNew.toString(), "@" + apiIdentifierRando, apiToken); + assertEquals(200, rejectFileAccessResponse.getStatusCode()); + + requestFileAccessResponse = UtilIT.requestFileAccess(tabFile3IdRestrictedNew.toString(), apiTokenRando); + //grant file access + Response grantFileAccessResponse = UtilIT.grantFileAccess(tabFile3IdRestrictedNew.toString(), "@" + apiIdentifierRando, apiToken); + assertEquals(200, grantFileAccessResponse.getStatusCode()); + + //if you make a request while you have been granted access you should get a command exception + requestFileAccessResponse = UtilIT.requestFileAccess(tabFile3IdRestrictedNew.toString(), apiTokenRando); + assertEquals(400, requestFileAccessResponse.getStatusCode()); + + //if you make a request of a public file you should also get a command exception + requestFileAccessResponse = UtilIT.requestFileAccess(basicFileIdNew.toString(), apiTokenRando); + assertEquals(400, requestFileAccessResponse.getStatusCode()); + + //Now should be able to download but the guestbook response is still required + randoDownload = UtilIT.downloadFile(tabFile3IdRestrictedNew, apiTokenRando); + assertEquals(BAD_REQUEST.getStatusCode(), randoDownload.getStatusCode()); + randoDownload = UtilIT.getDownloadFileUrlWithGuestbookResponse(tabFile3IdRestrictedNew, apiTokenRando, guestbookResponseJson); + String signedUrl = UtilIT.getSignedUrlFromResponse(randoDownload); + // Download the file using the signed url + Response signedUrlResponse = get(signedUrl); + assertEquals(OK.getStatusCode(), signedUrlResponse.getStatusCode()); + + //revokeFileAccess Response revokeFileAccessResponse = UtilIT.revokeFileAccess(tabFile3IdRestrictedNew.toString(), "@" + apiIdentifierRando, apiToken); assertEquals(200, revokeFileAccessResponse.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 6f3ffaa83b8..457bb405795 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -19,7 +19,6 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; - import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; @@ -29,7 +28,6 @@ import java.util.Map; import java.util.UUID; import java.util.logging.Logger; - import static io.restassured.RestAssured.given; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; import static jakarta.ws.rs.core.Response.Status.CREATED; @@ -43,6 +41,7 @@ import static org.hamcrest.CoreMatchers.notNullValue; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import io.restassured.http.ContentType; import static org.junit.jupiter.api.Assertions.assertTrue; public class AdminIT { @@ -50,6 +49,8 @@ public class AdminIT { private static final Logger logger = Logger.getLogger(AdminIT.class.getCanonicalName()); private final String testNonSuperuserApiToken = createTestNonSuperuserApiToken(); + static final String clientId = "test"; + static final String clientSecret = "94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8"; @BeforeAll public static void setUp() { @@ -681,6 +682,19 @@ public void testCreateNonBuiltinUserViaApi() { assertEquals(200, deleteUserToConvert.getStatusCode()); } + + @Test + void testCreateUserViaAPI_WithInvalidJson() { + Response response = given() + .body("{invalid}") + .contentType(ContentType.JSON) + .post("/api/admin/authenticatedUsers"); + + response.then() + .assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", containsString("Unexpected char")); + } @Test @@ -1105,4 +1119,48 @@ public void testSetSuperUserStatus(Boolean status) { toggleSuperuser.then().assertThat() .statusCode(OK.getStatusCode()); } + + // Testing creating an OIDC Provider not intended for use in JSF UI + @Test + public void testAddAuthProviders() { + Response createSuperuser = UtilIT.createRandomUser(); + String superuserUsername = UtilIT.getUsernameFromResponse(createSuperuser); + String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); + Response toggleSuperuser = UtilIT.makeSuperUser(superuserUsername); + toggleSuperuser.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response getAuthProviders = UtilIT.getAuthProviders(superuserApiToken); + getAuthProviders.prettyPrint(); + + String factoryData = String.format("type: oidc | issuer: http://keycloak.mydomain.com:8090/realms/test | clientId: %s | clientSecret: %s", clientId, clientSecret); + JsonObject jsonObject = Json.createObjectBuilder() + .add("id", "oidc1") + .add("factoryAlias", "oidc") + .add("title", "Open ID Connect SPA") + .add("subtitle", "SPA OIDC Provider") + .add("factoryData", factoryData) + .add("enabled", false) + .build(); + Response addAuthProviders = UtilIT.addAuthProviders(superuserApiToken, jsonObject); + addAuthProviders.prettyPrint(); + addAuthProviders.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + getAuthProviders = UtilIT.getAuthProviders(superuserApiToken); + getAuthProviders.prettyPrint(); + getAuthProviders.then().assertThat() + .statusCode(OK.getStatusCode()); + + boolean found = false; + List> providers = getAuthProviders.body().jsonPath().getList("data"); + for (Map provider : providers) { + if ("oidc1".equalsIgnoreCase((String) provider.get("id"))) { + found = true; + assertTrue(provider.get("title") != null && provider.get("title").equals("Open ID Connect SPA")); + assertTrue(provider.get("enabled") != null && !(Boolean) provider.get("enabled")); + } + } + assertTrue(found); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java index 16c44003f35..b649ad6bb95 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java @@ -87,7 +87,7 @@ public void testBagItExport() throws IOException { .replace('.', '-').toLowerCase(); // spacename: doi-10-5072-fk2-fosg5q - String pathToZip = bagitExportDir + "/" + spaceName + "v1.0" + ".zip"; + String pathToZip = bagitExportDir + "/" + spaceName + ".v1.0" + ".zip"; try { // give the bag time to generate diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java index 72f8fa638e1..060fd4a47f2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java @@ -7,6 +7,8 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import io.restassured.path.json.JsonPath; +import org.hamcrest.CoreMatchers; +import org.hamcrest.Matchers; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -30,7 +32,7 @@ public static void setUpClass() { } @Test - public void testRetrieveMyDataAsJsonString() { + public void testRetrieveMyDataAsJsonString() throws InterruptedException { // Call with bad API token ArrayList emptyRoleIdsList = new ArrayList<>(); Response badApiTokenResponse = UtilIT.retrieveMyDataAsJsonString("bad-token", "dummy-user-identifier", emptyRoleIdsList); @@ -38,7 +40,7 @@ public void testRetrieveMyDataAsJsonString() { // Call as superuser with invalid user identifier Response createUserResponse = UtilIT.createRandomUser(); - Response makeSuperUserResponse = UtilIT.makeSuperUser(UtilIT.getUsernameFromResponse(createUserResponse)); + Response makeSuperUserResponse = UtilIT.setSuperuserStatus(UtilIT.getUsernameFromResponse(createUserResponse), true); assertEquals(OK.getStatusCode(), makeSuperUserResponse.getStatusCode()); String superUserApiToken = UtilIT.getApiTokenFromResponse(createUserResponse); @@ -85,6 +87,7 @@ public void testRetrieveMyDataAsJsonString() { // Call as normal user with one valid dataverse role and one dataverse result UtilIT.grantRoleOnDataverse(dataverseAlias, DataverseRole.DS_CONTRIBUTOR.toString(), "@" + normalUserUsername, superUserApiToken); + Thread.sleep(4000); Response oneDataverseResponse = UtilIT.retrieveMyDataAsJsonString(normalUserApiToken, "", new ArrayList<>(Arrays.asList(5L))); oneDataverseResponse.prettyPrint(); @@ -116,7 +119,7 @@ public void testRetrieveMyDataCollections() throws InterruptedException { Response retrieveMyCollectionListResponse; // Create Superuser Response createUserResponse = UtilIT.createRandomUser(); - Response makeSuperUserResponse = UtilIT.makeSuperUser(UtilIT.getUsernameFromResponse(createUserResponse)); + Response makeSuperUserResponse = UtilIT.setSuperuserStatus(UtilIT.getUsernameFromResponse(createUserResponse), true); assertEquals(OK.getStatusCode(), makeSuperUserResponse.getStatusCode()); String superUserUsername = UtilIT.getUsernameFromResponse(createUserResponse); String superUserApiToken = UtilIT.getApiTokenFromResponse(createUserResponse); @@ -407,6 +410,112 @@ public void testRetrieveMyDataAsJsonStringSortOrder() { assertEquals(OK.getStatusCode(), deleteSuperUserResponse.getStatusCode()); } + @Test + public void testRetrieveMyDataWithMetadataFields() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + String datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse).toString(); + + UtilIT.sleepForReindex(datasetId, apiToken, 5); + + Response myDataWithAuthor = UtilIT.retrieveMyDataAsJsonString(apiToken, "", new ArrayList<>(Arrays.asList(6L)), "&metadata_fields=citation:author"); + myDataWithAuthor.prettyPrint(); + myDataWithAuthor.then().assertThat() + .body("data.items[0].metadataBlocks.citation.displayName", CoreMatchers.equalTo("Citation Metadata")) + .body("data.items[0].metadataBlocks.citation.fields[0].typeName", CoreMatchers.equalTo("author")) + .body("data.items[0].metadataBlocks.citation.fields[0].value[0].authorName.value", CoreMatchers.equalTo("Finch, Fiona")) + .body("data.items[0].metadataBlocks.citation.fields[0].value[0].authorAffiliation.value", CoreMatchers.equalTo("Birds Inc.")) + .statusCode(OK.getStatusCode()); + + Response subFieldsNotSupported = UtilIT.retrieveMyDataAsJsonString(apiToken, "", new ArrayList<>(Arrays.asList(6L)), "&metadata_fields=citation:authorAffiliation"); + subFieldsNotSupported.prettyPrint(); + subFieldsNotSupported.then().assertThat() + .body("data.items[0].metadataBlocks.citation.displayName", CoreMatchers.equalTo("Citation Metadata")) + // No fields returned. authorAffiliation is a subfield of author and not supported. + .body("data.items[0].metadataBlocks.citation.fields", Matchers.empty()) + .statusCode(OK.getStatusCode()); + + Response myDataWithAllFieldsFromCitation = UtilIT.retrieveMyDataAsJsonString(apiToken, "", new ArrayList<>(Arrays.asList(6L)), "&metadata_fields=citation:*"); + // Many more fields printed + myDataWithAllFieldsFromCitation.prettyPrint(); + myDataWithAllFieldsFromCitation.then().assertThat() + .body("data.items[0].metadataBlocks.citation.displayName", CoreMatchers.equalTo("Citation Metadata")) + // Many fields returned, all of the citation block that has been filled in. + .body("data.items[0].metadataBlocks.citation.fields", Matchers.hasSize(5)) + .statusCode(OK.getStatusCode()); + + } + + @Test + public void testRetrieveMyDataWithCollections() { + Response createUser = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + JsonPath createdDataverse = JsonPath.from(createDataverseResponse.body().asString()); + String dataverseName = createdDataverse.getString("data.name"); + String dataverseAlias = createdDataverse.getString("data.alias"); + Integer dataverseId = createdDataverse.getInt("data.id"); + + UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode()); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + JsonPath createdDataset = JsonPath.from(createDatasetResponse.body().asString()); + int datasetId = createdDataset.getInt("data.id"); + String datasetPid = createdDataset.getString("data.persistentId"); + + UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode()); + + UtilIT.sleepForReindex(datasetPid, apiToken, 5); + + // Test that the Dataverse collection that the dataset was created in is returned + Response myDataResponse = UtilIT.retrieveMyDataAsJsonString(apiToken, "", new ArrayList<>(Arrays.asList(6L)), "&show_collections=true"); + myDataResponse.prettyPrint(); + myDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.items[0].collections.size()", CoreMatchers.is(1)) + .body("data.items[0].collections[0].id", CoreMatchers.is(dataverseId)) + .body("data.items[0].collections[0].name", CoreMatchers.is(dataverseName)) + .body("data.items[0].collections[0].alias", CoreMatchers.is(dataverseAlias)); + + Response createDataverse2Response = UtilIT.createRandomDataverse(apiToken); + createDataverse2Response.prettyPrint(); + createDataverse2Response.then().assertThat().statusCode(CREATED.getStatusCode()); + JsonPath createDataverse2 = JsonPath.from(createDataverse2Response.body().asString()); + String dataverse2Name = createDataverse2.getString("data.name"); + String dataverse2Alias = createDataverse2.getString("data.alias"); + Integer dataverse2Id = createDataverse2.getInt("data.id"); + + UtilIT.publishDataverseViaNativeApi(dataverse2Alias, apiToken).then().assertThat().statusCode(OK.getStatusCode()); + + UtilIT.linkDataset(datasetPid, dataverse2Alias, apiToken).then().assertThat().statusCode(OK.getStatusCode()); + + UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); + + // Test that the Dataverse collection that the dataset was linked to is also returned + myDataResponse = UtilIT.retrieveMyDataAsJsonString(apiToken, "", new ArrayList<>(Arrays.asList(6L)), "&show_collections=true"); + myDataResponse.prettyPrint(); + myDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.items[0].collections.size()", CoreMatchers.is(2)) + .body("data.items[0].collections", CoreMatchers.hasItems( + Map.of("id", dataverseId, "name", dataverseName, "alias", dataverseAlias), + Map.of("id", dataverse2Id, "name", dataverse2Name, "alias", dataverse2Alias) + )); + + } + private static String prettyPrintError(String resourceBundleKey, List params) { final String errorMessage; if (params == null || params.isEmpty()) { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java index 205725822ff..ce914531839 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java @@ -1,6 +1,11 @@ package edu.harvard.iq.dataverse.api; +import static edu.harvard.iq.dataverse.api.ApiConstants.DS_VERSION_LATEST_PUBLISHED; import edu.harvard.iq.dataverse.dataset.DatasetType; +import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + +import static io.restassured.path.json.JsonPath.with; import io.restassured.RestAssured; import io.restassured.path.json.JsonPath; import io.restassured.response.Response; @@ -10,6 +15,10 @@ import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; import static jakarta.ws.rs.core.Response.Status.OK; + +import java.io.IOException; +import java.util.List; +import java.util.Map; import java.util.UUID; import org.hamcrest.CoreMatchers; import static org.hamcrest.CoreMatchers.containsString; @@ -25,6 +34,7 @@ public class DatasetTypesIT { final static String INSTRUMENT = "instrument"; + private static String apiTokenSuperuser; @BeforeAll public static void setUpClass() { @@ -32,12 +42,20 @@ public static void setUpClass() { Response createUser = UtilIT.createRandomUser(); createUser.then().assertThat().statusCode(OK.getStatusCode()); - String username = UtilIT.getUsernameFromResponse(createUser); - String apiToken = UtilIT.getApiTokenFromResponse(createUser); - UtilIT.setSuperuserStatus(username, true).then().assertThat().statusCode(OK.getStatusCode()); - - ensureDatasetTypeIsPresent(DatasetType.DATASET_TYPE_SOFTWARE, apiToken); - ensureDatasetTypeIsPresent(INSTRUMENT, apiToken); + String usernameSuperuser = UtilIT.getUsernameFromResponse(createUser); + apiTokenSuperuser = UtilIT.getApiTokenFromResponse(createUser); + UtilIT.setSuperuserStatus(usernameSuperuser, true).then().assertThat().statusCode(OK.getStatusCode()); + + // This description for software is shortened from https://datacite-metadata-schema.readthedocs.io/en/4.5/appendices/appendix-1/resourceTypeGeneral/#software + ensureDatasetTypeIsPresent(DatasetType.DATASET_TYPE_SOFTWARE, "A computer program in either source code (text) or compiled form.", apiTokenSuperuser); + String reviewDescription = null; + try { + reviewDescription = JsonUtil.getJsonObjectFromFile("scripts/api/data/datasetTypes/review.json").getString("description"); + } catch (IOException e) { + } + ensureDatasetTypeIsPresent(DatasetType.DATASET_TYPE_REVIEW, reviewDescription, apiTokenSuperuser); + // This description for instrument is from https://datacite-metadata-schema.readthedocs.io/en/4.5/appendices/appendix-1/resourceTypeGeneral/#instrument + ensureDatasetTypeIsPresent(INSTRUMENT, "A device, tool or apparatus used to obtain, measure and/or analyze data.", apiTokenSuperuser); } @AfterAll @@ -46,7 +64,7 @@ public static void afterClass() { UtilIT.setDisplayOnCreate("astroInstrument", false); } - private static void ensureDatasetTypeIsPresent(String datasetType, String apiToken) { + private static void ensureDatasetTypeIsPresent(String datasetType, String description, String apiToken) { Response getDatasetType = UtilIT.getDatasetType(datasetType); getDatasetType.prettyPrint(); String typeFound = JsonPath.from(getDatasetType.getBody().asString()).getString("data.name"); @@ -55,12 +73,36 @@ private static void ensureDatasetTypeIsPresent(String datasetType, String apiTok return; } System.out.println("The " + datasetType + "type wasn't found. Create it."); - String jsonIn = Json.createObjectBuilder().add("name", datasetType).build().toString(); + String displayName = capitalize(datasetType); + String jsonIn = Json.createObjectBuilder() + .add("name", datasetType) + .add("displayName", displayName) + .add("description", description) + .build().toString(); Response typeAdded = UtilIT.addDatasetType(jsonIn, apiToken); typeAdded.prettyPrint(); typeAdded.then().assertThat().statusCode(OK.getStatusCode()); } + private static String capitalize(String stringIn) { + return stringIn.substring(0, 1).toUpperCase() + stringIn.substring(1); + } + + /** + * @param allowedDatasetTypes comma separated (e.g. "dataset,software") + */ + private void setAllowedDatasetTypes(String dataverseAlias, String allowedDatasetTypes) { + String[] allowedDatasetTypeNames = allowedDatasetTypes.split(","); + Response setAllowedDatasetTypes = UtilIT.setCollectionAttribute(dataverseAlias, "allowedDatasetTypes", allowedDatasetTypes, + apiTokenSuperuser); + setAllowedDatasetTypes.prettyPrint(); + setAllowedDatasetTypes.then().assertThat() + .statusCode(OK.getStatusCode()) + // Just test the first name. (We only have the name to test with, + // as an argument to this menthod.) They should be in order. + .body("data.allowedDatasetTypes[0].name", is(allowedDatasetTypeNames[0])); + } + @Test public void testCreateSoftwareDatasetNative() { Response createUser = UtilIT.createRandomUser(); @@ -73,6 +115,8 @@ public void testCreateSoftwareDatasetNative() { String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse); Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverse); + setAllowedDatasetTypes(dataverseAlias, "dataset,software"); + String jsonIn = UtilIT.getDatasetJson("doc/sphinx-guides/source/_static/api/dataset-create-software.json"); Response createSoftware = UtilIT.createDataset(dataverseAlias, jsonIn, apiToken); @@ -118,10 +162,16 @@ public void testCreateSoftwareDatasetNative() { .body("data.total_count", CoreMatchers.is(2)) .body("data.count_in_response", CoreMatchers.is(2)) .body("data.facets[0].datasetType.friendly", CoreMatchers.is("Dataset Type")) - .body("data.facets[0].datasetType.labels[0].Dataset", CoreMatchers.is(1)) - .body("data.facets[0].datasetType.labels[1].Software", CoreMatchers.is(1)) + .body("data.facets[0].datasetType.labels[0].dataset", CoreMatchers.is(1)) + .body("data.facets[0].datasetType.labels[1].software", CoreMatchers.is(1)) .statusCode(OK.getStatusCode()); + Response getCitationCsl = UtilIT.getDatasetVersionCitationFormat(datasetId, DS_VERSION_LATEST_PUBLISHED, false, "CSL", apiToken); + getCitationCsl.prettyPrint(); + getCitationCsl.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("type", equalTo("software")); + // Response searchAsGuest = UtilIT.search(SearchFields.DATASET_TYPE + ":software", null); // searchAsGuest.prettyPrint(); // searchAsGuest.then().assertThat() @@ -143,6 +193,7 @@ public void testCreateDatasetSemantic() { createDataverse.then().assertThat().statusCode(CREATED.getStatusCode()); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse); Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverse); + setAllowedDatasetTypes(dataverseAlias, "software"); String jsonIn = UtilIT.getDatasetJson("doc/sphinx-guides/source/_static/api/dataset-create-software.jsonld"); @@ -177,6 +228,7 @@ public void testImportJson() { createDataverse.then().assertThat().statusCode(CREATED.getStatusCode()); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse); Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverse); + setAllowedDatasetTypes(dataverseAlias, "software"); String jsonIn = UtilIT.getDatasetJson("doc/sphinx-guides/source/_static/api/dataset-create-software.json"); @@ -259,7 +311,11 @@ public void testAddAndDeleteDatasetType() { //Avoid all-numeric names (which are not allowed) String randomName = "A" + UUID.randomUUID().toString().substring(0, 8); - String jsonIn = Json.createObjectBuilder().add("name", randomName).build().toString(); + String displayName = capitalize(randomName); + String jsonIn = Json.createObjectBuilder() + .add("name", randomName) + .add("displayName", displayName) + .build().toString(); System.out.println("adding type with name " + randomName); Response typeAdded = UtilIT.addDatasetType(jsonIn, apiToken); @@ -267,15 +323,24 @@ public void testAddAndDeleteDatasetType() { typeAdded.then().assertThat().statusCode(OK.getStatusCode()); - Long doomed = JsonPath.from(typeAdded.getBody().asString()).getLong("data.id"); + Long doomedId = JsonPath.from(typeAdded.getBody().asString()).getLong("data.id"); + // Deleting by name is not supported + String doomedName = JsonPath.from(typeAdded.getBody().asString()).getString("data.name"); - System.out.println("doomed: " + doomed); - Response getTypeById = UtilIT.getDatasetType(doomed.toString()); + System.out.println("doomed: " + doomedId); + Response getTypeById = UtilIT.getDatasetType(doomedId.toString()); getTypeById.prettyPrint(); getTypeById.then().assertThat().statusCode(OK.getStatusCode()); - System.out.println("deleting type with id " + doomed); - Response typeDeleted = UtilIT.deleteDatasetTypes(doomed, apiToken); + System.out.println("try to delete type by name " + doomedName + " should fail"); + Response deleteByNameFail = RestAssured.given().header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken) + .delete("/api/datasets/datasetTypes/" + doomedName); + deleteByNameFail.prettyPrint(); + deleteByNameFail.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()); + + System.out.println("deleting type with id " + doomedId); + Response typeDeleted = UtilIT.deleteDatasetTypes(doomedId, apiToken); typeDeleted.prettyPrint(); typeDeleted.then().assertThat().statusCode(OK.getStatusCode()); @@ -292,6 +357,7 @@ public void testAddDatasetTypeWithMDBLicense(){ JsonObjectBuilder job = Json.createObjectBuilder(); job.add("name", "testDatasetType"); + job.add("displayName", "testDatasetType"); job.add("linkedMetadataBlocks", Json.createArrayBuilder().add("geospatial")); job.add("availableLicenses", Json.createArrayBuilder().add("CC0 1.0")); @@ -349,6 +415,7 @@ public void testUpdateDatasetTypeWithLicense(){ JsonObjectBuilder job = Json.createObjectBuilder(); job.add("name", "testDatasetType"); + job.add("displayName", "testDatasetType"); Response typeAdded = UtilIT.addDatasetType(job.build(), apiToken); typeAdded.prettyPrint(); @@ -417,7 +484,11 @@ public void testUpdateDatasetTypeLinksWithMetadataBlocks() { //Avoid all-numeric names (which are not allowed) String randomName = "zzz" + UUID.randomUUID().toString().substring(0, 8); - String jsonIn = Json.createObjectBuilder().add("name", randomName).build().toString(); + String displayName = capitalize(randomName); + String jsonIn = Json.createObjectBuilder() + .add("name", randomName) + .add("displayName", displayName) + .build().toString(); System.out.println("adding type with name " + randomName); Response typeAdded = UtilIT.addDatasetType(jsonIn, apiToken); @@ -652,6 +723,7 @@ public void testCreateDatasetWithCustomType() { JsonObjectBuilder job = Json.createObjectBuilder(); job.add("name", "testDatasetType"); + job.add("displayName", "testDatasetType"); job.add("linkedMetadataBlocks", Json.createArrayBuilder().add("geospatial")); job.add("availableLicenses", Json.createArrayBuilder().add("CC0 1.0")); @@ -663,6 +735,8 @@ public void testCreateDatasetWithCustomType() { getTypes = UtilIT.getDatasetTypes(); getTypes.prettyPrint(); + setAllowedDatasetTypes(dataverseAlias, "testDatasetType"); + String pathToJsonFile = "scripts/api/data/dataset-create-new-with-type.json"; Response createDatasetResponse = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken); @@ -690,7 +764,12 @@ public void testCreateDatasetWithCustomType() { Response deleteDatasetResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken); deleteDatasetResponse.prettyPrint(); assertEquals(200, deleteDatasetResponse.getStatusCode()); - + + // We are about to delete the dataset type "testDatasetType" but first we need to + // disassociate it from the collection. We do this by associating a dataset type + // that we aren't deleting ("dataset", the default dataset type). + setAllowedDatasetTypes(dataverseAlias, "dataset"); + Long doomed = JsonPath.from(typeAdded.getBody().asString()).getLong("data.id"); System.out.println("doomed: " + doomed); @@ -713,4 +792,313 @@ public void testCreateDatasetWithCustomType() { } + @Test + public void testDatasetTypeNotAllowed() { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverse = UtilIT.createRandomDataverse(apiToken); + createDataverse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse); + Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverse); + + String jsonIn = UtilIT.getDatasetJson("doc/sphinx-guides/source/_static/api/dataset-create-software.json"); + + Response createSoftware = UtilIT.createDataset(dataverseAlias, jsonIn, apiToken); + createSoftware.prettyPrint(); + + createSoftware.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + } + + @Test + public void testUpdateCollectionWithInvalidDatasetType() { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverse = UtilIT.createRandomDataverse(apiToken); + createDataverse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse); + Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverse); + + String nonExistentTypes = "foo,bar,baz"; + + Response setAllowedDatasetTypesFail = UtilIT.setCollectionAttribute(dataverseAlias, "allowedDatasetTypes", + nonExistentTypes, apiTokenSuperuser); + setAllowedDatasetTypesFail.prettyPrint(); + setAllowedDatasetTypesFail.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()); + + Response failEmptyString = UtilIT.setCollectionAttribute(dataverseAlias, "allowedDatasetTypes", " ", + apiTokenSuperuser); + failEmptyString.prettyPrint(); + failEmptyString.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()); + } + + /** + * In this test, there are two users: one who publishes a dataset and + * another who publishes a review of that dataset. + */ + @Test + public void testCreateReview() { + Response createDatasetDepositor = UtilIT.createRandomUser(); + createDatasetDepositor.then().assertThat().statusCode(OK.getStatusCode()); + String apiTokenDepositor = UtilIT.getApiTokenFromResponse(createDatasetDepositor); + + Response createCollectionOfData = UtilIT.createRandomDataverse(apiTokenDepositor); + createCollectionOfData.then().assertThat().statusCode(CREATED.getStatusCode()); + String collectionOfDataAlias = UtilIT.getAliasFromResponse(createCollectionOfData); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(collectionOfDataAlias, apiTokenDepositor); + createDataset.then().assertThat().statusCode(CREATED.getStatusCode()); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = JsonPath.from(createDataset.getBody().asString()).getString("data.persistentId"); + + UtilIT.publishDataverseViaNativeApi(collectionOfDataAlias, apiTokenDepositor).then().assertThat().statusCode(OK.getStatusCode()); + UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiTokenDepositor); + + Response createReviewer = UtilIT.createRandomUser(); + createReviewer.then().assertThat().statusCode(OK.getStatusCode()); + String apiTokenReviewer = UtilIT.getApiTokenFromResponse(createReviewer); + + // We assume the reviewer wants their own collection for reviews. + Response createCollectionOfReviews = UtilIT.createRandomDataverse(apiTokenReviewer); + createCollectionOfReviews.then().assertThat().statusCode(CREATED.getStatusCode()); + String collectionOfReviewsAlias = UtilIT.getAliasFromResponse(createCollectionOfReviews); + + Response datasetMetadataResponse = UtilIT.nativeGet(datasetId, apiTokenReviewer); + datasetMetadataResponse.then().assertThat().statusCode(OK.getStatusCode()); + datasetMetadataResponse.prettyPrint(); + JsonPath datasetMetadata = JsonPath.from(datasetMetadataResponse.body().asString()); + String datasetTitle = datasetMetadata.getString("data.latestVersion.metadataBlocks.citation.fields[0].value"); + String datasetPidUrl = datasetMetadata.getString("data.persistentUrl"); + String datasetPidProtocol = datasetMetadata.getString("data.protocol"); + String datasetPidAuthority = datasetMetadata.getString("data.authority"); + String datasetPidSeparator = datasetMetadata.getString("data.separator"); + String datasetPidIdentifier = datasetMetadata.getString("data.identifier"); + String datasetPidWithoutProtocol = datasetPidAuthority + datasetPidSeparator + datasetPidIdentifier; + + Response getCitation = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, false, apiTokenReviewer); + getCitation.prettyPrint(); + getCitation.then().assertThat().statusCode(OK.getStatusCode()); + String datasetCitationHtml = JsonPath.from(getCitation.getBody().asString()).getString("data.message"); + String datasetCitationText = StringUtil.html2text(datasetCitationHtml); + + Response getCitationCsl = UtilIT.getDatasetVersionCitationFormat(datasetId, DS_VERSION_LATEST_PUBLISHED, false, "CSL", apiTokenReviewer); + getCitationCsl.prettyPrint(); + getCitationCsl.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("type", equalTo("dataset")); + + /** + * We are added the HTML version of a Related Dataset. We like the HTML + * version because both JSF and the SPA render the DOI link as a + * clickable link. + * + * The tooltip for Related Dataset says "Information, such as a + * persistent ID or citation, about a related dataset, such as previous + * research on the Dataset's subject". + * + * We are aware that there is a custom metadata block called + * "relatedDatasetsV2" at https://github.com/vera/related-datasets-cvoc + * that we have been playing with. We especially like that relationships + * can be expressed between the current object (a review) and the + * related dataset. This is simlar to how "Related Publication" works. + * See also discussion at + * https://dataverse.zulipchat.com/#narrow/channel/379673-dev/topic/Improved.20.22Related.20datasets.22/near/534969036 + */ + JsonObjectBuilder jsonForCreatingReview = Json.createObjectBuilder() + /** + * See above where this type is added to the installation and + * therefore available for use. + */ + .add("datasetType", DatasetType.DATASET_TYPE_REVIEW) + .add("datasetVersion", Json.createObjectBuilder() + .add("license", Json.createObjectBuilder() + .add("name", "CC0 1.0") + .add("uri", "http://creativecommons.org/publicdomain/zero/1.0") + ) + .add("metadataBlocks", Json.createObjectBuilder() + .add("citation", Json.createObjectBuilder() + .add("fields", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("typeName", "title") + .add("value", "Review of " + datasetTitle) + .add("typeClass", "primitive") + .add("multiple", false) + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("authorName", + Json.createObjectBuilder() + .add("value", "Simpson, Homer") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "authorName")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "author") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("datasetContactEmail", + Json.createObjectBuilder() + .add("value", "hsimpson@mailinator.com") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "datasetContactEmail")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "datasetContact") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("dsDescriptionValue", + Json.createObjectBuilder() + .add("value", "This is a review of a dataset.") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "dsDescriptionValue")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "dsDescription") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add("Other") + ) + .add("typeClass", "controlledVocabulary") + .add("multiple", true) + .add("typeName", "subject") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(datasetCitationHtml) + ) + .add("typeClass", "primitive") + .add("multiple", true) + .add("typeName", "relatedDatasets") + ) + ) + ) + )); + + /** + * We could just call `setAllowedDatasetTypes(collectionOfReviewsAlias, + * "review")` like other places in the code, but here we are making assertions + * on the first (and only) object under "allowedDatasetTypes". + */ + Response setAllowedDatasetTypes = UtilIT.setCollectionAttribute(collectionOfReviewsAlias, "allowedDatasetTypes", + "review", apiTokenSuperuser); + setAllowedDatasetTypes.prettyPrint(); + setAllowedDatasetTypes.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.allowedDatasetTypes[0].name", is("review")) + .body("data.allowedDatasetTypes[0].displayName", is("Review")) + .body("data.allowedDatasetTypes[0].description", is("A review of a dataset compiled by the expert community.")); + + Response createReview = UtilIT.createDataset(collectionOfReviewsAlias, jsonForCreatingReview, apiTokenReviewer); + createReview.prettyPrint(); + createReview.then().assertThat().statusCode(CREATED.getStatusCode()); + Integer reviewId = UtilIT.getDatasetIdFromResponse(createReview); + String reviewPid = JsonPath.from(createReview.getBody().asString()).getString("data.persistentId"); + + Response getReviewMetadata = UtilIT.nativeGet(reviewId, apiTokenReviewer); + getReviewMetadata.prettyPrint(); + getReviewMetadata.then().assertThat().statusCode(OK.getStatusCode()); + String datasetType = JsonPath.from(getReviewMetadata.getBody().asString()).getString("data.datasetType"); + assertEquals("review", datasetType); + + UtilIT.publishDataverseViaNativeApi(collectionOfReviewsAlias, apiTokenReviewer).then().assertThat().statusCode(OK.getStatusCode()); + UtilIT.publishDatasetViaNativeApi(reviewPid, "major", apiTokenReviewer).then().assertThat().statusCode(OK.getStatusCode()); + + Response getCitationCslReview = UtilIT.getDatasetVersionCitationFormat(reviewId, DS_VERSION_LATEST_PUBLISHED, false, "CSL", apiTokenReviewer); + getCitationCslReview.prettyPrint(); + getCitationCslReview.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("type", equalTo("review")); + + Response exportDatacite = UtilIT.exportDataset(reviewPid, "Datacite", false, "1.0", apiTokenReviewer); + exportDatacite.prettyPrint(); + exportDatacite.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("resource.resourceType", CoreMatchers.equalTo("Review")) + .body("resource.resourceType.@resourceTypeGeneral", CoreMatchers.equalTo("Other")); + } + + @Test + public void testInternationalization() { + Response getDatasetType = UtilIT.getDatasetType("software"); + getDatasetType.prettyPrint(); + getDatasetType.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.name", is("software")) + .body("data.displayName", is("Software")); + + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Accept-Language + getDatasetType = UtilIT.getDatasetType("software", "en-US,en;q=0.5"); + getDatasetType.prettyPrint(); + getDatasetType.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.name", is("software")) + .body("data.displayName", is("Software")); + + getDatasetType = UtilIT.getDatasetType("software", "en-US"); + getDatasetType.prettyPrint(); + getDatasetType.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.name", is("software")) + .body("data.displayName", is("Software")); + + getDatasetType = UtilIT.getDatasetType("software", ""); + getDatasetType.prettyPrint(); + getDatasetType.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.name", is("software")) + .body("data.displayName", is("Software")); + + boolean i18nIsConfigured = false; + if (!i18nIsConfigured) { + System.out.println("i18n is not configured; skipping test of non-English languages"); + return; + } + + getDatasetType = UtilIT.getDatasetType("software", "fr-CA,fr;q=0.8,en-US;q=0.6,en;q=0.4"); + getDatasetType.prettyPrint(); + getDatasetType.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.name", is("software")) + .body("data.displayName", is("Logiciel")); + + getDatasetType = UtilIT.getDatasetTypes("fr-CA,fr;q=0.8,en-US;q=0.6,en;q=0.4"); + getDatasetType.prettyPrint(); + getDatasetType.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Messy but the only way we've figured out ¯\_(ツ)_/¯ + List> dataset = with(getDatasetType.body().asString()).param("dataset", "dataset") + .getList("data.findAll { data -> data.name == dataset }"); + Map firstDataset = dataset.get(0); + assertEquals("Ensemble de données", firstDataset.get("displayName")); + + List> instrument = with(getDatasetType.body().asString()).param("instrument", "instrument") + .getList("data.findAll { data -> data.name == instrument }"); + Map firstInstrument = instrument.get(0); + // Instrument isn't translated in the French properties file; should fall back to English + assertEquals("Instrument", firstInstrument.get("displayName")); + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsEmbargoAPITest.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsEmbargoAPITest.java new file mode 100644 index 00000000000..64318e2e223 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsEmbargoAPITest.java @@ -0,0 +1,246 @@ + +package edu.harvard.iq.dataverse.api; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataFileServiceBean; +import edu.harvard.iq.dataverse.DatasetServiceBean; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Embargo; +import edu.harvard.iq.dataverse.EmbargoServiceBean; +import edu.harvard.iq.dataverse.PermissionServiceBean; +import edu.harvard.iq.dataverse.PermissionServiceBean.StaticPermissionQuery; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.ws.rs.container.ContainerRequestContext; +import jakarta.ws.rs.core.Response; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import java.time.LocalDate; +import java.util.List; + +import static jakarta.ws.rs.core.Response.Status.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.when; + +@LocalJvmSettings +public class DatasetsEmbargoAPITest { + + + @Mock + private DataFileServiceBean fileService; + + @Mock + private DatasetServiceBean datasetService; + + @Mock + private EmbargoServiceBean embargoService; + + @Mock + private PermissionServiceBean permissionService; + + @Mock + private SettingsServiceBean settingsService; + + @Mock + private ContainerRequestContext crc; + + @Mock + private edu.harvard.iq.dataverse.Dataset dataset; + + @Mock + private edu.harvard.iq.dataverse.DataFile file; + + @Mock + private DatasetVersion datasetVersion; + + @Mock + private TermsOfUseAndAccess termsOfUseAndAccess; + + @Mock + private StaticPermissionQuery permissionQuery; + + @InjectMocks + private Datasets datasetsApi; + + private AuthenticatedUser testUser; + + @BeforeEach + public void setUp() { + MockitoAnnotations.openMocks(this); + + testUser = new AuthenticatedUser(); + testUser.setId(1L); + testUser.setSuperuser(false); + + // Mock the authentication + when(crc.getProperty(ApiConstants.CONTAINER_REQUEST_CONTEXT_USER)) + .thenReturn(testUser); + + // Mock dataset lookup + when(datasetService.find(1L)).thenReturn(dataset); + + // Mock dataset version chain + when(dataset.getLatestVersion()).thenReturn(datasetVersion); + when(dataset.getFiles()).thenReturn(List.of(file)); + when(datasetVersion.getTermsOfUseAndAccess()).thenReturn(termsOfUseAndAccess); + when(datasetVersion.getVersionState()).thenReturn(DatasetVersion.VersionState.DRAFT); + when(termsOfUseAndAccess.getDatasetVersion()).thenReturn(datasetVersion); + + // Mock file lookup + when(fileService.find(2L)).thenReturn(file); + when(fileService.save(any(DataFile.class))).thenAnswer(invocation -> invocation.getArgument(0)); + + + + // Mock permission check + when(permissionService.userOn(eq(testUser), eq(dataset))) + .thenReturn(permissionQuery); + when(permissionQuery.has(Permission.EditDataset)).thenReturn(true); + + // Mock setting + when(settingsService.getValueForKey(SettingsServiceBean.Key.MaxEmbargoDurationInMonths)).thenReturn("12"); + + // Mock embargoService + when(embargoService.merge(any(Embargo.class))).thenAnswer(invocation -> invocation.getArgument(0)); + when(embargoService.save(any(Embargo.class), any(String.class))).thenReturn(1L); + + } + + @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "true", varArgs = "require-embargo-reason") + public void testCreateFileEmbargo_withReasonRequired_shouldRejectNullReason() { + // Arrange + LocalDate futureDate = LocalDate.now().plusMonths(6); + JsonObjectBuilder embargoJson = Json.createObjectBuilder() + .add("dateAvailable", futureDate.toString()) + .add("fileIds", Json.createArrayBuilder().add(1L)); + + // Act + Response response = datasetsApi.createFileEmbargo(crc, "1", embargoJson.build().toString()); + + // Assert + assertEquals(BAD_REQUEST.getStatusCode(), response.getStatus()); + if (response.hasEntity()) { + Object entity = response.getEntity(); + if (entity instanceof JsonObject) { + JsonObject jsonResponse = (JsonObject) entity; + String message = jsonResponse.getString("message", ""); + assertTrue(message.contains("Reason is required") || message.contains("reason"), + "Expected error message about required reason, got: " + message); + } + } + } + + @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "true", varArgs = "require-embargo-reason") + public void testCreateFileEmbargo_withReasonRequired_shouldAcceptValidReason() throws CommandException { + // Arrange + LocalDate futureDate = LocalDate.now().plusMonths(6); + JsonObjectBuilder embargoJson = Json.createObjectBuilder() + .add("dateAvailable", futureDate.toString()) + .add("reason", "Valid embargo reason for testing") + .add("fileIds", Json.createArrayBuilder().add(2L)); + + // Act + Response response = datasetsApi.createFileEmbargo(crc, "1", embargoJson.build().toString()); + + // Assert + assertTrue(response.getStatus() == OK.getStatusCode()); + } + + @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "false", varArgs = "require-embargo-reason") + public void testCreateFileEmbargo_withReasonNotRequired_shouldAcceptNullReason() throws CommandException { + // Arrange + LocalDate futureDate = LocalDate.now().plusMonths(6); + JsonObjectBuilder embargoJson = Json.createObjectBuilder() + .add("dateAvailable", futureDate.toString()) + .add("fileIds", Json.createArrayBuilder().add(2L)); + + // Act + Response response = datasetsApi.createFileEmbargo(crc, "1", embargoJson.build().toString()); + + // Assert + // Should not get BAD_REQUEST for missing reason when flag is disabled + if (response.getStatus() == BAD_REQUEST.getStatusCode() && response.hasEntity()) { + Object entity = response.getEntity(); + if (entity instanceof JsonObject) { + JsonObject jsonResponse = (JsonObject) entity; + String message = jsonResponse.getString("message", ""); + assertTrue(!message.contains("Reason is required"), + "Should not require reason when flag is disabled, got: " + message); + } + } + } + + @ParameterizedTest + @ValueSource(strings = {"", " ", "\t", "\n", " \t\n "}) + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "false", varArgs = "require-embargo-reason") + public void testCreateFileEmbargo_shouldRejectBlankReason_regardlessOfFlag(String blankReason) { + // Arrange + LocalDate futureDate = LocalDate.now().plusMonths(6); + JsonObjectBuilder embargoJson = Json.createObjectBuilder() + .add("dateAvailable", futureDate.toString()) + .add("reason", blankReason) + .add("fileIds", Json.createArrayBuilder().add(2L)); + + // Act + Response response = datasetsApi.createFileEmbargo(crc, "1", embargoJson.build().toString()); + + // Assert + assertEquals(BAD_REQUEST.getStatusCode(), response.getStatus()); + if (response.hasEntity()) { + Object entity = response.getEntity(); + if (entity instanceof JsonObject) { + JsonObject jsonResponse = (JsonObject) entity; + String message = jsonResponse.getString("message", ""); + assertTrue(message.contains("blank") || message.contains("empty"), + "Expected error message about blank reason, got: " + message); + } + } + } + + @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "true", varArgs = "require-embargo-reason") + public void testCreateFileEmbargo_withReasonRequired_shouldRejectEmptyString() { + // Arrange + LocalDate futureDate = LocalDate.now().plusMonths(6); + JsonObjectBuilder embargoJson = Json.createObjectBuilder() + .add("dateAvailable", futureDate.toString()) + .add("reason", "") + .add("fileIds", Json.createArrayBuilder().add(2L)); + + // Act + Response response = datasetsApi.createFileEmbargo(crc, "1", embargoJson.build().toString()); + + // Assert + assertEquals(BAD_REQUEST.getStatusCode(), response.getStatus()); + if (response.hasEntity()) { + Object entity = response.getEntity(); + if (entity instanceof JsonObject) { + JsonObject jsonResponse = (JsonObject) entity; + String message = jsonResponse.getString("message", ""); + assertTrue(message.contains("blank") || message.contains("empty") || message.contains("reason"), + "Expected error message about blank/empty reason, got: " + message); + } + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index d4531ec21cf..78fe28ebc30 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -12,7 +12,10 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.json.*; +import edu.harvard.iq.dataverse.util.json.JSONLDUtil; +import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonParser; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlUtil; import io.restassured.RestAssured; import io.restassured.http.ContentType; @@ -20,21 +23,13 @@ import io.restassured.path.json.JsonPath; import io.restassured.path.xml.XmlPath; import io.restassured.response.Response; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonValue; -import jakarta.json.JsonArrayBuilder; +import jakarta.json.*; import jakarta.ws.rs.core.Response.Status; import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.exception.ExceptionUtils; import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.skyscreamer.jsonassert.JSONAssert; import javax.xml.stream.XMLInputFactory; @@ -48,6 +43,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.time.LocalDate; +import java.time.Year; import java.time.format.DateTimeFormatter; import java.util.*; import java.util.logging.Logger; @@ -60,7 +56,6 @@ import static io.restassured.path.json.JsonPath.with; import static jakarta.ws.rs.core.Response.Status.*; import static java.lang.Thread.sleep; -import java.time.Year; import static org.hamcrest.CoreMatchers.*; import static org.hamcrest.Matchers.contains; import static org.junit.jupiter.api.Assertions.*; @@ -79,10 +74,6 @@ public static void setUpClass() { removeIdentifierGenerationStyle.then().assertThat() .statusCode(200); - Response removeExcludeEmail = UtilIT.deleteSetting(SettingsServiceBean.Key.ExcludeEmailFromExport); - removeExcludeEmail.then().assertThat() - .statusCode(200); - Response removeAnonymizedFieldTypeNames = UtilIT.deleteSetting(SettingsServiceBean.Key.AnonymizedFieldTypeNames); removeAnonymizedFieldTypeNames.then().assertThat() .statusCode(200); @@ -101,6 +92,10 @@ public static void setUpClass() { */ } + @AfterEach + public void afterEach() { + UtilIT.deleteSetting(SettingsServiceBean.Key.ExcludeEmailFromExport); + } @AfterAll public static void afterClass() { @@ -109,10 +104,6 @@ public static void afterClass() { removeIdentifierGenerationStyle.then().assertThat() .statusCode(200); - Response removeExcludeEmail = UtilIT.deleteSetting(SettingsServiceBean.Key.ExcludeEmailFromExport); - removeExcludeEmail.then().assertThat() - .statusCode(200); - Response removeAnonymizedFieldTypeNames = UtilIT.deleteSetting(SettingsServiceBean.Key.AnonymizedFieldTypeNames); removeAnonymizedFieldTypeNames.then().assertThat() .statusCode(200); @@ -306,7 +297,7 @@ public void testCreateDataset() { grantRole.prettyPrint(); grantRole.then().assertThat() .body("message", containsString(BundleUtil.getStringFromBundle("datasets.api.grant.role.assignee.has.role.error"))) - .statusCode(FORBIDDEN.getStatusCode()); + .statusCode(CONFLICT.getStatusCode()); // Create another random user: @@ -1757,11 +1748,6 @@ public void testExcludeEmail() { Response deleteUserResponse = UtilIT.deleteUser(username); deleteUserResponse.prettyPrint(); assertEquals(200, deleteUserResponse.getStatusCode()); - - Response removeExcludeEmail = UtilIT.deleteSetting(SettingsServiceBean.Key.ExcludeEmailFromExport); - removeExcludeEmail.then().assertThat() - .statusCode(200); - } @Disabled @@ -2366,7 +2352,7 @@ private static void validateAssignExistingRole(String datasetPersistentId, Strin failedGrantPermission.prettyPrint(); failedGrantPermission.then().assertThat() .body("message", containsString(BundleUtil.getStringFromBundle("datasets.api.grant.role.assignee.has.role.error"))) - .statusCode(FORBIDDEN.getStatusCode()); + .statusCode(CONFLICT.getStatusCode()); } @Test @@ -3089,12 +3075,19 @@ public void testDatasetLocksApi() { .statusCode(200); // Check again: - // This should return an empty list, as the dataset should have no locks just yet: + // This should no longer return an empty list, as the dataset now has a lock: checkDatasetLocks = UtilIT.checkDatasetLocks(datasetId.longValue(), "Ingest", apiToken); checkDatasetLocks.prettyPrint(); checkDatasetLocks.then().assertThat() .body("data[0].lockType", equalTo("Ingest")) .statusCode(200); + + // Confirm that when getting the dataset, the lock is also listed + Response getDatasetJson = UtilIT.nativeGet(datasetId, apiToken); + getDatasetJson.prettyPrint(); + getDatasetJson.then().assertThat() + .body("data.locks[0]", equalTo("Ingest")) + .statusCode(200); // Try to lock the dataset with the same type lock, AGAIN // (this should fail, of course!) @@ -3209,6 +3202,13 @@ public void testDatasetLocksApi() { checkDatasetLocks.then().assertThat() .body("data", equalTo(emptyArray)) .statusCode(200); + + // Confirm that when getting the dataset, the lock is also no longer listed + getDatasetJson = UtilIT.nativeGet(datasetId, apiToken); + getDatasetJson.prettyPrint(); + getDatasetJson.then().assertThat() + .body("data.locks", equalTo(emptyArray)) + .statusCode(200); } /** @@ -5042,7 +5042,7 @@ public void testCitationDate() throws IOException { .body("resource.dates.date[0]", CoreMatchers.equalTo("1999-12-31")) .body("resource.dates.date[1].@dateType", CoreMatchers.equalTo("Updated")) .body("resource.dates.date[1]", CoreMatchers.equalTo(today)) - .body("resource.publicationYear", CoreMatchers.equalTo("2025")); + .body("resource.publicationYear", CoreMatchers.equalTo(currentYear)); Response exportDatasetOaiDc = UtilIT.exportDataset(datasetPid, "oai_dc", apiToken, true); exportDatasetOaiDc.prettyPrint(); @@ -7375,6 +7375,159 @@ public void testUpdateLicense() { .statusCode(UNAUTHORIZED.getStatusCode()); } + @Test + public void testExcludeEmailOverride() { + // Create super user + String apiToken = getSuperuserToken(); + // Create user with no permission + String apiTokenNoPerms = UtilIT.createRandomUserGetToken(); + // Create Collection + String collectionAlias = UtilIT.createRandomCollectionGetAlias(apiToken); + // Publish Collection + UtilIT.publishDataverseViaNativeApi(collectionAlias, apiToken).prettyPrint(); + // Create Dataset + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(collectionAlias, apiToken); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = JsonPath.from(createDataset.asString()).getString("data.persistentId"); + // Publish Dataset + UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken).prettyPrint(); + + // Setting is not set - datasetContactEmail will NOT be excluded + Response response = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken); + response.then().assertThat().statusCode(OK.getStatusCode()); + String json = response.prettyPrint(); + assertTrue(json.contains("datasetContactName")); + assertTrue(json.contains("datasetContactEmail")); + + UtilIT.setSetting(SettingsServiceBean.Key.ExcludeEmailFromExport, "true"); + + // User does not ignore the setting - datasetContactEmail will be excluded + response = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken); + response.then().assertThat().statusCode(OK.getStatusCode()); + json = response.prettyPrint(); + assertTrue(json.contains("datasetContactName")); + assertTrue(!json.contains("datasetContactEmail")); + + // User has permission to ignore the setting allowing the datasetContactEmail to be included in the response + response = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, true, false, false, true); + response.then().assertThat().statusCode(OK.getStatusCode()); + json = response.prettyPrint(); + assertTrue(json.contains("datasetContactName")); + assertTrue(json.contains("datasetContactEmail")); + + // User has no permission to override the setting - datasetContactEmail will be excluded + response = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, true, false, false, true); + response.then().assertThat().statusCode(OK.getStatusCode()); + json = response.prettyPrint(); + assertTrue(json.contains("datasetContactName")); + assertTrue(!json.contains("datasetContactEmail")); + } + + @Test + public void testGetDatasetWithTermsOfUseAndGuestbook() throws IOException, JsonParseException { + String apiToken = getSuperuserToken(); + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + String ownerAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(ownerAlias, apiToken); + createDatasetResponse.prettyPrint(); + String persistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + + // Create a Guestbook + Guestbook guestbook = UtilIT.createRandomGuestbook(ownerAlias, persistentId, apiToken); + + // Create a license for Terms of Use + String jsonString = """ + { + "customTerms": { + "termsOfUse": "testTermsOfUse" + } + } + """; + Response updateLicenseResponse = UtilIT.updateLicense(datasetId.toString(), jsonString, apiToken); + updateLicenseResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo(BundleUtil.getStringFromBundle("datasets.api.updateLicense.success"))); + + // Enable the Guestbook with invalid enable flag + Response guestbookEnableResponse = UtilIT.enableGuestbook(ownerAlias, guestbook.getId(), apiToken, "x"); + guestbookEnableResponse.prettyPrint(); + guestbookEnableResponse.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", startsWith("Illegal value")); + + Response getDataset = UtilIT.getDatasetVersions(persistentId, apiToken); + getDataset.prettyPrint(); + getDataset.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].termsOfUse", equalTo("testTermsOfUse")) + .body("data[0].guestbookId", equalTo(guestbook.getId().intValue())); + + getDataset = UtilIT.nativeGet(datasetId, apiToken); + getDataset.prettyPrint(); + getDataset.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.guestbookId", equalTo(guestbook.getId().intValue())); + + Response getGuestbook = UtilIT.getGuestbook(guestbook.getId(), apiToken); + getGuestbook.prettyPrint(); + getGuestbook.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.id", equalTo(guestbook.getId().intValue())); + + getGuestbook = UtilIT.getGuestbook(-1L, apiToken); + getGuestbook.prettyPrint(); + getGuestbook.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + // remove the guestbook from the dataset + Response removeGuestbook = UtilIT.updateDatasetGuestbook(persistentId, null, apiToken); + removeGuestbook.prettyPrint(); + removeGuestbook.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", startsWith("Guestbook removed")); + // remove the already removed guestbook from the dataset + removeGuestbook = UtilIT.updateDatasetGuestbook(persistentId, null, apiToken); + removeGuestbook.prettyPrint(); + removeGuestbook.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", startsWith("No Guestbook to remove")); + + // Get the dataset to show that the guestbook was removed + getDataset = UtilIT.nativeGet(datasetId, apiToken); + getDataset.prettyPrint(); + getDataset.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.guestbookId", equalTo(null)); + + // Disable the Guestbook + guestbookEnableResponse = UtilIT.enableGuestbook(ownerAlias, guestbook.getId(), apiToken, Boolean.FALSE.toString()); + guestbookEnableResponse.prettyPrint(); + guestbookEnableResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", startsWith("Guestbook")); + + // Fail to add a disabled Guestbook to the Dataset + Response setGuestbook = UtilIT.updateDatasetGuestbook(persistentId, guestbook.getId(), apiToken); + setGuestbook.prettyPrint(); + setGuestbook.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", startsWith("Failed to update dataset guestbook")); + + // Enable the Guestbook. Add it to the Dataset. Then disable it. + // Show that the guestbook is still returned in the dataset Json even if it's disabled + UtilIT.enableGuestbook(ownerAlias, guestbook.getId(), apiToken, Boolean.TRUE.toString()).prettyPrint(); + UtilIT.updateDatasetGuestbook(persistentId, guestbook.getId(), apiToken).prettyPrint(); + UtilIT.enableGuestbook(ownerAlias, guestbook.getId(), apiToken, Boolean.FALSE.toString()).prettyPrint(); + getDataset = UtilIT.nativeGet(datasetId, apiToken); + getDataset.prettyPrint(); + getDataset.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.guestbookId", equalTo(guestbook.getId().intValue())); + } + private String getSuperuserToken() { Response createResponse = UtilIT.createRandomUser(); String adminApiToken = UtilIT.getApiTokenFromResponse(createResponse); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java index 1b908f63bfa..632ac39eb4e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java @@ -2609,13 +2609,17 @@ public void testUpdateInputLevelDisplayOnCreateOverride() { @Test public void testCreateAndGetTemplates() throws JsonParseException { + /* + Also Delete...and get single template + */ + Response createUserResponse = UtilIT.createRandomUser(); String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); String username = UtilIT.getUsernameFromResponse(createUserResponse); - Response createSecondUserResponse = UtilIT.createRandomUser(); - String secondApiToken = UtilIT.getApiTokenFromResponse(createSecondUserResponse); - String secondUsername = UtilIT.getUsernameFromResponse(createSecondUserResponse); + Response createSecondUserResponse = UtilIT.createRandomUser(); + String secondApiToken = UtilIT.getApiTokenFromResponse(createSecondUserResponse); + String secondUsername = UtilIT.getUsernameFromResponse(createSecondUserResponse); /* @@ -2623,9 +2627,13 @@ public void testCreateAndGetTemplates() throws JsonParseException { */ Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - + Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverseResponse); + + System.out.print("dataverseId: " + dataverseId); + String newName = "New Test Dataverse Name"; String newAffiliation = "New Test Dataverse Affiliation"; String newDataverseType = Dataverse.DataverseType.TEACHING_COURSES.toString(); @@ -2634,55 +2642,53 @@ public void testCreateAndGetTemplates() throws JsonParseException { String[] newFacetIds = new String[]{"contributorName"}; String[] newMetadataBlockNames = new String[]{"citation", "geospatial", "biomedical"}; - //Giving the new Dataverse updated metadatablocks so that it will not inherit templates - Response updateDataverseResponse = UtilIT.updateDataverse( - dataverseAlias, dataverseAlias, newName, newAffiliation, newDataverseType, newContactEmails, newInputLevelNames, + // Giving the new Dataverse updated metadatablocks so that it will not inherit + // templates + Response updateDataverseResponse = UtilIT.updateDataverse( + dataverseAlias, dataverseAlias, newName, newAffiliation, newDataverseType, newContactEmails, + newInputLevelNames, null, newMetadataBlockNames, apiToken, - Boolean.FALSE, Boolean.FALSE, null - ); - + Boolean.FALSE, Boolean.FALSE, null); + updateDataverseResponse.then().assertThat() .statusCode(OK.getStatusCode()); - // Create a template - String jsonString = """ - { - "name": "Dataverse template", - "isDefault": true, - "fields": [ - { - "typeName": "author", - "value": [ - { - "authorName": { - "typeName": "authorName", - "value": "Belicheck, Bill" - }, - "authorAffiliation": { - "typeName": "authorIdentifierScheme", - "value": "ORCID" - } - } - ] - } - ], - "instructions": [ - { - "instructionField": "author", - "instructionText": "The author data" - } - ] - } - """; + { + "name": "Dataverse template", + "isDefault": true, + "fields": [ + { + "typeName": "author", + "value": [ + { + "authorName": { + "typeName": "authorName", + "value": "Belicheck, Bill" + }, + "authorAffiliation": { + "typeName": "authorIdentifierScheme", + "value": "ORCID" + } + } + ] + } + ], + "instructions": [ + { + "instructionField": "author", + "instructionText": "The author data" + } + ] + } + """; Response createTemplateResponse = UtilIT.createTemplate( dataverseAlias, jsonString, - apiToken - ); - - createTemplateResponse.then().assertThat().statusCode(OK.getStatusCode()) + apiToken); + + createTemplateResponse.then().assertThat().statusCode(CREATED.getStatusCode()) .body("data.name", equalTo("Dataverse template")) .body("data.isDefault", equalTo(true)) .body("data.usageCount", equalTo(0)) @@ -2693,17 +2699,24 @@ public void testCreateAndGetTemplates() throws JsonParseException { .body("data.instructions[0].instructionText", equalTo("The author data")) .body("data.dataverseAlias", equalTo(dataverseAlias)); - // Template creation should fail if the user lacks dataverse edit permissions + Long templateId = createTemplateResponse.body().jsonPath().getLong("data.id"); + //Check for failure due unauthorized user. + Response setDefaultResp = UtilIT.setDefaultTemplate(dataverseAlias, templateId, secondApiToken); + setDefaultResp.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); + + // Set default template + setDefaultResp = UtilIT.setDefaultTemplate(dataverseAlias, templateId, apiToken); + setDefaultResp.then().assertThat().statusCode(OK.getStatusCode()); + + // Template creation should fail if the user lacks dataverse edit permissions createTemplateResponse = UtilIT.createTemplate( dataverseAlias, jsonString, - secondApiToken - ); + secondApiToken); createTemplateResponse.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); - // Get templates - + // Get templates and check this one is default now Response getTemplateResponse = UtilIT.getTemplates(dataverseAlias, apiToken); getTemplateResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.size()", equalTo(1)) @@ -2716,20 +2729,82 @@ public void testCreateAndGetTemplates() throws JsonParseException { .body("data[0].instructions[0].instructionField", equalTo("author")) .body("data[0].instructions[0].instructionText", equalTo("The author data")) .body("data[0].dataverseAlias", equalTo(dataverseAlias)); + + + // Remove default template + System.out.print("***************: " + dataverseAlias ); + + Response removeDefaultResp = UtilIT.removeDefaultTemplate(dataverseAlias, apiToken); + removeDefaultResp.prettyPrint(); + removeDefaultResp.then().assertThat().statusCode(OK.getStatusCode()); - // Templates retrieval should fail if a secondary user lacks dataset creation permissions + //check that template is no longer default. + getTemplateResponse = UtilIT.getTemplates(dataverseAlias, apiToken); + getTemplateResponse.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.size()", equalTo(1)) + .body("data[0].isDefault", equalTo(false)); - getTemplateResponse = UtilIT.getTemplates(dataverseAlias, secondApiToken); - getTemplateResponse.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); - // Templates retrieval should succeed if the secondary user has dataset creation permissions + // Templates retrieval should fail if a secondary user lacks dataset creation + // permissions + + getTemplateResponse = UtilIT.getTemplates(dataverseAlias, secondApiToken); + getTemplateResponse.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); + + + //set to super to update role UtilIT.setSuperuserStatus(username, true); + Response grantRoleResponse = UtilIT.grantRoleOnDataverse(dataverseAlias, DataverseRole.DS_CONTRIBUTOR, "@" + secondUsername, apiToken); + grantRoleResponse.prettyPrint(); grantRoleResponse.then().assertThat().statusCode(OK.getStatusCode()); getTemplateResponse = UtilIT.getTemplates(dataverseAlias, secondApiToken); getTemplateResponse.then().assertThat().statusCode(OK.getStatusCode()); + + Response getTemplateByIdResponse = UtilIT.getTemplate(templateId.toString(), apiToken); + getTemplateByIdResponse.prettyPrint(); + getTemplateByIdResponse.then().assertThat().statusCode(OK.getStatusCode()); + + //guest user shouldn't get it + getTemplateByIdResponse = UtilIT.getTemplate(templateId.toString()); + getTemplateByIdResponse.prettyPrint(); + getTemplateByIdResponse.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); + + Response deleteTemplateResponse = UtilIT.deleteTemplate(templateId.toString(), secondApiToken); + deleteTemplateResponse.prettyPrint(); + deleteTemplateResponse.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); + + //set back to show super not needed for delete - just Edit Dataverse + UtilIT.setSuperuserStatus(username, false); + + String badId = "8675309"; + + deleteTemplateResponse = UtilIT.deleteTemplate(badId, apiToken); + deleteTemplateResponse.prettyPrint(); + deleteTemplateResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + deleteTemplateResponse = UtilIT.deleteTemplate(templateId.toString(), apiToken); + deleteTemplateResponse.prettyPrint(); + deleteTemplateResponse.then().assertThat().statusCode(OK.getStatusCode()); + // back to super for cleanup + + UtilIT.setSuperuserStatus(username, true); + + Response deleteDataverse1Response = UtilIT.deleteDataverse(dataverseAlias, apiToken); + deleteDataverse1Response.prettyPrint(); + assertEquals(200, deleteDataverse1Response.getStatusCode()); + + Response deleteUserResponse = UtilIT.deleteUser(secondUsername); + deleteUserResponse.prettyPrint(); + assertEquals(200, deleteUserResponse.getStatusCode()); + + deleteUserResponse = UtilIT.deleteUser(username); + deleteUserResponse.prettyPrint(); + assertEquals(200, deleteUserResponse.getStatusCode()); + + } @Test @@ -2792,4 +2867,46 @@ public void testDataverseMetadataLanguage() { singleLang.then().assertThat().body("data", equalTo(List.of(Map.of("locale", "en", "title", "English")))); } + @Test + public void testAssignRoleOnDataverse() { + // Create user + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + // Create collection as that user + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // Create second user + Response createSecondUserResponse = UtilIT.createRandomUser(); + String secondUsername = UtilIT.getUsernameFromResponse(createSecondUserResponse); + String secondApiToken = UtilIT.getApiTokenFromResponse(createSecondUserResponse); + + // Let the first user assign a role on their collection to the second user + Response grantRoleResponse = UtilIT.grantRoleOnDataverse(dataverseAlias, DataverseRole.DS_CONTRIBUTOR, "@" + secondUsername, apiToken); + grantRoleResponse.prettyPrint(); + grantRoleResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Let the first user assign the same role to the same user again -> 409 Conflict + Response grantRoleTwiceResponse = UtilIT.grantRoleOnDataverse(dataverseAlias, DataverseRole.DS_CONTRIBUTOR, "@" + secondUsername, apiToken); + grantRoleTwiceResponse.prettyPrint(); + grantRoleTwiceResponse.then().assertThat().statusCode(CONFLICT.getStatusCode()); + + // Clean up + Response deleteDataverse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + deleteDataverse.prettyPrint(); + deleteDataverse.then().assertThat().statusCode(OK.getStatusCode()); + + Response deleteUserResponse = UtilIT.deleteUser(username); + deleteUserResponse.prettyPrint(); + assertEquals(200, deleteUserResponse.getStatusCode()); + + Response deleteSecondUserResponse = UtilIT.deleteUser(secondUsername); + deleteSecondUserResponse.prettyPrint(); + assertEquals(200, deleteSecondUserResponse.getStatusCode()); + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 262f3252f9d..ed96b5b4656 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -1,60 +1,59 @@ package edu.harvard.iq.dataverse.api; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.Guestbook; +import edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.JsonUtil; import io.restassured.RestAssured; +import io.restassured.path.json.JsonPath; +import io.restassured.path.xml.XmlPath; import io.restassured.response.Response; - -import java.nio.charset.StandardCharsets; -import java.util.*; -import java.util.logging.Logger; - -import edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism; +import jakarta.json.Json; import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.ws.rs.core.Response.Status; import org.assertj.core.util.Lists; +import org.hamcrest.CoreMatchers; import org.hamcrest.Matcher; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.BeforeAll; -import io.restassured.path.json.JsonPath; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import static edu.harvard.iq.dataverse.api.ApiConstants.*; -import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; -import static io.restassured.path.json.JsonPath.with; -import io.restassured.path.xml.XmlPath; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; import java.io.IOException; - -import static java.lang.Thread.sleep; - +import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.nio.file.Paths; -import java.text.MessageFormat; - -import jakarta.json.Json; -import jakarta.json.JsonObjectBuilder; - -import static jakarta.ws.rs.core.Response.Status.*; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.text.MessageFormat; import java.time.Year; -import org.hamcrest.CoreMatchers; -import org.hamcrest.Matchers; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.io.TempDir; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; +import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; +import static io.restassured.RestAssured.get; +import static io.restassured.path.json.JsonPath.with; +import static jakarta.ws.rs.core.Response.Status.*; +import static java.lang.Thread.sleep; import static org.hamcrest.CoreMatchers.*; import static org.junit.jupiter.api.Assertions.*; @@ -3661,6 +3660,8 @@ public void testUploadFilesWithLimits() throws JsonParseException { String apiToken = UtilIT.getApiTokenFromResponse(createUser); Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); createDataverseResponse.prettyPrint(); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); // Update the dataverse with a datasetFileCountLimit of 1 @@ -3676,10 +3677,10 @@ public void testUploadFilesWithLimits() throws JsonParseException { Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); createDatasetResponse.prettyPrint(); - Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); - String datasetPersistenceId = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); createDatasetResponse.then().assertThat() .statusCode(CREATED.getStatusCode()); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + String datasetPersistenceId = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); // ------------------------- // Add initial file @@ -3870,4 +3871,193 @@ public void testUpdateWithEmptyFieldsAndVersionCheck() throws InterruptedExcepti .body("message", equalTo(BundleUtil.getStringFromBundle("jsonparser.error.parsing.date",Collections.singletonList("bad-date")))) .statusCode(BAD_REQUEST.getStatusCode()); } + + @Test + public void testDownloadFileWithGuestbookResponse() throws IOException, JsonParseException { + msgt("testDownloadFileWithGuestbookResponse"); + // Create superuser + Response createUserResponse = UtilIT.createRandomUser(); + assertEquals(200, createUserResponse.getStatusCode()); + String ownerApiToken = UtilIT.getApiTokenFromResponse(createUserResponse); + String superusername = UtilIT.getUsernameFromResponse(createUserResponse); + UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); + + // Create Dataverse + String dataverseAlias = createDataverseGetAlias(ownerApiToken); + + // Create user with no permission + createUserResponse = UtilIT.createRandomUser(); + assertEquals(200, createUserResponse.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); + String username = UtilIT.getUsernameFromResponse(createUserResponse); + + // Create second user with no permission + createUserResponse = UtilIT.createRandomUser(); + createUserResponse.prettyPrint(); + assertEquals(200, createUserResponse.getStatusCode()); + String apiToken2 = UtilIT.getApiTokenFromResponse(createUserResponse); + String username2 = UtilIT.getUsernameFromResponse(createUserResponse); + String user2Email = JsonPath.from(createUserResponse.body().asString()).getString("data.authenticatedUser.email"); + + // Create Dataset + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, ownerApiToken); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + String persistentId = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); + Response getDatasetMetadata = UtilIT.nativeGet(datasetId, ownerApiToken); + getDatasetMetadata.then().assertThat().statusCode(200); + + Response getGuestbooksResponse = UtilIT.getGuestbooks(dataverseAlias, ownerApiToken); + getGuestbooksResponse.then().assertThat().statusCode(200); + assertTrue(getGuestbooksResponse.getBody().jsonPath().getList("data").isEmpty()); + + // Create a Guestbook + Guestbook guestbook = UtilIT.createRandomGuestbook(dataverseAlias, persistentId, ownerApiToken); + + // Get the list of Guestbooks + getGuestbooksResponse = UtilIT.getGuestbooks(dataverseAlias, ownerApiToken); + getGuestbooksResponse.then().assertThat().statusCode(200); + assertEquals(1, getGuestbooksResponse.getBody().jsonPath().getList("data").size()); + + // Upload files + JsonObjectBuilder json1 = Json.createObjectBuilder().add("description", "my description1").add("directoryLabel", "data/subdir1").add("categories", Json.createArrayBuilder().add("Data")); + Response uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), "src/main/webapp/resources/images/dataverseproject.png", json1.build(), ownerApiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + Integer fileId1 = JsonPath.from(uploadResponse.body().asString()).getInt("data.files[0].dataFile.id"); + JsonObjectBuilder json2 = Json.createObjectBuilder().add("description", "my description2").add("directoryLabel", "data/subdir1").add("categories", Json.createArrayBuilder().add("Data")); + uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), "src/main/webapp/resources/images/orcid_16x16.png", json1.build(), ownerApiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + Integer fileId2 = JsonPath.from(uploadResponse.body().asString()).getInt("data.files[0].dataFile.id"); + JsonObjectBuilder json3 = Json.createObjectBuilder().add("description", "my description3").add("directoryLabel", "data/subdir1").add("categories", Json.createArrayBuilder().add("Data")); + uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), "src/main/webapp/resources/images/cc0.png", json1.build(), ownerApiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + Integer fileId3 = JsonPath.from(uploadResponse.body().asString()).getInt("data.files[0].dataFile.id"); + JsonObjectBuilder json4 = Json.createObjectBuilder().add("description", "my description4").add("directoryLabel", "data/subdir1").add("categories", Json.createArrayBuilder().add("Data")); + uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), "src/main/webapp/resources/images/Robot-Icon_2.png", json1.build(), ownerApiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + Integer fileId4 = JsonPath.from(uploadResponse.body().asString()).getInt("data.files[0].dataFile.id"); + + // Restrict files + Response restrictResponse = UtilIT.restrictFile(fileId1.toString(), true, ownerApiToken); + restrictResponse.then().assertThat().statusCode(OK.getStatusCode()); + restrictResponse = UtilIT.restrictFile(fileId2.toString(), true, ownerApiToken); + restrictResponse.then().assertThat().statusCode(OK.getStatusCode()); + restrictResponse = UtilIT.restrictFile(fileId3.toString(), true, ownerApiToken); + restrictResponse.then().assertThat().statusCode(OK.getStatusCode()); + // do not restrict fileId4 + + // Update Dataset to allow requests + Response allowAccessRequestsResponse = UtilIT.allowAccessRequests(datasetId.toString(), true, ownerApiToken); + assertEquals(200, allowAccessRequestsResponse.getStatusCode()); + // Publish dataverse and dataset + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, ownerApiToken); + assertEquals(200, publishDataverse.getStatusCode()); + Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetId, "major", ownerApiToken); + assertEquals(200, publishDataset.getStatusCode()); + + // Request access + Response requestFileAccessResponse = UtilIT.requestFileAccess(fileId1.toString(), apiToken, null); + assertEquals(200, requestFileAccessResponse.getStatusCode()); + requestFileAccessResponse = UtilIT.requestFileAccess(fileId2.toString(), apiToken, null); + assertEquals(200, requestFileAccessResponse.getStatusCode()); + requestFileAccessResponse = UtilIT.requestFileAccess(fileId3.toString(), apiToken, null); + assertEquals(200, requestFileAccessResponse.getStatusCode()); + + // Grant file access + Response grantFileAccessResponse = UtilIT.grantFileAccess(fileId1.toString(), "@" + username, ownerApiToken); + assertEquals(200, grantFileAccessResponse.getStatusCode()); + grantFileAccessResponse = UtilIT.grantFileAccess(fileId2.toString(), "@" + username, ownerApiToken); + assertEquals(200, grantFileAccessResponse.getStatusCode()); + grantFileAccessResponse = UtilIT.grantFileAccess(fileId3.toString(), "@" + username, ownerApiToken); + assertEquals(200, grantFileAccessResponse.getStatusCode()); + + String guestbookResponse = UtilIT.generateGuestbookResponse(guestbook); + + // Download unrestricted file by guest user fails without GuestbookResponse + Response downloadResponse = UtilIT.downloadFile(fileId4); + downloadResponse.prettyPrint(); + downloadResponse.then().assertThat() + .body("status", equalTo(ApiConstants.STATUS_ERROR)) + .body("message", equalTo(BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", List.of(guestbook.getId().toString())))) + .statusCode(BAD_REQUEST.getStatusCode()); + // With GuestbookResponse. Guest user doesn't have the required Name and Email. so this will still fail + downloadResponse = UtilIT.postDownloadFile(fileId4, guestbookResponse); + downloadResponse.prettyPrint(); + downloadResponse.then().assertThat() + .body("status", equalTo(ApiConstants.STATUS_ERROR)) + .body("message", containsString("(Name,Email)")) + .statusCode(BAD_REQUEST.getStatusCode()); + String guestbookResponseForGuest = guestbookResponse.replace("\"guestbookResponse\": {", + "\"guestbookResponse\": { \"name\":\"My Name\", \"email\":\"myemail@example.com\", \"position\":\"My Position\", \"institution\":\"My Institution\","); + // With GuestbookResponse. Guest user doesn't have the required Name, etc. So we will add those to the Guestbook Response + downloadResponse = UtilIT.postDownloadFile(fileId4, guestbookResponseForGuest); + downloadResponse.prettyPrint(); + downloadResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + String signedUrl = UtilIT.getSignedUrlFromResponse(downloadResponse); + // Download the file using the signed url + Response signedUrlResponse = get(signedUrl); + assertEquals(OK.getStatusCode(), signedUrlResponse.getStatusCode()); + + // Get Download Url attempt - Guestbook Response is required but not found + downloadResponse = UtilIT.getDownloadFileUrlWithGuestbookResponse(fileId1, apiToken, null); + downloadResponse.prettyPrint(); + downloadResponse.then().assertThat() + .body("status", equalTo(ApiConstants.STATUS_ERROR)) + .body("message", equalTo(BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", List.of(guestbook.getId().toString())))) + .statusCode(BAD_REQUEST.getStatusCode()); + + // Get Signed Download Url with guestbook response + downloadResponse = UtilIT.getDownloadFileUrlWithGuestbookResponse(fileId1, apiToken, guestbookResponse); + downloadResponse.prettyPrint(); + downloadResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + signedUrl = UtilIT.getSignedUrlFromResponse(downloadResponse); + + // Download the file using the signed url + signedUrlResponse = get(signedUrl); + assertEquals(OK.getStatusCode(), signedUrlResponse.getStatusCode()); + + // Download multiple files - Guestbook Response is required but not found for file2 and file3 + downloadResponse = UtilIT.postDownloadDatafiles(fileId1 + "," + fileId2+ "," + fileId3, apiToken); + downloadResponse.prettyPrint(); + downloadResponse.then().assertThat() + .body("status", equalTo(ApiConstants.STATUS_ERROR)) + .body("message", equalTo(BundleUtil.getStringFromBundle("access.api.download.failure.guestbookResponseMissing", List.of(guestbook.getId().toString())))) + .statusCode(BAD_REQUEST.getStatusCode()); + + // Download multiple files with guestbook response and fileIds in json + String jsonBody = "{\"fileIds\":[" + fileId1 + "," + fileId2+ "," + fileId3 +"], " + guestbookResponse.substring(1); + downloadResponse = UtilIT.postDownloadDatafiles(jsonBody, apiToken); + assertEquals(OK.getStatusCode(), downloadResponse.getStatusCode()); + + downloadResponse = UtilIT.downloadFilesUrlWithGuestbookResponse(new Integer[]{fileId1, fileId2, fileId3}, apiToken, guestbookResponse); + signedUrl = UtilIT.getSignedUrlFromResponse(downloadResponse); + signedUrlResponse = get(signedUrl); + assertEquals(OK.getStatusCode(), signedUrlResponse.getStatusCode()); + + // TEST Overwrite name, email, institution and position in guestbook Response. Using user2 + requestFileAccessResponse = UtilIT.requestFileAccess(fileId1.toString(), apiToken2, null); + assertEquals(200, requestFileAccessResponse.getStatusCode()); + grantFileAccessResponse = UtilIT.grantFileAccess(fileId1.toString(), "@" + username2, ownerApiToken); + assertEquals(200, grantFileAccessResponse.getStatusCode()); + // Modify guestbookResponse excluding email to show that the email remains unchanged + guestbookResponse = guestbookResponse.replace("\"guestbookResponse\": {", + "\"guestbookResponse\": { \"name\":\"My Name\", \"position\":\"My Position\", \"institution\":\"My Institution\","); + downloadResponse = UtilIT.getDownloadFileUrlWithGuestbookResponse(fileId1, apiToken2, guestbookResponse); + downloadResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + Response guestbookResponses = UtilIT.getGuestbookResponses(dataverseAlias, guestbook.getId(), ownerApiToken); + assertTrue(guestbookResponses.prettyPrint().contains("My Name," + user2Email + ",My Institution,My Position")); + + // Get Signed Download Url with guestbook response using persistentId + // POST /api/access/dataset/:persistentId?persistentId=doi:10.xxxx/FK2/ABC + downloadResponse = UtilIT.downloadFilesUrlWithGuestbookResponse(persistentId, apiToken, guestbookResponse); + downloadResponse.prettyPrint(); + downloadResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + signedUrl = UtilIT.getSignedUrlFromResponse(downloadResponse); + signedUrlResponse = get(signedUrl); + assertEquals(OK.getStatusCode(), signedUrlResponse.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/InReviewWorkflowIT.java b/src/test/java/edu/harvard/iq/dataverse/api/InReviewWorkflowIT.java index a2bef649c72..187043f5233 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/InReviewWorkflowIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/InReviewWorkflowIT.java @@ -6,6 +6,7 @@ import io.restassured.response.Response; import edu.harvard.iq.dataverse.authorization.DataverseRole; import jakarta.json.Json; +import jakarta.json.JsonArray; import jakarta.json.JsonObjectBuilder; import static edu.harvard.iq.dataverse.UserNotification.Type.*; @@ -120,6 +121,13 @@ public void testCuratorSendsCommentsToAuthor() { .body("message", equalTo("You cannot submit this dataset for review because it is already in review.")) .statusCode(FORBIDDEN.getStatusCode()); + // Confirm that when getting the dataset, the "InReview" lock is listed + Response getDatasetJson = UtilIT.nativeGet(datasetId, authorApiToken); + getDatasetJson.prettyPrint(); + getDatasetJson.then().assertThat() + .body("data.locks[0]", equalTo("InReview")) + .statusCode(200); + Response authorsChecksForCommentsPrematurely = UtilIT.getNotifications(authorApiToken); authorsChecksForCommentsPrematurely.prettyPrint(); authorsChecksForCommentsPrematurely.then().assertThat() @@ -429,6 +437,41 @@ public void testCuratorSendsCommentsToAuthor() { // .body("data[3].reasonsForReturn", equalTo(null)) .statusCode(OK.getStatusCode()); + // The author realizes she wants to add another file and creates a new draft version. + Response authorAddsNewFileCreatingNewDraft = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile1, authorApiToken); + authorAddsNewFileCreatingNewDraft.prettyPrint(); + authorAddsNewFileCreatingNewDraft.then().assertThat() + .statusCode(OK.getStatusCode()); + + // The author re-submits. + Response submit4 = UtilIT.submitDatasetForReview(datasetPersistentId, authorApiToken); + submit4.prettyPrint(); + submit4.then().assertThat() + .body("data.inReview", equalTo(true)) + .statusCode(OK.getStatusCode()); + + // The curator checks notifications and sees that the dataset has been re-submitted after it was published. + Response curatorChecksForNotificationsPostPublication = UtilIT.getNotifications(curatorApiToken); + curatorChecksForNotificationsPostPublication.prettyPrint(); + curatorChecksForNotificationsPostPublication.then().assertThat() + .body("data[0].type", equalTo(SUBMITTEDDS.toString())) + .body("data[1].type", equalTo(PUBLISHEDDS.toString())) + .statusCode(OK.getStatusCode()); + + // The curator checks again, this time in app notification format. + Response curatorChecksForNotificationsPostPublicationInAppFormat = UtilIT.getNotifications(curatorApiToken, true, null, null, null); + curatorChecksForNotificationsPostPublicationInAppFormat.prettyPrint(); + curatorChecksForNotificationsPostPublicationInAppFormat.then().assertThat() + .body("data[0].type", equalTo(SUBMITTEDDS.toString())) + .body("data[0].objectDeleted", equalTo(null)) + .body("data[0].datasetPersistentIdentifier", equalTo(datasetPersistentId)) + .body("data[0].ownerAlias", equalTo(dataverseAlias)) + .body("data[1].type", equalTo(PUBLISHEDDS.toString())) + .body("data[1].objectDeleted", equalTo(null)) + .body("data[1].datasetPersistentIdentifier", equalTo(datasetPersistentId)) + .body("data[1].ownerAlias", equalTo(dataverseAlias)) + .statusCode(OK.getStatusCode()); + // These println's are here in case you want to log into the GUI to see what notifications look like. System.out.println("Curator username/password: " + curatorUsername); System.out.println("Author username/password: " + authorUsername); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/InfoIT.java b/src/test/java/edu/harvard/iq/dataverse/api/InfoIT.java index 4dcbab4093f..81dd20c54fa 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/InfoIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/InfoIT.java @@ -25,18 +25,24 @@ public static void setUpClass() { UtilIT.deleteSetting(SettingsServiceBean.Key.DatasetPublishPopupCustomText); UtilIT.deleteSetting(SettingsServiceBean.Key.ApplicationTermsOfUse); UtilIT.deleteSetting(SettingsServiceBean.Key.ApplicationTermsOfUse, "fr"); + UtilIT.deleteSetting(SettingsServiceBean.Key.PublishDatasetDisclaimerText); } @AfterAll public static void afterClass() { UtilIT.deleteSetting(SettingsServiceBean.Key.MaxEmbargoDurationInMonths); UtilIT.deleteSetting(SettingsServiceBean.Key.DatasetPublishPopupCustomText); + UtilIT.deleteSetting(SettingsServiceBean.Key.PublishDatasetDisclaimerText); } @Test public void testGetDatasetPublishPopupCustomText() { testSettingEndpoint(SettingsServiceBean.Key.DatasetPublishPopupCustomText, "Hello world!"); } + @Test + public void testGetDatasetPublishDisclaimerText() { + testSettingEndpoint(SettingsServiceBean.Key.PublishDatasetDisclaimerText, "Hello world!"); + } @Test public void testGetMaxEmbargoDurationInMonths() { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LDNInboxIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LDNInboxIT.java index 33019efa8b4..857d5a0eec6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LDNInboxIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LDNInboxIT.java @@ -1,4 +1,3 @@ - package edu.harvard.iq.dataverse.api; import org.junit.jupiter.api.Test; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LDNInboxTest.java b/src/test/java/edu/harvard/iq/dataverse/api/LDNInboxTest.java index 1adf8327f83..147175b45b1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LDNInboxTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LDNInboxTest.java @@ -5,12 +5,10 @@ import edu.harvard.iq.dataverse.DataverseRoleServiceBean; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.MailServiceBean; import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; import edu.harvard.iq.dataverse.RoleAssignment; import edu.harvard.iq.dataverse.UserNotification; import edu.harvard.iq.dataverse.UserNotificationServiceBean; -import edu.harvard.iq.dataverse.api.ldn.COARNotifyRelationshipAnnouncement; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.RoleAssignee; @@ -19,7 +17,6 @@ import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; import jakarta.json.Json; @@ -40,8 +37,6 @@ import org.mockito.junit.jupiter.MockitoExtension; import static jakarta.ws.rs.core.Response.Status.OK; -import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; -import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java index 8951b0bd42e..e3ea08c9f0d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java @@ -1,5 +1,7 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; import io.restassured.RestAssured; import io.restassured.path.json.JsonPath; import static io.restassured.path.json.JsonPath.with; @@ -7,6 +9,7 @@ import edu.harvard.iq.dataverse.authorization.DataverseRole; import java.io.StringReader; import java.util.List; +import java.util.Map; import java.util.logging.Logger; import jakarta.json.Json; import jakarta.json.JsonObject; @@ -15,9 +18,14 @@ import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; import static jakarta.ws.rs.core.Response.Status.OK; import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; + import org.hamcrest.CoreMatchers; -import static org.hamcrest.CoreMatchers.equalTo; + +import static org.hamcrest.CoreMatchers.*; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -29,6 +37,10 @@ public class MoveIT { public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); } + @AfterAll + public static void afterClass() { + sendNotificationOnDatasetMoveSetting(false); + } @Test public void testMoveDataset() { @@ -57,7 +69,7 @@ public void testMoveDataset() { Response noPermToCreateDataset = UtilIT.createRandomDatasetViaNativeApi(curatorDataverseAlias1, authorApiToken); noPermToCreateDataset.prettyPrint(); noPermToCreateDataset.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); - + Response grantAuthorAddDataset = UtilIT.grantRoleOnDataverse(curatorDataverseAlias1, DataverseRole.DS_CONTRIBUTOR.toString(), "@" + authorUsername, curatorApiToken); grantAuthorAddDataset.prettyPrint(); grantAuthorAddDataset.then().assertThat() @@ -145,6 +157,81 @@ public void testMoveDataset() { } + @Test + public void testMoveDatasetNotification() { + sendNotificationOnDatasetMoveSetting(true); + // Create the first user/dataverse (superuser) + Response createUser1 = UtilIT.createRandomUser(); + createUser1.prettyPrint(); + createUser1.then().assertThat() + .statusCode(OK.getStatusCode()); + String user1Username = UtilIT.getUsernameFromResponse(createUser1); + String user1ApiToken = UtilIT.getApiTokenFromResponse(createUser1); + UtilIT.setSuperuserStatus(user1Username, true); + + Response createDataverse1 = UtilIT.createRandomDataverse(user1ApiToken); + createDataverse1.prettyPrint(); + createDataverse1.then().assertThat() + .statusCode(CREATED.getStatusCode()); + String dataverseAlias1 = UtilIT.getAliasFromResponse(createDataverse1); + + // Create the second user/dataverse + Response createUser2 = UtilIT.createRandomUser(); + createUser2.prettyPrint(); + createUser2.then().assertThat() + .statusCode(OK.getStatusCode()); + String user2Username = UtilIT.getUsernameFromResponse(createUser2); + String user2ApiToken = UtilIT.getApiTokenFromResponse(createUser2); + + Response createDataverse2 = UtilIT.createRandomDataverse(user2ApiToken); + createDataverse2.prettyPrint(); + createDataverse2.then().assertThat() + .statusCode(CREATED.getStatusCode()); + String dataverseAlias2 = UtilIT.getAliasFromResponse(createDataverse2); + + // User2 creates dataset in DV2 + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias2, user2ApiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + + // User1(superuser) moves the dataset from dataverse2 to dataverse1 + Response moveDataset = UtilIT.moveDataset(datasetId.toString(), dataverseAlias1, user1ApiToken); + moveDataset.prettyPrint(); + moveDataset.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo(BundleUtil.getStringFromBundle("datasets.api.moveDataset.success"))); + + // verify that a notification was sent to user1 + Response getNotifications = UtilIT.getNotifications(user1ApiToken); + getNotifications.prettyPrint(); + verifyNotification(getNotifications, dataverseAlias1); + + // verify that a notification was sent to user2 + getNotifications = UtilIT.getNotifications(user2ApiToken); + getNotifications.prettyPrint(); + verifyNotification(getNotifications, dataverseAlias1); + } + + private void verifyNotification(Response notificationListResponse, String dataverseAlias) { + notificationListResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + boolean found = false; + List> notifications = notificationListResponse.body().jsonPath().getList("data"); + + for (Map notification : notifications) { + if ("DATASETMOVED".equalsIgnoreCase(notification.get("type"))) { + if (notification.get("messageText") != null && notification.get("messageText").contains(dataverseAlias)) { + found = true; + assertTrue(notification.get("subjectText") != null && notification.get("subjectText").contains("has been moved")); + assertTrue(notification.get("messageText") != null && notification.get("messageText").startsWith(BundleUtil.getStringFromBundle("notification.email.greeting"))); + } + } + } + assertTrue(found); + } + @Test public void testMoveDatasetThief() { @@ -310,12 +397,12 @@ public void testMoveLinkedDataset() { assertEquals("OK", linksAfterData.getString("status")); assertEquals(0, linksAfterData.getJsonObject("data").getJsonArray("linked-dataverses").size()); } - + @Test public void testMoveDatasetsPerms() { /* - Verify that permissions set on a dataset remain + Verify that permissions set on a dataset remain after that dataaset is moved */ Response createCurator = UtilIT.createRandomUser(); @@ -408,4 +495,15 @@ public void testMoveDatasetsPerms() { } + private static void sendNotificationOnDatasetMoveSetting(boolean enable) { + Response resp; + if (enable) { + resp = UtilIT.enableSetting(SettingsServiceBean.Key.SendNotificationOnDatasetMove); + } else { + resp = UtilIT.deleteSetting(SettingsServiceBean.Key.SendNotificationOnDatasetMove); + } + resp.prettyPrint(); + resp.then().assertThat() + .statusCode(OK.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ReviewsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ReviewsIT.java new file mode 100644 index 00000000000..2fc74c4b3f1 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/ReviewsIT.java @@ -0,0 +1,347 @@ +package edu.harvard.iq.dataverse.api; + +import edu.harvard.iq.dataverse.dataset.DatasetType; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; +import io.restassured.RestAssured; +import io.restassured.path.json.JsonPath; +import io.restassured.response.Response; +import jakarta.json.Json; +import jakarta.json.JsonObjectBuilder; +import static jakarta.ws.rs.core.Response.Status.CREATED; +import static jakarta.ws.rs.core.Response.Status.OK; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +/** + * This test class has not been added to the API test suite at + * tests/integration-tests.txt because it relies on the review.tsv which is not + * loaded out of the box. When we start loading review.tsv for new installations + * of Dataverse (and stop putting it under "experiemental" on the list of + * metadata blocks in the guides), we'll add this test class to the API test + * suite. + * + * To run these tests, manually load review.tsv (or temporarily set + * loadReviewTsv to true below) and update Solr. Be advised that there are + * other places in the API test suite that make assertions on the number of + * metadata blocks. Again, some day we might ship Dataverse with review.tsv + * already loaded. + */ +public class ReviewsIT { + + private static String apiTokenSuperuser; + + @BeforeAll + public static void setUpClass() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String usernameSuperuser = UtilIT.getUsernameFromResponse(createUser); + apiTokenSuperuser = UtilIT.getApiTokenFromResponse(createUser); + UtilIT.setSuperuserStatus(usernameSuperuser, true).then().assertThat().statusCode(OK.getStatusCode()); + + byte[] reviewTsv = null; + try { + reviewTsv = Files.readAllBytes(Paths.get("scripts/api/data/metadatablocks/review.tsv")); + } catch (IOException e) { + } + + // See warnings above. If you enable this, don't forget to update Solr. + boolean loadReviewTsv = false; + if (loadReviewTsv) { + Response response = UtilIT.loadMetadataBlock(apiTokenSuperuser, reviewTsv); + response.prettyPrint(); + assertEquals(200, response.getStatusCode()); + response.then().assertThat().statusCode(OK.getStatusCode()); + } + + String datasetDescription = "A study, experiment, set of observations, or publication that is uploaded by a user. A dataset can comprise a single file or multiple files."; + ensureDatasetTypeIsPresent(DatasetType.DATASET_TYPE_DATASET, "Dataset", datasetDescription, apiTokenSuperuser); + + String reviewDescription = null; + try { + reviewDescription = JsonUtil.getJsonObjectFromFile("scripts/api/data/datasetTypes/review.json").getString("description"); + } catch (IOException e) { + } + ensureDatasetTypeIsPresent(DatasetType.DATASET_TYPE_REVIEW, "Review", reviewDescription, apiTokenSuperuser); + } + + private static void ensureDatasetTypeIsPresent(String name, String displayName, String description, + String apiToken) { + Response getDatasetType = UtilIT.getDatasetType(name); + getDatasetType.prettyPrint(); + String nameFound = JsonPath.from(getDatasetType.getBody().asString()).getString("data.name"); + String displayNameFound = JsonPath.from(getDatasetType.getBody().asString()).getString("data.displayName"); + String descriptionFound = JsonPath.from(getDatasetType.getBody().asString()).getString("data.description"); + System.out.println("Found: name=" + nameFound + ". Display name=" + displayNameFound + ". Description=" + + descriptionFound); + if (name.equals(nameFound)) { + System.out.println(name + "=" + nameFound + ". Exists. No need to create. Returning."); + return; + } else { + System.out.println(name + " wasn't found. Create it."); + } + String jsonIn = NullSafeJsonBuilder.jsonObjectBuilder() + .add("name", name) + .add("displayName", displayName) + .add("description", description) + .add("linkedMetadataBlocks", Json.createArrayBuilder() + .add("review") + ) + .build().toString(); + // System.out.println(JsonUtil.prettyPrint(jsonIn)); + Response typeAdded = UtilIT.addDatasetType(jsonIn, apiToken); + typeAdded.prettyPrint(); + typeAdded.then().assertThat().statusCode(OK.getStatusCode()); + } + + @Test + public void testCreateReview() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response setAllowedDatasetTypes = UtilIT.setCollectionAttribute(dataverseAlias, "allowedDatasetTypes", + "review", apiTokenSuperuser); + setAllowedDatasetTypes.prettyPrint(); + setAllowedDatasetTypes.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.allowedDatasetTypes[0].name", is("review")) + .body("data.allowedDatasetTypes[0].displayName", is("Review")) + .body("data.allowedDatasetTypes[0].description", is("A review of a dataset compiled by the expert community.")); + + String itemReviewedTitle = "Percent of Children That Have Asthma"; + String itemReviewedUrl = "https://datacommons.org/tools/statvar#sv=Percent_Person_Children_WithAsthma"; + // This citation came from https://www.mybib.com + String itemReviewedCitation = "\"Statistical Variable Explorer - Data Commons.\" Datacommons.org, 2026, datacommons.org/tools/statvar#sv=Percent_Person_Children_WithAsthma. Accessed 9 Mar. 2026."; + String reviewTitle = "Review of " + itemReviewedTitle; + String authorName = "Wazowski, Mike"; + String authorEmail = "mwazowski@mailinator.com"; + JsonObjectBuilder jsonForCreatingReview = Json.createObjectBuilder() + /** + * See above where this type is added to the installation and + * therefore available for use. + */ + .add("datasetType", DatasetType.DATASET_TYPE_REVIEW) + .add("datasetVersion", Json.createObjectBuilder() + .add("license", Json.createObjectBuilder() + .add("name", "CC0 1.0") + .add("uri", "http://creativecommons.org/publicdomain/zero/1.0")) + .add("metadataBlocks", Json.createObjectBuilder() + .add("citation", Json.createObjectBuilder() + .add("fields", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("typeName", "title") + .add("value", reviewTitle) + .add("typeClass", "primitive") + .add("multiple", false)) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("authorName", + Json.createObjectBuilder() + .add("value", authorName) + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", + "authorName")))) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "author")) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("datasetContactEmail", + Json.createObjectBuilder() + .add("value", authorEmail) + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", + "datasetContactEmail")))) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "datasetContact")) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("dsDescriptionValue", + Json.createObjectBuilder() + .add("value", + "This is a review of a dataset.") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", + "dsDescriptionValue")))) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "dsDescription")) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add("Medicine, Health and Life Sciences")) + .add("typeClass", "controlledVocabulary") + .add("multiple", true) + .add("typeName", "subject")) + .add(Json.createObjectBuilder() + .add("value", Json.createObjectBuilder() + .add("itemReviewedUrl", + Json.createObjectBuilder() + .add("value", itemReviewedUrl) + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "itemReviewedUrl")) + .add("itemReviewedType", + Json.createObjectBuilder() + .add("value", "Dataset") + .add("typeClass", + "controlledVocabulary") + .add("multiple", false) + .add("typeName", "itemReviewedType")) + .add("itemReviewedCitation", + Json.createObjectBuilder() + .add("value", itemReviewedCitation) + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "itemReviewedCitation"))) + .add("typeClass", "compound") + .add("multiple", false) + .add("typeName", "itemReviewed")))))); + + Response createReview = UtilIT.createDataset(dataverseAlias, jsonForCreatingReview, apiToken); + createReview.prettyPrint(); + createReview.then().assertThat().statusCode(CREATED.getStatusCode()); + Integer reviewId = UtilIT.getDatasetIdFromResponse(createReview); + String reviewPid = JsonPath.from(createReview.getBody().asString()).getString("data.persistentId"); + + } + + /** + * In this test, we check if temReviewedUrl and itemReviewedType are required. (They are subfields of itemReviewed.) In review.tsv they are set to required. + */ + @Test + public void testCreateReviewRequiredFields() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response setAllowedDatasetTypes = UtilIT.setCollectionAttribute(dataverseAlias, "allowedDatasetTypes", + "review", apiTokenSuperuser); + setAllowedDatasetTypes.prettyPrint(); + setAllowedDatasetTypes.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.allowedDatasetTypes[0].name", is("review")) + .body("data.allowedDatasetTypes[0].displayName", is("Review")) + .body("data.allowedDatasetTypes[0].description", + is("A review of a dataset compiled by the expert community.")); + + String itemReviewedTitle = "Percent of Children That Have Asthma"; + String itemReviewedUrl = "https://datacommons.org/tools/statvar#sv=Percent_Person_Children_WithAsthma"; + String reviewTitle = "Review of " + itemReviewedTitle; + String authorName = "Wazowski, Mike"; + String authorEmail = "mwazowski@mailinator.com"; + JsonObjectBuilder jsonForCreatingReview = Json.createObjectBuilder() + /** + * See above where this type is added to the installation and + * therefore available for use. + */ + .add("datasetType", DatasetType.DATASET_TYPE_REVIEW) + .add("datasetVersion", Json.createObjectBuilder() + .add("license", Json.createObjectBuilder() + .add("name", "CC0 1.0") + .add("uri", "http://creativecommons.org/publicdomain/zero/1.0")) + .add("metadataBlocks", Json.createObjectBuilder() + .add("citation", Json.createObjectBuilder() + .add("fields", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("typeName", "title") + .add("value", reviewTitle) + .add("typeClass", "primitive") + .add("multiple", false)) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("authorName", + Json.createObjectBuilder() + .add("value", authorName) + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", + "authorName")))) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "author")) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("datasetContactEmail", + Json.createObjectBuilder() + .add("value", authorEmail) + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", + "datasetContactEmail")))) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "datasetContact")) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("dsDescriptionValue", + Json.createObjectBuilder() + .add("value", + "This is a review of a dataset.") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", + "dsDescriptionValue")))) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "dsDescription")) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add("Medicine, Health and Life Sciences")) + .add("typeClass", "controlledVocabulary") + .add("multiple", true) + .add("typeName", "subject")))))); + + Response createReview = UtilIT.createDataset(dataverseAlias, jsonForCreatingReview, apiToken); + createReview.prettyPrint(); + // FIXME: The review was created but it shouldn't have been because + // required fields were not supplied. In review.tsv various fields + // are required. See https://github.com/IQSS/dataverse/issues/12196 + createReview.then().assertThat().statusCode(CREATED.getStatusCode()); + Integer reviewId = UtilIT.getDatasetIdFromResponse(createReview); + String reviewPid = JsonPath.from(createReview.getBody().asString()).getString("data.persistentId"); + + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index ca9e19e1bbf..04b552ded5f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -119,6 +119,7 @@ public void testSearchPermisions() { .body("data.total_count", CoreMatchers.is(1)) .body("data.count_in_response", CoreMatchers.is(1)) .body("data.items[0].name", CoreMatchers.is("Darwin's Finches")) + // Note that "Unpublished" and "Draft" are in English. That's how they are indexed. .body("data.items[0].publicationStatuses", CoreMatchers.hasItems("Unpublished", "Draft")) .statusCode(OK.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java index 709908ac6eb..22dfe61da07 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java @@ -954,7 +954,8 @@ public void testDeleteFiles() { reindexDataset4ToFindDatabaseId.then().assertThat() .statusCode(OK.getStatusCode()); Integer datasetId4 = JsonPath.from(reindexDataset4ToFindDatabaseId.asString()).getInt("data.id"); - + UtilIT.sleepForReindex(datasetPersistentId4, apiToken, 5); + Response destroyDataset4 = UtilIT.destroyDataset(datasetId4, apiToken); destroyDataset4.prettyPrint(); destroyDataset4.then().assertThat() diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index f09b33a0b5b..e4b5fefba3b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1,56 +1,56 @@ package edu.harvard.iq.dataverse.api; +import com.mashape.unirest.http.Unirest; +import com.mashape.unirest.http.exceptions.UnirestException; +import com.mashape.unirest.request.GetRequest; import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.api.datadeposit.SwordConfigurationImpl; +import edu.harvard.iq.dataverse.mydata.MyDataFilterParams; +import edu.harvard.iq.dataverse.settings.FeatureFlags; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonParser; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import io.restassured.http.ContentType; import io.restassured.path.json.JsonPath; -import io.restassured.response.Response; - -import java.io.*; -import java.util.*; -import java.util.logging.Logger; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonArrayBuilder; -import jakarta.json.JsonObject; - -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import static jakarta.ws.rs.core.Response.Status.CREATED; - -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.time.LocalDateTime; -import java.util.logging.Level; -import edu.harvard.iq.dataverse.api.datadeposit.SwordConfigurationImpl; import io.restassured.path.xml.XmlPath; -import edu.harvard.iq.dataverse.mydata.MyDataFilterParams; -import jakarta.ws.rs.core.HttpHeaders; -import org.apache.commons.lang3.StringUtils; -import org.assertj.core.util.Lists; -import org.junit.jupiter.api.Test; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import io.restassured.response.Response; import io.restassured.specification.RequestSpecification; -import com.mashape.unirest.http.Unirest; -import com.mashape.unirest.http.exceptions.UnirestException; -import com.mashape.unirest.request.GetRequest; -import edu.harvard.iq.dataverse.util.FileUtil; +import jakarta.json.*; +import jakarta.ws.rs.core.HttpHeaders; import org.apache.commons.io.IOUtils; -import java.nio.file.Path; - +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; +import org.assertj.core.util.Lists; import org.hamcrest.BaseMatcher; import org.hamcrest.Description; import org.hamcrest.Matcher; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; import static edu.harvard.iq.dataverse.api.ApiConstants.*; -import static io.restassured.path.xml.XmlPath.from; +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import static io.restassured.RestAssured.given; - -import edu.harvard.iq.dataverse.settings.FeatureFlags; -import edu.harvard.iq.dataverse.util.StringUtil; - +import static io.restassured.path.xml.XmlPath.from; +import static jakarta.ws.rs.core.HttpHeaders.ACCEPT_LANGUAGE; +import static jakarta.ws.rs.core.Response.Status.CREATED; +import static jakarta.ws.rs.core.Response.Status.OK; +import static org.hamcrest.CoreMatchers.startsWith; import static org.junit.jupiter.api.Assertions.*; public class UtilIT { @@ -337,6 +337,13 @@ static Integer getDataverseIdFromResponse(Response createDataverseResponse) { logger.info("Id found in create dataverse response: " + dataverseId); return dataverseId; } + + static Long getTemplateIdFromResponse(Response createTemplateResponse) { + JsonPath createdTemplate = JsonPath.from(createTemplateResponse.body().asString()); + Long templateId = createdTemplate.getLong("data.id"); + logger.info("Id found in create template response: " + templateId); + return templateId; + } static Integer getDatasetIdFromResponse(Response createDatasetResponse) { JsonPath createdDataset = JsonPath.from(createDatasetResponse.body().asString()); @@ -588,6 +595,36 @@ static Response getGuestbookResponses(String dataverseAlias, Long guestbookId, S return requestSpec.get("/api/dataverses/" + dataverseAlias + "/guestbookResponses/"); } + public static Response createGuestbook(String dataverseAlias, String guestbookAsJson, String apiToken) { + Response createGuestbookResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(guestbookAsJson) + .contentType("application/json") + .post("/api/guestbooks/" + dataverseAlias); + return createGuestbookResponse; + } + + static Response getGuestbook(Long guestbookId, String apiToken) { + RequestSpecification requestSpec = given() + .header(API_TOKEN_HTTP_HEADER, apiToken); + return requestSpec.get("/api/guestbooks/" + guestbookId ); + } + + static Response getGuestbooks(String dataverseAlias, String apiToken) { + RequestSpecification requestSpec = given() + .header(API_TOKEN_HTTP_HEADER, apiToken); + return requestSpec.get("/api/guestbooks/" + dataverseAlias + "/list" ); + } + + static Response enableGuestbook(String dataverseAlias, Long guestbookId, String apiToken, String enable) { + Response createGuestbookResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(enable) + .contentType("application/json") + .put("/api/guestbooks/" + dataverseAlias + "/" + guestbookId + "/enabled"); + return createGuestbookResponse; + } + static Response getCollectionSchema(String dataverseAlias, String apiToken) { Response getCollectionSchemaResponse = given() .header(API_TOKEN_HTTP_HEADER, apiToken) @@ -801,6 +838,21 @@ static Response updateFieldLevelDatasetMetadataViaNative(String persistentId, St return editVersionMetadataFromJsonStr(persistentId, jsonIn, apiToken, null); } + static Response updateDatasetGuestbook(String persistentId, Long guestbookId, String apiToken) { + RequestSpecification requestSpecification = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json"); + String path = "/api/datasets/:persistentId/guestbook/?persistentId=" + persistentId; + if (guestbookId != null) { + return requestSpecification + .body(guestbookId) + .put(path); + } else { + return requestSpecification + .delete(path); + } + } + static Response editVersionMetadataFromJsonStr(String persistentId, String jsonString, String apiToken) { return editVersionMetadataFromJsonStr(persistentId, jsonString, apiToken, null); } @@ -1150,7 +1202,11 @@ static Response downloadFile(Integer fileId) { // .header(API_TOKEN_HTTP_HEADER, apiToken) .get("/api/access/datafile/" + fileId); } - + static Response postDownloadFile(Integer fileId, String jsonBody) { + return given() + .body(jsonBody) + .post("/api/access/datafile/" + fileId); + } static Response downloadFile(Integer fileId, String apiToken) { String nullByteRange = null; String nullFormat = null; @@ -1180,6 +1236,12 @@ static Response downloadFile(Integer fileId, String byteRange, String format, St //.header(API_TOKEN_HTTP_HEADER, apiToken) return requestSpecification.get("/api/access/datafile/" + fileId + "?key=" + apiToken + optionalFormat + optionalImageThumb); } + + static Response downloadFile(Integer fileId, String queryParams, String apiToken) { + RequestSpecification requestSpecification = given(); + + return requestSpecification.get("/api/access/datafile/" + fileId + "?key=" + apiToken + queryParams); + } static Response downloadTabularFile(Integer fileId) { return given() @@ -1201,7 +1263,50 @@ static Response downloadFileOriginal(Integer fileId, String apiToken) { return given() .get("/api/access/datafile/" + fileId + "?format=original&key=" + apiToken); } - + + static Response getDownloadFileUrlWithGuestbookResponse(Integer fileId, String apiToken, String body) { + RequestSpecification requestSpecification = given(); + requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken); + if (body != null) { + requestSpecification.body(body); + } + return requestSpecification.post("/api/access/datafile/" + fileId); + } + + static Response downloadFilesUrlWithGuestbookResponse(Integer[] fileIds, String apiToken, String body) { + RequestSpecification requestSpecification = given(); + requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken); + if (body != null) { + requestSpecification.body(body); + } + String getString = "/api/access/datafiles/"; + for (Integer fileId : fileIds) { + getString += fileId + ","; + } + return requestSpecification.post(getString); + } + static Response downloadFilesUrlWithGuestbookResponse(String persistentId, String apiToken, String body) { + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken); + } + if (body != null) { + requestSpecification.body(body); + } + String getString = "/api/access/dataset/:persistentId?persistentId=" + persistentId; + return requestSpecification.post(getString); + } + + static Response postDownloadDatafiles(String body, String apiToken) { + String getString = "/api/access/datafiles"; + RequestSpecification requestSpecification = given(); + requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken); + if (body != null) { // body contains list of data file ids + requestSpecification.body(body); + } + return requestSpecification.post(getString); + } + static Response downloadFiles(Integer[] fileIds) { String getString = "/api/access/datafiles/"; for(Integer fileId : fileIds) { @@ -1780,7 +1885,15 @@ static Response getDatasetVersion(String persistentId, String versionNumber, Str static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, boolean excludeFiles, boolean includeDeaccessioned) { return getDatasetVersion(persistentId,versionNumber,apiToken,excludeFiles,false,includeDeaccessioned); } - static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, boolean excludeFiles,boolean excludeMetadataBlocks, boolean includeDeaccessioned) { + static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, boolean excludeFiles, boolean excludeMetadataBlocks, boolean includeDeaccessioned) { + return getDatasetVersion(persistentId, versionNumber, apiToken, excludeFiles, excludeMetadataBlocks, includeDeaccessioned, false); + } + // includeMetadataBlocksEmail is an override of the Setting ExcludeEmailFromExport. excludeMetadataBlocks must be false and user needs EditDataset permission + static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, + boolean excludeFiles, + boolean excludeMetadataBlocks, + boolean includeDeaccessioned, + boolean ignoreSettingExcludeEmailFromExport) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) .queryParam("includeDeaccessioned", includeDeaccessioned) @@ -1789,7 +1902,8 @@ static Response getDatasetVersion(String persistentId, String versionNumber, Str + "?persistentId=" + persistentId + (excludeFiles ? "&excludeFiles=true" : "") - + (excludeMetadataBlocks ? "&excludeMetadataBlocks=true" : "")); + + (excludeMetadataBlocks ? "&excludeMetadataBlocks=true" : "") + + (ignoreSettingExcludeEmailFromExport ? "&ignoreSettingExcludeEmailFromExport=true" : "")); } static Response compareDatasetVersions(String persistentId, String versionNumber1, String versionNumber2, String apiToken) { return given() @@ -1976,6 +2090,14 @@ static Response getAuthProviders(String apiToken) { .get("/api/admin/authenticationProviders"); return response; } + static Response addAuthProviders(String apiToken, JsonObject jsonObject) { + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(jsonObject.toString()) + .contentType("application/json") + .post("/api/admin/authenticationProviders"); + return response; + } static Response migrateShibToBuiltin(Long userIdToConvert, String newEmailAddress, String apiToken) { Response response = given() @@ -2051,8 +2173,9 @@ static Response allowAccessRequests(String datasetIdOrPersistentId, boolean allo } static Response requestFileAccess(String fileIdOrPersistentId, String apiToken) { - System.out.print ("Reuest file acceess + fileIdOrPersistentId: " + fileIdOrPersistentId); - System.out.print ("Reuest file acceess + apiToken: " + apiToken); + return requestFileAccess(fileIdOrPersistentId, apiToken, null); + } + static Response requestFileAccess(String fileIdOrPersistentId, String apiToken, String body) { String idInPath = fileIdOrPersistentId; // Assume it's a number. String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. if (!NumberUtils.isCreatable(fileIdOrPersistentId)) { @@ -2064,10 +2187,16 @@ static Response requestFileAccess(String fileIdOrPersistentId, String apiToken) if (optionalQueryParam.isEmpty()) { keySeparator = "?"; } - System.out.print ("URL: " + "/api/access/datafile/" + idInPath + "/requestAccess" + optionalQueryParam + keySeparator + "key=" + apiToken); - Response response = given() - .put("/api/access/datafile/" + idInPath + "/requestAccess" + optionalQueryParam + keySeparator + "key=" + apiToken); - return response; + String path = "/api/access/datafile/" + idInPath + "/requestAccess" + optionalQueryParam + keySeparator + "key=" + apiToken; + System.out.print ("URL: " + path); + RequestSpecification requestSpecification = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json"); + if (body != null) { + requestSpecification.body(body); + } + + return requestSpecification.put(path); } static Response grantFileAccess(String fileIdOrPersistentId, String identifier, String apiToken) { @@ -4154,17 +4283,21 @@ static Response importDatasetViaNativeApi(String apiToken, String dataverseAlias } - static Response retrieveMyDataAsJsonString(String apiToken, String userIdentifier, ArrayList roleIds) { + static Response retrieveMyDataAsJsonString(String apiToken, String userIdentifier, ArrayList roleIds, String parameterString) { Response response = given() .header(API_TOKEN_HTTP_HEADER, apiToken) .contentType("application/json; charset=utf-8") .queryParam("role_ids", roleIds) .queryParam("dvobject_types", MyDataFilterParams.defaultDvObjectTypes) .queryParam("published_states", MyDataFilterParams.defaultPublishedStates) - .get("/api/mydata/retrieve?userIdentifier=" + userIdentifier); + .get("/api/mydata/retrieve?userIdentifier=" + userIdentifier + parameterString); return response; } + static Response retrieveMyDataAsJsonString(String apiToken, String userIdentifier, ArrayList roleIds) { + return retrieveMyDataAsJsonString(apiToken, userIdentifier, roleIds, ""); + } + static Response retrieveMyCollectionList(String apiToken, String userIdentifier) { RequestSpecification requestSpecification = given(); if (apiToken != null) { @@ -4229,6 +4362,15 @@ static Response getDatasetVersionCitation(Integer datasetId, String version, boo return response; } + static Response getDatasetVersionCitationFormat(Integer datasetId, String version, boolean includeDeaccessioned, String format, String apiToken) { + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .queryParam("includeDeaccessioned", includeDeaccessioned) + .get("/api/datasets/" + datasetId + "/versions/" + version + "/citation/" + format); + return response; + } + static Response setDatasetCitationDateField(String datasetIdOrPersistentId, String dateField, String apiToken) { String idInPath = datasetIdOrPersistentId; // Assume it's a number. String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. @@ -4713,14 +4855,28 @@ static Response listDataverseInputLevels(String dataverseAlias, String apiToken) } public static Response getDatasetTypes() { - Response response = given() - .get("/api/datasets/datasetTypes"); - return response; + return getDatasetTypes(null); + } + + public static Response getDatasetTypes(String acceptLanguage) { + RequestSpecification requestSpecification = given(); + if (acceptLanguage != null) { + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Accept-Language + requestSpecification.header(ACCEPT_LANGUAGE, acceptLanguage); + } + return requestSpecification.get("/api/datasets/datasetTypes"); } static Response getDatasetType(String idOrName) { - return given() - .get("/api/datasets/datasetTypes/" + idOrName); + return getDatasetType(idOrName, null); + } + + static Response getDatasetType(String idOrName, String acceptLanguage) { + RequestSpecification requestSpecification = given(); + if (acceptLanguage != null) { + requestSpecification.header(ACCEPT_LANGUAGE, acceptLanguage); + } + return requestSpecification.get("/api/datasets/datasetTypes/" + idOrName); } static Response addDatasetType(String jsonIn, String apiToken) { @@ -5128,6 +5284,12 @@ public static Response createTemplate(String dataverseAlias, String jsonString, .body(jsonString) .post("/api/dataverses/" + dataverseAlias + "/templates"); } + + public static Response deleteTemplate(String id, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .delete("/api/dataverses/"+id+"/template"); + } public static Response getTemplates(String dataverseAlias, String apiToken) { return given() @@ -5135,6 +5297,35 @@ public static Response getTemplates(String dataverseAlias, String apiToken) { .header(API_TOKEN_HTTP_HEADER, apiToken) .get("/api/dataverses/" + dataverseAlias + "/templates"); } + + public static Response setDefaultTemplate(String dataverseAlias, Long templateId, String apiToken) { + return given() + .contentType(ContentType.JSON) + .header(API_TOKEN_HTTP_HEADER, apiToken) + .post("/api/dataverses/" + dataverseAlias + "/template/default/" + templateId); + } + + public static Response removeDefaultTemplate(String dataverseAlias, String apiToken) { + return given() + .contentType(ContentType.JSON) + .header(API_TOKEN_HTTP_HEADER, apiToken) + .delete("/api/dataverses/" + dataverseAlias + "/template/default"); + } + + + + public static Response getTemplate(String templateId) { + return given() + .contentType(ContentType.JSON) + .get("/api/dataverses/" + templateId + "/template"); + } + + public static Response getTemplate(String templateId, String apiToken) { + return given() + .contentType(ContentType.JSON) + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/dataverses/" + templateId + "/template"); + } /** * Gets the tool URL for a dataset with optional parameters @@ -5257,4 +5448,59 @@ public static Response sendMessageToLDNInbox(String message) { .when() .post("/api/inbox/"); } + + public static Response setGuestbookEntryOnRequest(String datasetId, String apiToken, Boolean enabled) { + return given() + .body(enabled) + .contentType(ContentType.JSON) + .header(API_TOKEN_HTTP_HEADER, apiToken) + .put("/api/datasets/" + datasetId + "/guestbookEntryAtRequest"); + } + + public static Guestbook createRandomGuestbook(String ownerAlias, String persistentId, String apiToken) throws IOException, JsonParseException { + Guestbook gb = new Guestbook(); + File guestbookJson = new File("scripts/api/data/guestbook-test.json"); + String guestbookAsJson = new String(Files.readAllBytes(Paths.get(guestbookJson.getAbsolutePath()))); + JsonObject jsonObj = JsonUtil.getJsonObject(guestbookAsJson); + JsonParser jsonParsor = new JsonParser(); + jsonParsor.parseGuestbook(jsonObj, gb); + + Response createGuestbookResponse = UtilIT.createGuestbook(ownerAlias, guestbookAsJson, apiToken); + createGuestbookResponse.prettyPrint(); + createGuestbookResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + JsonPath createdGuestbook = JsonPath.from(createGuestbookResponse.body().asString()); + long guestbookId = createdGuestbook.getLong("data.id"); + + Response guestbookEnableResponse = UtilIT.enableGuestbook(ownerAlias, guestbookId, apiToken, Boolean.TRUE.toString()); + guestbookEnableResponse.prettyPrint(); + guestbookEnableResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", startsWith("Guestbook")); + Response getGuestbookResponse = UtilIT.getGuestbook(guestbookId, apiToken); + getGuestbookResponse.prettyPrint(); + JsonPath jsonPath = JsonPath.from(getGuestbookResponse.body().asString()); + gb.setId(guestbookId); + gb.getCustomQuestions().get(0).setId(jsonPath.getLong("data.customQuestions[0].id")); + gb.getCustomQuestions().get(1).setId(jsonPath.getLong("data.customQuestions[1].id")); + gb.getCustomQuestions().get(2).setId(jsonPath.getLong("data.customQuestions[2].id")); + + // Add the Guestbook to the Dataset + Response setGuestbook = UtilIT.updateDatasetGuestbook(persistentId, guestbookId, apiToken); + setGuestbook.prettyPrint(); + return gb; + } + + public static String generateGuestbookResponse(Guestbook gb) throws IOException { + File guestbookJson = new File("scripts/api/data/guestbook-test-response.json"); + String guestbookAsJson = new String(Files.readAllBytes(Paths.get(guestbookJson.getAbsolutePath()))); + + List cqIDs = new ArrayList<>(); + gb.getCustomQuestions().stream().forEach(cq -> cqIDs.add(cq.getId())); + + return guestbookAsJson.replace("@ID", gb.getId().toString()) + .replace("@QID1", cqIDs.get(0).toString()) + .replace("@QID2", cqIDs.get(1).toString()) + .replace("@QID3", cqIDs.get(2).toString()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/ContainerRequestTestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/ContainerRequestTestFake.java index 74f2e9dbb41..111ea81fe51 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/ContainerRequestTestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/ContainerRequestTestFake.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.net.URI; import java.util.*; +import java.util.function.Predicate; public class ContainerRequestTestFake implements ContainerRequestContext { @@ -132,4 +133,10 @@ public void setSecurityContext(SecurityContext securityContext) { public void abortWith(Response response) { } + + @Override + public boolean containsHeaderString(String name, String valueSeparatorRegex, Predicate valuePredicate) { + // TODO Auto-generated method stub + return false; + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/UriInfoTestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/UriInfoTestFake.java index 51d20083ec8..2f91f1ec3d2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/UriInfoTestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/UriInfoTestFake.java @@ -103,4 +103,10 @@ public URI resolve(URI uri) { public URI relativize(URI uri) { return null; } + + @Override + public String getMatchedResourceTemplate() { + // TODO Auto-generated method stub + return null; + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/TestEntityManager.java b/src/test/java/edu/harvard/iq/dataverse/engine/TestEntityManager.java index af8b75d5d80..d8264f6434d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/TestEntityManager.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/TestEntityManager.java @@ -14,19 +14,29 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.logging.Level; import java.util.logging.Logger; + +import jakarta.persistence.CacheRetrieveMode; +import jakarta.persistence.CacheStoreMode; +import jakarta.persistence.ConnectionConsumer; +import jakarta.persistence.ConnectionFunction; import jakarta.persistence.EntityGraph; import jakarta.persistence.EntityManager; import jakarta.persistence.EntityManagerFactory; import jakarta.persistence.EntityTransaction; +import jakarta.persistence.FindOption; import jakarta.persistence.FlushModeType; import jakarta.persistence.Id; import jakarta.persistence.LockModeType; +import jakarta.persistence.LockOption; import jakarta.persistence.Query; +import jakarta.persistence.RefreshOption; import jakarta.persistence.StoredProcedureQuery; import jakarta.persistence.TypedQuery; +import jakarta.persistence.TypedQueryReference; import jakarta.persistence.criteria.CriteriaBuilder; import jakarta.persistence.criteria.CriteriaDelete; import jakarta.persistence.criteria.CriteriaQuery; +import jakarta.persistence.criteria.CriteriaSelect; import jakarta.persistence.criteria.CriteriaUpdate; import jakarta.persistence.metamodel.Metamodel; @@ -206,6 +216,11 @@ public Query createNamedQuery(String name) { public TypedQuery createNamedQuery(String name, Class resultClass) { throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. } + + @Override + public TypedQuery createQuery(TypedQueryReference reference) { + throw new UnsupportedOperationException("Not supported yet."); + } @Override public Query createNativeQuery(String sqlString) { @@ -312,4 +327,74 @@ public List> getEntityGraphs(Class entityClass) { throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. } + @Override + public T callWithConnection(ConnectionFunction function) { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void runWithConnection(ConnectionConsumer action) { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public T find(Class entityClass, Object primaryKey, FindOption... options) { + // TODO Auto-generated method stub + return null; + } + + @Override + public T find(EntityGraph entityGraph, Object primaryKey, FindOption... options) { + // TODO Auto-generated method stub + return null; + } + + @Override + public T getReference(T entity) { + // TODO Auto-generated method stub + return null; + } + + @Override + public void lock(Object entity, LockModeType lockMode, LockOption... options) { + // TODO Auto-generated method stub + + } + + @Override + public void refresh(Object entity, RefreshOption... options) { + // TODO Auto-generated method stub + + } + + @Override + public void setCacheRetrieveMode(CacheRetrieveMode cacheRetrieveMode) { + // TODO Auto-generated method stub + + } + + @Override + public void setCacheStoreMode(CacheStoreMode cacheStoreMode) { + // TODO Auto-generated method stub + + } + + @Override + public CacheRetrieveMode getCacheRetrieveMode() { + // TODO Auto-generated method stub + return null; + } + + @Override + public CacheStoreMode getCacheStoreMode() { + // TODO Auto-generated method stub + return null; + } + + @Override + public TypedQuery createQuery(CriteriaSelect selectQuery) { + // TODO Auto-generated method stub + return null; + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateTemplateCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateTemplateCommandTest.java index 71bcbad98b8..baac3187fd8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateTemplateCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateTemplateCommandTest.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.engine.command.CommandContext; +import jakarta.persistence.EntityManager; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.util.testing.JvmSetting; @@ -32,6 +33,9 @@ public class CreateTemplateCommandTest { private DataverseRequest dataverseRequestStub; + + @Mock + private EntityManager em; @Mock private CommandContext contextMock; @@ -43,8 +47,6 @@ public class CreateTemplateCommandTest { private LicenseServiceBean licenseServiceBeanMock; @Mock private DataverseFieldTypeInputLevelServiceBean fieldTypeInputLevelServiceBeanMock; - @Mock - private EntityManager em; @Spy private Template templateSpy; @@ -62,9 +64,11 @@ public void setUp() { @Test public void execute_shouldSaveTemplate_noInitialization() throws CommandException { // Create the command with initialization set to false + + Template savedTemplate = mock(Template.class); + when(templateServiceBeanStub.save(templateSpy)).thenReturn(savedTemplate); CreateTemplateCommand sut = new CreateTemplateCommand(templateSpy, dataverseRequestStub, dataverseMock, false); - Template savedTemplate = mock(Template.class); when(templateServiceBeanStub.save(templateSpy)).thenReturn(savedTemplate); // Act @@ -86,11 +90,12 @@ public void execute_shouldInitializeAndSaveTemplate_withInitialization() throws when(contextMock.metadataBlocks()).thenReturn(metadataBlockServiceBeanMock); when(contextMock.licenses()).thenReturn(licenseServiceBeanMock); when(contextMock.fieldTypeInputLevels()).thenReturn(fieldTypeInputLevelServiceBeanMock); + Template savedTemplate = mock(Template.class); + when(templateServiceBeanStub.save(templateSpy)).thenReturn(savedTemplate); when(dataverseMock.getId()).thenReturn(42L); when(dataverseMock.isMetadataBlockRoot()).thenReturn(true); - Template savedTemplate = mock(Template.class); when(templateServiceBeanStub.save(templateSpy)).thenReturn(savedTemplate); // Mock system metadata blocks diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteTemplateCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteTemplateCommandTest.java new file mode 100644 index 00000000000..bdbcbb3a1aa --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteTemplateCommandTest.java @@ -0,0 +1,93 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.Template; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.*; +import jakarta.persistence.EntityManager; +import java.util.*; + +import static org.mockito.Mockito.*; +import static org.junit.jupiter.api.Assertions.*; + +class DeleteTemplateCommandTest { + + @Mock + private CommandContext ctxt; + + @Mock + private EntityManager em; + + @Mock + private Dataverse editedDv; + + @Mock + private Dataverse otherDv; + + @Mock + private Template doomed; + + @Mock + private DataverseRequest dataverseRequest; + + + + @BeforeEach + void setup() { + MockitoAnnotations.openMocks(this); + when(ctxt.em()).thenReturn(em); + + List dvWDefaultTemplate = new ArrayList<>(); + DeleteTemplateCommand sut = new DeleteTemplateCommand(dataverseRequest, editedDv,doomed, dvWDefaultTemplate); + // Simulate that dvWDefaultTemplate has one dataverse with a template + + } + + @Test + void testExecute_MergesAndRemovesCorrectly() throws Exception { + // Arrange + Dataverse mergedDv = mock(Dataverse.class); + Template mergedTemplate = mock(Template.class); + List dvWDefaultTemplate = new ArrayList<>(); + dvWDefaultTemplate.add(otherDv); + + when(em.merge(editedDv)).thenReturn(mergedDv); + when(em.merge(doomed)).thenReturn(mergedTemplate); + DeleteTemplateCommand sut = new DeleteTemplateCommand(dataverseRequest, editedDv, doomed, dvWDefaultTemplate); + + // Act + Dataverse result = sut.execute(ctxt); + + // Assert + verify(em).merge(editedDv); // should merge edited dataverse + verify(em).merge(otherDv); // should merge dvWDefaultTemplate items after clearing default template + verify(otherDv).setDefaultTemplate(null); + verify(em).merge(doomed); // should merge doomed template + verify(em).remove(mergedTemplate); // should remove merged template + assertEquals(mergedDv, result); // return value is the merged dataverse + } + + @Test + void testExecute_WhenEditedDvIsNull() throws Exception { + // Arrange + Dataverse nullDv = null; + List dvWDefaultTemplate = new ArrayList<>(); + DeleteTemplateCommand sut = new DeleteTemplateCommand(dataverseRequest, nullDv, doomed, dvWDefaultTemplate); + + Template mergedTemplate = mock(Template.class); + when(em.merge(doomed)).thenReturn(mergedTemplate); + + // Act + Dataverse result = sut.execute(ctxt); + + // Assert + verify(em, never()).merge(any(Dataverse.class)); + verify(em).merge(doomed); + verify(em).remove(mergedTemplate); + assertNull(result); + } +} + diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommandTest.java index b8902728785..7bdbc6602c5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommandTest.java @@ -25,6 +25,7 @@ import edu.harvard.iq.dataverse.engine.DataverseEngine; import edu.harvard.iq.dataverse.engine.TestCommandContext; import edu.harvard.iq.dataverse.engine.TestDataverseEngine; +import edu.harvard.iq.dataverse.engine.TestEntityManager; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; @@ -428,267 +429,8 @@ void testInvalidMovePublishedToUnpublished() { assertThrows(IllegalCommandException.class, () -> testEngine.submit(new MoveDatasetCommand(aRequest, moved, childDraft, null))); } - - private static class EntityManagerImpl implements EntityManager { - - @Override - public void persist(Object entity) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public T merge(T entity) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void remove(Object entity) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public T find(Class entityClass, Object primaryKey) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public T find(Class entityClass, Object primaryKey, Map properties) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public T find(Class entityClass, Object primaryKey, LockModeType lockMode) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public T find(Class entityClass, Object primaryKey, LockModeType lockMode, Map properties) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public T getReference(Class entityClass, Object primaryKey) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void flush() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void setFlushMode(FlushModeType flushMode) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public FlushModeType getFlushMode() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void lock(Object entity, LockModeType lockMode) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void lock(Object entity, LockModeType lockMode, Map properties) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void refresh(Object entity) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void refresh(Object entity, Map properties) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void refresh(Object entity, LockModeType lockMode) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void refresh(Object entity, LockModeType lockMode, Map properties) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void clear() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void detach(Object entity) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public boolean contains(Object entity) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public LockModeType getLockMode(Object entity) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void setProperty(String propertyName, Object value) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Map getProperties() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Query createQuery(String qlString) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public TypedQuery createQuery(CriteriaQuery criteriaQuery) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Query createQuery(CriteriaUpdate updateQuery) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Query createQuery(CriteriaDelete deleteQuery) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public TypedQuery createQuery(String qlString, Class resultClass) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Query createNamedQuery(String name) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public TypedQuery createNamedQuery(String name, Class resultClass) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Query createNativeQuery(String sqlString) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Query createNativeQuery(String sqlString, Class resultClass) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Query createNativeQuery(String sqlString, String resultSetMapping) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public StoredProcedureQuery createNamedStoredProcedureQuery(String name) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public StoredProcedureQuery createStoredProcedureQuery(String procedureName) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public StoredProcedureQuery createStoredProcedureQuery(String procedureName, Class... resultClasses) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public StoredProcedureQuery createStoredProcedureQuery(String procedureName, String... resultSetMappings) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void joinTransaction() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public boolean isJoinedToTransaction() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public T unwrap(Class cls) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Object getDelegate() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void close() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public boolean isOpen() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public EntityTransaction getTransaction() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public EntityManagerFactory getEntityManagerFactory() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public CriteriaBuilder getCriteriaBuilder() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Metamodel getMetamodel() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public EntityGraph createEntityGraph(Class rootType) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public EntityGraph createEntityGraph(String graphName) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public EntityGraph getEntityGraph(String graphName) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public List> getEntityGraphs(Class entityClass) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - } - - private static class MockEntityManager extends EntityManagerImpl { + + private static class MockEntityManager extends TestEntityManager { @Override public T merge(T entity) { diff --git a/src/test/java/edu/harvard/iq/dataverse/export/CroissantExporterSlimTest.java b/src/test/java/edu/harvard/iq/dataverse/export/CroissantExporterSlimTest.java new file mode 100644 index 00000000000..fcbc9611818 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/export/CroissantExporterSlimTest.java @@ -0,0 +1,580 @@ +package edu.harvard.iq.dataverse.export; + +import static org.junit.jupiter.api.Assertions.*; + +import io.gdcc.spi.export.ExportDataProvider; +import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonObject; +import jakarta.json.JsonReader; +import jakarta.json.JsonWriter; +import jakarta.json.JsonWriterFactory; +import jakarta.json.stream.JsonGenerator; +import java.io.ByteArrayOutputStream; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.OutputStream; +import java.io.StringReader; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.skyscreamer.jsonassert.JSONAssert; + +public class CroissantExporterSlimTest { + + static CroissantExporterSlim exporter; + static OutputStream outputStreamMinimal; + static ExportDataProvider dataProviderMinimal; + static OutputStream outputStreamMax; + static ExportDataProvider dataProviderMax; + static OutputStream outputStreamCars; + static ExportDataProvider dataProviderCars; + static OutputStream outputStreamRestricted; + static ExportDataProvider dataProviderRestricted; + static OutputStream outputStreamJunk; + static ExportDataProvider dataProviderJunk; + static OutputStream outputStreamDraft; + static ExportDataProvider dataProviderDraft; + + @BeforeAll + public static void setUp() { + exporter = new CroissantExporterSlim(); + + outputStreamMinimal = new ByteArrayOutputStream(); + dataProviderMinimal = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/minimal/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/minimal/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/minimal/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/minimal/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/minimal/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + + outputStreamMax = new ByteArrayOutputStream(); + dataProviderMax = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/max/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/max/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/max/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/max/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/max/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + + outputStreamCars = new ByteArrayOutputStream(); + dataProviderCars = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/cars/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/cars/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/cars/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/cars/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/cars/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + + outputStreamRestricted = new ByteArrayOutputStream(); + dataProviderRestricted = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/restricted/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/restricted/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/restricted/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/restricted/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/restricted/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + + outputStreamJunk = new ByteArrayOutputStream(); + dataProviderJunk = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/junk/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/junk/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/junk/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/junk/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/junk/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + + outputStreamDraft = new ByteArrayOutputStream(); + dataProviderDraft = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/draft/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/draft/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/draft/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/draft/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/draft/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + } + + @Test + public void testGetFormatName() { + CroissantExporterSlim instance = new CroissantExporterSlim(); + String expResult = ""; + String result = instance.getFormatName(); + assertEquals("croissantSlim", result); + } + + @Test + public void testGetDisplayName() { + assertEquals("Croissant Slim", exporter.getDisplayName(null)); + } + + @Test + public void testIsHarvestable() { + assertEquals(false, exporter.isHarvestable()); + } + + @Test + public void testIsAvailableToUsers() { + assertEquals(false, exporter.isAvailableToUsers()); + } + + @Test + public void testGetMediaType() { + assertEquals("application/json", exporter.getMediaType()); + } + + @Test + public void testExportDatasetMinimal() throws Exception { + exporter.exportDataset(dataProviderMinimal, outputStreamMinimal); + String actual = outputStreamMinimal.toString(); + writeCroissantFile(actual, "minimal"); + String expected = + Files.readString( + Paths.get( + "src/test/resources/croissant/minimal/expected/minimal-croissant.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamMinimal.toString())); + } + + @Test + public void testExportDatasetMax() throws Exception { + exporter.exportDataset(dataProviderMax, outputStreamMax); + String actual = outputStreamMax.toString(); + writeCroissantFile(actual, "max"); + String expected = + Files.readString( + Paths.get( + "src/test/resources/croissant/max/expected/max-croissantSlim.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamMax.toString())); + } + + @Test + public void testExportDatasetCars() throws Exception { + exporter.exportDataset(dataProviderCars, outputStreamCars); + String actual = outputStreamCars.toString(); + writeCroissantFile(actual, "cars"); + String expected = + Files.readString( + Paths.get( + "src/test/resources/croissant/cars/expected/cars-croissantSlim.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamCars.toString())); + } + + /** Same as the cars data but the stata13-auto.dta file is restricted. */ + @Test + public void testExportDatasetRestricted() throws Exception { + exporter.exportDataset(dataProviderRestricted, outputStreamRestricted); + String actual = outputStreamRestricted.toString(); + writeCroissantFile(actual, "restricted"); + String expected = + Files.readString( + Paths.get( + "src/test/resources/croissant/restricted/expected/restricted-croissantSlim.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamRestricted.toString())); + } + + @Test + public void testExportDatasetJunk() throws Exception { + exporter.exportDataset(dataProviderJunk, outputStreamJunk); + String actual = outputStreamJunk.toString(); + writeCroissantFile(actual, "junk"); + String expected = + Files.readString( + Paths.get("src/test/resources/croissant/junk/expected/junk-croissant.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamJunk.toString())); + } + + @Test + public void testExportDatasetDraft() throws Exception { + exporter.exportDataset(dataProviderDraft, outputStreamDraft); + String actual = outputStreamDraft.toString(); + writeCroissantFile(actual, "draft"); + String expected = + Files.readString( + Paths.get( + "src/test/resources/croissant/draft/expected/draft-croissantSlim.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamDraft.toString())); + } + + private void writeCroissantFile(String actual, String name) throws IOException { + Path dir = + Files.createDirectories(Paths.get("src/test/resources/croissant/" + name + "/out")); + Path out = Paths.get(dir + "/croissant.json"); + Files.writeString(out, prettyPrint(actual), StandardCharsets.UTF_8); + } + + public static String prettyPrint(String jsonObject) { + try { + return prettyPrint(getJsonObject(jsonObject)); + } catch (Exception ex) { + return jsonObject; + } + } + + public static String prettyPrint(JsonObject jsonObject) { + Map config = new HashMap<>(); + config.put(JsonGenerator.PRETTY_PRINTING, true); + JsonWriterFactory jsonWriterFactory = Json.createWriterFactory(config); + StringWriter stringWriter = new StringWriter(); + try (JsonWriter jsonWriter = jsonWriterFactory.createWriter(stringWriter)) { + jsonWriter.writeObject(jsonObject); + } + return stringWriter.toString(); + } + + public static JsonObject getJsonObject(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + try (JsonReader jsonReader = Json.createReader(rdr)) { + return jsonReader.readObject(); + } + } + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/export/CroissantExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/CroissantExporterTest.java new file mode 100644 index 00000000000..6c6da792d4e --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/export/CroissantExporterTest.java @@ -0,0 +1,605 @@ +package edu.harvard.iq.dataverse.export; + +import static org.junit.jupiter.api.Assertions.*; + +import io.gdcc.spi.export.ExportDataProvider; +import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonObject; +import jakarta.json.JsonReader; +import jakarta.json.JsonWriter; +import jakarta.json.JsonWriterFactory; +import jakarta.json.stream.JsonGenerator; +import java.io.ByteArrayOutputStream; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.OutputStream; +import java.io.StringReader; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.skyscreamer.jsonassert.JSONAssert; + +public class CroissantExporterTest { + + static CroissantExporter exporter; + static OutputStream outputStreamMinimal; + static ExportDataProvider dataProviderMinimal; + static OutputStream outputStreamMax; + static ExportDataProvider dataProviderMax; + static OutputStream outputStreamCars; + static ExportDataProvider dataProviderCars; + static OutputStream outputStreamRestricted; + static ExportDataProvider dataProviderRestricted; + static OutputStream outputStreamJunk; + static ExportDataProvider dataProviderJunk; + static OutputStream outputStreamDraft; + static ExportDataProvider dataProviderDraft; + + @BeforeAll + public static void setUp() { + exporter = new CroissantExporter(); + + outputStreamMinimal = new ByteArrayOutputStream(); + dataProviderMinimal = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/minimal/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/minimal/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/minimal/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/minimal/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/minimal/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + + outputStreamMax = new ByteArrayOutputStream(); + dataProviderMax = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/max/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/max/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/max/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/max/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/max/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + + outputStreamCars = new ByteArrayOutputStream(); + dataProviderCars = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/cars/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/cars/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/cars/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/cars/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/cars/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + + outputStreamRestricted = new ByteArrayOutputStream(); + dataProviderRestricted = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/restricted/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/restricted/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/restricted/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/restricted/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/restricted/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + + outputStreamJunk = new ByteArrayOutputStream(); + dataProviderJunk = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/junk/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/junk/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/junk/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/junk/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/junk/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + + outputStreamDraft = new ByteArrayOutputStream(); + dataProviderDraft = + new ExportDataProvider() { + @Override + public JsonObject getDatasetJson() { + String pathToJsonFile = + "src/test/resources/croissant/draft/in/datasetJson.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetORE() { + String pathToJsonFile = + "src/test/resources/croissant/draft/in/datasetORE.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonArray getDatasetFileDetails() { + String pathToJsonFile = + "src/test/resources/croissant/draft/in/datasetFileDetails.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readArray(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public JsonObject getDatasetSchemaDotOrg() { + String pathToJsonFile = + "src/test/resources/croissant/draft/in/datasetSchemaDotOrg.json"; + try (JsonReader jsonReader = + Json.createReader(new FileReader(pathToJsonFile))) { + return jsonReader.readObject(); + } catch (FileNotFoundException ex) { + return null; + } + } + + @Override + public String getDataCiteXml() { + try { + return Files.readString( + Paths.get( + "src/test/resources/croissant/draft/in/dataCiteXml.xml"), + StandardCharsets.UTF_8); + } catch (IOException ex) { + return null; + } + } + }; + } + + @Test + public void testGetFormatName() { + CroissantExporter instance = new CroissantExporter(); + String expResult = ""; + String result = instance.getFormatName(); + assertEquals("croissant", result); + } + + @Test + public void testGetDisplayName() { + assertEquals("Croissant", exporter.getDisplayName(null)); + } + + @Test + public void testIsHarvestable() { + assertEquals(false, exporter.isHarvestable()); + } + + @Test + public void testIsAvailableToUsers() { + assertEquals(true, exporter.isAvailableToUsers()); + } + + @Test + public void testGetMediaType() { + assertEquals("application/json", exporter.getMediaType()); + } + + @Test + public void testExportDatasetMinimal() throws Exception { + exporter.exportDataset(dataProviderMinimal, outputStreamMinimal); + String actual = outputStreamMinimal.toString(); + writeCroissantFile(actual, "minimal"); + String expected = + Files.readString( + Paths.get( + "src/test/resources/croissant/minimal/expected/minimal-croissant.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamMinimal.toString())); + } + + @Test + public void testExportDatasetMax() throws Exception { + exporter.exportDataset(dataProviderMax, outputStreamMax); + String actual = outputStreamMax.toString(); + writeCroissantFile(actual, "max"); + /* + First, install pyDataverse from Dans-labs, the "croissant" branch: + pip3 install --upgrade --no-cache-dir git+https://github.com/Dans-labs/pyDataverse@croissant#egg=pyDataverse + You can use this script to export Croissant from a dataset: + --- + from pyDataverse.Croissant import Croissant + #from pyDataverse.Croissant import Croissant + import json + #host = "https://dataverse.nl" + #PID = "doi:10.34894/KMRAYH" + host = "https://beta.dataverse.org" + PID = "doi:10.5072/FK2/VQTYHD" + croissant = Croissant(host, PID) + print(json.dumps(croissant.get_record(), indent=4, default=str)) + --- + Finally, uncomment the lines below to check for differences. + */ + // String pyDataverse = Files.readString(Paths.get("/tmp/pyDataverse.json"), + // StandardCharsets.UTF_8); + // JSONAssert.assertEquals(actual, pyDataverse, true); + String expected = + Files.readString( + Paths.get("src/test/resources/croissant/max/expected/max-croissant.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamMax.toString())); + } + + /* + The data in stata13-auto.dta looks something like this: + make price mpg rep78 headroom trunk weight length turn displacement gear_ratio foreign + "AMC Concord" 4099 22 3 2.5 11 2930 186 40 121 3.58 0 + "AMC Pacer" 4749 17 3 3.0 11 3350 173 40 258 2.53 0 + "AMC Spirit" 3799 22 3.0 12 2640 168 35 121 3.08 0 + */ + @Test + public void testExportDatasetCars() throws Exception { + exporter.exportDataset(dataProviderCars, outputStreamCars); + String actual = outputStreamCars.toString(); + writeCroissantFile(actual, "cars"); + String expected = + Files.readString( + Paths.get("src/test/resources/croissant/cars/expected/cars-croissant.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamCars.toString())); + } + + /** Same as the cars data but the stata13-auto.dta file is restricted. */ + @Test + public void testExportDatasetRestricted() throws Exception { + exporter.exportDataset(dataProviderRestricted, outputStreamRestricted); + String actual = outputStreamRestricted.toString(); + writeCroissantFile(actual, "restricted"); + String expected = + Files.readString( + Paths.get( + "src/test/resources/croissant/restricted/expected/restricted-croissant.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamRestricted.toString())); + } + + @Test + public void testExportDatasetJunk() throws Exception { + exporter.exportDataset(dataProviderJunk, outputStreamJunk); + String actual = outputStreamJunk.toString(); + writeCroissantFile(actual, "junk"); + String expected = + Files.readString( + Paths.get("src/test/resources/croissant/junk/expected/junk-croissant.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamJunk.toString())); + } + + @Test + public void testExportDatasetDraft() throws Exception { + exporter.exportDataset(dataProviderDraft, outputStreamDraft); + String actual = outputStreamDraft.toString(); + writeCroissantFile(actual, "draft"); + String expected = + Files.readString( + Paths.get( + "src/test/resources/croissant/draft/expected/draft-croissant.json"), + StandardCharsets.UTF_8); + JSONAssert.assertEquals(expected, actual, true); + assertEquals(prettyPrint(expected), prettyPrint(outputStreamDraft.toString())); + } + + private void writeCroissantFile(String actual, String name) throws IOException { + Path dir = + Files.createDirectories(Paths.get("src/test/resources/croissant/" + name + "/out")); + Path out = Paths.get(dir + "/croissant.json"); + Files.writeString(out, prettyPrint(actual), StandardCharsets.UTF_8); + } + + public static String prettyPrint(String jsonObject) { + try { + return prettyPrint(getJsonObject(jsonObject)); + } catch (Exception ex) { + return jsonObject; + } + } + + public static String prettyPrint(JsonObject jsonObject) { + Map config = new HashMap<>(); + config.put(JsonGenerator.PRETTY_PRINTING, true); + JsonWriterFactory jsonWriterFactory = Json.createWriterFactory(config); + StringWriter stringWriter = new StringWriter(); + try (JsonWriter jsonWriter = jsonWriterFactory.createWriter(stringWriter)) { + jsonWriter.writeObject(jsonObject); + } + return stringWriter.toString(); + } + + public static JsonObject getJsonObject(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + try (JsonReader jsonReader = Json.createReader(rdr)) { + return jsonReader.readObject(); + } + } + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/i18n/I18NUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/i18n/I18NUtilTest.java new file mode 100644 index 00000000000..10eadf3082b --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/i18n/I18NUtilTest.java @@ -0,0 +1,44 @@ +package edu.harvard.iq.dataverse.i18n; + +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; +import java.util.Locale; + +public class I18NUtilTest { + + @Test + void testParseAcceptLanguageHeader_singleLanguage() { + Locale locale = I18nUtil.parseAcceptLanguageHeader("en-US"); + assertEquals(Locale.forLanguageTag("en-US"), locale); + } + + @Test + void testParseAcceptLanguageHeader_singleLanguageWithQ() { + Locale locale = I18nUtil.parseAcceptLanguageHeader("en-US,en;q=0.5"); + assertEquals(Locale.forLanguageTag("en-US"), locale); + } + + @Test + void testParseAcceptLanguageHeader_multipleLanguages() { + Locale locale = I18nUtil.parseAcceptLanguageHeader("fr-CA,fr;q=0.8,en-US;q=0.6,en;q=0.4"); + assertEquals(Locale.forLanguageTag("fr-CA"), locale); + } + + @Test + void testParseAcceptLanguageHeader_emptyHeader() { + Locale locale = I18nUtil.parseAcceptLanguageHeader(""); + assertNull(locale); + } + + @Test + void testParseAcceptLanguageHeader_nullHeader() { + Locale locale = I18nUtil.parseAcceptLanguageHeader(null); + assertNull(locale); + } + + @Test + void testParseAcceptLanguageHeader_invalidHeader() { + Locale locale = I18nUtil.parseAcceptLanguageHeader("invalid-header"); + assertEquals(Locale.forLanguageTag("invalid-header"), locale); + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBeanTest.java new file mode 100644 index 00000000000..f04d5a7896c --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBeanTest.java @@ -0,0 +1,140 @@ + +package edu.harvard.iq.dataverse.search; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.InjectMocks; +import org.mockito.MockitoAnnotations; + +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +public class SolrIndexServiceBeanTest { + + @InjectMocks + private SolrIndexServiceBean solrIndexServiceBean; + + @BeforeEach + public void setUp() { + MockitoAnnotations.openMocks(this); + } + + @Test + public void testDatasetVersionsToBuildCardsFor_OnlyDraft() { + // Arrange + Dataset dataset = mock(Dataset.class); + DatasetVersion draftVersion = createMockVersion(1L, DatasetVersion.VersionState.DRAFT, true); + + when(dataset.getLatestVersion()).thenReturn(draftVersion); + when(dataset.getReleasedVersion()).thenReturn(null); + + // Act + Set result = invokeDatasetVersionsToBuildCardsFor(dataset); + + // Assert + assertEquals(1, result.size()); + assertTrue(result.contains(draftVersion)); + } + + @Test + public void testDatasetVersionsToBuildCardsFor_OnlyDeaccessioned() { + // Arrange + Dataset dataset = mock(Dataset.class); + DatasetVersion deaccessionedVersion = createMockVersion(1L, DatasetVersion.VersionState.DEACCESSIONED, false); + + when(dataset.getLatestVersion()).thenReturn(deaccessionedVersion); + when(dataset.getReleasedVersion()).thenReturn(null); + + // Act + Set result = invokeDatasetVersionsToBuildCardsFor(dataset); + + // Assert + assertEquals(1, result.size()); + assertTrue(result.contains(deaccessionedVersion)); + } + + @Test + public void testDatasetVersionsToBuildCardsFor_OnlyReleased() { + // Arrange + Dataset dataset = mock(Dataset.class); + DatasetVersion releasedVersion = createMockVersion(1L, DatasetVersion.VersionState.RELEASED, false); + + when(dataset.getLatestVersion()).thenReturn(releasedVersion); + when(dataset.getReleasedVersion()).thenReturn(releasedVersion); + + // Act + Set result = invokeDatasetVersionsToBuildCardsFor(dataset); + + // Assert + assertEquals(1, result.size()); + assertTrue(result.contains(releasedVersion)); + } + + @Test + public void testDatasetVersionsToBuildCardsFor_ReleasedAndDraft() { + // Arrange + Dataset dataset = mock(Dataset.class); + DatasetVersion releasedVersion = createMockVersion(1L, DatasetVersion.VersionState.RELEASED, false); + DatasetVersion draftVersion = createMockVersion(2L, DatasetVersion.VersionState.DRAFT, true); + + when(dataset.getLatestVersion()).thenReturn(draftVersion); + when(dataset.getReleasedVersion()).thenReturn(releasedVersion); + + // Act + Set result = invokeDatasetVersionsToBuildCardsFor(dataset); + + // Assert + assertEquals(2, result.size()); + assertTrue(result.contains(releasedVersion)); + assertTrue(result.contains(draftVersion)); + } + + @Test + public void testDatasetVersionsToBuildCardsFor_ReleasedAndDeaccessioned() { + // Arrange + Dataset dataset = mock(Dataset.class); + DatasetVersion releasedVersion = createMockVersion(1L, DatasetVersion.VersionState.RELEASED, false); + DatasetVersion deaccessionedVersion = createMockVersion(2L, DatasetVersion.VersionState.DEACCESSIONED, false); + + // Latest is deaccessioned, but there's a released version + when(dataset.getLatestVersion()).thenReturn(deaccessionedVersion); + when(dataset.getReleasedVersion()).thenReturn(releasedVersion); + + // Act + Set result = invokeDatasetVersionsToBuildCardsFor(dataset); + + // Assert + // Should only return the released version, not the deaccessioned one + assertEquals(1, result.size()); + assertTrue(result.contains(releasedVersion)); + assertFalse(result.contains(deaccessionedVersion)); + } + + // Helper method to create mock DatasetVersion + private DatasetVersion createMockVersion(Long id, DatasetVersion.VersionState state, boolean isDraft) { + DatasetVersion version = mock(DatasetVersion.class); + when(version.getId()).thenReturn(id); + when(version.getVersionState()).thenReturn(state); + when(version.isDraft()).thenReturn(isDraft); + when(version.isReleased()).thenReturn(state == DatasetVersion.VersionState.RELEASED); + when(version.isDeaccessioned()).thenReturn(state == DatasetVersion.VersionState.DEACCESSIONED); + return version; + } + + // Helper method to invoke the private method using reflection + @SuppressWarnings("unchecked") + private Set invokeDatasetVersionsToBuildCardsFor(Dataset dataset) { + try { + java.lang.reflect.Method method = SolrIndexServiceBean.class.getDeclaredMethod( + "datasetVersionsToBuildCardsFor", Dataset.class); + method.setAccessible(true); + return (Set) method.invoke(solrIndexServiceBean, dataset); + } catch (Exception e) { + throw new RuntimeException("Failed to invoke private method", e); + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 46359d7b02c..af3ef6694fd 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -3,25 +3,20 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.util.FileUtil.FileCitationExtension; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import java.io.File; import java.io.IOException; -import java.time.LocalDate; import java.net.URI; +import java.time.LocalDate; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Stream; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.*; public class FileUtilTest { @@ -248,17 +243,17 @@ public void testIsPubliclyDownloadable5() { public void testgetFileDownloadUrl() { Long fileId = 42l; Long fileMetadataId = 2L; - assertEquals("/api/access/datafile/42", FileUtil.getFileDownloadUrlPath(null, fileId, false, null)); - assertEquals("/api/access/datafile/42", FileUtil.getFileDownloadUrlPath("", fileId, false, null)); - assertEquals("/api/access/datafile/bundle/42", FileUtil.getFileDownloadUrlPath("bundle", fileId, false, null)); - assertEquals("/api/access/datafile/bundle/42?fileMetadataId=2", FileUtil.getFileDownloadUrlPath("bundle", fileId, false, fileMetadataId)); - assertEquals("/api/access/datafile/42?format=original", FileUtil.getFileDownloadUrlPath("original", fileId, false, null)); - assertEquals("/api/access/datafile/42?format=RData", FileUtil.getFileDownloadUrlPath("RData", fileId, false, null)); - assertEquals("/api/access/datafile/42/metadata", FileUtil.getFileDownloadUrlPath("var", fileId, false, null)); - assertEquals("/api/access/datafile/42/metadata?fileMetadataId=2", FileUtil.getFileDownloadUrlPath("var", fileId, false, fileMetadataId)); - assertEquals("/api/access/datafile/42?format=tab", FileUtil.getFileDownloadUrlPath("tab", fileId, false, null)); - assertEquals("/api/access/datafile/42?format=tab&gbrecs=true", FileUtil.getFileDownloadUrlPath("tab", fileId, true, null)); - assertEquals("/api/access/datafile/42?gbrecs=true", FileUtil.getFileDownloadUrlPath(null, fileId, true, null)); + assertEquals("/api/access/datafile/42", FileUtil.getFileDownloadUrlPath(null, fileId, false, null, null)); + assertEquals("/api/access/datafile/42", FileUtil.getFileDownloadUrlPath("", fileId, false, null, null)); + assertEquals("/api/access/datafile/bundle/42", FileUtil.getFileDownloadUrlPath("bundle", fileId, false, null, null)); + assertEquals("/api/access/datafile/bundle/42?fileMetadataId=2", FileUtil.getFileDownloadUrlPath("bundle", fileId, false, fileMetadataId, null)); + assertEquals("/api/access/datafile/42?format=original", FileUtil.getFileDownloadUrlPath("original", fileId, false, null, null)); + assertEquals("/api/access/datafile/42?format=RData", FileUtil.getFileDownloadUrlPath("RData", fileId, false, null, null)); + assertEquals("/api/access/datafile/42/metadata", FileUtil.getFileDownloadUrlPath("var", fileId, false, null, null)); + assertEquals("/api/access/datafile/42/metadata?fileMetadataId=2", FileUtil.getFileDownloadUrlPath("var", fileId, false, fileMetadataId, null)); + assertEquals("/api/access/datafile/42?format=tab", FileUtil.getFileDownloadUrlPath("tab", fileId, false, null, null)); + assertEquals("/api/access/datafile/42?format=tab&gbrecs=true", FileUtil.getFileDownloadUrlPath("tab", fileId, true, null, null)); + assertEquals("/api/access/datafile/42?gbrecs=true", FileUtil.getFileDownloadUrlPath(null, fileId, true, null, null)); } @Test diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilValidateEmbargoReasonTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilValidateEmbargoReasonTest.java new file mode 100644 index 00000000000..f472350704e --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilValidateEmbargoReasonTest.java @@ -0,0 +1,217 @@ +package edu.harvard.iq.dataverse.util; + +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import jakarta.faces.application.FacesMessage; +import jakarta.faces.component.UIComponent; +import jakarta.faces.context.ExternalContext; +import jakarta.faces.context.FacesContext; +import jakarta.faces.validator.ValidatorException; + +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +@LocalJvmSettings +public class FileUtilValidateEmbargoReasonTest { + + @Mock + private FacesContext facesContext; + + @Mock + private UIComponent component; + + @Mock + private ExternalContext externalContext; + + private Map requestParameterMap; + private AutoCloseable mocks; + + @BeforeEach + public void setUp() { + mocks = MockitoAnnotations.openMocks(this); + requestParameterMap = new HashMap<>(); + + when(facesContext.getExternalContext()).thenReturn(externalContext); + when(externalContext.getRequestParameterMap()).thenReturn(requestParameterMap); + } + + @AfterEach + public void tearDown() throws Exception { + if (mocks != null) { + mocks.close(); + } + } + + @Test + public void validateEmbargoReason_shouldSkipValidation_whenRemovingEmbargo() { + // Arrange + boolean removeEmbargo = true; + requestParameterMap.put("jakarta.faces.source", "fileEmbargoPopupSaveButton"); + + // Act & Assert - should not throw exception + assertDoesNotThrow(() -> + FileUtil.validateEmbargoReason(facesContext, component, null, removeEmbargo) + ); + } + + @Test + public void validateEmbargoReason_shouldSkipValidation_whenSourceIsNull() { + // Arrange + boolean removeEmbargo = false; + requestParameterMap.put("jakarta.faces.source", null); + + // Act & Assert - should not throw exception + assertDoesNotThrow(() -> + FileUtil.validateEmbargoReason(facesContext, component, null, removeEmbargo) + ); + } + + @Test + public void validateEmbargoReason_shouldSkipValidation_whenSourceIsNotSaveButton() { + // Arrange + boolean removeEmbargo = false; + requestParameterMap.put("jakarta.faces.source", "someOtherButton"); + + // Act & Assert - should not throw exception + assertDoesNotThrow(() -> + FileUtil.validateEmbargoReason(facesContext, component, null, removeEmbargo) + ); + } + + @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "true", varArgs = "require-embargo-reason") + public void validateEmbargoReason_shouldThrowException_whenReasonIsNullAndFlagEnabled() { + // Arrange + boolean removeEmbargo = false; + requestParameterMap.put("jakarta.faces.source", "fileEmbargoPopupSaveButton"); + + // Act & Assert + ValidatorException exception = assertThrows(ValidatorException.class, () -> + FileUtil.validateEmbargoReason(facesContext, component, null, removeEmbargo) + ); + + assertEquals(FacesMessage.SEVERITY_ERROR, exception.getFacesMessage().getSeverity()); + assertEquals(BundleUtil.getStringFromBundle("embargo.reason.required"), + exception.getFacesMessage().getSummary()); + } + + @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "false", varArgs = "require-embargo-reason") + public void validateEmbargoReason_shouldNotThrowException_whenReasonIsNullAndFlagDisabled() { + // Arrange + boolean removeEmbargo = false; + requestParameterMap.put("jakarta.faces.source", "fileEmbargoPopupSaveButton"); + + // Act & Assert - should not throw exception + assertDoesNotThrow(() -> + FileUtil.validateEmbargoReason(facesContext, component, null, removeEmbargo) + ); + } + + @ParameterizedTest + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "false", varArgs = "require-embargo-reason") + @ValueSource(strings = {"", " ", "\t", "\n", " \t\n "}) + public void validateEmbargoReason_shouldThrowException_whenReasonIsBlank(String blankReason) { + // Arrange + boolean removeEmbargo = false; + requestParameterMap.put("jakarta.faces.source", "fileEmbargoPopupSaveButton"); + + // Act & Assert + ValidatorException exception = assertThrows(ValidatorException.class, () -> + FileUtil.validateEmbargoReason(facesContext, component, blankReason, removeEmbargo) + ); + + assertEquals(FacesMessage.SEVERITY_ERROR, exception.getFacesMessage().getSeverity()); + assertEquals(BundleUtil.getStringFromBundle("embargo.reason.blank"), + exception.getFacesMessage().getSummary()); + } + + @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "true", varArgs = "require-embargo-reason") + public void validateEmbargoReason_shouldHandleComplexScenario_flagEnabledBlankReasonSaveButton() { + // Arrange + boolean removeEmbargo = false; + requestParameterMap.put("jakarta.faces.source", "fileEmbargoPopupSaveButton"); + + // Act & Assert - blank reason should still fail even when flag is disabled + ValidatorException exception = assertThrows(ValidatorException.class, () -> + FileUtil.validateEmbargoReason(facesContext, component, " ", removeEmbargo) + ); + + assertEquals(BundleUtil.getStringFromBundle("embargo.reason.blank"), + exception.getFacesMessage().getSummary()); + } + + @ParameterizedTest + @ValueSource(strings = { + "Valid embargo reason", + " Valid reason with spaces " + }) + public void validateEmbargoReason_shouldNotThrowException_whenReasonIsValid(String validReason) { + // Arrange + boolean removeEmbargo = false; + requestParameterMap.put("jakarta.faces.source", "fileEmbargoPopupSaveButton"); + + // Act & Assert - should not throw exception + assertDoesNotThrow(() -> + FileUtil.validateEmbargoReason(facesContext, component, validReason, removeEmbargo) + ); + } + + @ParameterizedTest + @MethodSource("provideSaveButtonVariations") + public void validateEmbargoReason_shouldValidate_whenSaveButtonTriggersRequest(String buttonId) { + // Arrange + boolean removeEmbargo = false; + requestParameterMap.put("jakarta.faces.source", buttonId); + + // Act & Assert + ValidatorException exception = assertThrows(ValidatorException.class, () -> + FileUtil.validateEmbargoReason(facesContext, component, "", removeEmbargo) + ); + + assertEquals(FacesMessage.SEVERITY_ERROR, exception.getFacesMessage().getSeverity()); + } + + static Stream provideSaveButtonVariations() { + return Stream.of( + Arguments.of("fileEmbargoPopupSaveButton"), + // button in any context + Arguments.of("form:fileEmbargoPopupSaveButton"), + // or any suffix + Arguments.of("fileEmbargoPopupSaveButton:anything") + ); + } + + @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "true", varArgs = "require-embargo-reason") + public void validateEmbargoReason_shouldHandleComplexScenario_flagEnabledNullReasonSaveButton() { + // Arrange + boolean removeEmbargo = false; + requestParameterMap.put("jakarta.faces.source", "form:fileEmbargoPopupSaveButton"); + + // Act & Assert + ValidatorException exception = assertThrows(ValidatorException.class, () -> + FileUtil.validateEmbargoReason(facesContext, component, null, removeEmbargo) + ); + + assertEquals(BundleUtil.getStringFromBundle("embargo.reason.required"), + exception.getFacesMessage().getSummary()); + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/StringUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/StringUtilTest.java index b00b4afca7d..7ab951dac38 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/StringUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/StringUtilTest.java @@ -10,6 +10,7 @@ import java.util.Optional; import java.util.stream.Stream; +import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index 123aacc601d..3e8ada1f386 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -93,10 +93,11 @@ void testGetVersionWithBuild() { // then assertTrue(result.startsWith("100.100"), "'" + result + "' not starting with 100.100"); - assertTrue(result.contains("build")); // Cannot test this here - there might be the bundle file present which is not under test control //assertTrue(result.endsWith("FOOBAR"), "'" + result + "' not ending with FOOBAR"); + // Not sure what to do about this. The above is correct, if there is a BuildNumber.properties + // file present on the developer's system, it will take precedence. - L.A. } @Test @@ -202,6 +203,55 @@ void testGetTabularIngestSizeLimitsWithSingleInvalidValue() { assertEquals(1, result.size()); assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); } + + @Test + public void testGetHarvestingClientRequestIntervals() { + + // Test with setting not set will return default 0.0 + // given + doReturn(null).when(settingsService).getValueForKey(SettingsServiceBean.Key.HarvestingClientCallRateLimit); + // when + Map result = systemConfig.getHarvestingClientRequestIntervals(); + // then + assertEquals(1, result.size()); + assertEquals(0, result.get(SystemConfig.DEFAULT_KEY)); + + // Test with good client + String value = "{\"harvarddv\": 0.9, \"default\": 0.0}"; + // given + doReturn(value).when(settingsService).getValueForKey(SettingsServiceBean.Key.HarvestingClientCallRateLimit); + // when + result = systemConfig.getHarvestingClientRequestIntervals(); + // then + assertEquals(2, result.size()); + assertTrue(result.containsKey("harvarddv")); + assertTrue(result.containsKey("default")); + assertEquals(0.9F, systemConfig.getHarvestingClientRequestInterval("harvarddv")); + assertEquals(0.0F, systemConfig.getHarvestingClientRequestInterval("notFoundSoDefault")); + + // Test with missing default will create default 0.0 + value = "{\"harvarddv\": 0.9}"; + // given + doReturn(value).when(settingsService).getValueForKey(SettingsServiceBean.Key.HarvestingClientCallRateLimit); + // when + result = systemConfig.getHarvestingClientRequestIntervals(); + // then + assertEquals(2, result.size()); + assertTrue(result.containsKey("default")); + assertEquals(0.0F, systemConfig.getHarvestingClientRequestInterval("default")); + + // Test with invalid JSON (value as string instead of float) will default setting to default 0.0 + value = "{\"harvarddv1\": 0.9, \"harvarddv2\": \"string\"}"; + // given + doReturn(value).when(settingsService).getValueForKey(SettingsServiceBean.Key.HarvestingClientCallRateLimit); + // when + result = systemConfig.getHarvestingClientRequestIntervals(); + // then + assertEquals(1, result.size()); + assertTrue(result.containsKey("default")); + assertTrue(!result.containsKey("harvarddv1")); + assertTrue(!result.containsKey("harvarddv2")); + } @ParameterizedTest @ValueSource(strings = {"", "{ invalid: }"}) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java index 09739b67023..d92f8822e59 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java @@ -1,12 +1,15 @@ package edu.harvard.iq.dataverse.util; -import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.assertTrue; +import jakarta.ws.rs.core.MultivaluedHashMap; +import jakarta.ws.rs.core.MultivaluedMap; +import org.junit.jupiter.api.Test; +import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; public class UrlSignerUtilTest { @@ -47,4 +50,40 @@ public void testSignAndValidate() { assertFalse(UrlSignerUtil.isValidUrl(signedUrl3, user1, get, key)); } + + @Test + public void testSignAndValidateWithParams() { + final String url1 = "http://localhost:8080/api/test1?p1=true&p2=test"; + final String url2 = "http://localhost:8080/api/test1?p1=true&p2=test&until=2999-01-01&user=Fred&method=POST&token=abracadabara&signed=true"; + final String url3 = "localhost:8080/api/test1?p1=true&p2&until=2099-01-01"; + final int longTimeout = 1000; + final String user1 = "Alice"; + final String key = "abracadabara open sesame"; + MultivaluedMap queryParameters = new MultivaluedHashMap<>(); + queryParameters.put("p1", List.of("true")); + queryParameters.put("p2", List.of("test")); + queryParameters.put("until", List.of("2099-01-01")); + + String signedUrl1 = UrlSignerUtil.signUrl(url1, longTimeout, user1, "GET", key); + assertTrue(signedUrl1.contains("test1?p1=true&p2=test")); + System.out.println(signedUrl1); + + String signedUrl2 = UrlSignerUtil.signUrl(url2, longTimeout, user1, "GET", key); + assertTrue(signedUrl2.contains("&until=")); // contains the until param but not the bogus one passed in + assertFalse(signedUrl2.contains("&until=2099-01-01")); + assertTrue(signedUrl2.contains("&user=Alice")); // contains the user param but not the bogus one passed in + assertFalse(signedUrl2.contains("&user=Fred")); + assertTrue(signedUrl2.contains("&method=GET")); // contains the method param but not the bogus one passed in + assertFalse(signedUrl2.contains("&method=POST")); + assertTrue(signedUrl2.contains("&token=")); // contains the signed token param but not the bogus one passed in + assertFalse(signedUrl2.contains("&token=abracadabara")); + assertFalse(signedUrl2.contains("&signed")); // make sure we don't propagate the "signed" param + System.out.println(signedUrl2); + + // This will log an error but will still return the signed url even if it's now a valid url + // All callers of this method don't handle errors being returned, and it's highly unlikely that the url would be bad + String signedUrl3 = UrlSignerUtil.signUrl(url3, longTimeout, user1, "GET", key); + System.out.println(signedUrl3); + assertTrue(signedUrl3.contains("&p2&")); // Show that this works with params that have no value + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagGeneratorInfoFileTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagGeneratorInfoFileTest.java new file mode 100644 index 00000000000..05e83b8540d --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagGeneratorInfoFileTest.java @@ -0,0 +1,296 @@ + +package edu.harvard.iq.dataverse.util.bagit; + +import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import com.google.gson.JsonParser; + +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +public class BagGeneratorInfoFileTest { + + private BagGenerator bagGenerator; + private JsonObjectBuilder testAggregationBuilder; + + @Mock + private OREMap mockOreMap; + + @BeforeEach + public void setUp() throws Exception { + MockitoAnnotations.openMocks(this); + + // Create base test aggregation builder with required fields + testAggregationBuilder = Json.createObjectBuilder(); + testAggregationBuilder.add("@id", "doi:10.5072/FK2/TEST123"); + testAggregationBuilder.add(JsonLDTerm.schemaOrg("name").getLabel(), "Test Dataset"); + testAggregationBuilder.add(JsonLDTerm.schemaOrg("includedInDataCatalog").getLabel(), "Test Catalog"); + } + + /** + * Helper method to finalize the aggregation and create the BagGenerator + */ + private void initializeBagGenerator() throws Exception { + JsonObject testAggregation = testAggregationBuilder.build(); + + JsonObjectBuilder oremapJsonBuilder = Json.createObjectBuilder(); + oremapJsonBuilder.add(JsonLDTerm.ore("describes").getLabel(), testAggregation); + JsonObject oremapObject = oremapJsonBuilder.build(); + // Mock the OREMap.getOREMap() method to return the built JSON + when(mockOreMap.getOREMap()).thenReturn(oremapObject); + + // Initialize BagGenerator with test data + bagGenerator = new BagGenerator(oremapObject, "", AbstractSubmitToArchiveCommand.getJsonLDTerms(mockOreMap)); + setPrivateField(bagGenerator, "aggregation", (com.google.gson.JsonObject) JsonParser + .parseString(oremapObject.getJsonObject(JsonLDTerm.ore("describes").getLabel()).toString())); + setPrivateField(bagGenerator, "totalDataSize", 1024000L); + setPrivateField(bagGenerator, "dataCount", 10L); + } + + @Test + public void testGenerateInfoFileWithSingleContact() throws Exception { + // Arrange + JsonLDTerm contactTerm = JsonLDTerm.schemaOrg("creator"); + JsonLDTerm contactNameTerm = JsonLDTerm.schemaOrg("name"); + JsonLDTerm contactEmailTerm = JsonLDTerm.schemaOrg("email"); + + when(mockOreMap.getContactTerm()).thenReturn(contactTerm); + when(mockOreMap.getContactNameTerm()).thenReturn(contactNameTerm); + when(mockOreMap.getContactEmailTerm()).thenReturn(contactEmailTerm); + + JsonObjectBuilder contactBuilder = Json.createObjectBuilder(); + contactBuilder.add(contactNameTerm.getLabel(), "John Doe"); + contactBuilder.add(contactEmailTerm.getLabel(), "john.doe@example.com"); + testAggregationBuilder.add(contactTerm.getLabel(), contactBuilder); + + initializeBagGenerator(); + + // Act + String infoFile = invokeGenerateInfoFile(); + + // Assert + assertNotNull(infoFile); + assertTrue(infoFile.contains("Contact-Name: John Doe")); + assertTrue(infoFile.contains("Contact-Email: john.doe@example.com")); + } + + @Test + public void testGenerateInfoFileWithMultipleContacts() throws Exception { + // Arrange + JsonLDTerm contactTerm = JsonLDTerm.schemaOrg("creator"); + JsonLDTerm contactNameTerm = JsonLDTerm.schemaOrg("name"); + JsonLDTerm contactEmailTerm = JsonLDTerm.schemaOrg("email"); + + when(mockOreMap.getContactTerm()).thenReturn(contactTerm); + when(mockOreMap.getContactNameTerm()).thenReturn(contactNameTerm); + when(mockOreMap.getContactEmailTerm()).thenReturn(contactEmailTerm); + + JsonArrayBuilder contactsBuilder = Json.createArrayBuilder(); + + JsonObjectBuilder contact1 = Json.createObjectBuilder(); + contact1.add(contactNameTerm.getLabel(), "John Doe"); + contact1.add(contactEmailTerm.getLabel(), "john.doe@example.com"); + + JsonObjectBuilder contact2 = Json.createObjectBuilder(); + contact2.add(contactNameTerm.getLabel(), "Jane Smith"); + contact2.add(contactEmailTerm.getLabel(), "jane.smith@example.com"); + + JsonObjectBuilder contact3 = Json.createObjectBuilder(); + contact3.add(contactNameTerm.getLabel(), "Bob Johnson"); + contact3.add(contactEmailTerm.getLabel(), "bob.johnson@example.com"); + + contactsBuilder.add(contact1); + contactsBuilder.add(contact2); + contactsBuilder.add(contact3); + + testAggregationBuilder.add(contactTerm.getLabel(), contactsBuilder); + + initializeBagGenerator(); + + // Act + String infoFile = invokeGenerateInfoFile(); + + // Assert + assertNotNull(infoFile); + assertTrue(infoFile.contains("Contact-Name: John Doe")); + assertTrue(infoFile.contains("Contact-Email: john.doe@example.com")); + assertTrue(infoFile.contains("Contact-Name: Jane Smith")); + assertTrue(infoFile.contains("Contact-Email: jane.smith@example.com")); + assertTrue(infoFile.contains("Contact-Name: Bob Johnson")); + assertTrue(infoFile.contains("Contact-Email: bob.johnson@example.com")); + } + + @Test + public void testGenerateInfoFileWithSingleDescription() throws Exception { + // Arrange + JsonLDTerm descriptionTerm = JsonLDTerm.schemaOrg("description"); + JsonLDTerm descriptionTextTerm = JsonLDTerm.schemaOrg("value"); + + when(mockOreMap.getDescriptionTerm()).thenReturn(descriptionTerm); + when(mockOreMap.getDescriptionTextTerm()).thenReturn(descriptionTextTerm); + + JsonObjectBuilder descriptionBuilder = Json.createObjectBuilder(); + descriptionBuilder.add(descriptionTextTerm.getLabel(), "This is a test dataset description."); + testAggregationBuilder.add(descriptionTerm.getLabel(), descriptionBuilder); + + initializeBagGenerator(); + + // Act + String infoFile = invokeGenerateInfoFile(); + + // Assert + assertNotNull(infoFile); + assertTrue(infoFile.contains("External-Description: This is a test dataset description.")); + } + + @Test + public void testGenerateInfoFileWithMultipleDescriptions() throws Exception { + // Arrange + JsonLDTerm descriptionTerm = JsonLDTerm.schemaOrg("description"); + JsonLDTerm descriptionTextTerm = JsonLDTerm.schemaOrg("value"); + + when(mockOreMap.getDescriptionTerm()).thenReturn(descriptionTerm); + when(mockOreMap.getDescriptionTextTerm()).thenReturn(descriptionTextTerm); + + JsonArrayBuilder descriptionsBuilder = Json.createArrayBuilder(); + + JsonObjectBuilder desc1 = Json.createObjectBuilder(); + desc1.add(descriptionTextTerm.getLabel(), "First description of the dataset."); + + JsonObjectBuilder desc2 = Json.createObjectBuilder(); + desc2.add(descriptionTextTerm.getLabel(), "Second description with additional details."); + + JsonObjectBuilder desc3 = Json.createObjectBuilder(); + desc3.add(descriptionTextTerm.getLabel(), "Third description for completeness."); + + descriptionsBuilder.add(desc1); + descriptionsBuilder.add(desc2); + descriptionsBuilder.add(desc3); + + testAggregationBuilder.add(descriptionTerm.getLabel(), descriptionsBuilder); + + initializeBagGenerator(); + + // Act + String infoFile = invokeGenerateInfoFile(); + // Assert + assertNotNull(infoFile); + // Multiple descriptions should be concatenated with commas as per getSingleValue method + assertTrue(infoFile.contains("External-Description: First description of the dataset.,Second description with\r\n additional details.,Third description for completeness.")); + } + + @Test + public void testGenerateInfoFileWithRequiredFields() throws Exception { + // Arrange - minimal setup with required fields already in setUp() + JsonLDTerm contactTerm = JsonLDTerm.schemaOrg("creator"); + JsonLDTerm contactNameTerm = JsonLDTerm.schemaOrg("name"); + JsonLDTerm descriptionTerm = JsonLDTerm.schemaOrg("description"); + JsonLDTerm descriptionTextTerm = JsonLDTerm.schemaOrg("value"); + + when(mockOreMap.getContactTerm()).thenReturn(contactTerm); + when(mockOreMap.getContactNameTerm()).thenReturn(contactNameTerm); + when(mockOreMap.getContactEmailTerm()).thenReturn(null); + when(mockOreMap.getDescriptionTerm()).thenReturn(descriptionTerm); + when(mockOreMap.getDescriptionTextTerm()).thenReturn(descriptionTextTerm); + + JsonObjectBuilder contactBuilder = Json.createObjectBuilder(); + contactBuilder.add(contactNameTerm.getLabel(), "Test Contact"); + testAggregationBuilder.add(contactTerm.getLabel(), contactBuilder); + + JsonObjectBuilder descriptionBuilder = Json.createObjectBuilder(); + descriptionBuilder.add(descriptionTextTerm.getLabel(), "Test description"); + testAggregationBuilder.add(descriptionTerm.getLabel(), descriptionBuilder); + + initializeBagGenerator(); + + // Act + String infoFile = invokeGenerateInfoFile(); + + // Assert + assertNotNull(infoFile); + assertTrue(infoFile.contains("Contact-Name: Test Contact")); + assertTrue(infoFile.contains("External-Description: Test description")); + assertTrue(infoFile.contains("Source-Organization:")); + assertTrue(infoFile.contains("Organization-Address:")); + assertTrue(infoFile.contains("Organization-Email:")); + assertTrue(infoFile.contains("Bagging-Date:")); + assertTrue(infoFile.contains("External-Identifier: doi:10.5072/FK2/TEST123")); + assertTrue(infoFile.contains("Bag-Size:")); + assertTrue(infoFile.contains("Payload-Oxum: 1024000.10")); + assertTrue(infoFile.contains("Internal-Sender-Identifier: Test Catalog:Test Dataset")); + } + + @Test + public void testGenerateInfoFileWithDifferentBagSizes() throws Exception { + // Arrange + JsonLDTerm contactTerm = JsonLDTerm.schemaOrg("creator"); + when(mockOreMap.getContactTerm()).thenReturn(contactTerm); + when(mockOreMap.getContactNameTerm()).thenReturn(null); + when(mockOreMap.getContactEmailTerm()).thenReturn(null); + when(mockOreMap.getDescriptionTerm()).thenReturn(null); + + initializeBagGenerator(); + + // Test with bytes + setPrivateField(bagGenerator, "totalDataSize", 512L); + setPrivateField(bagGenerator, "dataCount", 5L); + String infoFile1 = invokeGenerateInfoFile(); + assertTrue(infoFile1.contains("Bag-Size: 512 bytes")); + assertTrue(infoFile1.contains("Payload-Oxum: 512.5")); + + // Test with KB + setPrivateField(bagGenerator, "totalDataSize", 2048L); + setPrivateField(bagGenerator, "dataCount", 3L); + String infoFile2 = invokeGenerateInfoFile(); + assertTrue(infoFile2.contains("Bag-Size: 2.05 KB")); + assertTrue(infoFile2.contains("Payload-Oxum: 2048.3")); + + // Test with MB + setPrivateField(bagGenerator, "totalDataSize", 5242880L); + setPrivateField(bagGenerator, "dataCount", 100L); + String infoFile3 = invokeGenerateInfoFile(); + assertTrue(infoFile3.contains("Bag-Size: 5.24 MB")); + assertTrue(infoFile3.contains("Payload-Oxum: 5242880.100")); + + // Test with GB + setPrivateField(bagGenerator, "totalDataSize", 2147483648L); + setPrivateField(bagGenerator, "dataCount", 1000L); + + String infoFile4 = invokeGenerateInfoFile(); + assertTrue(infoFile4.contains("Bag-Size: 2.15 GB")); + assertTrue(infoFile4.contains("Payload-Oxum: 2147483648.1000")); + } + + // Helper methods + + /** + * Invokes the private generateInfoFile method using reflection + */ + private String invokeGenerateInfoFile() throws Exception { + Method method = BagGenerator.class.getDeclaredMethod("generateInfoFile"); + method.setAccessible(true); + return (String) method.invoke(bagGenerator); + } + + /** + * Sets a private field value using reflection + */ + private void setPrivateField(Object target, String fieldName, Object value) throws Exception { + Field field = BagGenerator.class.getDeclaredField(fieldName); + field.setAccessible(true); + field.set(target, value); + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagGeneratorMultilineWrapTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagGeneratorMultilineWrapTest.java new file mode 100644 index 00000000000..6595404b755 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagGeneratorMultilineWrapTest.java @@ -0,0 +1,257 @@ + +package edu.harvard.iq.dataverse.util.bagit; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +/** + * Tests adapted for DD-2093: verify the behavior of BagGenerator.multilineWrap. + */ +public class BagGeneratorMultilineWrapTest { + + private static Method multilineWrap; + + @BeforeAll + static void setUp() throws NoSuchMethodException { + // Access the private static method via reflection + multilineWrap = BagGenerator.class.getDeclaredMethod("multilineWrap", String.class); + multilineWrap.setAccessible(true); + } + + private String callMultilineWrap(String input) { + try { + return (String) multilineWrap.invoke(null, input); + } catch (IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + + @Test + void shortLine_noWrap() { + String input = "Hello world"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo("Hello world"); + } + + @Test + void exactBoundary_78chars_noWrap() { + String input = "a".repeat(78); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(input); + } + + @Test + void longSingleWord_wrapsAt78WithIndent() { + String input = "a".repeat(100); + String expected = "a".repeat(79) + "\r\n " + "a".repeat(21); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void multiline_input_indentsSecondAndSubsequentOriginalLines() { + String input = "Line1\nLine2\nLine3"; + String expected = "Line1\r\n Line2\r\n Line3"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void multiline_withLF_normalizedAndIndented() { + String input = "a".repeat(200); + String expected = "a".repeat(79) + "\r\n " + "a".repeat(78) + "\r\n " + "a".repeat(43); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void emptyLines_trimmedAndSkipped() { + String input = "Line1\n\nLine3"; + String expected = "Line1\r\n Line3"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void whitespaceOnlyLines_ignored() { + String input = "Line1\n \n\t\t\nLine3"; + String expected = "Line1\r\n Line3"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void longSecondLine_preservesIndentOnWraps() { + String line1 = "Header"; + String line2 = "b".repeat(90); + String input = line1 + "\n" + line2; + String expected = "Header\r\n " + "b".repeat(79) + "\r\n " + "b".repeat(11); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void labelLength_reducesFirstLineMaxLength() { + // With a label of length 20, first line should wrap at 78-20=58 chars + String label = "l".repeat(20); + String input = label + "a".repeat(150); + // First line: 58 chars, subsequent lines: 78 + String expected = label + "a".repeat(59) + "\r\n " + "a".repeat(78) + "\r\n " + "a".repeat(13); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void labelLength_zero_behavesAsDefault() { + String input = "a".repeat(100); + String expected = "a".repeat(79) + "\r\n " + "a".repeat(21); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void labelLength_withMultipleLines_onlyAffectsFirstLine() { + String label = "l".repeat(15); + String input = label + "a".repeat(100) + "\nSecond line content"; + // First line wraps at 79-15=64, then continues at 78 per line + // Second line starts fresh and wraps normally + String expected = label + "a".repeat(64) + "\r\n " + "a".repeat(36) + "\r\n Second line content"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void wrapsAtWordBoundary_notMidWord() { + // Create a string with a word boundary at position 75 + // "a" repeated 75 times, then a space, then more characters + String input = "a".repeat(75) + " " + "b".repeat(20); + // Should wrap at the space (position 75), not at position 79 + String expected = "a".repeat(75) + "\r\n " + "b".repeat(20); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void wrapsAtWordBoundary_multipleSpaces() { + // Test with word boundary closer to the limit + String input = "a".repeat(70) + " word " + "b".repeat(20); + // Should wrap after "word" (at position 76) + String expected = "a".repeat(70) + " word\r\n " + "b".repeat(20); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void wrapsAtWordBoundary_withLabelLength() { + String label = "l".repeat(20); + // With label length=20, first line wraps at 78-20=58 + // Create string with word boundary at position 55 + String input = label + "a".repeat(55) + " " + "b".repeat(30); + // Should wrap at the space (position 55) + String expected = label + "a".repeat(55) + "\r\n " + "b".repeat(30); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + // Tests for additional line separator characters + + @Test + void multiline_withCR_normalizedAndIndented() { + String input = "Line1\rLine2\rLine3"; + String expected = "Line1\r\n Line2\r\n Line3"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void multiline_withCRLF_normalizedAndIndented() { + String input = "Line1\r\nLine2\r\nLine3"; + String expected = "Line1\r\n Line2\r\n Line3"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void multiline_withVT_normalizedAndIndented() { + // VT (U+000B) - Vertical Tab + String input = "Line1\u000BLine2\u000BLine3"; + String expected = "Line1\r\n Line2\r\n Line3"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void multiline_withFF_normalizedAndIndented() { + // FF (U+000C) - Form Feed + String input = "Line1\u000CLine2\u000CLine3"; + String expected = "Line1\r\n Line2\r\n Line3"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void multiline_withNEL_normalizedAndIndented() { + // NEL (U+0085) - Next Line + String input = "Line1\u0085Line2\u0085Line3"; + String expected = "Line1\r\n Line2\r\n Line3"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void multiline_withLS_normalizedAndIndented() { + // LS (U+2028) - Line Separator + String input = "Line1\u2028Line2\u2028Line3"; + String expected = "Line1\r\n Line2\r\n Line3"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void multiline_withPS_normalizedAndIndented() { + // PS (U+2029) - Paragraph Separator + String input = "Line1\u2029Line2\u2029Line3"; + String expected = "Line1\r\n Line2\r\n Line3"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void multiline_mixedSeparators_normalizedAndIndented() { + // Test with a mix of different line separators + String input = "Line1\nLine2\rLine3\r\nLine4\u000BLine5\u000CLine6\u0085Line7\u2028Line8\u2029Line9"; + String expected = "Line1\r\n Line2\r\n Line3\r\n Line4\r\n Line5\r\n Line6\r\n Line7\r\n Line8\r\n Line9"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void emptyLines_withVariousSeparators_trimmedAndSkipped() { + // Test empty lines with different separators + String input = "Line1\n\nLine3\r\rLine5\u000B\u000BLine7"; + String expected = "Line1\r\n Line3\r\n Line5\r\n Line7"; + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void longLine_withCRLF_wrapsAndIndents() { + String input = "a".repeat(100) + "\r\n" + "b".repeat(100); + String expected = "a".repeat(79) + "\r\n " + "a".repeat(21) + "\r\n " + "b".repeat(79) + "\r\n " + "b".repeat(21); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } + + @Test + void longLine_withMixedSeparators_wrapsAndIndents() { + String input = "a".repeat(100) + "\n" + "b".repeat(100) + "\r" + "c".repeat(100); + String expected = "a".repeat(79) + "\r\n " + "a".repeat(21) + "\r\n " + "b".repeat(79) + "\r\n " + "b".repeat(21) + "\r\n " + "c".repeat(79) + "\r\n " + "c".repeat(21); + String out = callMultilineWrap(input); + assertThat(out).isEqualTo(expected); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java index 9debb4aa5ca..3cd827e7715 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java @@ -109,7 +109,9 @@ public void init() throws IOException { RateLimitUtil.rateLimitMap.clear(); RateLimitUtil.rateLimits.clear(); } - + // Reset cache for each test + cache.rateLimitCache.clear(); + // Reset to default auth user authUser.setRateLimitTier(1); authUser.setSuperuser(false); @@ -163,15 +165,23 @@ public void testAuthenticatedUserGettingRateLimited() throws InterruptedExceptio authUser.setRateLimitTier(2); // 120 cals per hour - 1 added token every 30 seconds boolean rateLimited = false; int cnt; + long startTime = System.currentTimeMillis(); for (cnt = 0; cnt <200; cnt++) { rateLimited = !cache.checkRate(authUser, action); if (rateLimited) { break; } } - assertTrue(rateLimited); - assertEquals(120, cnt); + long endTime = System.currentTimeMillis(); + System.out.println("Test loop took " + (endTime - startTime) + " ms"); + //Add a few seconds to account for time outside loop + long durationMinutes = (6000 + endTime - startTime) / 60000L; + // 120 calls/hr = 2 calls/min. Add any tokens that may have been added during the test run + long expectedMax = 120 + (durationMinutes * 2); + assertTrue(rateLimited); + assertTrue(cnt >= 120 && cnt <= expectedMax, "cnt was " + cnt + ", expected between 120 and " + expectedMax); + for (cnt = 0; cnt <60; cnt++) { Thread.sleep(1000);// Wait for bucket to be replenished (check each second for 1 minute max) rateLimited = !cache.checkRate(authUser, action); diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/InAppNotificationsJsonPrinterTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/InAppNotificationsJsonPrinterTest.java index a58b21ea561..3eba488454f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/InAppNotificationsJsonPrinterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/InAppNotificationsJsonPrinterTest.java @@ -402,13 +402,16 @@ public void testAddFieldsByType_submittedDs() { userNotification.setObjectId(1L); userNotification.setRequestor(requestor); + DatasetVersion datasetVersion = mock(DatasetVersion.class); Dataset dataset = mock(Dataset.class); Dataverse owner = mock(Dataverse.class); + when(datasetVersion.getDataset()).thenReturn(dataset); + when(dataset.getGlobalId()).thenReturn(testGlobalId); when(dataset.getDisplayName()).thenReturn("Submitted Dataset"); when(dataset.getOwner()).thenReturn(owner); - when(datasetService.find(1L)).thenReturn(dataset); + when(datasetVersionService.find(1L)).thenReturn(datasetVersion); when(owner.getAlias()).thenReturn("reviewDv"); when(owner.getDisplayName()).thenReturn("Review Dataverse"); diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java index d1cb30e2bc3..cc78dfbc97c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java @@ -65,6 +65,58 @@ public class JsonParserTest { DatasetFieldType pubIdType; DatasetFieldType compoundSingleType; JsonParser sut; + + final static String guestbookJson = """ + { + "name": "my test guestbook", + "enabled": true, + "emailRequired": true, + "nameRequired": true, + "institutionRequired": false, + "positionRequired": false, + "customQuestions": [ + { + "question": "how's your day", + "required": true, + "displayOrder": 0, + "type": "text", + "hidden": false + }, + { + "question": "Describe yourself", + "required": false, + "displayOrder": 1, + "type": "textarea", + "hidden": false + }, + { + "question": "What color car do you drive", + "required": true, + "displayOrder": 2, + "type": "options", + "hidden": false, + "optionValues": [ + { + "value": "Red", + "displayOrder": 0 + }, + { + "value": "White", + "displayOrder": 1 + }, + { + "value": "Yellow", + "displayOrder": 2 + }, + { + "value": "Purple", + "displayOrder": 3 + } + ] + } + ] + } + """; public JsonParserTest() { } @@ -733,4 +785,137 @@ public void testEnum() throws JsonParseException { assertTrue(typesSet.contains(Type.REVOKEROLE), "Set contains REVOKEROLE"); assertTrue(typesSet.contains(Type.ASSIGNROLE), "Set contains ASSIGNROLE"); } + + @Test + public void testGuestbook() throws JsonParseException { + JsonObject jsonObj = JsonUtil.getJsonObject(guestbookJson); + Guestbook gb = new Guestbook(); + gb = sut.parseGuestbook(jsonObj, gb); + assertEquals(true, gb.isEnabled()); + assertEquals(3, gb.getCustomQuestions().size()); + assertEquals(4, gb.getCustomQuestions().get(2).getCustomQuestionValues().size()); + assertEquals("Purple", gb.getCustomQuestions().get(2).getCustomQuestionValues().get(3).getValueString()); + assertEquals(3, gb.getCustomQuestions().get(2).getCustomQuestionValues().get(3).getDisplayOrder()); + } + + @Test + public void testGuestbookResponse() throws JsonParseException { + JsonObject jsonObj = JsonUtil.getJsonObject(guestbookJson); + Guestbook gb = new Guestbook(); + gb = sut.parseGuestbook(jsonObj, gb); + Long i = 1L; + for (CustomQuestion cq : gb.getCustomQuestions()) { + cq.setId(i++); + cq.setRequired(true); + } + + final String guestbookResponseJson = """ + { + "name": "My Name", + "email": "my.email@example.com", + "institution": "Harvard", + "position": "Upright", + "answers": [ + { + "id": 1, + "value": "Good" + }, + { + "id": 2, + "value": ["Multi","Line"] + }, + { + "id": 3, + "value": "Yellow" + } + ] + } + """; + final String guestbookResponseJsonMissing3 = """ + { + "answers": [ + { + "id": 1, + "value": "Good" + }, + { + "id": 2, + "value": ["Multi","Line"] + } + ] + } + """; + + GuestbookResponse guestbookResponse = new GuestbookResponse(); + guestbookResponse.setGuestbook(gb); + jsonObj = JsonUtil.getJsonObject(guestbookResponseJson); + GuestbookResponse gbr = sut.parseGuestbookResponse(jsonObj, guestbookResponse); + assertTrue(gbr.getCustomQuestionResponses().size() == 3); + + // Test missing required question response + try { + jsonObj = JsonUtil.getJsonObject(guestbookResponseJsonMissing3); + gbr = sut.parseGuestbookResponse(jsonObj, guestbookResponse); + } catch (JsonParseException e) { + System.out.println(e.getMessage()); + assertTrue(e.getMessage().contains("What color car do you drive")); + } + // Test invalid option in question response + try { + jsonObj = JsonUtil.getJsonObject(guestbookResponseJson.replace("Yellow", "Green")); + gbr = sut.parseGuestbookResponse(jsonObj, guestbookResponse); + } catch (JsonParseException e) { + System.out.println(e.getMessage()); + assertTrue(e.getMessage().contains("not a valid option (Green)")); + } + // Test invalid Custom Question ID in question response + try { + jsonObj = JsonUtil.getJsonObject(guestbookResponseJson.replace("3", "4")); + gbr = sut.parseGuestbookResponse(jsonObj, guestbookResponse); + } catch (JsonParseException e) { + System.out.println(e.getMessage()); + assertTrue(e.getMessage().contains("ID 4 not found")); + } + + // Test overwrite name, email, institution and position. + try { + jsonObj = JsonUtil.getJsonObject(guestbookResponseJson); + gbr = sut.parseGuestbookResponse(jsonObj, guestbookResponse); + assertEquals("My Name", gbr.getName()); + // Removing name from the JSON defaults it to the original value in guestbook response + gbr.setName("My Original Name"); + jsonObj = JsonUtil.getJsonObject(guestbookResponseJson.replace("\"name\": \"My Name\",", "")); + gbr = sut.parseGuestbookResponse(jsonObj, guestbookResponse); + assertEquals("My Original Name", gbr.getName()); + // test invalid email (does not change original) + gbr.setEmail("original@example.com"); + jsonObj = JsonUtil.getJsonObject(guestbookResponseJson.replace("my.email@example.com", "badEmail.com")); + gbr = sut.parseGuestbookResponse(jsonObj, guestbookResponse); + assertEquals("original@example.com", gbr.getEmail()); + // test valid email (overwrite email) + jsonObj = JsonUtil.getJsonObject(guestbookResponseJson.replace("my.email@example.com", "new@example.com")); + gbr = sut.parseGuestbookResponse(jsonObj, guestbookResponse); + assertEquals("new@example.com", gbr.getEmail()); + } catch (JsonParseException e) { + System.out.println(e.getMessage()); + assertTrue(e.getMessage().contains("ID 4 not found")); + } + + // Test missing "answers" array + try { + jsonObj = JsonUtil.getJsonObject(guestbookResponseJson.replace("answers", "answer")); + gbr = sut.parseGuestbookResponse(jsonObj, guestbookResponse); + } catch (JsonParseException e) { + System.out.println(e.getMessage()); + assertTrue(e.getMessage().contains("Guestbook Response entry is required but not present")); + } + // Test missing "answers" empty array + try { + jsonObj = JsonUtil.getJsonObject("{\"answers\" : []}"); + gbr = sut.parseGuestbookResponse(jsonObj, guestbookResponse); + } catch (JsonParseException e) { + System.out.println(e.getMessage()); + assertTrue(e.getMessage().contains("Guestbook Response entry is required but not present")); + } + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java index 2f4fda068d4..34676335857 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; +import edu.harvard.iq.dataverse.UserNotification.Type; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.RoleAssignee; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; @@ -12,7 +13,12 @@ import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.UserNotification.Type; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.template.TemplateBuilder; +import jakarta.json.*; +import org.assertj.core.util.Lists; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import java.sql.Timestamp; import java.time.Instant; @@ -20,19 +26,7 @@ import java.util.*; import java.util.stream.Collectors; -import edu.harvard.iq.dataverse.util.template.TemplateBuilder; - -import jakarta.json.*; - -import edu.harvard.iq.dataverse.util.BundleUtil; -import org.assertj.core.util.Lists; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.BeforeEach; - -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.assertFalse; public class JsonPrinterTest { @@ -480,6 +474,99 @@ public void testDatasetWithNondefaultType() { assertEquals(sut, result); } + @Test + public void testDatasetWithGuestbook() { + String sut = "foobar"; + DatasetType foobar = new DatasetType(); + foobar.setName(sut); + + Guestbook guestbook = new Guestbook(); + guestbook.setId(1L); + guestbook.setEnabled(true); + guestbook.setName("Test Guestbook"); + guestbook.setEmailRequired(true); + guestbook.setCreateTime(Timestamp.from(Instant.now())); + + int cqOrder = 0; + CustomQuestion cq1 = new CustomQuestion(); + cq1.setDisplayOrder(cqOrder); + cq1.setId(Long.valueOf(++cqOrder)); + cq1.setGuestbook(guestbook); + cq1.setRequired(true); + cq1.setQuestionString("My first question"); + cq1.setQuestionType("text"); // options, textarea, text + + CustomQuestion cq2 = new CustomQuestion(); + cq2.setDisplayOrder(cqOrder); + cq2.setId(Long.valueOf(++cqOrder)); + cq2.setGuestbook(guestbook); + cq2.setRequired(false); + cq2.setQuestionString("My second question"); + cq2.setQuestionType("textarea"); + + CustomQuestion cq3 = new CustomQuestion(); + cq3.setDisplayOrder(cqOrder); + cq3.setId(Long.valueOf(++cqOrder)); + cq3.setGuestbook(guestbook); + cq3.setRequired(false); + cq3.setQuestionString("My third question"); + cq3.setQuestionType("options"); + List values = new ArrayList<>(); + int cqvOrder = 0; + CustomQuestionValue cqv1 = new CustomQuestionValue(); + cqv1.setValueString("Red"); + cqv1.setDisplayOrder(cqvOrder); + cqv1.setId(Long.valueOf(++cqvOrder)); + values.add(cqv1); + CustomQuestionValue cqv2 = new CustomQuestionValue(); + cqv2.setValueString("White"); + cqv2.setDisplayOrder(cqvOrder); + cqv2.setId(Long.valueOf(++cqvOrder)); + values.add(cqv2); + CustomQuestionValue cqv3 = new CustomQuestionValue(); + cqv3.setValueString("Blue"); + cqv3.setDisplayOrder(cqvOrder); + cqv3.setId(Long.valueOf(++cqvOrder)); + values.add(cqv3); + cq3.setCustomQuestionValues(values); + List customQuestions = new ArrayList<>(); + customQuestions.add(cq1); + customQuestions.add(cq2); + customQuestions.add(cq3); + guestbook.setCustomQuestions(customQuestions); + + Dataverse dv = new Dataverse(); + dv.setId(41L); + Dataset dataset = createDataset(42); + dataset.setDatasetType(foobar); + dataset.setOwner(dv); + guestbook.setDataverse(dataset.getOwner()); + dataset.setGuestbook(guestbook); + + // verify that the guestbook id is in the dataset response + var jsob = JsonPrinter.json(dataset.getLatestVersion(), null, false, false, false, false).build(); + System.out.println(jsob); + var gbID = jsob.getInt("guestbookId"); + assertEquals(1, gbID); + + var gb = JsonPrinter.json(guestbook).build(); + System.out.println(gb); + + // verify guestbook values + assertEquals("Test Guestbook", gb.getString("name")); + assertEquals(true, gb.getBoolean("emailRequired")); + assertEquals(false, gb.getBoolean("nameRequired")); + assertEquals(3, gb.getJsonArray("customQuestions").size()); + // verify multiple choice question + var result_cq3 = gb.getJsonArray("customQuestions"); + System.out.println(result_cq3); + var result_cq3_options = result_cq3.getJsonObject(2).getJsonArray("optionValues"); // question 3 is index 2 + System.out.println(result_cq3_options); + assertEquals(3, result_cq3_options.size()); + var result_cq3_options2 = result_cq3_options.getJsonObject(1); // option 2 is index 1 + assertEquals("White", result_cq3_options2.getString("value")); + } + @Test public void testJsonArrayDataverseCollections() { List collections = new ArrayList<>(); diff --git a/src/test/java/edu/harvard/iq/dataverse/workflow/internalspi/AuthorizedExternalStepTest.java b/src/test/java/edu/harvard/iq/dataverse/workflow/internalspi/AuthorizedExternalStepTest.java new file mode 100644 index 00000000000..0db89cb61fb --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/workflow/internalspi/AuthorizedExternalStepTest.java @@ -0,0 +1,96 @@ +package edu.harvard.iq.dataverse.workflow.internalspi; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.workflow.WorkflowContext; +import org.apache.hc.client5.http.classic.methods.HttpPost; +import org.apache.hc.core5.http.io.entity.StringEntity; +import org.junit.jupiter.api.Test; + +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +class AuthorizedExternalStepTest { + + @Test + void testRequestEntityIsSetCorrectly() throws Exception { + // Mock the static field before any code that uses it + // Mock and set BrandingUtil.dataverseService + edu.harvard.iq.dataverse.DataverseServiceBean mockDataverseService = mock(edu.harvard.iq.dataverse.DataverseServiceBean.class); + when(mockDataverseService.getRootDataverseName()).thenReturn("Root"); + + Field dataverseServiceField = BrandingUtil.class.getDeclaredField("dataverseService"); + dataverseServiceField.setAccessible(true); + dataverseServiceField.set(null, mockDataverseService); + + SettingsServiceBean mockSettings = mock(SettingsServiceBean.class); + when(mockSettings.getValueForKey(any())).thenReturn(null); + + Field field = BrandingUtil.class.getDeclaredField("settingsService"); + field.setAccessible(true); + field.set(null, mockSettings); + + + // Prepare parameters + Map params = new HashMap<>(); + params.put("method", "POST"); + params.put("url", "http://example.com/api"); + params.put("contentType", "application/json"); + params.put("body", "{\"invocationId\": \"${invocationId}\"}"); + + // Mock context + WorkflowContext context = mock(WorkflowContext.class); + when(context.getInvocationId()).thenReturn("12345"); + when(context.getDataset()).thenReturn(MockDataset.create()); + + // Create the step + AuthorizedExternalStep step = new AuthorizedExternalStep(params); + + + // Directly test buildMethod to verify the request entity + HttpPost request = (HttpPost) step.buildMethod(false, context); + StringEntity entity = (StringEntity) request.getEntity(); + assertNotNull(entity); + assertEquals("application/json", entity.getContentType()); + String body = new String(entity.getContent().readAllBytes()); + assertTrue(body.contains("\"invocationId\": \"12345\"")); + } + + static class MockDataverse extends edu.harvard.iq.dataverse.Dataverse { + @Override + public java.util.List getCitationDatasetFieldTypes() { + return new ArrayList<>(); + } + } + + // Helper mock dataset + static class MockDataset extends edu.harvard.iq.dataverse.Dataset { + private edu.harvard.iq.dataverse.GlobalId globalId; + private edu.harvard.iq.dataverse.Dataverse owner = mock(edu.harvard.iq.dataverse.Dataverse.class); + + static MockDataset create() { + MockDataset ds = new MockDataset(); + ds.setId(1L); + ds.setIdentifier("ds1"); + ds.globalId = new edu.harvard.iq.dataverse.GlobalId("doi", "10.1234/DS1", null, null, null, null); + return ds; + } + + @Override + public edu.harvard.iq.dataverse.GlobalId getGlobalId() { + return globalId; + } + + @Override + public edu.harvard.iq.dataverse.Dataverse getOwner() { + return owner; + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/workflow/internalspi/COARNotifyRelationshipAnnouncementStepTest.java b/src/test/java/edu/harvard/iq/dataverse/workflow/internalspi/COARNotifyRelationshipAnnouncementStepTest.java new file mode 100644 index 00000000000..ce128c452b4 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/workflow/internalspi/COARNotifyRelationshipAnnouncementStepTest.java @@ -0,0 +1,1148 @@ +package edu.harvard.iq.dataverse.workflow.internalspi; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetFieldValue; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +class COARNotifyRelationshipAnnouncementStepTest { + + private COARNotifyRelationshipAnnouncementStep step; + private static DatasetFieldType multiValueFieldType; + private static DatasetFieldType singleValueFieldType; + private static DatasetFieldType compoundFieldType; + private static DatasetFieldType childFieldType1; + private static DatasetFieldType childFieldType2; + + @BeforeAll + static void setUpFieldTypes() { + // Setup field types + multiValueFieldType = new DatasetFieldType(); + multiValueFieldType.setId(1L); + multiValueFieldType.setName("testMultiField"); + multiValueFieldType.setAllowMultiples(true); + multiValueFieldType.setFieldType(DatasetFieldType.FieldType.TEXT); + multiValueFieldType.setChildDatasetFieldTypes(new ArrayList<>()); + + singleValueFieldType = new DatasetFieldType(); + singleValueFieldType.setId(2L); + singleValueFieldType.setName("testSingleField"); + singleValueFieldType.setAllowMultiples(false); + singleValueFieldType.setFieldType(DatasetFieldType.FieldType.TEXT); + singleValueFieldType.setChildDatasetFieldTypes(new ArrayList<>()); + + // Setup compound field type with child fields + compoundFieldType = new DatasetFieldType(); + compoundFieldType.setId(3L); + compoundFieldType.setName("testCompoundField"); + compoundFieldType.setAllowMultiples(true); + compoundFieldType.setFieldType(DatasetFieldType.FieldType.NONE); + + childFieldType1 = new DatasetFieldType(); + childFieldType1.setId(4L); + childFieldType1.setName("authorName"); + childFieldType1.setFieldType(DatasetFieldType.FieldType.TEXT); + childFieldType1.setParentDatasetFieldType(compoundFieldType); + childFieldType1.setChildDatasetFieldTypes(new ArrayList<>()); + + childFieldType2 = new DatasetFieldType(); + childFieldType2.setId(5L); + childFieldType2.setName("authorAffiliation"); + childFieldType2.setFieldType(DatasetFieldType.FieldType.TEXT); + childFieldType2.setParentDatasetFieldType(compoundFieldType); + childFieldType2.setChildDatasetFieldTypes(new ArrayList<>()); + + compoundFieldType.setChildDatasetFieldTypes(List.of(childFieldType1, childFieldType2)); + } + + @BeforeEach + void setUp() { + Map params = new HashMap<>(); + step = new COARNotifyRelationshipAnnouncementStep(params); + } + + @Test + void testFilterNewValues_MultiValue_AllNew() throws Exception { + // Create current field with 3 values + DatasetField currentField = createMultiValueField(multiValueFieldType, "value1", "value2", "value3"); + + // Create prior field with no values + DatasetField priorField = createMultiValueField(multiValueFieldType); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // All values should be included + assertEquals(3, filtered.getDatasetFieldValues().size()); + assertTrue(containsValue(filtered, "value1")); + assertTrue(containsValue(filtered, "value2")); + assertTrue(containsValue(filtered, "value3")); + } + + @Test + void testFilterNewValues_MultiValue_SomeNew() throws Exception { + // Create current field with 3 values + DatasetField currentField = createMultiValueField(multiValueFieldType, "value1", "value2", "value3"); + + // Create prior field with 2 existing values + DatasetField priorField = createMultiValueField(multiValueFieldType, "value1", "value2"); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // Only new value should be included + assertEquals(1, filtered.getDatasetFieldValues().size()); + assertFalse(containsValue(filtered, "value1")); + assertFalse(containsValue(filtered, "value2")); + assertTrue(containsValue(filtered, "value3")); + } + + @Test + void testFilterNewValues_MultiValue_NoneNew() throws Exception { + // Create current field with 2 values + DatasetField currentField = createMultiValueField(multiValueFieldType, "value1", "value2"); + + // Create prior field with same values + DatasetField priorField = createMultiValueField(multiValueFieldType, "value1", "value2"); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // No values should be included + assertEquals(0, filtered.getDatasetFieldValues().size()); + assertTrue(filtered.isEmpty()); + } + + @Test + void testFilterNewValues_SingleValue_Changed() throws Exception { + // Create current field with new value + DatasetField currentField = createSingleValueField(singleValueFieldType, "newValue"); + + // Create prior field with old value + DatasetField priorField = createSingleValueField(singleValueFieldType, "oldValue"); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // New value should be included + assertFalse(filtered.isEmpty()); + assertEquals("newValue", filtered.getValue()); + } + + @Test + void testFilterNewValues_SingleValue_Unchanged() throws Exception { + // Create current field with same value + DatasetField currentField = createSingleValueField(singleValueFieldType, "sameValue"); + + // Create prior field with same value + DatasetField priorField = createSingleValueField(singleValueFieldType, "sameValue"); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // No value should be included + assertTrue(filtered.isEmpty()); + } + + @Test + void testFilterNewValues_DoesNotModifyOriginal() throws Exception { + // Create current field with 3 values + DatasetField currentField = createMultiValueField(multiValueFieldType, "value1", "value2", "value3"); + int originalSize = currentField.getDatasetFieldValues().size(); + + // Create prior field with 2 existing values + DatasetField priorField = createMultiValueField(multiValueFieldType, "value1", "value2"); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // Original field should be unchanged + assertEquals(originalSize, currentField.getDatasetFieldValues().size()); + assertTrue(containsValue(currentField, "value1")); + assertTrue(containsValue(currentField, "value2")); + assertTrue(containsValue(currentField, "value3")); + + // Filtered field should only have new value + assertEquals(1, filtered.getDatasetFieldValues().size()); + assertTrue(containsValue(filtered, "value3")); + } + + @Test + void testFilterNewValues_CompoundValue_AllNew() throws Exception { + // Create current field with 2 compound values + DatasetField currentField = createCompoundField(compoundFieldType, + new String[]{"Author1", "Affiliation1"}, + new String[] { "Author2", "Affiliation2" }); + + // Create prior field with no values + DatasetField priorField = createCompoundField(compoundFieldType); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // All compound values should be included + assertEquals(2, filtered.getDatasetFieldCompoundValues().size()); + assertTrue(containsCompoundValue(filtered, "Author1", "Affiliation1")); + assertTrue(containsCompoundValue(filtered, "Author2", "Affiliation2")); + } + + @Test + void testFilterNewValues_CompoundValue_SomeNew() throws Exception { + // Create current field with 3 compound values + DatasetField currentField = createCompoundField(compoundFieldType, + new String[]{"Author1", "Affiliation1"}, + new String[]{"Author2", "Affiliation2"}, + new String[]{"Author3", "Affiliation3"}); + + // Create prior field with 2 existing compound values + DatasetField priorField = createCompoundField(compoundFieldType, + new String[]{"Author1", "Affiliation1"}, + new String[] { "Author2", "Affiliation2" }); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // Only new compound value should be included + assertEquals(1, filtered.getDatasetFieldCompoundValues().size()); + assertFalse(containsCompoundValue(filtered, "Author1", "Affiliation1")); + assertFalse(containsCompoundValue(filtered, "Author2", "Affiliation2")); + assertTrue(containsCompoundValue(filtered, "Author3", "Affiliation3")); + } + + @Test + void testFilterNewValues_CompoundValue_NoneNew() throws Exception { + // Create current field with 2 compound values + DatasetField currentField = createCompoundField(compoundFieldType, + new String[]{"Author1", "Affiliation1"}, + new String[] { "Author2", "Affiliation2" }); + + // Create prior field with same compound values + DatasetField priorField = createCompoundField(compoundFieldType, + new String[]{"Author1", "Affiliation1"}, + new String[] { "Author2", "Affiliation2" }); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // No compound values should be included + assertEquals(0, filtered.getDatasetFieldCompoundValues().size()); + assertTrue(filtered.isEmpty()); + } + + @Test + void testFilterNewValues_CompoundValue_PartialMatch() throws Exception { + // Create current field with compound value where one child field changed + DatasetField currentField = createCompoundField(compoundFieldType, + new String[] { "Author1", "NewAffiliation" }); + + // Create prior field with same author but different affiliation + DatasetField priorField = createCompoundField(compoundFieldType, + new String[]{"Author1", "OldAffiliation"}); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // Should be treated as a new compound value since child field changed + assertEquals(1, filtered.getDatasetFieldCompoundValues().size()); + assertTrue(containsCompoundValue(filtered, "Author1", "NewAffiliation")); + } + + @Test + void testFilterNewValues_CompoundValue_DoesNotModifyOriginal() throws Exception { + // Create current field with 3 compound values + DatasetField currentField = createCompoundField(compoundFieldType, + new String[]{"Author1", "Affiliation1"}, + new String[]{"Author2", "Affiliation2"}, + new String[]{"Author3", "Affiliation3"}); + int originalSize = currentField.getDatasetFieldCompoundValues().size(); + + // Create prior field with 2 existing compound values + DatasetField priorField = createCompoundField(compoundFieldType, + new String[]{"Author1", "Affiliation1"}, + new String[] { "Author2", "Affiliation2" }); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // Original field should be unchanged + assertEquals(originalSize, currentField.getDatasetFieldCompoundValues().size()); + assertTrue(containsCompoundValue(currentField, "Author1", "Affiliation1")); + assertTrue(containsCompoundValue(currentField, "Author2", "Affiliation2")); + assertTrue(containsCompoundValue(currentField, "Author3", "Affiliation3")); + + // Filtered field should only have new compound value + assertEquals(1, filtered.getDatasetFieldCompoundValues().size()); + assertTrue(containsCompoundValue(filtered, "Author3", "Affiliation3")); + } + + @Test + void testFilterNewValues_ControlledVocab_AllNew() throws Exception { + // Setup controlled vocabulary field type + DatasetFieldType cvFieldType = new DatasetFieldType(); + cvFieldType.setName("testCVField"); + cvFieldType.setAllowMultiples(true); + cvFieldType.setFieldType(DatasetFieldType.FieldType.TEXT); + cvFieldType.setAllowControlledVocabulary(true); + cvFieldType.setChildDatasetFieldTypes(new ArrayList<>()); + + // Create controlled vocabulary values + ControlledVocabularyValue cvv1 = new ControlledVocabularyValue(); + cvv1.setStrValue("Medicine, Health and Life Sciences"); + cvv1.setDatasetFieldType(cvFieldType); + + ControlledVocabularyValue cvv2 = new ControlledVocabularyValue(); + cvv2.setStrValue("Social Sciences"); + cvv2.setDatasetFieldType(cvFieldType); + + ControlledVocabularyValue cvv3 = new ControlledVocabularyValue(); + cvv3.setStrValue("Engineering"); + cvv3.setDatasetFieldType(cvFieldType); + + // Create current field with 3 CV values + DatasetField currentField = new DatasetField(); + currentField.setDatasetFieldType(cvFieldType); + currentField.setControlledVocabularyValues(List.of(cvv1, cvv2, cvv3)); + + // Create prior field with no values + DatasetField priorField = new DatasetField(); + priorField.setDatasetFieldType(cvFieldType); + priorField.setControlledVocabularyValues(new ArrayList<>()); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // All CV values should be included + assertEquals(3, filtered.getControlledVocabularyValues().size()); + assertTrue(containsControlledVocabValue(filtered, "Medicine, Health and Life Sciences")); + assertTrue(containsControlledVocabValue(filtered, "Social Sciences")); + assertTrue(containsControlledVocabValue(filtered, "Engineering")); + } + + @Test + void testFilterNewValues_ControlledVocab_SomeNew() throws Exception { + // Setup controlled vocabulary field type + DatasetFieldType cvFieldType = new DatasetFieldType(); + cvFieldType.setName("testCVField"); + cvFieldType.setAllowMultiples(true); + cvFieldType.setFieldType(DatasetFieldType.FieldType.TEXT); + cvFieldType.setAllowControlledVocabulary(true); + cvFieldType.setChildDatasetFieldTypes(new ArrayList<>()); + + // Create controlled vocabulary values + ControlledVocabularyValue cvv1 = new ControlledVocabularyValue(); + cvv1.setStrValue("Medicine, Health and Life Sciences"); + cvv1.setDatasetFieldType(cvFieldType); + + ControlledVocabularyValue cvv2 = new ControlledVocabularyValue(); + cvv2.setStrValue("Social Sciences"); + cvv2.setDatasetFieldType(cvFieldType); + + ControlledVocabularyValue cvv3 = new ControlledVocabularyValue(); + cvv3.setStrValue("Engineering"); + cvv3.setDatasetFieldType(cvFieldType); + + // Create current field with 3 CV values + DatasetField currentField = new DatasetField(); + currentField.setDatasetFieldType(cvFieldType); + currentField.setControlledVocabularyValues(List.of(cvv1, cvv2, cvv3)); + + // Create prior field with 2 existing CV values + DatasetField priorField = new DatasetField(); + priorField.setDatasetFieldType(cvFieldType); + priorField.setControlledVocabularyValues(List.of(cvv1, cvv2)); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // Only new CV value should be included + assertEquals(1, filtered.getControlledVocabularyValues().size()); + assertFalse(containsControlledVocabValue(filtered, "Medicine, Health and Life Sciences")); + assertFalse(containsControlledVocabValue(filtered, "Social Sciences")); + assertTrue(containsControlledVocabValue(filtered, "Engineering")); + } + + @Test + void testFilterNewValues_ControlledVocab_NoneNew() throws Exception { + // Setup controlled vocabulary field type + DatasetFieldType cvFieldType = new DatasetFieldType(); + cvFieldType.setName("testCVField"); + cvFieldType.setAllowMultiples(true); + cvFieldType.setFieldType(DatasetFieldType.FieldType.TEXT); + cvFieldType.setAllowControlledVocabulary(true); + cvFieldType.setChildDatasetFieldTypes(new ArrayList<>()); + + // Create controlled vocabulary values + ControlledVocabularyValue cvv1 = new ControlledVocabularyValue(); + cvv1.setStrValue("Medicine, Health and Life Sciences"); + cvv1.setDatasetFieldType(cvFieldType); + + ControlledVocabularyValue cvv2 = new ControlledVocabularyValue(); + cvv2.setStrValue("Social Sciences"); + cvv2.setDatasetFieldType(cvFieldType); + + // Create current field with 2 CV values + DatasetField currentField = new DatasetField(); + currentField.setDatasetFieldType(cvFieldType); + currentField.setControlledVocabularyValues(List.of(cvv1, cvv2)); + + // Create prior field with same CV values + DatasetField priorField = new DatasetField(); + priorField.setDatasetFieldType(cvFieldType); + priorField.setControlledVocabularyValues(List.of(cvv1, cvv2)); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // No CV values should be included + assertEquals(0, filtered.getControlledVocabularyValues().size()); + assertTrue(filtered.isEmpty()); + } + + @Test + void testFilterNewValues_ControlledVocab_SingleValue_Changed() throws Exception { + // Setup controlled vocabulary field type (non-multiple) + DatasetFieldType cvFieldType = new DatasetFieldType(); + cvFieldType.setName("testCVField"); + cvFieldType.setAllowMultiples(false); + cvFieldType.setFieldType(DatasetFieldType.FieldType.TEXT); + cvFieldType.setAllowControlledVocabulary(true); + cvFieldType.setChildDatasetFieldTypes(new ArrayList<>()); + + // Create controlled vocabulary values + ControlledVocabularyValue cvvOld = new ControlledVocabularyValue(); + cvvOld.setStrValue("Medicine, Health and Life Sciences"); + cvvOld.setDatasetFieldType(cvFieldType); + + ControlledVocabularyValue cvvNew = new ControlledVocabularyValue(); + cvvNew.setStrValue("Social Sciences"); + cvvNew.setDatasetFieldType(cvFieldType); + + // Create current field with new CV value + DatasetField currentField = new DatasetField(); + currentField.setDatasetFieldType(cvFieldType); + currentField.setControlledVocabularyValues(List.of(cvvNew)); + + // Create prior field with old CV value + DatasetField priorField = new DatasetField(); + priorField.setDatasetFieldType(cvFieldType); + priorField.setControlledVocabularyValues(List.of(cvvOld)); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // New CV value should be included + assertFalse(filtered.isEmpty()); + assertEquals(1, filtered.getControlledVocabularyValues().size()); + assertTrue(containsControlledVocabValue(filtered, "Social Sciences")); + } + + @Test + void testFilterNewValues_CompoundWithControlledVocabChild_AllNew() throws Exception { + // Setup compound field type with CV child field + DatasetFieldType compoundType = new DatasetFieldType(); + compoundType.setName("testCompoundWithCV"); + compoundType.setAllowMultiples(true); + compoundType.setFieldType(DatasetFieldType.FieldType.NONE); + + DatasetFieldType childTextType = new DatasetFieldType(); + childTextType.setName("childText"); + childTextType.setFieldType(DatasetFieldType.FieldType.TEXT); + childTextType.setParentDatasetFieldType(compoundType); + childTextType.setChildDatasetFieldTypes(new ArrayList<>()); + + DatasetFieldType childCVType = new DatasetFieldType(); + childCVType.setName("childCV"); + childCVType.setFieldType(DatasetFieldType.FieldType.TEXT); + childCVType.setAllowControlledVocabulary(true); + childCVType.setParentDatasetFieldType(compoundType); + childCVType.setChildDatasetFieldTypes(new ArrayList<>()); + + compoundType.setChildDatasetFieldTypes(List.of(childTextType, childCVType)); + + // Create controlled vocabulary values + ControlledVocabularyValue cvv1 = new ControlledVocabularyValue(); + cvv1.setStrValue("ark"); + cvv1.setDatasetFieldType(childCVType); + + ControlledVocabularyValue cvv2 = new ControlledVocabularyValue(); + cvv2.setStrValue("doi"); + cvv2.setDatasetFieldType(childCVType); + + // Create current field with 2 compound values containing CV child fields + DatasetField currentField = new DatasetField(); + currentField.setDatasetFieldType(compoundType); + + List compoundValues = new ArrayList<>(); + + // First compound value + DatasetFieldCompoundValue cv1 = new DatasetFieldCompoundValue(); + cv1.setParentDatasetField(currentField); + + DatasetField child1Text = new DatasetField(); + child1Text.setDatasetFieldType(childTextType); + child1Text.setParentDatasetFieldCompoundValue(cv1); + child1Text.setSingleValue("Value1"); + + DatasetField child1CV = new DatasetField(); + child1CV.setDatasetFieldType(childCVType); + child1CV.setParentDatasetFieldCompoundValue(cv1); + child1CV.setControlledVocabularyValues(List.of(cvv1)); + + cv1.setChildDatasetFields(List.of(child1Text, child1CV)); + compoundValues.add(cv1); + + // Second compound value + DatasetFieldCompoundValue cv2 = new DatasetFieldCompoundValue(); + cv2.setParentDatasetField(currentField); + + DatasetField child2Text = new DatasetField(); + child2Text.setDatasetFieldType(childTextType); + child2Text.setParentDatasetFieldCompoundValue(cv2); + child2Text.setSingleValue("Value2"); + + DatasetField child2CV = new DatasetField(); + child2CV.setDatasetFieldType(childCVType); + child2CV.setParentDatasetFieldCompoundValue(cv2); + child2CV.setControlledVocabularyValues(List.of(cvv2)); + + cv2.setChildDatasetFields(List.of(child2Text, child2CV)); + compoundValues.add(cv2); + + currentField.setDatasetFieldCompoundValues(compoundValues); + + // Create prior field with no values + DatasetField priorField = new DatasetField(); + priorField.setDatasetFieldType(compoundType); + priorField.setDatasetFieldCompoundValues(new ArrayList<>()); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // All compound values should be included + assertEquals(2, filtered.getDatasetFieldCompoundValues().size()); + } + + @Test + void testFilterNewValues_CompoundWithControlledVocabChild_CVChanged() throws Exception { + // Setup compound field type with CV child field + DatasetFieldType compoundType = new DatasetFieldType(); + compoundType.setId(1L); + compoundType.setName("testCompoundWithCV"); + compoundType.setAllowMultiples(true); + compoundType.setFieldType(DatasetFieldType.FieldType.NONE); + + DatasetFieldType childTextType = new DatasetFieldType(); + childTextType.setId(2L); + childTextType.setName("childText"); + childTextType.setFieldType(DatasetFieldType.FieldType.TEXT); + childTextType.setParentDatasetFieldType(compoundType); + childTextType.setChildDatasetFieldTypes(new ArrayList<>()); + + DatasetFieldType childCVType = new DatasetFieldType(); + childCVType.setId(3L); + childCVType.setName("childCV"); + childCVType.setFieldType(DatasetFieldType.FieldType.TEXT); + childCVType.setAllowControlledVocabulary(true); + childCVType.setParentDatasetFieldType(compoundType); + childCVType.setChildDatasetFieldTypes(new ArrayList<>()); + + compoundType.setChildDatasetFieldTypes(List.of(childTextType, childCVType)); + + // Create controlled vocabulary values + ControlledVocabularyValue cvvOld = new ControlledVocabularyValue(); + cvvOld.setStrValue("ark"); + cvvOld.setDatasetFieldType(childCVType); + + ControlledVocabularyValue cvvNew = new ControlledVocabularyValue(); + cvvNew.setStrValue("doi"); + cvvNew.setDatasetFieldType(childCVType); + + // Create current field with compound value containing new CV + DatasetField currentField = new DatasetField(); + currentField.setDatasetFieldType(compoundType); + + DatasetFieldCompoundValue currentCV = new DatasetFieldCompoundValue(); + currentCV.setParentDatasetField(currentField); + + DatasetField currentChildText = new DatasetField(); + currentChildText.setDatasetFieldType(childTextType); + currentChildText.setParentDatasetFieldCompoundValue(currentCV); + currentChildText.setSingleValue("SameValue"); + + DatasetField currentChildCV = new DatasetField(); + currentChildCV.setDatasetFieldType(childCVType); + currentChildCV.setParentDatasetFieldCompoundValue(currentCV); + currentChildCV.setControlledVocabularyValues(List.of(cvvNew)); + + currentCV.setChildDatasetFields(List.of(currentChildText, currentChildCV)); + currentField.setDatasetFieldCompoundValues(List.of(currentCV)); + + // Create prior field with compound value containing old CV + DatasetField priorField = new DatasetField(); + priorField.setDatasetFieldType(compoundType); + + DatasetFieldCompoundValue priorCV = new DatasetFieldCompoundValue(); + priorCV.setParentDatasetField(priorField); + + DatasetField priorChildText = new DatasetField(); + priorChildText.setDatasetFieldType(childTextType); + priorChildText.setParentDatasetFieldCompoundValue(priorCV); + priorChildText.setSingleValue("SameValue"); + + DatasetField priorChildCV = new DatasetField(); + priorChildCV.setDatasetFieldType(childCVType); + priorChildCV.setParentDatasetFieldCompoundValue(priorCV); + priorChildCV.setControlledVocabularyValues(List.of(cvvOld)); + + priorCV.setChildDatasetFields(List.of(priorChildText, priorChildCV)); + priorField.setDatasetFieldCompoundValues(List.of(priorCV)); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // Should be treated as new compound value since CV child changed + assertEquals(1, filtered.getDatasetFieldCompoundValues().size()); + + // Verify the CV value in the filtered compound + DatasetFieldCompoundValue filteredCV = filtered.getDatasetFieldCompoundValues().get(0); + DatasetField filteredChildCV = filteredCV.getChildDatasetFields().stream() + .filter(f -> f.getDatasetFieldType().equals(childCVType)) + .findFirst() + .orElse(null); + + assertNotNull(filteredChildCV); + assertEquals(1, filteredChildCV.getControlledVocabularyValues().size()); + assertEquals("doi", filteredChildCV.getControlledVocabularyValues().get(0).getStrValue()); + } + + @Test + void testFilterNewValues_CompoundWithControlledVocabChild_CVUnchanged() throws Exception { + // Setup compound field type with CV child field + DatasetFieldType compoundType = new DatasetFieldType(); + compoundType.setId(1L); + compoundType.setName("testCompoundWithCV"); + compoundType.setAllowMultiples(true); + compoundType.setFieldType(DatasetFieldType.FieldType.NONE); + + DatasetFieldType childTextType = new DatasetFieldType(); + childTextType.setId(2L); + childTextType.setName("childText"); + childTextType.setFieldType(DatasetFieldType.FieldType.TEXT); + childTextType.setParentDatasetFieldType(compoundType); + childTextType.setChildDatasetFieldTypes(new ArrayList<>()); + + DatasetFieldType childCVType = new DatasetFieldType(); + childCVType.setId(3L); + childCVType.setName("childCV"); + childCVType.setFieldType(DatasetFieldType.FieldType.TEXT); + childCVType.setAllowControlledVocabulary(true); + childCVType.setParentDatasetFieldType(compoundType); + childCVType.setChildDatasetFieldTypes(new ArrayList<>()); + + compoundType.setChildDatasetFieldTypes(List.of(childTextType, childCVType)); + + // Create controlled vocabulary value + ControlledVocabularyValue cvv = new ControlledVocabularyValue(); + cvv.setStrValue("ark"); + cvv.setDatasetFieldType(childCVType); + + // Create current field with compound value + DatasetField currentField = new DatasetField(); + currentField.setDatasetFieldType(compoundType); + + DatasetFieldCompoundValue currentCV = new DatasetFieldCompoundValue(); + currentCV.setParentDatasetField(currentField); + + DatasetField currentChildText = new DatasetField(); + currentChildText.setDatasetFieldType(childTextType); + currentChildText.setParentDatasetFieldCompoundValue(currentCV); + currentChildText.setSingleValue("SameValue"); + + DatasetField currentChildCV = new DatasetField(); + currentChildCV.setDatasetFieldType(childCVType); + currentChildCV.setParentDatasetFieldCompoundValue(currentCV); + currentChildCV.setControlledVocabularyValues(List.of(cvv)); + + currentCV.setChildDatasetFields(List.of(currentChildText, currentChildCV)); + currentField.setDatasetFieldCompoundValues(List.of(currentCV)); + + // Create prior field with same compound value + DatasetField priorField = new DatasetField(); + priorField.setDatasetFieldType(compoundType); + + DatasetFieldCompoundValue priorCV = new DatasetFieldCompoundValue(); + priorCV.setParentDatasetField(priorField); + + DatasetField priorChildText = new DatasetField(); + priorChildText.setDatasetFieldType(childTextType); + priorChildText.setParentDatasetFieldCompoundValue(priorCV); + priorChildText.setSingleValue("SameValue"); + + DatasetField priorChildCV = new DatasetField(); + priorChildCV.setDatasetFieldType(childCVType); + priorChildCV.setParentDatasetFieldCompoundValue(priorCV); + priorChildCV.setControlledVocabularyValues(List.of(cvv)); + + priorCV.setChildDatasetFields(List.of(priorChildText, priorChildCV)); + priorField.setDatasetFieldCompoundValues(List.of(priorCV)); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // No compound values should be included since nothing changed + assertEquals(0, filtered.getDatasetFieldCompoundValues().size()); + assertTrue(filtered.isEmpty()); + } + + @Test + void testFilterNewValues_CompoundWithPrimitiveChild_AllNew() throws Exception { + // Setup compound field type with primitive child fields + DatasetFieldType compoundType = new DatasetFieldType(); + compoundType.setId(1L); + compoundType.setName("testCompoundWithPrimitive"); + compoundType.setAllowMultiples(true); + compoundType.setFieldType(DatasetFieldType.FieldType.NONE); + + DatasetFieldType childTextField = new DatasetFieldType(); + childTextField.setId(2L); + childTextField.setName("childText"); + childTextField.setFieldType(DatasetFieldType.FieldType.TEXT); + childTextField.setParentDatasetFieldType(compoundType); + childTextField.setChildDatasetFieldTypes(new ArrayList<>()); + + DatasetFieldType childIntField = new DatasetFieldType(); + childIntField.setId(3L); + childIntField.setName("childInt"); + childIntField.setFieldType(DatasetFieldType.FieldType.INT); + childIntField.setParentDatasetFieldType(compoundType); + childIntField.setChildDatasetFieldTypes(new ArrayList<>()); + + compoundType.setChildDatasetFieldTypes(List.of(childTextField, childIntField)); + + // Create current field with 2 compound values + DatasetField currentField = new DatasetField(); + currentField.setDatasetFieldType(compoundType); + + List compoundValues = new ArrayList<>(); + + // First compound value + DatasetFieldCompoundValue cv1 = new DatasetFieldCompoundValue(); + cv1.setParentDatasetField(currentField); + + DatasetField child1Text = new DatasetField(); + child1Text.setDatasetFieldType(childTextField); + child1Text.setParentDatasetFieldCompoundValue(cv1); + child1Text.setSingleValue("Text1"); + + DatasetField child1Int = new DatasetField(); + child1Int.setDatasetFieldType(childIntField); + child1Int.setParentDatasetFieldCompoundValue(cv1); + child1Int.setSingleValue("123"); + + cv1.setChildDatasetFields(List.of(child1Text, child1Int)); + compoundValues.add(cv1); + + // Second compound value + DatasetFieldCompoundValue cv2 = new DatasetFieldCompoundValue(); + cv2.setParentDatasetField(currentField); + + DatasetField child2Text = new DatasetField(); + child2Text.setDatasetFieldType(childTextField); + child2Text.setParentDatasetFieldCompoundValue(cv2); + child2Text.setSingleValue("Text2"); + + DatasetField child2Int = new DatasetField(); + child2Int.setDatasetFieldType(childIntField); + child2Int.setParentDatasetFieldCompoundValue(cv2); + child2Int.setSingleValue("456"); + + cv2.setChildDatasetFields(List.of(child2Text, child2Int)); + compoundValues.add(cv2); + + currentField.setDatasetFieldCompoundValues(compoundValues); + + // Create prior field with no values + DatasetField priorField = new DatasetField(); + priorField.setDatasetFieldType(compoundType); + priorField.setDatasetFieldCompoundValues(new ArrayList<>()); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // All compound values should be included + assertEquals(2, filtered.getDatasetFieldCompoundValues().size()); + } + + @Test + void testFilterNewValues_CompoundWithPrimitiveChild_SomeNew() throws Exception { + // Setup compound field type with primitive child fields + DatasetFieldType compoundType = new DatasetFieldType(); + compoundType.setId(1L); + compoundType.setName("testCompoundWithPrimitive"); + compoundType.setAllowMultiples(true); + compoundType.setFieldType(DatasetFieldType.FieldType.NONE); + + DatasetFieldType childTextField = new DatasetFieldType(); + childTextField.setId(2L); + childTextField.setName("childText"); + childTextField.setFieldType(DatasetFieldType.FieldType.TEXT); + childTextField.setParentDatasetFieldType(compoundType); + childTextField.setChildDatasetFieldTypes(new ArrayList<>()); + + DatasetFieldType childIntField = new DatasetFieldType(); + childIntField.setId(3L); + childIntField.setName("childInt"); + childIntField.setFieldType(DatasetFieldType.FieldType.INT); + childIntField.setParentDatasetFieldType(compoundType); + childIntField.setChildDatasetFieldTypes(new ArrayList<>()); + + compoundType.setChildDatasetFieldTypes(List.of(childTextField, childIntField)); + + // Create current field with 3 compound values + DatasetField currentField = new DatasetField(); + currentField.setDatasetFieldType(compoundType); + + List currentCompoundValues = new ArrayList<>(); + + // First compound value (existing) + DatasetFieldCompoundValue cv1 = new DatasetFieldCompoundValue(); + cv1.setParentDatasetField(currentField); + + DatasetField child1Text = new DatasetField(); + child1Text.setDatasetFieldType(childTextField); + child1Text.setParentDatasetFieldCompoundValue(cv1); + child1Text.setSingleValue("Text1"); + + DatasetField child1Int = new DatasetField(); + child1Int.setDatasetFieldType(childIntField); + child1Int.setParentDatasetFieldCompoundValue(cv1); + child1Int.setSingleValue("123"); + + cv1.setChildDatasetFields(List.of(child1Text, child1Int)); + currentCompoundValues.add(cv1); + + // Second compound value (new) + DatasetFieldCompoundValue cv2 = new DatasetFieldCompoundValue(); + cv2.setParentDatasetField(currentField); + + DatasetField child2Text = new DatasetField(); + child2Text.setDatasetFieldType(childTextField); + child2Text.setParentDatasetFieldCompoundValue(cv2); + child2Text.setSingleValue("Text2"); + + DatasetField child2Int = new DatasetField(); + child2Int.setDatasetFieldType(childIntField); + child2Int.setParentDatasetFieldCompoundValue(cv2); + child2Int.setSingleValue("456"); + + cv2.setChildDatasetFields(List.of(child2Text, child2Int)); + currentCompoundValues.add(cv2); + + // Third compound value (new) + DatasetFieldCompoundValue cv3 = new DatasetFieldCompoundValue(); + cv3.setParentDatasetField(currentField); + + DatasetField child3Text = new DatasetField(); + child3Text.setDatasetFieldType(childTextField); + child3Text.setParentDatasetFieldCompoundValue(cv3); + child3Text.setSingleValue("Text3"); + + DatasetField child3Int = new DatasetField(); + child3Int.setDatasetFieldType(childIntField); + child3Int.setParentDatasetFieldCompoundValue(cv3); + child3Int.setSingleValue("789"); + + cv3.setChildDatasetFields(List.of(child3Text, child3Int)); + currentCompoundValues.add(cv3); + + currentField.setDatasetFieldCompoundValues(currentCompoundValues); + + // Create prior field with 1 existing compound value + DatasetField priorField = new DatasetField(); + priorField.setDatasetFieldType(compoundType); + + List priorCompoundValues = new ArrayList<>(); + + DatasetFieldCompoundValue priorCv1 = new DatasetFieldCompoundValue(); + priorCv1.setParentDatasetField(priorField); + + DatasetField priorChild1Text = new DatasetField(); + priorChild1Text.setDatasetFieldType(childTextField); + priorChild1Text.setParentDatasetFieldCompoundValue(priorCv1); + priorChild1Text.setSingleValue("Text1"); + + DatasetField priorChild1Int = new DatasetField(); + priorChild1Int.setDatasetFieldType(childIntField); + priorChild1Int.setParentDatasetFieldCompoundValue(priorCv1); + priorChild1Int.setSingleValue("123"); + + priorCv1.setChildDatasetFields(List.of(priorChild1Text, priorChild1Int)); + priorCompoundValues.add(priorCv1); + + priorField.setDatasetFieldCompoundValues(priorCompoundValues); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // Only 2 new compound values should be included + assertEquals(2, filtered.getDatasetFieldCompoundValues().size()); + + // Verify the new values are present + boolean hasText2 = false; + boolean hasText3 = false; + + for (DatasetFieldCompoundValue cv : filtered.getDatasetFieldCompoundValues()) { + for (DatasetField childField : cv.getChildDatasetFields()) { + if (childField.getDatasetFieldType().equals(childTextField)) { + String value = childField.getDisplayValue(); + if ("Text2".equals(value)) { + hasText2 = true; + } else if ("Text3".equals(value)) { + hasText3 = true; + } + } + } + } + + assertTrue(hasText2); + assertTrue(hasText3); + } + + @Test + void testFilterNewValues_CompoundWithPrimitiveChild_NoneNew() throws Exception { + // Setup compound field type with primitive child fields + DatasetFieldType compoundType = new DatasetFieldType(); + compoundType.setId(1L); + compoundType.setName("testCompoundWithPrimitive"); + compoundType.setAllowMultiples(true); + compoundType.setFieldType(DatasetFieldType.FieldType.NONE); + + DatasetFieldType childTextField = new DatasetFieldType(); + childTextField.setId(2L); + childTextField.setName("childText"); + childTextField.setFieldType(DatasetFieldType.FieldType.TEXT); + childTextField.setParentDatasetFieldType(compoundType); + childTextField.setChildDatasetFieldTypes(new ArrayList<>()); + + DatasetFieldType childIntField = new DatasetFieldType(); + childIntField.setId(3L); + childIntField.setName("childInt"); + childIntField.setFieldType(DatasetFieldType.FieldType.INT); + childIntField.setParentDatasetFieldType(compoundType); + childIntField.setChildDatasetFieldTypes(new ArrayList<>()); + + compoundType.setChildDatasetFieldTypes(List.of(childTextField, childIntField)); + + // Create current field with 2 compound values + DatasetField currentField = new DatasetField(); + currentField.setDatasetFieldType(compoundType); + + List currentCompoundValues = new ArrayList<>(); + + // First compound value + DatasetFieldCompoundValue cv1 = new DatasetFieldCompoundValue(); + cv1.setParentDatasetField(currentField); + + DatasetField child1Text = new DatasetField(); + child1Text.setDatasetFieldType(childTextField); + child1Text.setParentDatasetFieldCompoundValue(cv1); + child1Text.setSingleValue("Text1"); + + DatasetField child1Int = new DatasetField(); + child1Int.setDatasetFieldType(childIntField); + child1Int.setParentDatasetFieldCompoundValue(cv1); + child1Int.setSingleValue("123"); + + cv1.setChildDatasetFields(List.of(child1Text, child1Int)); + currentCompoundValues.add(cv1); + + // Second compound value + DatasetFieldCompoundValue cv2 = new DatasetFieldCompoundValue(); + cv2.setParentDatasetField(currentField); + + DatasetField child2Text = new DatasetField(); + child2Text.setDatasetFieldType(childTextField); + child2Text.setParentDatasetFieldCompoundValue(cv2); + child2Text.setSingleValue("Text2"); + + DatasetField child2Int = new DatasetField(); + child2Int.setDatasetFieldType(childIntField); + child2Int.setParentDatasetFieldCompoundValue(cv2); + child2Int.setSingleValue("456"); + + cv2.setChildDatasetFields(List.of(child2Text, child2Int)); + currentCompoundValues.add(cv2); + + currentField.setDatasetFieldCompoundValues(currentCompoundValues); + + // Create prior field with same compound values + DatasetField priorField = new DatasetField(); + priorField.setDatasetFieldType(compoundType); + + List priorCompoundValues = new ArrayList<>(); + + // First compound value (same as current) + DatasetFieldCompoundValue priorCv1 = new DatasetFieldCompoundValue(); + priorCv1.setParentDatasetField(priorField); + + DatasetField priorChild1Text = new DatasetField(); + priorChild1Text.setDatasetFieldType(childTextField); + priorChild1Text.setParentDatasetFieldCompoundValue(priorCv1); + priorChild1Text.setSingleValue("Text1"); + + DatasetField priorChild1Int = new DatasetField(); + priorChild1Int.setDatasetFieldType(childIntField); + priorChild1Int.setParentDatasetFieldCompoundValue(priorCv1); + priorChild1Int.setSingleValue("123"); + + priorCv1.setChildDatasetFields(List.of(priorChild1Text, priorChild1Int)); + priorCompoundValues.add(priorCv1); + + // Second compound value (same as current) + DatasetFieldCompoundValue priorCv2 = new DatasetFieldCompoundValue(); + priorCv2.setParentDatasetField(priorField); + + DatasetField priorChild2Text = new DatasetField(); + priorChild2Text.setDatasetFieldType(childTextField); + priorChild2Text.setParentDatasetFieldCompoundValue(priorCv2); + priorChild2Text.setSingleValue("Text2"); + + DatasetField priorChild2Int = new DatasetField(); + priorChild2Int.setDatasetFieldType(childIntField); + priorChild2Int.setParentDatasetFieldCompoundValue(priorCv2); + priorChild2Int.setSingleValue("456"); + + priorCv2.setChildDatasetFields(List.of(priorChild2Text, priorChild2Int)); + priorCompoundValues.add(priorCv2); + + priorField.setDatasetFieldCompoundValues(priorCompoundValues); + + // Filter + DatasetField filtered = invokeFilterNewValues(currentField, priorField); + + // No compound values should be included + assertEquals(0, filtered.getDatasetFieldCompoundValues().size()); + assertTrue(filtered.isEmpty()); + } + + // Helper methods + + private DatasetField createMultiValueField(DatasetFieldType fieldType, String... values) { + DatasetField field = new DatasetField(); + field.setDatasetFieldType(fieldType); + + List fieldValues = new ArrayList<>(); + for (String value : values) { + DatasetFieldValue dfv = new DatasetFieldValue(); + dfv.setValue(value); + dfv.setDatasetField(field); + fieldValues.add(dfv); + } + field.setDatasetFieldValues(fieldValues); + + return field; + } + + private DatasetField createSingleValueField(DatasetFieldType fieldType, String value) { + DatasetField field = new DatasetField(); + field.setDatasetFieldType(fieldType); + field.setSingleValue(value); + return field; + } + + private boolean containsValue(DatasetField field, String value) { + for (DatasetFieldValue dfv : field.getDatasetFieldValues()) { + if (value.equals(dfv.getDisplayValue())) { + return true; + } + } + return false; + } + + private DatasetField createCompoundField(DatasetFieldType fieldType, String[]... compoundValues) { + DatasetField field = new DatasetField(); + field.setDatasetFieldType(fieldType); + + List compoundValueList = new ArrayList<>(); + for (String[] values : compoundValues) { + DatasetFieldCompoundValue compoundValue = new DatasetFieldCompoundValue(); + compoundValue.setParentDatasetField(field); + + List childFields = new ArrayList<>(); + List childTypes = new ArrayList<>(fieldType.getChildDatasetFieldTypes()); + + // Create child fields based on the parent's child types and provided values + for (int i = 0; i < Math.min(values.length, childTypes.size()); i++) { + DatasetField childField = new DatasetField(); + childField.setDatasetFieldType(childTypes.get(i)); + childField.setParentDatasetFieldCompoundValue(compoundValue); + childField.setSingleValue(values[i]); + childFields.add(childField); + } + + compoundValue.setChildDatasetFields(childFields); + compoundValueList.add(compoundValue); + } + + field.setDatasetFieldCompoundValues(compoundValueList); + return field; + } + + private boolean containsCompoundValue(DatasetField field, String... childValues) { + for (DatasetFieldCompoundValue cv : field.getDatasetFieldCompoundValues()) { + List cvValues = new ArrayList<>(); + + for (DatasetField childField : cv.getChildDatasetFields()) { + cvValues.add(childField.getDisplayValue()); + } + + // Check if all provided values are present in this compound value + boolean allMatch = true; + for (String value : childValues) { + if (!cvValues.contains(value)) { + allMatch = false; + break; + } + } + + if (allMatch && cvValues.size() == childValues.length) { + return true; + } + } + return false; + } + + private boolean containsControlledVocabValue(DatasetField field, String strValue) { + if (field.getControlledVocabularyValues() == null) { + return false; + } + + for (ControlledVocabularyValue cvv : field.getControlledVocabularyValues()) { + if (cvv.getStrValue().equals(strValue)) { + return true; + } + } + return false; + } + + /** + * Use reflection to invoke the private filterNewValues method + */ + private DatasetField invokeFilterNewValues(DatasetField currentField, DatasetField priorField) throws Exception { + var method = COARNotifyRelationshipAnnouncementStep.class.getDeclaredMethod( + "filterNewValues", DatasetField.class, DatasetField.class); + method.setAccessible(true); + return (DatasetField) method.invoke(step, currentField, priorField); + } +} \ No newline at end of file diff --git a/src/test/resources/croissant/.gitignore b/src/test/resources/croissant/.gitignore new file mode 100644 index 00000000000..7aa31745061 --- /dev/null +++ b/src/test/resources/croissant/.gitignore @@ -0,0 +1,2 @@ +# these "out" files are generated when running tests +/*/out/croissant.json diff --git a/src/test/resources/croissant/cars/expected/cars-croissant.json b/src/test/resources/croissant/cars/expected/cars-croissant.json new file mode 100644 index 00000000000..a9c0d48b217 --- /dev/null +++ b/src/test/resources/croissant/cars/expected/cars-croissant.json @@ -0,0 +1,302 @@ +{ + "@context": { + "@language": "en", + "@vocab": "https://schema.org/", + "citeAs": "cr:citeAs", + "column": "cr:column", + "conformsTo": "dct:conformsTo", + "cr": "http://mlcommons.org/croissant/", + "rai": "http://mlcommons.org/croissant/RAI/", + "data": { + "@id": "cr:data", + "@type": "@json" + }, + "dataType": { + "@id": "cr:dataType", + "@type": "@vocab" + }, + "dct": "http://purl.org/dc/terms/", + "examples": { + "@id": "cr:examples", + "@type": "@json" + }, + "extract": "cr:extract", + "field": "cr:field", + "fileProperty": "cr:fileProperty", + "fileObject": "cr:fileObject", + "fileSet": "cr:fileSet", + "format": "cr:format", + "includes": "cr:includes", + "isLiveDataset": "cr:isLiveDataset", + "jsonPath": "cr:jsonPath", + "key": "cr:key", + "md5": "cr:md5", + "parentField": "cr:parentField", + "path": "cr:path", + "recordSet": "cr:recordSet", + "references": "cr:references", + "regex": "cr:regex", + "repeated": "cr:repeated", + "replace": "cr:replace", + "samplingRate": "cr:samplingRate", + "sc": "https://schema.org/", + "separator": "cr:separator", + "source": "cr:source", + "subField": "cr:subField", + "transform": "cr:transform", + "wd": "https://www.wikidata.org/wiki/" + }, + "@type": "sc:Dataset", + "conformsTo": "http://mlcommons.org/croissant/1.0", + "name": "Cars", + "url": "https://doi.org/10.5072/FK2/CY7BWA", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard" + }, + "name": "Durbin, Philip" + } + ], + "description": "This dataset is about cars.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "datePublished": "2025-05-16", + "dateModified": "2025-05-16", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "version": "1.0", + "citeAs": "@data{FK2/CY7BWA_2025,author = {Durbin, Philip},publisher = {Root},title = {Cars},year = {2025},url = {https://doi.org/10.5072/FK2/CY7BWA}}", + "distribution": [ + { + "@type": "cr:FileObject", + "@id": "code/compute.py", + "name": "compute.py", + "encodingFormat": "text/x-python", + "md5": "d84985e94dde671f318076bd7a137f15", + "contentSize": "15", + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/7" + }, + { + "@type": "cr:FileObject", + "@id": "data/stata13-auto.dta", + "name": "stata13-auto.dta", + "encodingFormat": "application/x-stata-13", + "md5": "7b1201ce6b469796837a835377338c5a", + "contentSize": "6443", + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/9?format=original" + }, + { + "@type": "cr:FileObject", + "@id": "doc/README.md", + "name": "README.md", + "encodingFormat": "text/markdown", + "md5": "a2e484d07ee5590cc32182dc2c6ccc83", + "contentSize": "28", + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/8" + } + ], + "recordSet": [ + { + "@type": "cr:RecordSet", + "field": [ + { + "@type": "cr:Field", + "name": "make", + "description": "Make and Model", + "dataType": "sc:Text", + "source": { + "@id": "2", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "make" + } + } + }, + { + "@type": "cr:Field", + "name": "price", + "description": "Price", + "dataType": "sc:Integer", + "source": { + "@id": "5", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "price" + } + } + }, + { + "@type": "cr:Field", + "name": "mpg", + "description": "Mileage (mpg)", + "dataType": "sc:Integer", + "source": { + "@id": "3", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "mpg" + } + } + }, + { + "@type": "cr:Field", + "name": "rep78", + "description": "Repair Record 1978", + "dataType": "sc:Integer", + "source": { + "@id": "12", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "rep78" + } + } + }, + { + "@type": "cr:Field", + "name": "headroom", + "description": "Headroom (in.)", + "dataType": "sc:Float", + "source": { + "@id": "1", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "headroom" + } + } + }, + { + "@type": "cr:Field", + "name": "trunk", + "description": "Trunk space (cu. ft.)", + "dataType": "sc:Integer", + "source": { + "@id": "7", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "trunk" + } + } + }, + { + "@type": "cr:Field", + "name": "weight", + "description": "Weight (lbs.)", + "dataType": "sc:Integer", + "source": { + "@id": "4", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "weight" + } + } + }, + { + "@type": "cr:Field", + "name": "length", + "description": "Length (in.)", + "dataType": "sc:Integer", + "source": { + "@id": "8", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "length" + } + } + }, + { + "@type": "cr:Field", + "name": "turn", + "description": "Turn Circle (ft.) ", + "dataType": "sc:Integer", + "source": { + "@id": "9", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "turn" + } + } + }, + { + "@type": "cr:Field", + "name": "displacement", + "description": "Displacement (cu. in.)", + "dataType": "sc:Integer", + "source": { + "@id": "10", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "displacement" + } + } + }, + { + "@type": "cr:Field", + "name": "gear_ratio", + "description": "Gear Ratio", + "dataType": "sc:Float", + "source": { + "@id": "6", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "gear_ratio" + } + } + }, + { + "@type": "cr:Field", + "name": "foreign", + "description": "Car type", + "dataType": "sc:Integer", + "source": { + "@id": "11", + "fileObject": { + "@id": "data/stata13-auto.dta" + }, + "extract": { + "column": "foreign" + } + } + } + ] + } + ] +} \ No newline at end of file diff --git a/src/test/resources/croissant/cars/expected/cars-croissantSlim.json b/src/test/resources/croissant/cars/expected/cars-croissantSlim.json new file mode 100644 index 00000000000..392ddd3a5dd --- /dev/null +++ b/src/test/resources/croissant/cars/expected/cars-croissantSlim.json @@ -0,0 +1,83 @@ +{ + "@context": { + "@language": "en", + "@vocab": "https://schema.org/", + "citeAs": "cr:citeAs", + "column": "cr:column", + "conformsTo": "dct:conformsTo", + "cr": "http://mlcommons.org/croissant/", + "rai": "http://mlcommons.org/croissant/RAI/", + "data": { + "@id": "cr:data", + "@type": "@json" + }, + "dataType": { + "@id": "cr:dataType", + "@type": "@vocab" + }, + "dct": "http://purl.org/dc/terms/", + "examples": { + "@id": "cr:examples", + "@type": "@json" + }, + "extract": "cr:extract", + "field": "cr:field", + "fileProperty": "cr:fileProperty", + "fileObject": "cr:fileObject", + "fileSet": "cr:fileSet", + "format": "cr:format", + "includes": "cr:includes", + "isLiveDataset": "cr:isLiveDataset", + "jsonPath": "cr:jsonPath", + "key": "cr:key", + "md5": "cr:md5", + "parentField": "cr:parentField", + "path": "cr:path", + "recordSet": "cr:recordSet", + "references": "cr:references", + "regex": "cr:regex", + "repeated": "cr:repeated", + "replace": "cr:replace", + "samplingRate": "cr:samplingRate", + "sc": "https://schema.org/", + "separator": "cr:separator", + "source": "cr:source", + "subField": "cr:subField", + "transform": "cr:transform", + "wd": "https://www.wikidata.org/wiki/" + }, + "@type": "sc:Dataset", + "conformsTo": "http://mlcommons.org/croissant/1.0", + "name": "Cars", + "url": "https://doi.org/10.5072/FK2/CY7BWA", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard" + }, + "name": "Durbin, Philip" + } + ], + "description": "This dataset is about cars.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "datePublished": "2025-05-16", + "dateModified": "2025-05-16", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "version": "1.0", + "citeAs": "@data{FK2/CY7BWA_2025,author = {Durbin, Philip},publisher = {Root},title = {Cars},year = {2025},url = {https://doi.org/10.5072/FK2/CY7BWA}}" +} diff --git a/src/test/resources/croissant/cars/in/dataCiteXml.xml b/src/test/resources/croissant/cars/in/dataCiteXml.xml new file mode 100644 index 00000000000..7c6c89385fd --- /dev/null +++ b/src/test/resources/croissant/cars/in/dataCiteXml.xml @@ -0,0 +1,51 @@ + + + 10.5072/FK2/CY7BWA + + + Durbin, Philip + Philip + Durbin + Harvard + + + + Cars + + Root + 2025 + + Other + + + + Durbin, Philip + Philip + Durbin + Harvard + + + + 2024-03-13 + 2025-05-16 + + + + 15 + 28 + 4026 + + + text/x-python + text/markdown + text/tab-separated-values + + 1.0 + + + Creative Commons CC0 1.0 Universal Public Domain Dedication. + + + This dataset is about cars. + + diff --git a/src/test/resources/croissant/cars/in/datasetFileDetails.json b/src/test/resources/croissant/cars/in/datasetFileDetails.json new file mode 100644 index 00000000000..2ce12a4abe9 --- /dev/null +++ b/src/test/resources/croissant/cars/in/datasetFileDetails.json @@ -0,0 +1,355 @@ +[ + { + "id": 7, + "persistentId": "", + "filename": "compute.py", + "contentType": "text/x-python", + "friendlyType": "Python Source Code", + "filesize": 15, + "description": "", + "storageIdentifier": "local://196d9f154f7-8cadf34ee905", + "rootDataFileId": -1, + "md5": "d84985e94dde671f318076bd7a137f15", + "checksum": { + "type": "MD5", + "value": "d84985e94dde671f318076bd7a137f15" + }, + "tabularData": false, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true, + "restricted": false, + "fileMetadataId": 1, + "varGroups": [] + }, + { + "id": 9, + "persistentId": "", + "filename": "stata13-auto.tab", + "contentType": "text/tab-separated-values", + "friendlyType": "Tab-Delimited", + "filesize": 4026, + "description": "", + "storageIdentifier": "local://196d9f15719-2270bfca2b48", + "originalFileFormat": "application/x-stata-13", + "originalFormatLabel": "Stata 13 Binary", + "originalFileSize": 6443, + "originalFileName": "stata13-auto.dta", + "UNF": "UNF:6:RPd9EWHSZwqUvRZuKTJMqg==", + "rootDataFileId": -1, + "md5": "7b1201ce6b469796837a835377338c5a", + "checksum": { + "type": "MD5", + "value": "7b1201ce6b469796837a835377338c5a" + }, + "tabularData": true, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true, + "restricted": false, + "fileMetadataId": 3, + "dataTables": [ + { + "varQuantity": 12, + "caseQuantity": 74, + "UNF": "UNF:6:RPd9EWHSZwqUvRZuKTJMqg==", + "dataVariables": [ + { + "id": 2, + "name": "make", + "label": "Make and Model", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "CHARACTER", + "isOrderedCategorical": false, + "fileOrder": 0, + "UNF": "UNF:6:Oo4vwiL8ffhSECOcjsKk2g==", + "variableMetadata": [] + }, + { + "id": 5, + "name": "price", + "label": "Price", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 1, + "UNF": "UNF:6:rvfkkdA36AaCSqCQciybfA==", + "variableMetadata": [], + "summaryStatistics": { + "min": "3291.0", + "medn": "5006.5", + "mean": "6165.256756756757", + "max": "15906.0", + "vald": "74.0", + "mode": ".", + "stdev": "2949.4958847689186", + "invd": "0.0" + } + }, + { + "id": 3, + "name": "mpg", + "label": "Mileage (mpg)", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 2, + "UNF": "UNF:6:vVr3w8CgeZq1KpDfJQudOg==", + "variableMetadata": [], + "summaryStatistics": { + "max": "41.0", + "vald": "74.0", + "medn": "20.0", + "min": "12.0", + "stdev": "5.785503209735141", + "mean": "21.2972972972973", + "invd": "0.0", + "mode": "." + } + }, + { + "id": 12, + "name": "rep78", + "label": "Repair Record 1978", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 3, + "UNF": "UNF:6:gbFI98swTWNhAjCRyi2cdA==", + "variableMetadata": [], + "summaryStatistics": { + "stdev": "0.989932270109041", + "mode": ".", + "min": "1.0", + "max": "5.0", + "medn": "3.0", + "mean": "3.4057971014492754", + "vald": "69.0", + "invd": "5.0" + } + }, + { + "id": 1, + "name": "headroom", + "label": "Headroom (in.)", + "weighted": false, + "variableIntervalType": "contin", + "variableFormatType": "NUMERIC", + "format": "float", + "isOrderedCategorical": false, + "fileOrder": 4, + "UNF": "UNF:6:g4Pl3T0Oz2e/OKJ64WiTnA==", + "variableMetadata": [], + "summaryStatistics": { + "mean": "2.993243243243243", + "mode": ".", + "vald": "74.0", + "invd": "0.0", + "stdev": "0.845994766828771", + "min": "1.5", + "medn": "3.0", + "max": "5.0" + } + }, + { + "id": 7, + "name": "trunk", + "label": "Trunk space (cu. ft.)", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 5, + "UNF": "UNF:6:iab0POsE3By7dQfgX/TY4g==", + "variableMetadata": [], + "summaryStatistics": { + "vald": "74.0", + "mode": ".", + "mean": "13.756756756756756", + "max": "23.0", + "min": "5.0", + "medn": "14.0", + "invd": "0.0", + "stdev": "4.277404189173201" + } + }, + { + "id": 4, + "name": "weight", + "label": "Weight (lbs.)", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 6, + "UNF": "UNF:6:cdoTdfUNeYWHHFEBCDxg+w==", + "variableMetadata": [], + "summaryStatistics": { + "invd": "0.0", + "min": "1760.0", + "vald": "74.0", + "max": "4840.0", + "stdev": "777.1935671373664", + "mean": "3019.459459459459", + "mode": ".", + "medn": "3190.0" + } + }, + { + "id": 8, + "name": "length", + "label": "Length (in.)", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 7, + "UNF": "UNF:6:8z1rjwhqBN4meYIiKI4P1A==", + "variableMetadata": [], + "summaryStatistics": { + "mode": ".", + "vald": "74.0", + "invd": "0.0", + "stdev": "22.266339902021585", + "max": "233.0", + "medn": "192.5", + "mean": "187.93243243243245", + "min": "142.0" + } + }, + { + "id": 9, + "name": "turn", + "label": "Turn Circle (ft.) ", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 8, + "UNF": "UNF:6:QxhjrrNtVz4qA8RulQ2MuQ==", + "variableMetadata": [], + "summaryStatistics": { + "stdev": "4.399353727233908", + "vald": "74.0", + "max": "51.0", + "min": "31.0", + "medn": "40.0", + "mean": "39.648648648648646", + "invd": "0.0", + "mode": "." + } + }, + { + "id": 10, + "name": "displacement", + "label": "Displacement (cu. in.)", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 9, + "UNF": "UNF:6:ftk+RAQpTCT1/y6G/rLWfA==", + "variableMetadata": [], + "summaryStatistics": { + "stdev": "91.83721896440396", + "invd": "0.0", + "min": "79.0", + "medn": "196.0", + "mode": ".", + "vald": "74.0", + "mean": "197.2972972972973", + "max": "425.0" + } + }, + { + "id": 6, + "name": "gear_ratio", + "label": "Gear Ratio", + "weighted": false, + "variableIntervalType": "contin", + "variableFormatType": "NUMERIC", + "format": "float", + "isOrderedCategorical": false, + "fileOrder": 10, + "UNF": "UNF:6:qjnY/qbx26FTepoPqRZ6lw==", + "variableMetadata": [], + "summaryStatistics": { + "medn": "2.9550000429153442", + "stdev": "0.45628709670763035", + "mean": "3.0148648667979883", + "min": "2.190000057220459", + "max": "3.890000104904175", + "mode": ".", + "vald": "74.0", + "invd": "0.0" + } + }, + { + "id": 11, + "name": "foreign", + "label": "Car type", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 11, + "UNF": "UNF:6:nbjzgh3wfAFqKpaoFnHalA==", + "variableMetadata": [], + "summaryStatistics": { + "max": "1.0", + "invd": "0.0", + "mode": ".", + "medn": "0.0", + "stdev": "0.46018845840901884", + "min": "0.0", + "mean": "0.2972972972972975", + "vald": "74.0" + }, + "variableCategories": [ + { + "label": "Domestic", + "value": "0", + "isMissing": false, + "frequency": 52.0 + }, + { + "label": "Foreign", + "value": "1", + "isMissing": false, + "frequency": 22.0 + } + ] + } + ] + } + ], + "varGroups": [] + }, + { + "id": 8, + "persistentId": "", + "filename": "README.md", + "contentType": "text/markdown", + "friendlyType": "Markdown Text", + "filesize": 28, + "description": "", + "storageIdentifier": "local://196d9f15664-1d4bb4e96a97", + "rootDataFileId": -1, + "md5": "a2e484d07ee5590cc32182dc2c6ccc83", + "checksum": { + "type": "MD5", + "value": "a2e484d07ee5590cc32182dc2c6ccc83" + }, + "tabularData": false, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true, + "restricted": false, + "fileMetadataId": 2, + "varGroups": [] + } +] diff --git a/src/test/resources/croissant/cars/in/datasetJson.json b/src/test/resources/croissant/cars/in/datasetJson.json new file mode 100644 index 00000000000..96aa26c9228 --- /dev/null +++ b/src/test/resources/croissant/cars/in/datasetJson.json @@ -0,0 +1,228 @@ +{ + "id": 6, + "identifier": "FK2/CY7BWA", + "persistentUrl": "https://doi.org/10.5072/FK2/CY7BWA", + "protocol": "doi", + "authority": "10.5072", + "separator": "/", + "publisher": "Root", + "publicationDate": "2025-05-16", + "storageIdentifier": "local://10.5072/FK2/CY7BWA", + "datasetType": "dataset", + "datasetVersion": { + "id": 3, + "datasetId": 6, + "datasetPersistentId": "doi:10.5072/FK2/CY7BWA", + "storageIdentifier": "local://10.5072/FK2/CY7BWA", + "versionNumber": 1, + "internalVersionNumber": 10, + "versionMinorNumber": 0, + "versionState": "RELEASED", + "latestVersionPublishingState": "RELEASED", + "UNF": "UNF:6:RPd9EWHSZwqUvRZuKTJMqg==", + "lastUpdateTime": "2025-05-16T16:33:18Z", + "releaseTime": "2025-05-16T16:33:18Z", + "createTime": "2025-05-16T16:33:13Z", + "publicationDate": "2025-05-16", + "citationDate": "2025-05-16", + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0", + "iconUri": "https://licensebuttons.net/p/zero/1.0/88x31.png", + "rightsIdentifier": "CC0-1.0", + "rightsIdentifierScheme": "SPDX", + "schemeUri": "https://spdx.org/licenses/", + "languageCode": "en" + }, + "fileAccessRequest": true, + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "name": "citation", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Cars" + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "Durbin, Philip" + }, + "authorAffiliation": { + "typeName": "authorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "Harvard" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactName": { + "typeName": "datasetContactName", + "multiple": false, + "typeClass": "primitive", + "value": "Durbin, Philip" + }, + "datasetContactAffiliation": { + "typeName": "datasetContactAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "Harvard" + }, + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "dataverse@mailinator.com" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "This dataset is about cars." + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Other" + ] + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "Durbin, Philip" + }, + { + "typeName": "dateOfDeposit", + "multiple": false, + "typeClass": "primitive", + "value": "2024-03-13" + } + ] + } + }, + "files": [ + { + "description": "", + "label": "compute.py", + "restricted": false, + "directoryLabel": "code", + "version": 2, + "datasetVersionId": 3, + "dataFile": { + "id": 7, + "persistentId": "", + "filename": "compute.py", + "contentType": "text/x-python", + "friendlyType": "Python Source Code", + "filesize": 15, + "description": "", + "storageIdentifier": "local://196d9f154f7-8cadf34ee905", + "rootDataFileId": -1, + "md5": "d84985e94dde671f318076bd7a137f15", + "checksum": { + "type": "MD5", + "value": "d84985e94dde671f318076bd7a137f15" + }, + "tabularData": false, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true + } + }, + { + "description": "", + "label": "README.md", + "restricted": false, + "directoryLabel": "doc", + "version": 2, + "datasetVersionId": 3, + "dataFile": { + "id": 8, + "persistentId": "", + "filename": "README.md", + "contentType": "text/markdown", + "friendlyType": "Markdown Text", + "filesize": 28, + "description": "", + "storageIdentifier": "local://196d9f15664-1d4bb4e96a97", + "rootDataFileId": -1, + "md5": "a2e484d07ee5590cc32182dc2c6ccc83", + "checksum": { + "type": "MD5", + "value": "a2e484d07ee5590cc32182dc2c6ccc83" + }, + "tabularData": false, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true + } + }, + { + "description": "", + "label": "stata13-auto.tab", + "restricted": false, + "directoryLabel": "data", + "version": 4, + "datasetVersionId": 3, + "dataFile": { + "id": 9, + "persistentId": "", + "filename": "stata13-auto.tab", + "contentType": "text/tab-separated-values", + "friendlyType": "Tab-Delimited", + "filesize": 4026, + "description": "", + "storageIdentifier": "local://196d9f15719-2270bfca2b48", + "originalFileFormat": "application/x-stata-13", + "originalFormatLabel": "Stata 13 Binary", + "originalFileSize": 6443, + "originalFileName": "stata13-auto.dta", + "UNF": "UNF:6:RPd9EWHSZwqUvRZuKTJMqg==", + "rootDataFileId": -1, + "md5": "7b1201ce6b469796837a835377338c5a", + "checksum": { + "type": "MD5", + "value": "7b1201ce6b469796837a835377338c5a" + }, + "tabularData": true, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true + } + } + ], + "citation": "Durbin, Philip, 2025, \"Cars\", https://doi.org/10.5072/FK2/CY7BWA, Root, V1, UNF:6:RPd9EWHSZwqUvRZuKTJMqg== [fileUNF]" + } +} diff --git a/src/test/resources/croissant/cars/in/datasetORE.json b/src/test/resources/croissant/cars/in/datasetORE.json new file mode 100644 index 00000000000..0b244ada0c1 --- /dev/null +++ b/src/test/resources/croissant/cars/in/datasetORE.json @@ -0,0 +1,133 @@ +{ + "dcterms:modified": "2025-05-19", + "dcterms:creator": "Root", + "@type": "ore:ResourceMap", + "schema:additionalType": "Dataverse OREMap Format v1.0.1", + "dvcore:generatedBy": { + "@type": "schema:SoftwareApplication", + "schema:name": "Dataverse", + "schema:version": "6.6", + "schema:url": "https://github.com/iqss/dataverse" + }, + "@id": "http://localhost:8080/api/datasets/export?exporter=OAI_ORE&persistentId=https://doi.org/10.5072/FK2/CY7BWA", + "ore:describes": { + "citation:datasetContact": { + "citation:datasetContactName": "Durbin, Philip", + "citation:datasetContactAffiliation": "Harvard", + "citation:datasetContactEmail": "dataverse@mailinator.com" + }, + "author": { + "citation:authorName": "Durbin, Philip", + "citation:authorAffiliation": "Harvard" + }, + "citation:dsDescription": { + "citation:dsDescriptionValue": "This dataset is about cars." + }, + "dateOfDeposit": "2024-03-13", + "title": "Cars", + "citation:depositor": "Durbin, Philip", + "subject": "Other", + "@id": "https://doi.org/10.5072/FK2/CY7BWA", + "@type": [ + "ore:Aggregation", + "schema:Dataset" + ], + "schema:version": "1.0", + "schema:name": "Cars", + "schema:dateModified": "Fri May 16 16:33:18 UTC 2025", + "schema:datePublished": "2025-05-16", + "schema:creativeWorkStatus": "RELEASED", + "schema:license": "http://creativecommons.org/publicdomain/zero/1.0", + "dvcore:fileTermsOfAccess": { + "dvcore:fileRequestAccess": true + }, + "schema:includedInDataCatalog": "Root", + "schema:isPartOf": { + "schema:name": "Cars", + "@id": "http://localhost:8080/dataverse/cars", + "schema:description": "Data about cars.", + "schema:isPartOf": { + "schema:name": "Root", + "@id": "http://localhost:8080/dataverse/root", + "schema:description": "The root dataverse." + } + }, + "ore:aggregates": [ + { + "schema:description": "", + "schema:name": "compute.py", + "dvcore:restricted": false, + "dvcore:directoryLabel": "code", + "schema:version": 2, + "dvcore:datasetVersionId": 3, + "@id": "http://localhost:8080/file.xhtml?fileId=7", + "schema:sameAs": "http://localhost:8080/api/access/datafile/7", + "@type": "ore:AggregatedResource", + "schema:fileFormat": "text/x-python", + "dvcore:filesize": 15, + "dvcore:storageIdentifier": "local://196d9f154f7-8cadf34ee905", + "dvcore:rootDataFileId": -1, + "dvcore:checksum": { + "@type": "MD5", + "@value": "d84985e94dde671f318076bd7a137f15" + } + }, + { + "schema:description": "", + "schema:name": "README.md", + "dvcore:restricted": false, + "dvcore:directoryLabel": "doc", + "schema:version": 2, + "dvcore:datasetVersionId": 3, + "@id": "http://localhost:8080/file.xhtml?fileId=8", + "schema:sameAs": "http://localhost:8080/api/access/datafile/8", + "@type": "ore:AggregatedResource", + "schema:fileFormat": "text/markdown", + "dvcore:filesize": 28, + "dvcore:storageIdentifier": "local://196d9f15664-1d4bb4e96a97", + "dvcore:rootDataFileId": -1, + "dvcore:checksum": { + "@type": "MD5", + "@value": "a2e484d07ee5590cc32182dc2c6ccc83" + } + }, + { + "schema:description": "", + "schema:name": "stata13-auto.dta", + "dvcore:restricted": false, + "dvcore:directoryLabel": "data", + "schema:version": 4, + "dvcore:datasetVersionId": 3, + "@id": "http://localhost:8080/file.xhtml?fileId=9", + "schema:sameAs": "http://localhost:8080/api/access/datafile/9?format=original", + "@type": "ore:AggregatedResource", + "schema:fileFormat": "application/x-stata-13", + "dvcore:filesize": 6443, + "dvcore:storageIdentifier": "local://196d9f15719-2270bfca2b48", + "dvcore:currentIngestedName": "stata13-auto.tab", + "dvcore:UNF": "UNF:6:RPd9EWHSZwqUvRZuKTJMqg==", + "dvcore:rootDataFileId": -1, + "dvcore:checksum": { + "@type": "MD5", + "@value": "7b1201ce6b469796837a835377338c5a" + } + } + ], + "schema:hasPart": [ + "http://localhost:8080/file.xhtml?fileId=7", + "http://localhost:8080/file.xhtml?fileId=8", + "http://localhost:8080/file.xhtml?fileId=9" + ] + }, + "@context": { + "author": "http://purl.org/dc/terms/creator", + "citation": "https://dataverse.org/schema/citation/", + "dateOfDeposit": "http://purl.org/dc/terms/dateSubmitted", + "dcterms": "http://purl.org/dc/terms/", + "dvcore": "https://dataverse.org/schema/core#", + "ore": "http://www.openarchives.org/ore/terms/", + "schema": "http://schema.org/", + "subject": "http://purl.org/dc/terms/subject", + "title": "http://purl.org/dc/terms/title" + } +} diff --git a/src/test/resources/croissant/cars/in/datasetSchemaDotOrg.json b/src/test/resources/croissant/cars/in/datasetSchemaDotOrg.json new file mode 100644 index 00000000000..83f587c5fd7 --- /dev/null +++ b/src/test/resources/croissant/cars/in/datasetSchemaDotOrg.json @@ -0,0 +1,78 @@ +{ + "@context": "http://schema.org", + "@type": "Dataset", + "@id": "https://doi.org/10.5072/FK2/CY7BWA", + "identifier": "https://doi.org/10.5072/FK2/CY7BWA", + "name": "Cars", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard" + }, + "name": "Durbin, Philip" + } + ], + "author": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard" + }, + "name": "Durbin, Philip" + } + ], + "datePublished": "2025-05-16", + "dateModified": "2025-05-16", + "version": "1", + "description": "This dataset is about cars.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "provider": { + "@type": "Organization", + "name": "Root" + }, + "distribution": [ + { + "@type": "DataDownload", + "name": "compute.py", + "encodingFormat": "text/x-python", + "contentSize": 15, + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/7" + }, + { + "@type": "DataDownload", + "name": "stata13-auto.tab", + "encodingFormat": "text/tab-separated-values", + "contentSize": 4026, + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/9" + }, + { + "@type": "DataDownload", + "name": "README.md", + "encodingFormat": "text/markdown", + "contentSize": 28, + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/8" + } + ] +} diff --git a/src/test/resources/croissant/draft/expected/draft-croissant.json b/src/test/resources/croissant/draft/expected/draft-croissant.json new file mode 100644 index 00000000000..b2065f79195 --- /dev/null +++ b/src/test/resources/croissant/draft/expected/draft-croissant.json @@ -0,0 +1,94 @@ +{ + "@context": { + "@language": "en", + "@vocab": "https://schema.org/", + "citeAs": "cr:citeAs", + "column": "cr:column", + "conformsTo": "dct:conformsTo", + "cr": "http://mlcommons.org/croissant/", + "rai": "http://mlcommons.org/croissant/RAI/", + "data": { + "@id": "cr:data", + "@type": "@json" + }, + "dataType": { + "@id": "cr:dataType", + "@type": "@vocab" + }, + "dct": "http://purl.org/dc/terms/", + "examples": { + "@id": "cr:examples", + "@type": "@json" + }, + "extract": "cr:extract", + "field": "cr:field", + "fileProperty": "cr:fileProperty", + "fileObject": "cr:fileObject", + "fileSet": "cr:fileSet", + "format": "cr:format", + "includes": "cr:includes", + "isLiveDataset": "cr:isLiveDataset", + "jsonPath": "cr:jsonPath", + "key": "cr:key", + "md5": "cr:md5", + "parentField": "cr:parentField", + "path": "cr:path", + "recordSet": "cr:recordSet", + "references": "cr:references", + "regex": "cr:regex", + "repeated": "cr:repeated", + "replace": "cr:replace", + "samplingRate": "cr:samplingRate", + "sc": "https://schema.org/", + "separator": "cr:separator", + "source": "cr:source", + "subField": "cr:subField", + "transform": "cr:transform", + "wd": "https://www.wikidata.org/wiki/" + }, + "@type": "sc:Dataset", + "conformsTo": "http://mlcommons.org/croissant/1.0", + "name": "Draft Dataset", + "url": "https://doi.org/10.5072/FK2/OO7TEP", + "creator": [ + { + "@type": "Person", + "givenName": "Draft", + "familyName": "Punk", + "affiliation": { + "@type": "Organization", + "name": "French house" + }, + "name": "Punk, Draft" + } + ], + "description": "This dataset hasn't been published yet.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "dateModified": "", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "version": "DRAFT", + "citeAs": "@data{FK2/OO7TEP,author = {Punk, Draft},publisher = {Root},title = {Draft Dataset},url = {https://doi.org/10.5072/FK2/OO7TEP}}", + "distribution": [ + { + "@type": "cr:FileObject", + "@id": "data.txt", + "name": "data.txt", + "encodingFormat": "text/plain", + "md5": "050644e853fdfe46a3707695ba2fe736", + "contentSize": "18", + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/4" + } + ] +} \ No newline at end of file diff --git a/src/test/resources/croissant/draft/expected/draft-croissantSlim.json b/src/test/resources/croissant/draft/expected/draft-croissantSlim.json new file mode 100644 index 00000000000..30eabda4c9c --- /dev/null +++ b/src/test/resources/croissant/draft/expected/draft-croissantSlim.json @@ -0,0 +1,82 @@ +{ + "@context": { + "@language": "en", + "@vocab": "https://schema.org/", + "citeAs": "cr:citeAs", + "column": "cr:column", + "conformsTo": "dct:conformsTo", + "cr": "http://mlcommons.org/croissant/", + "rai": "http://mlcommons.org/croissant/RAI/", + "data": { + "@id": "cr:data", + "@type": "@json" + }, + "dataType": { + "@id": "cr:dataType", + "@type": "@vocab" + }, + "dct": "http://purl.org/dc/terms/", + "examples": { + "@id": "cr:examples", + "@type": "@json" + }, + "extract": "cr:extract", + "field": "cr:field", + "fileProperty": "cr:fileProperty", + "fileObject": "cr:fileObject", + "fileSet": "cr:fileSet", + "format": "cr:format", + "includes": "cr:includes", + "isLiveDataset": "cr:isLiveDataset", + "jsonPath": "cr:jsonPath", + "key": "cr:key", + "md5": "cr:md5", + "parentField": "cr:parentField", + "path": "cr:path", + "recordSet": "cr:recordSet", + "references": "cr:references", + "regex": "cr:regex", + "repeated": "cr:repeated", + "replace": "cr:replace", + "samplingRate": "cr:samplingRate", + "sc": "https://schema.org/", + "separator": "cr:separator", + "source": "cr:source", + "subField": "cr:subField", + "transform": "cr:transform", + "wd": "https://www.wikidata.org/wiki/" + }, + "@type": "sc:Dataset", + "conformsTo": "http://mlcommons.org/croissant/1.0", + "name": "Draft Dataset", + "url": "https://doi.org/10.5072/FK2/OO7TEP", + "creator": [ + { + "@type": "Person", + "givenName": "Draft", + "familyName": "Punk", + "affiliation": { + "@type": "Organization", + "name": "French house" + }, + "name": "Punk, Draft" + } + ], + "description": "This dataset hasn't been published yet.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "dateModified": "", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "version": "DRAFT", + "citeAs": "@data{FK2/OO7TEP,author = {Punk, Draft},publisher = {Root},title = {Draft Dataset},url = {https://doi.org/10.5072/FK2/OO7TEP}}" +} diff --git a/src/test/resources/croissant/draft/in/dataCiteXml.xml b/src/test/resources/croissant/draft/in/dataCiteXml.xml new file mode 100644 index 00000000000..814f3d365e7 --- /dev/null +++ b/src/test/resources/croissant/draft/in/dataCiteXml.xml @@ -0,0 +1,46 @@ + + + 10.5072/FK2/OO7TEP + + + Punk, Draft + Draft + Punk + French house + + + + Draft Dataset + + Root + 2025 + + Other + + + + Admin, Dataverse + Dataverse + Admin + Dataverse.org + + + + 2025-04-14 + + + + 18 + + + text/plain + + DRAFT + + + Creative Commons CC0 1.0 Universal Public Domain Dedication. + + + This dataset hasn&apos;t been published yet. + + diff --git a/src/test/resources/croissant/draft/in/datasetFileDetails.json b/src/test/resources/croissant/draft/in/datasetFileDetails.json new file mode 100644 index 00000000000..1460aedba00 --- /dev/null +++ b/src/test/resources/croissant/draft/in/datasetFileDetails.json @@ -0,0 +1,23 @@ +[ + { + "id": 4, + "persistentId": "", + "filename": "data.txt", + "contentType": "text/plain", + "friendlyType": "Plain Text", + "filesize": 18, + "storageIdentifier": "local://196347bdb85-7b4820f8e4ef", + "rootDataFileId": -1, + "md5": "050644e853fdfe46a3707695ba2fe736", + "checksum": { + "type": "MD5", + "value": "050644e853fdfe46a3707695ba2fe736" + }, + "tabularData": false, + "creationDate": "2025-04-14", + "fileAccessRequest": false, + "restricted": false, + "fileMetadataId": 1, + "varGroups": [] + } +] diff --git a/src/test/resources/croissant/draft/in/datasetJson.json b/src/test/resources/croissant/draft/in/datasetJson.json new file mode 100644 index 00000000000..bbfd30ed03a --- /dev/null +++ b/src/test/resources/croissant/draft/in/datasetJson.json @@ -0,0 +1,156 @@ +{ + "id": 3, + "identifier": "FK2/OO7TEP", + "persistentUrl": "https://doi.org/10.5072/FK2/OO7TEP", + "protocol": "doi", + "authority": "10.5072", + "separator": "/", + "publisher": "Root", + "storageIdentifier": "local://10.5072/FK2/OO7TEP", + "datasetType": "dataset", + "datasetVersion": { + "id": 1, + "datasetId": 3, + "datasetPersistentId": "doi:10.5072/FK2/OO7TEP", + "storageIdentifier": "local://10.5072/FK2/OO7TEP", + "internalVersionNumber": 2, + "versionState": "DRAFT", + "latestVersionPublishingState": "DRAFT", + "lastUpdateTime": "2025-04-14T13:27:47Z", + "createTime": "2025-04-14T13:26:41Z", + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0", + "iconUri": "https://licensebuttons.net/p/zero/1.0/88x31.png", + "rightsIdentifier": "CC0-1.0", + "rightsIdentifierScheme": "SPDX", + "schemeUri": "https://spdx.org/licenses/", + "languageCode": "en" + }, + "fileAccessRequest": true, + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "name": "citation", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Draft Dataset" + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "Punk, Draft" + }, + "authorAffiliation": { + "typeName": "authorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "French house" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactName": { + "typeName": "datasetContactName", + "multiple": false, + "typeClass": "primitive", + "value": "Admin, Dataverse" + }, + "datasetContactAffiliation": { + "typeName": "datasetContactAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "Dataverse.org" + }, + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "dataverse@mailinator.com" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "This dataset hasn't been published yet." + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Other" + ] + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "Admin, Dataverse" + }, + { + "typeName": "dateOfDeposit", + "multiple": false, + "typeClass": "primitive", + "value": "2025-04-14" + } + ] + } + }, + "files": [ + { + "label": "data.txt", + "restricted": false, + "version": 1, + "datasetVersionId": 1, + "dataFile": { + "id": 4, + "persistentId": "", + "filename": "data.txt", + "contentType": "text/plain", + "friendlyType": "Plain Text", + "filesize": 18, + "storageIdentifier": "local://196347bdb85-7b4820f8e4ef", + "rootDataFileId": -1, + "md5": "050644e853fdfe46a3707695ba2fe736", + "checksum": { + "type": "MD5", + "value": "050644e853fdfe46a3707695ba2fe736" + }, + "tabularData": false, + "creationDate": "2025-04-14", + "fileAccessRequest": false + } + } + ], + "citation": "Punk, Draft, 2025, \"Draft Dataset\", https://doi.org/10.5072/FK2/OO7TEP, Root, DRAFT VERSION" + } +} diff --git a/src/test/resources/croissant/draft/in/datasetORE.json b/src/test/resources/croissant/draft/in/datasetORE.json new file mode 100644 index 00000000000..8f9cfe6fb63 --- /dev/null +++ b/src/test/resources/croissant/draft/in/datasetORE.json @@ -0,0 +1,87 @@ +{ + "dcterms:modified": "2025-04-14", + "dcterms:creator": "Root", + "@type": "ore:ResourceMap", + "schema:additionalType": "Dataverse OREMap Format v1.0.1", + "dvcore:generatedBy": { + "@type": "schema:SoftwareApplication", + "schema:name": "Dataverse", + "schema:version": "6.6", + "schema:url": "https://github.com/iqss/dataverse" + }, + "@id": "http://localhost:8080/api/datasets/export?exporter=OAI_ORE&persistentId=https://doi.org/10.5072/FK2/OO7TEP", + "ore:describes": { + "author": { + "citation:authorName": "Punk, Draft", + "citation:authorAffiliation": "French house" + }, + "citation:datasetContact": { + "citation:datasetContactName": "Admin, Dataverse", + "citation:datasetContactAffiliation": "Dataverse.org", + "citation:datasetContactEmail": "dataverse@mailinator.com" + }, + "citation:dsDescription": { + "citation:dsDescriptionValue": "This dataset hasn't been published yet." + }, + "dateOfDeposit": "2025-04-14", + "citation:depositor": "Admin, Dataverse", + "subject": "Other", + "title": "Draft Dataset", + "@id": "https://doi.org/10.5072/FK2/OO7TEP", + "@type": [ + "ore:Aggregation", + "schema:Dataset" + ], + "schema:version": "DRAFT", + "schema:name": "Draft Dataset", + "schema:dateModified": "Mon Apr 14 13:27:47 UTC 2025", + "schema:creativeWorkStatus": "DRAFT", + "schema:license": "http://creativecommons.org/publicdomain/zero/1.0", + "dvcore:fileTermsOfAccess": { + "dvcore:fileRequestAccess": true + }, + "schema:includedInDataCatalog": "Root", + "schema:isPartOf": { + "schema:name": "Draft Collection", + "@id": "http://localhost:8080/dataverse/draft", + "schema:isPartOf": { + "schema:name": "Root", + "@id": "http://localhost:8080/dataverse/root", + "schema:description": "The root dataverse." + } + }, + "ore:aggregates": [ + { + "schema:name": "data.txt", + "dvcore:restricted": false, + "schema:version": 1, + "dvcore:datasetVersionId": 1, + "@id": "http://localhost:8080/file.xhtml?fileId=4", + "schema:sameAs": "http://localhost:8080/api/access/datafile/4", + "@type": "ore:AggregatedResource", + "schema:fileFormat": "text/plain", + "dvcore:filesize": 18, + "dvcore:storageIdentifier": "local://196347bdb85-7b4820f8e4ef", + "dvcore:rootDataFileId": -1, + "dvcore:checksum": { + "@type": "MD5", + "@value": "050644e853fdfe46a3707695ba2fe736" + } + } + ], + "schema:hasPart": [ + "http://localhost:8080/file.xhtml?fileId=4" + ] + }, + "@context": { + "author": "http://purl.org/dc/terms/creator", + "citation": "https://dataverse.org/schema/citation/", + "dateOfDeposit": "http://purl.org/dc/terms/dateSubmitted", + "dcterms": "http://purl.org/dc/terms/", + "dvcore": "https://dataverse.org/schema/core#", + "ore": "http://www.openarchives.org/ore/terms/", + "schema": "http://schema.org/", + "subject": "http://purl.org/dc/terms/subject", + "title": "http://purl.org/dc/terms/title" + } +} diff --git a/src/test/resources/croissant/draft/in/datasetSchemaDotOrg.json b/src/test/resources/croissant/draft/in/datasetSchemaDotOrg.json new file mode 100644 index 00000000000..62328140af8 --- /dev/null +++ b/src/test/resources/croissant/draft/in/datasetSchemaDotOrg.json @@ -0,0 +1,60 @@ +{ + "@context": "http://schema.org", + "@type": "Dataset", + "@id": "https://doi.org/10.5072/FK2/OO7TEP", + "identifier": "https://doi.org/10.5072/FK2/OO7TEP", + "name": "Draft Dataset", + "creator": [ + { + "@type": "Person", + "givenName": "Draft", + "familyName": "Punk", + "affiliation": { + "@type": "Organization", + "name": "French house" + }, + "name": "Punk, Draft" + } + ], + "author": [ + { + "@type": "Person", + "givenName": "Draft", + "familyName": "Punk", + "affiliation": { + "@type": "Organization", + "name": "French house" + }, + "name": "Punk, Draft" + } + ], + "dateModified": "", + "version": "DRAFT", + "description": "This dataset hasn't been published yet.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "provider": { + "@type": "Organization", + "name": "Root" + }, + "distribution": [ + { + "@type": "DataDownload", + "name": "data.txt", + "encodingFormat": "text/plain", + "contentSize": 18, + "contentUrl": "http://localhost:8080/api/access/datafile/4" + } + ] +} diff --git a/src/test/resources/croissant/junk/expected/junk-croissant.json b/src/test/resources/croissant/junk/expected/junk-croissant.json new file mode 100644 index 00000000000..b02bed5694e --- /dev/null +++ b/src/test/resources/croissant/junk/expected/junk-croissant.json @@ -0,0 +1,83 @@ +{ + "@context": { + "@language": "en", + "@vocab": "https://schema.org/", + "citeAs": "cr:citeAs", + "column": "cr:column", + "conformsTo": "dct:conformsTo", + "cr": "http://mlcommons.org/croissant/", + "rai": "http://mlcommons.org/croissant/RAI/", + "data": { + "@id": "cr:data", + "@type": "@json" + }, + "dataType": { + "@id": "cr:dataType", + "@type": "@vocab" + }, + "dct": "http://purl.org/dc/terms/", + "examples": { + "@id": "cr:examples", + "@type": "@json" + }, + "extract": "cr:extract", + "field": "cr:field", + "fileProperty": "cr:fileProperty", + "fileObject": "cr:fileObject", + "fileSet": "cr:fileSet", + "format": "cr:format", + "includes": "cr:includes", + "isLiveDataset": "cr:isLiveDataset", + "jsonPath": "cr:jsonPath", + "key": "cr:key", + "md5": "cr:md5", + "parentField": "cr:parentField", + "path": "cr:path", + "recordSet": "cr:recordSet", + "references": "cr:references", + "regex": "cr:regex", + "repeated": "cr:repeated", + "replace": "cr:replace", + "samplingRate": "cr:samplingRate", + "sc": "https://schema.org/", + "separator": "cr:separator", + "source": "cr:source", + "subField": "cr:subField", + "transform": "cr:transform", + "wd": "https://www.wikidata.org/wiki/" + }, + "@type": "sc:Dataset", + "conformsTo": "http://mlcommons.org/croissant/1.0", + "name": "</script><script>alert(666)</script>", + "url": "https://doi.org/10.5072/FK2/0CNXUJ", + "creator": [ + { + "@type": "Person", + "givenName": "Sylvester", + "familyName": "Ritter", + "affiliation": { + "@type": "Organization", + "name": "WWF" + }, + "name": "Ritter, Sylvester" + } + ], + "description": "A junk dataset.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "datePublished": "2025-03-13", + "dateModified": "2025-03-13", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "version": "1.0", + "citeAs": "@data{FK2/0CNXUJ_2025,author = {Ritter, Sylvester},publisher = {Root},title = {},year = {2025},url = {https://doi.org/10.5072/FK2/0CNXUJ}}" +} \ No newline at end of file diff --git a/src/test/resources/croissant/junk/in/dataCiteXml.xml b/src/test/resources/croissant/junk/in/dataCiteXml.xml new file mode 100644 index 00000000000..d6c11b056e2 --- /dev/null +++ b/src/test/resources/croissant/junk/in/dataCiteXml.xml @@ -0,0 +1,33 @@ + + + 10.5072/FK2/0CNXUJ + + + Ritter, Sylvester + Sylvester + Ritter + WWF + + + + :unav + + Root + 2025 + + Other + + + 2025-03-13 + 2025-03-13 + + + 1.0 + + + Creative Commons CC0 1.0 Universal Public Domain Dedication. + + + A junk dataset. + + diff --git a/src/test/resources/croissant/junk/in/datasetFileDetails.json b/src/test/resources/croissant/junk/in/datasetFileDetails.json new file mode 100644 index 00000000000..fe51488c706 --- /dev/null +++ b/src/test/resources/croissant/junk/in/datasetFileDetails.json @@ -0,0 +1 @@ +[] diff --git a/src/test/resources/croissant/junk/in/datasetJson.json b/src/test/resources/croissant/junk/in/datasetJson.json new file mode 100644 index 00000000000..984ae55cb92 --- /dev/null +++ b/src/test/resources/croissant/junk/in/datasetJson.json @@ -0,0 +1,124 @@ +{ + "id": 2, + "identifier": "FK2/0CNXUJ", + "persistentUrl": "https://doi.org/10.5072/FK2/0CNXUJ", + "protocol": "doi", + "authority": "10.5072", + "separator": "/", + "publisher": "Root", + "publicationDate": "2025-03-13", + "storageIdentifier": "local://10.5072/FK2/0CNXUJ", + "datasetType": "dataset", + "datasetVersion": { + "id": 1, + "datasetId": 2, + "datasetPersistentId": "doi:10.5072/FK2/0CNXUJ", + "storageIdentifier": "local://10.5072/FK2/0CNXUJ", + "versionNumber": 1, + "versionMinorNumber": 0, + "versionState": "RELEASED", + "latestVersionPublishingState": "RELEASED", + "lastUpdateTime": "2025-03-13T14:56:36Z", + "releaseTime": "2025-03-13T14:56:36Z", + "createTime": "2025-03-13T14:56:26Z", + "publicationDate": "2025-03-13", + "citationDate": "2025-03-13", + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0", + "iconUri": "https://licensebuttons.net/p/zero/1.0/88x31.png", + "rightsIdentifier": "CC0-1.0", + "rightsIdentifierScheme": "SPDX", + "schemeUri": "https://spdx.org/licenses/", + "languageCode": "en" + }, + "fileAccessRequest": true, + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "name": "citation", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "" + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "Ritter, Sylvester" + }, + "authorAffiliation": { + "typeName": "authorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "WWF" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "dataverse@mailinator.com" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "A junk dataset." + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Other" + ] + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "Admin, Dataverse" + }, + { + "typeName": "dateOfDeposit", + "multiple": false, + "typeClass": "primitive", + "value": "2025-03-13" + } + ] + } + }, + "files": [], + "citation": "Ritter, Sylvester, 2025, https://doi.org/10.5072/FK2/0CNXUJ, Root, V1" + } +} diff --git a/src/test/resources/croissant/junk/in/datasetORE.json b/src/test/resources/croissant/junk/in/datasetORE.json new file mode 100644 index 00000000000..646955bbb17 --- /dev/null +++ b/src/test/resources/croissant/junk/in/datasetORE.json @@ -0,0 +1,62 @@ +{ + "dcterms:modified": "2025-03-13", + "dcterms:creator": "Root", + "@type": "ore:ResourceMap", + "schema:additionalType": "Dataverse OREMap Format v1.0.1", + "dvcore:generatedBy": { + "@type": "schema:SoftwareApplication", + "schema:name": "Dataverse", + "schema:version": "6.5", + "schema:url": "https://github.com/iqss/dataverse" + }, + "@id": "http://localhost:8080/api/datasets/export?exporter=OAI_ORE&persistentId=https://doi.org/10.5072/FK2/0CNXUJ", + "ore:describes": { + "citation:dsDescription": { + "citation:dsDescriptionValue": "A junk dataset." + }, + "author": { + "citation:authorName": "Ritter, Sylvester", + "citation:authorAffiliation": "WWF" + }, + "citation:datasetContact": { + "citation:datasetContactEmail": "dataverse@mailinator.com" + }, + "citation:depositor": "Admin, Dataverse", + "subject": "Other", + "title": "", + "dateOfDeposit": "2025-03-13", + "@id": "https://doi.org/10.5072/FK2/0CNXUJ", + "@type": [ + "ore:Aggregation", + "schema:Dataset" + ], + "schema:version": "1.0", + "schema:name": "", + "schema:dateModified": "Thu Mar 13 14:56:36 UTC 2025", + "schema:datePublished": "2025-03-13", + "schema:creativeWorkStatus": "RELEASED", + "schema:license": "http://creativecommons.org/publicdomain/zero/1.0", + "dvcore:fileTermsOfAccess": { + "dvcore:fileRequestAccess": true + }, + "schema:includedInDataCatalog": "Root", + "schema:isPartOf": { + "schema:name": "Root", + "@id": "http://localhost:8080/dataverse/root", + "schema:description": "The root dataverse." + }, + "ore:aggregates": [], + "schema:hasPart": [] + }, + "@context": { + "author": "http://purl.org/dc/terms/creator", + "citation": "https://dataverse.org/schema/citation/", + "dateOfDeposit": "http://purl.org/dc/terms/dateSubmitted", + "dcterms": "http://purl.org/dc/terms/", + "dvcore": "https://dataverse.org/schema/core#", + "ore": "http://www.openarchives.org/ore/terms/", + "schema": "http://schema.org/", + "subject": "http://purl.org/dc/terms/subject", + "title": "http://purl.org/dc/terms/title" + } +} diff --git a/src/test/resources/croissant/junk/in/datasetSchemaDotOrg.json b/src/test/resources/croissant/junk/in/datasetSchemaDotOrg.json new file mode 100644 index 00000000000..e487f075115 --- /dev/null +++ b/src/test/resources/croissant/junk/in/datasetSchemaDotOrg.json @@ -0,0 +1,52 @@ +{ + "@context": "http://schema.org", + "@type": "Dataset", + "@id": "https://doi.org/10.5072/FK2/0CNXUJ", + "identifier": "https://doi.org/10.5072/FK2/0CNXUJ", + "name": "", + "creator": [ + { + "@type": "Person", + "givenName": "Sylvester", + "familyName": "Ritter", + "affiliation": { + "@type": "Organization", + "name": "WWF" + }, + "name": "Ritter, Sylvester" + } + ], + "author": [ + { + "@type": "Person", + "givenName": "Sylvester", + "familyName": "Ritter", + "affiliation": { + "@type": "Organization", + "name": "WWF" + }, + "name": "Ritter, Sylvester" + } + ], + "datePublished": "2025-03-13", + "dateModified": "2025-03-13", + "version": "1", + "description": "A junk dataset.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "provider": { + "@type": "Organization", + "name": "Root" + } +} diff --git a/src/test/resources/croissant/max/expected/max-croissant.json b/src/test/resources/croissant/max/expected/max-croissant.json new file mode 100644 index 00000000000..bf1941c7289 --- /dev/null +++ b/src/test/resources/croissant/max/expected/max-croissant.json @@ -0,0 +1,196 @@ +{ + "@context": { + "@language": "en", + "@vocab": "https://schema.org/", + "citeAs": "cr:citeAs", + "column": "cr:column", + "conformsTo": "dct:conformsTo", + "cr": "http://mlcommons.org/croissant/", + "rai": "http://mlcommons.org/croissant/RAI/", + "data": { + "@id": "cr:data", + "@type": "@json" + }, + "dataType": { + "@id": "cr:dataType", + "@type": "@vocab" + }, + "dct": "http://purl.org/dc/terms/", + "examples": { + "@id": "cr:examples", + "@type": "@json" + }, + "extract": "cr:extract", + "field": "cr:field", + "fileProperty": "cr:fileProperty", + "fileObject": "cr:fileObject", + "fileSet": "cr:fileSet", + "format": "cr:format", + "includes": "cr:includes", + "isLiveDataset": "cr:isLiveDataset", + "jsonPath": "cr:jsonPath", + "key": "cr:key", + "md5": "cr:md5", + "parentField": "cr:parentField", + "path": "cr:path", + "recordSet": "cr:recordSet", + "references": "cr:references", + "regex": "cr:regex", + "repeated": "cr:repeated", + "replace": "cr:replace", + "samplingRate": "cr:samplingRate", + "sc": "https://schema.org/", + "separator": "cr:separator", + "source": "cr:source", + "subField": "cr:subField", + "transform": "cr:transform", + "wd": "https://www.wikidata.org/wiki/" + }, + "@type": "sc:Dataset", + "conformsTo": "http://mlcommons.org/croissant/1.0", + "name": "Max Schema.org", + "url": "https://doi.org/10.5072/FK2/VQTYHD", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard University" + }, + "sameAs": "https://orcid.org/0000-0002-9528-9470", + "@id": "https://orcid.org/0000-0002-9528-9470", + "identifier": "https://orcid.org/0000-0002-9528-9470", + "name": "Durbin, Philip" + }, + { + "@type": "Person", + "affiliation": { + "@type": "Organization", + "name": "Harvard University" + }, + "name": "IQSS" + } + ], + "description": "Exercising fields used by `schema.org` exporter.", + "keywords": [ + "Social Sciences", + "Other", + "foo", + "bar" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "datePublished": "2024-05-01", + "dateModified": "2025-05-21", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "https://beta.dataverse.org" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "version": "3.0", + "citeAs": "@data{FK2/VQTYHD_2024,author = {Durbin, Philip and IQSS},publisher = {Root},title = {Max Schema.org},year = {2024},url = {https://doi.org/10.5072/FK2/VQTYHD}}", + "funder": [ + { + "@type": "Organization", + "name": "NSF" + }, + { + "@type": "Organization", + "name": "NIH" + } + ], + "spatialCoverage": [ + "Cambridge, MA, United States, Harvard Square" + ], + "citation": [ + { + "@type": "CreativeWork", + "name": "Tykhonov, V., & Durbin, P. (2024, March 20). Croissant ML standard in the context of Dataverse, EOSC and beyond. Zenodo. https://doi.org/10.5281/zenodo.10843668", + "@id": "https://doi.org/10.5281/zenodo.10843668", + "identifier": "https://doi.org/10.5281/zenodo.10843668", + "url": "https://doi.org/10.5281/zenodo.10843668" + } + ], + "temporalCoverage": [ + "2023-01-01/2023-12-31" + ], + "distribution": [ + { + "@type": "cr:FileObject", + "@id": "data.tsv", + "name": "data.tsv", + "encodingFormat": "text/tab-separated-values", + "md5": "3663d6a436ac00f5541a7336d6fa18c9", + "contentSize": "33", + "description": "", + "contentUrl": "https://beta.dataverse.org/api/access/datafile/26646?format=original" + }, + { + "@type": "cr:FileObject", + "@id": "doc/README.md", + "name": "README.md", + "encodingFormat": "text/markdown", + "md5": "ebf050ec8cce5df0a72b100cfc9f442f", + "contentSize": "34", + "description": "Additional documentation.", + "contentUrl": "https://beta.dataverse.org/api/access/datafile/26148" + } + ], + "recordSet": [ + { + "@type": "cr:RecordSet", + "field": [ + { + "@type": "cr:Field", + "name": "foo", + "description": "foo", + "dataType": "sc:Text", + "source": { + "@id": "1287", + "fileObject": { + "@id": "data.tsv" + }, + "extract": { + "column": "foo" + } + } + }, + { + "@type": "cr:Field", + "name": "bar", + "description": "bar", + "dataType": "sc:Integer", + "source": { + "@id": "1285", + "fileObject": { + "@id": "data.tsv" + }, + "extract": { + "column": "bar" + } + } + }, + { + "@type": "cr:Field", + "name": "baz", + "description": "baz", + "dataType": "sc:Integer", + "source": { + "@id": "1286", + "fileObject": { + "@id": "data.tsv" + }, + "extract": { + "column": "baz" + } + } + } + ] + } + ] +} \ No newline at end of file diff --git a/src/test/resources/croissant/max/expected/max-croissantSlim.json b/src/test/resources/croissant/max/expected/max-croissantSlim.json new file mode 100644 index 00000000000..fa3d632838e --- /dev/null +++ b/src/test/resources/croissant/max/expected/max-croissantSlim.json @@ -0,0 +1,122 @@ +{ + "@context": { + "@language": "en", + "@vocab": "https://schema.org/", + "citeAs": "cr:citeAs", + "column": "cr:column", + "conformsTo": "dct:conformsTo", + "cr": "http://mlcommons.org/croissant/", + "rai": "http://mlcommons.org/croissant/RAI/", + "data": { + "@id": "cr:data", + "@type": "@json" + }, + "dataType": { + "@id": "cr:dataType", + "@type": "@vocab" + }, + "dct": "http://purl.org/dc/terms/", + "examples": { + "@id": "cr:examples", + "@type": "@json" + }, + "extract": "cr:extract", + "field": "cr:field", + "fileProperty": "cr:fileProperty", + "fileObject": "cr:fileObject", + "fileSet": "cr:fileSet", + "format": "cr:format", + "includes": "cr:includes", + "isLiveDataset": "cr:isLiveDataset", + "jsonPath": "cr:jsonPath", + "key": "cr:key", + "md5": "cr:md5", + "parentField": "cr:parentField", + "path": "cr:path", + "recordSet": "cr:recordSet", + "references": "cr:references", + "regex": "cr:regex", + "repeated": "cr:repeated", + "replace": "cr:replace", + "samplingRate": "cr:samplingRate", + "sc": "https://schema.org/", + "separator": "cr:separator", + "source": "cr:source", + "subField": "cr:subField", + "transform": "cr:transform", + "wd": "https://www.wikidata.org/wiki/" + }, + "@type": "sc:Dataset", + "conformsTo": "http://mlcommons.org/croissant/1.0", + "name": "Max Schema.org", + "url": "https://doi.org/10.5072/FK2/VQTYHD", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard University" + }, + "sameAs": "https://orcid.org/0000-0002-9528-9470", + "@id": "https://orcid.org/0000-0002-9528-9470", + "identifier": "https://orcid.org/0000-0002-9528-9470", + "name": "Durbin, Philip" + }, + { + "@type": "Person", + "affiliation": { + "@type": "Organization", + "name": "Harvard University" + }, + "name": "IQSS" + } + ], + "description": "Exercising fields used by `schema.org` exporter.", + "keywords": [ + "Social Sciences", + "Other", + "foo", + "bar" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "datePublished": "2024-05-01", + "dateModified": "2025-05-21", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "https://beta.dataverse.org" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "version": "3.0", + "citeAs": "@data{FK2/VQTYHD_2024,author = {Durbin, Philip and IQSS},publisher = {Root},title = {Max Schema.org},year = {2024},url = {https://doi.org/10.5072/FK2/VQTYHD}}", + "funder": [ + { + "@type": "Organization", + "name": "NSF" + }, + { + "@type": "Organization", + "name": "NIH" + } + ], + "spatialCoverage": [ + "Cambridge, MA, United States, Harvard Square" + ], + "citation": [ + { + "@type": "CreativeWork", + "name": "Tykhonov, V., & Durbin, P. (2024, March 20). Croissant ML standard in the context of Dataverse, EOSC and beyond. Zenodo. https://doi.org/10.5281/zenodo.10843668", + "@id": "https://doi.org/10.5281/zenodo.10843668", + "identifier": "https://doi.org/10.5281/zenodo.10843668", + "url": "https://doi.org/10.5281/zenodo.10843668" + } + ], + "temporalCoverage": [ + "2023-01-01/2023-12-31" + ] +} diff --git a/src/test/resources/croissant/max/in/dataCiteXml.xml b/src/test/resources/croissant/max/in/dataCiteXml.xml new file mode 100644 index 00000000000..e91c0583b71 --- /dev/null +++ b/src/test/resources/croissant/max/in/dataCiteXml.xml @@ -0,0 +1,77 @@ + + + 10.5072/FK2/VQTYHD + + + Durbin, Philip + Philip + Durbin + https://orcid.org/0000-0002-9528-9470 + Harvard University + + + IQSS + Harvard University + + + + Max Schema.org + + Root + 2024 + + Social Sciences + Other + foo + bar + + + + Durbin, Philip + Philip + Durbin + + + + 2024-05-01 + 2024-05-01 + 2025-05-21 + 2023-01-01/2023-12-31 + + + + 10.5281/ZENODO.10843668 + + + 34 + 21865 + 27 + + + text/markdown + text/tab-separated-values + text/tab-separated-values + + 3.0 + + + Creative Commons CC0 1.0 Universal Public Domain Dedication. + + + Exercising fields used by `schema.org` exporter. + + + + United States, MA,, Cambridge,, Harvard Square, + + + + + NSF + + + NIH + 3OT2DB000004-01S3 + + + diff --git a/src/test/resources/croissant/max/in/datasetFileDetails.json b/src/test/resources/croissant/max/in/datasetFileDetails.json new file mode 100644 index 00000000000..35881e3eae1 --- /dev/null +++ b/src/test/resources/croissant/max/in/datasetFileDetails.json @@ -0,0 +1,117 @@ +[ + { + "id": 26646, + "persistentId": "", + "filename": "data.tab", + "contentType": "text/tab-separated-values", + "friendlyType": "Tab-Delimited", + "filesize": 27, + "storageIdentifier": "s3://beta-dataverse-direct:196f44cc758-4d710ffac5d8", + "originalFileFormat": "text/tsv", + "originalFormatLabel": "Tab-Separated Values", + "originalFileSize": 33, + "originalFileName": "data.tsv", + "UNF": "UNF:6:ngOUmEnfm08jahzBYqStQA==", + "rootDataFileId": -1, + "md5": "3663d6a436ac00f5541a7336d6fa18c9", + "checksum": { + "type": "MD5", + "value": "3663d6a436ac00f5541a7336d6fa18c9" + }, + "tabularData": true, + "creationDate": "2025-05-21", + "publicationDate": "2025-05-21", + "fileAccessRequest": true, + "restricted": false, + "fileMetadataId": 32509, + "dataTables": [ + { + "varQuantity": 3, + "caseQuantity": 3, + "UNF": "UNF:6:ngOUmEnfm08jahzBYqStQA==", + "dataVariables": [ + { + "id": 1287, + "name": "foo", + "label": "foo", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "CHARACTER", + "isOrderedCategorical": false, + "fileOrder": 0, + "UNF": "UNF:6:FWBO/a1GcxDnM3fNLdzrHw==", + "variableMetadata": [] + }, + { + "id": 1285, + "name": "bar", + "label": "bar", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 1, + "UNF": "UNF:6:AvELPR5QTaBbnq6S22Msow==", + "variableMetadata": [], + "summaryStatistics": { + "mode": ".", + "invd": "0.0", + "min": "1.0", + "stdev": "1.0", + "max": "3.0", + "vald": "3.0", + "mean": "2.0", + "medn": "2.0" + } + }, + { + "id": 1286, + "name": "baz", + "label": "baz", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 2, + "UNF": "UNF:6:WkRUZjFbozW1nFYiqMGWeQ==", + "variableMetadata": [], + "summaryStatistics": { + "mean": "20.0", + "mode": ".", + "min": "10.0", + "max": "30.0", + "invd": "0.0", + "stdev": "10.0", + "vald": "3.0", + "medn": "20.0" + } + } + ] + } + ], + "varGroups": [] + }, + { + "id": 26148, + "persistentId": "", + "filename": "README.md", + "contentType": "text/markdown", + "friendlyType": "Markdown Text", + "filesize": 34, + "description": "Additional documentation.", + "storageIdentifier": "s3://beta-dataverse-direct:18f35bee76a-f45ece0b0fcc", + "rootDataFileId": -1, + "md5": "ebf050ec8cce5df0a72b100cfc9f442f", + "checksum": { + "type": "MD5", + "value": "ebf050ec8cce5df0a72b100cfc9f442f" + }, + "tabularData": false, + "creationDate": "2024-05-01", + "publicationDate": "2024-05-01", + "fileAccessRequest": true, + "restricted": false, + "fileMetadataId": 32511, + "varGroups": [] + } +] diff --git a/src/test/resources/croissant/max/in/datasetJson.json b/src/test/resources/croissant/max/in/datasetJson.json new file mode 100644 index 00000000000..a0ddaa54436 --- /dev/null +++ b/src/test/resources/croissant/max/in/datasetJson.json @@ -0,0 +1,376 @@ +{ + "id": 26147, + "identifier": "FK2/VQTYHD", + "persistentUrl": "https://doi.org/10.5072/FK2/VQTYHD", + "protocol": "doi", + "authority": "10.5072", + "separator": "/", + "publisher": "Root", + "publicationDate": "2024-05-01", + "storageIdentifier": "s3://10.5072/FK2/VQTYHD", + "datasetType": "dataset", + "datasetVersion": { + "id": 266, + "datasetId": 26147, + "datasetPersistentId": "doi:10.5072/FK2/VQTYHD", + "storageIdentifier": "s3://10.5072/FK2/VQTYHD", + "versionNumber": 3, + "internalVersionNumber": 7, + "versionMinorNumber": 0, + "versionState": "RELEASED", + "latestVersionPublishingState": "RELEASED", + "UNF": "UNF:6:ngOUmEnfm08jahzBYqStQA==", + "lastUpdateTime": "2025-05-21T19:25:29Z", + "releaseTime": "2025-05-21T19:25:29Z", + "createTime": "2025-05-21T19:23:21Z", + "publicationDate": "2024-05-01", + "citationDate": "2024-05-01", + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0", + "iconUri": "https://licensebuttons.net/p/zero/1.0/88x31.png", + "rightsIdentifier": "CC0-1.0", + "rightsIdentifierScheme": "SPDX", + "schemeUri": "https://spdx.org/licenses/", + "languageCode": "en" + }, + "fileAccessRequest": true, + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "name": "citation", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Max Schema.org" + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "Durbin, Philip" + }, + "authorAffiliation": { + "typeName": "authorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "Harvard University" + }, + "authorIdentifierScheme": { + "typeName": "authorIdentifierScheme", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "ORCID" + }, + "authorIdentifier": { + "typeName": "authorIdentifier", + "multiple": false, + "typeClass": "primitive", + "value": "0000-0002-9528-9470" + } + }, + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "IQSS" + }, + "authorAffiliation": { + "typeName": "authorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "Harvard University" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactName": { + "typeName": "datasetContactName", + "multiple": false, + "typeClass": "primitive", + "value": "Durbin, Philip" + }, + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "philip_durbin@harvard.edu" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "Exercising fields used by `schema.org` exporter." + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Social Sciences", + "Other" + ] + }, + { + "typeName": "keyword", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "foo" + } + }, + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "bar" + } + } + ] + }, + { + "typeName": "publication", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "Tykhonov, V., & Durbin, P. (2024, March 20). Croissant ML standard in the context of Dataverse, EOSC and beyond. Zenodo. https://doi.org/10.5281/zenodo.10843668" + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "doi" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "10.5281/zenodo.10843668" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "https://doi.org/10.5281/zenodo.10843668" + } + } + ] + }, + { + "typeName": "contributor", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Funder" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "NSF" + } + } + ] + }, + { + "typeName": "grantNumber", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "grantNumberAgency": { + "typeName": "grantNumberAgency", + "multiple": false, + "typeClass": "primitive", + "value": "NIH" + }, + "grantNumberValue": { + "typeName": "grantNumberValue", + "multiple": false, + "typeClass": "primitive", + "value": "3OT2DB000004-01S3" + } + } + ] + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "Durbin, Philip" + }, + { + "typeName": "dateOfDeposit", + "multiple": false, + "typeClass": "primitive", + "value": "2024-05-01" + }, + { + "typeName": "timePeriodCovered", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "2023-01-01" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "2023-12-31" + } + } + ] + } + ] + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "name": "geospatial", + "fields": [ + { + "typeName": "geographicCoverage", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "United States" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "MA" + }, + "city": { + "typeName": "city", + "multiple": false, + "typeClass": "primitive", + "value": "Cambridge" + }, + "otherGeographicCoverage": { + "typeName": "otherGeographicCoverage", + "multiple": false, + "typeClass": "primitive", + "value": "Harvard Square" + } + } + ] + } + ] + } + }, + "files": [ + { + "description": "Additional documentation.", + "label": "README.md", + "restricted": false, + "directoryLabel": "doc", + "version": 1, + "datasetVersionId": 266, + "dataFile": { + "id": 26148, + "persistentId": "", + "filename": "README.md", + "contentType": "text/markdown", + "friendlyType": "Markdown Text", + "filesize": 34, + "description": "Additional documentation.", + "storageIdentifier": "s3://beta-dataverse-direct:18f35bee76a-f45ece0b0fcc", + "rootDataFileId": -1, + "md5": "ebf050ec8cce5df0a72b100cfc9f442f", + "checksum": { + "type": "MD5", + "value": "ebf050ec8cce5df0a72b100cfc9f442f" + }, + "tabularData": false, + "creationDate": "2024-05-01", + "publicationDate": "2024-05-01", + "fileAccessRequest": true + } + }, + { + "label": "data.tab", + "restricted": false, + "version": 3, + "datasetVersionId": 266, + "dataFile": { + "id": 26646, + "persistentId": "", + "filename": "data.tab", + "contentType": "text/tab-separated-values", + "friendlyType": "Tab-Delimited", + "filesize": 27, + "storageIdentifier": "s3://beta-dataverse-direct:196f44cc758-4d710ffac5d8", + "originalFileFormat": "text/tsv", + "originalFormatLabel": "Tab-Separated Values", + "originalFileSize": 33, + "originalFileName": "data.tsv", + "UNF": "UNF:6:ngOUmEnfm08jahzBYqStQA==", + "rootDataFileId": -1, + "md5": "3663d6a436ac00f5541a7336d6fa18c9", + "checksum": { + "type": "MD5", + "value": "3663d6a436ac00f5541a7336d6fa18c9" + }, + "tabularData": true, + "creationDate": "2025-05-21", + "publicationDate": "2025-05-21", + "fileAccessRequest": true + } + } + ], + "citation": "Durbin, Philip; IQSS, 2024, \"Max Schema.org\", https://doi.org/10.5072/FK2/VQTYHD, Root, V3, UNF:6:ngOUmEnfm08jahzBYqStQA== [fileUNF]" + } +} diff --git a/src/test/resources/croissant/max/in/datasetORE.json b/src/test/resources/croissant/max/in/datasetORE.json new file mode 100644 index 00000000000..2c3cce7ab6a --- /dev/null +++ b/src/test/resources/croissant/max/in/datasetORE.json @@ -0,0 +1,163 @@ +{ + "dcterms:modified": "2025-05-21", + "dcterms:creator": "Root", + "@type": "ore:ResourceMap", + "schema:additionalType": "Dataverse OREMap Format v1.0.1", + "dvcore:generatedBy": { + "@type": "schema:SoftwareApplication", + "schema:name": "Dataverse", + "schema:version": "6.6 build develop-c4379a0", + "schema:url": "https://github.com/iqss/dataverse" + }, + "@id": "https://beta.dataverse.org/api/datasets/export?exporter=OAI_ORE&persistentId=https://doi.org/10.5072/FK2/VQTYHD", + "ore:describes": { + "author": [ + { + "citation:authorName": "Durbin, Philip", + "citation:authorAffiliation": "Harvard University", + "authorIdentifierScheme": "ORCID", + "authorIdentifier": "0000-0002-9528-9470" + }, + { + "citation:authorName": "IQSS", + "citation:authorAffiliation": "Harvard University" + } + ], + "citation:keyword": [ + { + "citation:keywordValue": "foo" + }, + { + "citation:keywordValue": "bar" + } + ], + "timePeriodCovered": { + "citation:timePeriodCoveredStart": "2023-01-01", + "citation:timePeriodCoveredEnd": "2023-12-31" + }, + "contributor": { + "citation:contributorType": "Funder", + "citation:contributorName": "NSF" + }, + "citation:dsDescription": { + "citation:dsDescriptionValue": "Exercising fields used by `schema.org` exporter." + }, + "publication": { + "publicationCitation": "Tykhonov, V., & Durbin, P. (2024, March 20). Croissant ML standard in the context of Dataverse, EOSC and beyond. Zenodo. https://doi.org/10.5281/zenodo.10843668", + "publicationIDType": "doi", + "publicationIDNumber": "10.5281/zenodo.10843668", + "publicationURL": "https://doi.org/10.5281/zenodo.10843668" + }, + "grantNumber": { + "citation:grantNumberAgency": "NIH", + "citation:grantNumberValue": "3OT2DB000004-01S3" + }, + "geospatial:geographicCoverage": { + "geospatial:country": "United States", + "geospatial:state": "MA", + "geospatial:city": "Cambridge", + "geospatial:otherGeographicCoverage": "Harvard Square" + }, + "citation:datasetContact": { + "citation:datasetContactName": "Durbin, Philip", + "citation:datasetContactEmail": "philip_durbin@harvard.edu" + }, + "dateOfDeposit": "2024-05-01", + "subject": [ + "Social Sciences", + "Other" + ], + "citation:depositor": "Durbin, Philip", + "title": "Max Schema.org", + "@id": "https://doi.org/10.5072/FK2/VQTYHD", + "@type": [ + "ore:Aggregation", + "schema:Dataset" + ], + "schema:version": "3.0", + "schema:name": "Max Schema.org", + "schema:dateModified": "2025-05-21 19:25:29.653", + "schema:datePublished": "2024-05-01", + "schema:creativeWorkStatus": "RELEASED", + "schema:license": "http://creativecommons.org/publicdomain/zero/1.0", + "dvcore:fileTermsOfAccess": { + "dvcore:fileRequestAccess": true + }, + "schema:includedInDataCatalog": "Root", + "schema:isPartOf": { + "schema:name": "Philip Durbin Dataverse", + "@id": "https://beta.dataverse.org/dataverse/pdurbin", + "schema:isPartOf": { + "schema:name": "Root", + "@id": "https://beta.dataverse.org/dataverse/root", + "schema:description": "The root dataverse." + } + }, + "ore:aggregates": [ + { + "schema:description": "Additional documentation.", + "schema:name": "README.md", + "dvcore:restricted": false, + "dvcore:directoryLabel": "doc", + "schema:version": 1, + "dvcore:datasetVersionId": 266, + "@id": "https://beta.dataverse.org/file.xhtml?fileId=26148", + "schema:sameAs": "https://beta.dataverse.org/api/access/datafile/26148", + "@type": "ore:AggregatedResource", + "schema:fileFormat": "text/markdown", + "dvcore:filesize": 34, + "dvcore:storageIdentifier": "s3://beta-dataverse-direct:18f35bee76a-f45ece0b0fcc", + "dvcore:rootDataFileId": -1, + "dvcore:checksum": { + "@type": "MD5", + "@value": "ebf050ec8cce5df0a72b100cfc9f442f" + } + }, + { + "schema:name": "data.tsv", + "dvcore:restricted": false, + "schema:version": 3, + "dvcore:datasetVersionId": 266, + "@id": "https://beta.dataverse.org/file.xhtml?fileId=26646", + "schema:sameAs": "https://beta.dataverse.org/api/access/datafile/26646?format=original", + "@type": "ore:AggregatedResource", + "schema:fileFormat": "text/tsv", + "dvcore:filesize": 33, + "dvcore:storageIdentifier": "s3://beta-dataverse-direct:196f44cc758-4d710ffac5d8", + "dvcore:currentIngestedName": "data.tab", + "dvcore:UNF": "UNF:6:ngOUmEnfm08jahzBYqStQA==", + "dvcore:rootDataFileId": -1, + "dvcore:checksum": { + "@type": "MD5", + "@value": "3663d6a436ac00f5541a7336d6fa18c9" + } + } + ], + "schema:hasPart": [ + "https://beta.dataverse.org/file.xhtml?fileId=26148", + "https://beta.dataverse.org/file.xhtml?fileId=26646" + ] + }, + "@context": { + "author": "http://purl.org/dc/terms/creator", + "authorIdentifier": "http://purl.org/spar/datacite/AgentIdentifier", + "authorIdentifierScheme": "http://purl.org/spar/datacite/AgentIdentifierScheme", + "citation": "https://dataverse.org/schema/citation/", + "contributor": "http://purl.org/dc/terms/contributor", + "dateOfDeposit": "http://purl.org/dc/terms/dateSubmitted", + "dcterms": "http://purl.org/dc/terms/", + "dvcore": "https://dataverse.org/schema/core#", + "geospatial": "https://beta.dataverse.org/schema/geospatial#", + "grantNumber": "https://schema.org/sponsor", + "ore": "http://www.openarchives.org/ore/terms/", + "publication": "http://purl.org/dc/terms/isReferencedBy", + "publicationCitation": "http://purl.org/dc/terms/bibliographicCitation", + "publicationIDNumber": "http://purl.org/spar/datacite/ResourceIdentifier", + "publicationIDType": "http://purl.org/spar/datacite/ResourceIdentifierScheme", + "publicationURL": "https://schema.org/distribution", + "schema": "http://schema.org/", + "subject": "http://purl.org/dc/terms/subject", + "timePeriodCovered": "https://schema.org/temporalCoverage", + "title": "http://purl.org/dc/terms/title" + } +} diff --git a/src/test/resources/croissant/max/in/datasetSchemaDotOrg.json b/src/test/resources/croissant/max/in/datasetSchemaDotOrg.json new file mode 100644 index 00000000000..d3f764255e8 --- /dev/null +++ b/src/test/resources/croissant/max/in/datasetSchemaDotOrg.json @@ -0,0 +1,119 @@ +{ + "@context": "http://schema.org", + "@type": "Dataset", + "@id": "https://doi.org/10.5072/FK2/VQTYHD", + "identifier": "https://doi.org/10.5072/FK2/VQTYHD", + "name": "Max Schema.org", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard University" + }, + "sameAs": "https://orcid.org/0000-0002-9528-9470", + "@id": "https://orcid.org/0000-0002-9528-9470", + "identifier": "https://orcid.org/0000-0002-9528-9470", + "name": "Durbin, Philip" + }, + { + "@type": "Person", + "affiliation": { + "@type": "Organization", + "name": "Harvard University" + }, + "name": "IQSS" + } + ], + "author": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard University" + }, + "sameAs": "https://orcid.org/0000-0002-9528-9470", + "@id": "https://orcid.org/0000-0002-9528-9470", + "identifier": "https://orcid.org/0000-0002-9528-9470", + "name": "Durbin, Philip" + }, + { + "@type": "Person", + "affiliation": { + "@type": "Organization", + "name": "Harvard University" + }, + "name": "IQSS" + } + ], + "datePublished": "2024-05-01", + "dateModified": "2025-05-21", + "version": "3", + "description": "Exercising fields used by `schema.org` exporter.", + "keywords": [ + "Social Sciences", + "Other", + "foo", + "bar" + ], + "citation": [ + { + "@type": "CreativeWork", + "name": "Tykhonov, V., & Durbin, P. (2024, March 20). Croissant ML standard in the context of Dataverse, EOSC and beyond. Zenodo. https://doi.org/10.5281/zenodo.10843668", + "@id": "https://doi.org/10.5281/zenodo.10843668", + "identifier": "https://doi.org/10.5281/zenodo.10843668", + "url": "https://doi.org/10.5281/zenodo.10843668" + } + ], + "temporalCoverage": [ + "2023-01-01/2023-12-31" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "https://beta.dataverse.org" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "provider": { + "@type": "Organization", + "name": "Root" + }, + "funder": [ + { + "@type": "Organization", + "name": "NSF" + }, + { + "@type": "Organization", + "name": "NIH" + } + ], + "spatialCoverage": [ + "Cambridge, MA, United States, Harvard Square" + ], + "distribution": [ + { + "@type": "DataDownload", + "name": "data.tab", + "encodingFormat": "text/tab-separated-values", + "contentSize": 27, + "contentUrl": "https://beta.dataverse.org/api/access/datafile/26646" + }, + { + "@type": "DataDownload", + "name": "README.md", + "encodingFormat": "text/markdown", + "contentSize": 34, + "description": "Additional documentation.", + "contentUrl": "https://beta.dataverse.org/api/access/datafile/26148" + } + ] +} diff --git a/src/test/resources/croissant/minimal/expected/minimal-croissant.json b/src/test/resources/croissant/minimal/expected/minimal-croissant.json new file mode 100644 index 00000000000..7c47afc1485 --- /dev/null +++ b/src/test/resources/croissant/minimal/expected/minimal-croissant.json @@ -0,0 +1,79 @@ +{ + "@context": { + "@language": "en", + "@vocab": "https://schema.org/", + "citeAs": "cr:citeAs", + "column": "cr:column", + "conformsTo": "dct:conformsTo", + "cr": "http://mlcommons.org/croissant/", + "rai": "http://mlcommons.org/croissant/RAI/", + "data": { + "@id": "cr:data", + "@type": "@json" + }, + "dataType": { + "@id": "cr:dataType", + "@type": "@vocab" + }, + "dct": "http://purl.org/dc/terms/", + "examples": { + "@id": "cr:examples", + "@type": "@json" + }, + "extract": "cr:extract", + "field": "cr:field", + "fileProperty": "cr:fileProperty", + "fileObject": "cr:fileObject", + "fileSet": "cr:fileSet", + "format": "cr:format", + "includes": "cr:includes", + "isLiveDataset": "cr:isLiveDataset", + "jsonPath": "cr:jsonPath", + "key": "cr:key", + "md5": "cr:md5", + "parentField": "cr:parentField", + "path": "cr:path", + "recordSet": "cr:recordSet", + "references": "cr:references", + "regex": "cr:regex", + "repeated": "cr:repeated", + "replace": "cr:replace", + "samplingRate": "cr:samplingRate", + "sc": "https://schema.org/", + "separator": "cr:separator", + "source": "cr:source", + "subField": "cr:subField", + "transform": "cr:transform", + "wd": "https://www.wikidata.org/wiki/" + }, + "@type": "sc:Dataset", + "conformsTo": "http://mlcommons.org/croissant/1.0", + "name": "Minimal", + "url": "https://doi.org/10.5072/FK2/4C0JYC", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "name": "Durbin, Philip" + } + ], + "description": "Minimal metadata and no files.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "datePublished": "2024-05-01", + "dateModified": "2024-05-01", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "https://beta.dataverse.org" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "version": "1.0", + "citeAs": "@data{FK2/4C0JYC_2024,author = {Durbin, Philip},publisher = {Root},title = {Minimal},year = {2024},url = {https://doi.org/10.5072/FK2/4C0JYC}}" +} \ No newline at end of file diff --git a/src/test/resources/croissant/minimal/in/dataCiteXml.xml b/src/test/resources/croissant/minimal/in/dataCiteXml.xml new file mode 100644 index 00000000000..14feafba53d --- /dev/null +++ b/src/test/resources/croissant/minimal/in/dataCiteXml.xml @@ -0,0 +1,17 @@ + + + 10.5072/FK2/4C0JYC + Durbin, Philip + + Minimal + + Root + 2024 + + + Minimal metadata and no files. + + + diff --git a/src/test/resources/croissant/minimal/in/datasetFileDetails.json b/src/test/resources/croissant/minimal/in/datasetFileDetails.json new file mode 100644 index 00000000000..fe51488c706 --- /dev/null +++ b/src/test/resources/croissant/minimal/in/datasetFileDetails.json @@ -0,0 +1 @@ +[] diff --git a/src/test/resources/croissant/minimal/in/datasetJson.json b/src/test/resources/croissant/minimal/in/datasetJson.json new file mode 100644 index 00000000000..cedd4723dd5 --- /dev/null +++ b/src/test/resources/croissant/minimal/in/datasetJson.json @@ -0,0 +1,100 @@ +{ + "id": 26146, + "identifier": "FK2/4C0JYC", + "persistentUrl": "https://doi.org/10.5072/FK2/4C0JYC", + "protocol": "doi", + "authority": "10.5072", + "publisher": "Root", + "publicationDate": "2024-05-01", + "storageIdentifier": "s3://10.5072/FK2/4C0JYC", + "datasetVersion": { + "id": 108, + "datasetId": 26146, + "datasetPersistentId": "doi:10.5072/FK2/4C0JYC", + "storageIdentifier": "s3://10.5072/FK2/4C0JYC", + "versionNumber": 1, + "versionMinorNumber": 0, + "versionState": "RELEASED", + "latestVersionPublishingState": "RELEASED", + "lastUpdateTime": "2024-05-01T14:27:17Z", + "releaseTime": "2024-05-01T14:27:17Z", + "createTime": "2024-05-01T14:26:54Z", + "publicationDate": "2024-05-01", + "citationDate": "2024-05-01", + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0", + "iconUri": "https://licensebuttons.net/p/zero/1.0/88x31.png" + }, + "fileAccessRequest": true, + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "name": "citation", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Minimal" + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "Durbin, Philip" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "philip_durbin@harvard.edu" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "Minimal metadata and no files." + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Other" + ] + } + ] + } + }, + "files": [], + "citation": "Durbin, Philip, 2024, \"Minimal\", https://doi.org/10.5072/FK2/4C0JYC, Root, V1" + } +} diff --git a/src/test/resources/croissant/minimal/in/datasetORE.json b/src/test/resources/croissant/minimal/in/datasetORE.json new file mode 100644 index 00000000000..a76ec9ea0ac --- /dev/null +++ b/src/test/resources/croissant/minimal/in/datasetORE.json @@ -0,0 +1,62 @@ +{ + "dcterms:modified": "2024-05-01", + "dcterms:creator": "Root", + "@type": "ore:ResourceMap", + "schema:additionalType": "Dataverse OREMap Format v1.0.0", + "dvcore:generatedBy": { + "@type": "schema:SoftwareApplication", + "schema:name": "Dataverse", + "schema:version": "6.2 build develop-e615050", + "schema:url": "https://github.com/iqss/dataverse" + }, + "@id": "https://beta.dataverse.org/api/datasets/export?exporter=OAI_ORE&persistentId=https://doi.org/10.5072/FK2/4C0JYC", + "ore:describes": { + "citation:dsDescription": { + "citation:dsDescriptionValue": "Minimal metadata and no files." + }, + "author": { + "citation:authorName": "Durbin, Philip" + }, + "citation:datasetContact": { + "citation:datasetContactEmail": "philip_durbin@harvard.edu" + }, + "title": "Minimal", + "subject": "Other", + "@id": "https://doi.org/10.5072/FK2/4C0JYC", + "@type": [ + "ore:Aggregation", + "schema:Dataset" + ], + "schema:version": "1.0", + "schema:name": "Minimal", + "schema:dateModified": "2024-05-01 14:27:17.719", + "schema:datePublished": "2024-05-01", + "schema:creativeWorkStatus": "RELEASED", + "schema:license": "http://creativecommons.org/publicdomain/zero/1.0", + "dvcore:fileTermsOfAccess": { + "dvcore:fileRequestAccess": true + }, + "schema:includedInDataCatalog": "Root", + "schema:isPartOf": { + "schema:name": "Philip Durbin Dataverse", + "@id": "https://beta.dataverse.org/dataverse/pdurbin", + "schema:isPartOf": { + "schema:name": "Root", + "@id": "https://beta.dataverse.org/dataverse/root", + "schema:description": "The root dataverse." + } + }, + "ore:aggregates": [], + "schema:hasPart": [] + }, + "@context": { + "author": "http://purl.org/dc/terms/creator", + "citation": "https://dataverse.org/schema/citation/", + "dcterms": "http://purl.org/dc/terms/", + "dvcore": "https://dataverse.org/schema/core#", + "ore": "http://www.openarchives.org/ore/terms/", + "schema": "http://schema.org/", + "subject": "http://purl.org/dc/terms/subject", + "title": "http://purl.org/dc/terms/title" + } +} diff --git a/src/test/resources/croissant/minimal/in/datasetSchemaDotOrg.json b/src/test/resources/croissant/minimal/in/datasetSchemaDotOrg.json new file mode 100644 index 00000000000..36dcab588a3 --- /dev/null +++ b/src/test/resources/croissant/minimal/in/datasetSchemaDotOrg.json @@ -0,0 +1,44 @@ +{ + "@context": "http://schema.org", + "@type": "Dataset", + "@id": "https://doi.org/10.5072/FK2/4C0JYC", + "identifier": "https://doi.org/10.5072/FK2/4C0JYC", + "name": "Minimal", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "name": "Durbin, Philip" + } + ], + "author": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "name": "Durbin, Philip" + } + ], + "datePublished": "2024-05-01", + "dateModified": "2024-05-01", + "version": "1", + "description": "Minimal metadata and no files.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "https://beta.dataverse.org" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "provider": { + "@type": "Organization", + "name": "Root" + } +} diff --git a/src/test/resources/croissant/restricted/expected/restricted-croissant.json b/src/test/resources/croissant/restricted/expected/restricted-croissant.json new file mode 100644 index 00000000000..19d970d1bbb --- /dev/null +++ b/src/test/resources/croissant/restricted/expected/restricted-croissant.json @@ -0,0 +1,115 @@ +{ + "@context": { + "@language": "en", + "@vocab": "https://schema.org/", + "citeAs": "cr:citeAs", + "column": "cr:column", + "conformsTo": "dct:conformsTo", + "cr": "http://mlcommons.org/croissant/", + "rai": "http://mlcommons.org/croissant/RAI/", + "data": { + "@id": "cr:data", + "@type": "@json" + }, + "dataType": { + "@id": "cr:dataType", + "@type": "@vocab" + }, + "dct": "http://purl.org/dc/terms/", + "examples": { + "@id": "cr:examples", + "@type": "@json" + }, + "extract": "cr:extract", + "field": "cr:field", + "fileProperty": "cr:fileProperty", + "fileObject": "cr:fileObject", + "fileSet": "cr:fileSet", + "format": "cr:format", + "includes": "cr:includes", + "isLiveDataset": "cr:isLiveDataset", + "jsonPath": "cr:jsonPath", + "key": "cr:key", + "md5": "cr:md5", + "parentField": "cr:parentField", + "path": "cr:path", + "recordSet": "cr:recordSet", + "references": "cr:references", + "regex": "cr:regex", + "repeated": "cr:repeated", + "replace": "cr:replace", + "samplingRate": "cr:samplingRate", + "sc": "https://schema.org/", + "separator": "cr:separator", + "source": "cr:source", + "subField": "cr:subField", + "transform": "cr:transform", + "wd": "https://www.wikidata.org/wiki/" + }, + "@type": "sc:Dataset", + "conformsTo": "http://mlcommons.org/croissant/1.0", + "name": "Cars", + "url": "https://doi.org/10.5072/FK2/CY7BWA", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard" + }, + "name": "Durbin, Philip" + } + ], + "description": "This dataset is about cars.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "datePublished": "2025-05-16", + "dateModified": "2025-05-16", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "version": "1.0", + "citeAs": "@data{FK2/CY7BWA_2025,author = {Durbin, Philip},publisher = {Root},title = {Cars},year = {2025},url = {https://doi.org/10.5072/FK2/CY7BWA}}", + "distribution": [ + { + "@type": "cr:FileObject", + "@id": "code/compute.py", + "name": "compute.py", + "encodingFormat": "text/x-python", + "md5": "d84985e94dde671f318076bd7a137f15", + "contentSize": "15", + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/7" + }, + { + "@type": "cr:FileObject", + "@id": "data/stata13-auto.dta", + "name": "stata13-auto.dta", + "encodingFormat": "application/x-stata-13", + "md5": "7b1201ce6b469796837a835377338c5a", + "contentSize": "6443", + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/9?format=original" + }, + { + "@type": "cr:FileObject", + "@id": "doc/README.md", + "name": "README.md", + "encodingFormat": "text/markdown", + "md5": "a2e484d07ee5590cc32182dc2c6ccc83", + "contentSize": "28", + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/8" + } + ] +} \ No newline at end of file diff --git a/src/test/resources/croissant/restricted/expected/restricted-croissantSlim.json b/src/test/resources/croissant/restricted/expected/restricted-croissantSlim.json new file mode 100644 index 00000000000..392ddd3a5dd --- /dev/null +++ b/src/test/resources/croissant/restricted/expected/restricted-croissantSlim.json @@ -0,0 +1,83 @@ +{ + "@context": { + "@language": "en", + "@vocab": "https://schema.org/", + "citeAs": "cr:citeAs", + "column": "cr:column", + "conformsTo": "dct:conformsTo", + "cr": "http://mlcommons.org/croissant/", + "rai": "http://mlcommons.org/croissant/RAI/", + "data": { + "@id": "cr:data", + "@type": "@json" + }, + "dataType": { + "@id": "cr:dataType", + "@type": "@vocab" + }, + "dct": "http://purl.org/dc/terms/", + "examples": { + "@id": "cr:examples", + "@type": "@json" + }, + "extract": "cr:extract", + "field": "cr:field", + "fileProperty": "cr:fileProperty", + "fileObject": "cr:fileObject", + "fileSet": "cr:fileSet", + "format": "cr:format", + "includes": "cr:includes", + "isLiveDataset": "cr:isLiveDataset", + "jsonPath": "cr:jsonPath", + "key": "cr:key", + "md5": "cr:md5", + "parentField": "cr:parentField", + "path": "cr:path", + "recordSet": "cr:recordSet", + "references": "cr:references", + "regex": "cr:regex", + "repeated": "cr:repeated", + "replace": "cr:replace", + "samplingRate": "cr:samplingRate", + "sc": "https://schema.org/", + "separator": "cr:separator", + "source": "cr:source", + "subField": "cr:subField", + "transform": "cr:transform", + "wd": "https://www.wikidata.org/wiki/" + }, + "@type": "sc:Dataset", + "conformsTo": "http://mlcommons.org/croissant/1.0", + "name": "Cars", + "url": "https://doi.org/10.5072/FK2/CY7BWA", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard" + }, + "name": "Durbin, Philip" + } + ], + "description": "This dataset is about cars.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "datePublished": "2025-05-16", + "dateModified": "2025-05-16", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "version": "1.0", + "citeAs": "@data{FK2/CY7BWA_2025,author = {Durbin, Philip},publisher = {Root},title = {Cars},year = {2025},url = {https://doi.org/10.5072/FK2/CY7BWA}}" +} diff --git a/src/test/resources/croissant/restricted/in/dataCiteXml.xml b/src/test/resources/croissant/restricted/in/dataCiteXml.xml new file mode 100644 index 00000000000..7c6c89385fd --- /dev/null +++ b/src/test/resources/croissant/restricted/in/dataCiteXml.xml @@ -0,0 +1,51 @@ + + + 10.5072/FK2/CY7BWA + + + Durbin, Philip + Philip + Durbin + Harvard + + + + Cars + + Root + 2025 + + Other + + + + Durbin, Philip + Philip + Durbin + Harvard + + + + 2024-03-13 + 2025-05-16 + + + + 15 + 28 + 4026 + + + text/x-python + text/markdown + text/tab-separated-values + + 1.0 + + + Creative Commons CC0 1.0 Universal Public Domain Dedication. + + + This dataset is about cars. + + diff --git a/src/test/resources/croissant/restricted/in/datasetFileDetails.json b/src/test/resources/croissant/restricted/in/datasetFileDetails.json new file mode 100644 index 00000000000..f2cdff072da --- /dev/null +++ b/src/test/resources/croissant/restricted/in/datasetFileDetails.json @@ -0,0 +1,355 @@ +[ + { + "id": 7, + "persistentId": "", + "filename": "compute.py", + "contentType": "text/x-python", + "friendlyType": "Python Source Code", + "filesize": 15, + "description": "", + "storageIdentifier": "local://196d9f154f7-8cadf34ee905", + "rootDataFileId": -1, + "md5": "d84985e94dde671f318076bd7a137f15", + "checksum": { + "type": "MD5", + "value": "d84985e94dde671f318076bd7a137f15" + }, + "tabularData": false, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true, + "restricted": false, + "fileMetadataId": 1, + "varGroups": [] + }, + { + "id": 9, + "persistentId": "", + "filename": "stata13-auto.tab", + "contentType": "text/tab-separated-values", + "friendlyType": "Tab-Delimited", + "filesize": 4026, + "description": "", + "storageIdentifier": "local://196d9f15719-2270bfca2b48", + "originalFileFormat": "application/x-stata-13", + "originalFormatLabel": "Stata 13 Binary", + "originalFileSize": 6443, + "originalFileName": "stata13-auto.dta", + "UNF": "UNF:6:RPd9EWHSZwqUvRZuKTJMqg==", + "rootDataFileId": -1, + "md5": "7b1201ce6b469796837a835377338c5a", + "checksum": { + "type": "MD5", + "value": "7b1201ce6b469796837a835377338c5a" + }, + "tabularData": true, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true, + "restricted": true, + "fileMetadataId": 3, + "dataTables": [ + { + "varQuantity": 12, + "caseQuantity": 74, + "UNF": "UNF:6:RPd9EWHSZwqUvRZuKTJMqg==", + "dataVariables": [ + { + "id": 2, + "name": "make", + "label": "Make and Model", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "CHARACTER", + "isOrderedCategorical": false, + "fileOrder": 0, + "UNF": "UNF:6:Oo4vwiL8ffhSECOcjsKk2g==", + "variableMetadata": [] + }, + { + "id": 5, + "name": "price", + "label": "Price", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 1, + "UNF": "UNF:6:rvfkkdA36AaCSqCQciybfA==", + "variableMetadata": [], + "summaryStatistics": { + "min": "3291.0", + "medn": "5006.5", + "mean": "6165.256756756757", + "max": "15906.0", + "vald": "74.0", + "mode": ".", + "stdev": "2949.4958847689186", + "invd": "0.0" + } + }, + { + "id": 3, + "name": "mpg", + "label": "Mileage (mpg)", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 2, + "UNF": "UNF:6:vVr3w8CgeZq1KpDfJQudOg==", + "variableMetadata": [], + "summaryStatistics": { + "max": "41.0", + "vald": "74.0", + "medn": "20.0", + "min": "12.0", + "stdev": "5.785503209735141", + "mean": "21.2972972972973", + "invd": "0.0", + "mode": "." + } + }, + { + "id": 12, + "name": "rep78", + "label": "Repair Record 1978", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 3, + "UNF": "UNF:6:gbFI98swTWNhAjCRyi2cdA==", + "variableMetadata": [], + "summaryStatistics": { + "stdev": "0.989932270109041", + "mode": ".", + "min": "1.0", + "max": "5.0", + "medn": "3.0", + "mean": "3.4057971014492754", + "vald": "69.0", + "invd": "5.0" + } + }, + { + "id": 1, + "name": "headroom", + "label": "Headroom (in.)", + "weighted": false, + "variableIntervalType": "contin", + "variableFormatType": "NUMERIC", + "format": "float", + "isOrderedCategorical": false, + "fileOrder": 4, + "UNF": "UNF:6:g4Pl3T0Oz2e/OKJ64WiTnA==", + "variableMetadata": [], + "summaryStatistics": { + "mean": "2.993243243243243", + "mode": ".", + "vald": "74.0", + "invd": "0.0", + "stdev": "0.845994766828771", + "min": "1.5", + "medn": "3.0", + "max": "5.0" + } + }, + { + "id": 7, + "name": "trunk", + "label": "Trunk space (cu. ft.)", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 5, + "UNF": "UNF:6:iab0POsE3By7dQfgX/TY4g==", + "variableMetadata": [], + "summaryStatistics": { + "vald": "74.0", + "mode": ".", + "mean": "13.756756756756756", + "max": "23.0", + "min": "5.0", + "medn": "14.0", + "invd": "0.0", + "stdev": "4.277404189173201" + } + }, + { + "id": 4, + "name": "weight", + "label": "Weight (lbs.)", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 6, + "UNF": "UNF:6:cdoTdfUNeYWHHFEBCDxg+w==", + "variableMetadata": [], + "summaryStatistics": { + "invd": "0.0", + "min": "1760.0", + "vald": "74.0", + "max": "4840.0", + "stdev": "777.1935671373664", + "mean": "3019.459459459459", + "mode": ".", + "medn": "3190.0" + } + }, + { + "id": 8, + "name": "length", + "label": "Length (in.)", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 7, + "UNF": "UNF:6:8z1rjwhqBN4meYIiKI4P1A==", + "variableMetadata": [], + "summaryStatistics": { + "mode": ".", + "vald": "74.0", + "invd": "0.0", + "stdev": "22.266339902021585", + "max": "233.0", + "medn": "192.5", + "mean": "187.93243243243245", + "min": "142.0" + } + }, + { + "id": 9, + "name": "turn", + "label": "Turn Circle (ft.) ", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 8, + "UNF": "UNF:6:QxhjrrNtVz4qA8RulQ2MuQ==", + "variableMetadata": [], + "summaryStatistics": { + "stdev": "4.399353727233908", + "vald": "74.0", + "max": "51.0", + "min": "31.0", + "medn": "40.0", + "mean": "39.648648648648646", + "invd": "0.0", + "mode": "." + } + }, + { + "id": 10, + "name": "displacement", + "label": "Displacement (cu. in.)", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 9, + "UNF": "UNF:6:ftk+RAQpTCT1/y6G/rLWfA==", + "variableMetadata": [], + "summaryStatistics": { + "stdev": "91.83721896440396", + "invd": "0.0", + "min": "79.0", + "medn": "196.0", + "mode": ".", + "vald": "74.0", + "mean": "197.2972972972973", + "max": "425.0" + } + }, + { + "id": 6, + "name": "gear_ratio", + "label": "Gear Ratio", + "weighted": false, + "variableIntervalType": "contin", + "variableFormatType": "NUMERIC", + "format": "float", + "isOrderedCategorical": false, + "fileOrder": 10, + "UNF": "UNF:6:qjnY/qbx26FTepoPqRZ6lw==", + "variableMetadata": [], + "summaryStatistics": { + "medn": "2.9550000429153442", + "stdev": "0.45628709670763035", + "mean": "3.0148648667979883", + "min": "2.190000057220459", + "max": "3.890000104904175", + "mode": ".", + "vald": "74.0", + "invd": "0.0" + } + }, + { + "id": 11, + "name": "foreign", + "label": "Car type", + "weighted": false, + "variableIntervalType": "discrete", + "variableFormatType": "NUMERIC", + "isOrderedCategorical": false, + "fileOrder": 11, + "UNF": "UNF:6:nbjzgh3wfAFqKpaoFnHalA==", + "variableMetadata": [], + "summaryStatistics": { + "max": "1.0", + "invd": "0.0", + "mode": ".", + "medn": "0.0", + "stdev": "0.46018845840901884", + "min": "0.0", + "mean": "0.2972972972972975", + "vald": "74.0" + }, + "variableCategories": [ + { + "label": "Domestic", + "value": "0", + "isMissing": false, + "frequency": 52.0 + }, + { + "label": "Foreign", + "value": "1", + "isMissing": false, + "frequency": 22.0 + } + ] + } + ] + } + ], + "varGroups": [] + }, + { + "id": 8, + "persistentId": "", + "filename": "README.md", + "contentType": "text/markdown", + "friendlyType": "Markdown Text", + "filesize": 28, + "description": "", + "storageIdentifier": "local://196d9f15664-1d4bb4e96a97", + "rootDataFileId": -1, + "md5": "a2e484d07ee5590cc32182dc2c6ccc83", + "checksum": { + "type": "MD5", + "value": "a2e484d07ee5590cc32182dc2c6ccc83" + }, + "tabularData": false, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true, + "restricted": false, + "fileMetadataId": 2, + "varGroups": [] + } +] diff --git a/src/test/resources/croissant/restricted/in/datasetJson.json b/src/test/resources/croissant/restricted/in/datasetJson.json new file mode 100644 index 00000000000..3234579cddd --- /dev/null +++ b/src/test/resources/croissant/restricted/in/datasetJson.json @@ -0,0 +1,228 @@ +{ + "id": 6, + "identifier": "FK2/CY7BWA", + "persistentUrl": "https://doi.org/10.5072/FK2/CY7BWA", + "protocol": "doi", + "authority": "10.5072", + "separator": "/", + "publisher": "Root", + "publicationDate": "2025-05-16", + "storageIdentifier": "local://10.5072/FK2/CY7BWA", + "datasetType": "dataset", + "datasetVersion": { + "id": 3, + "datasetId": 6, + "datasetPersistentId": "doi:10.5072/FK2/CY7BWA", + "storageIdentifier": "local://10.5072/FK2/CY7BWA", + "versionNumber": 1, + "internalVersionNumber": 10, + "versionMinorNumber": 0, + "versionState": "RELEASED", + "latestVersionPublishingState": "RELEASED", + "UNF": "UNF:6:RPd9EWHSZwqUvRZuKTJMqg==", + "lastUpdateTime": "2025-05-16T16:33:18Z", + "releaseTime": "2025-05-16T16:33:18Z", + "createTime": "2025-05-16T16:33:13Z", + "publicationDate": "2025-05-16", + "citationDate": "2025-05-16", + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0", + "iconUri": "https://licensebuttons.net/p/zero/1.0/88x31.png", + "rightsIdentifier": "CC0-1.0", + "rightsIdentifierScheme": "SPDX", + "schemeUri": "https://spdx.org/licenses/", + "languageCode": "en" + }, + "fileAccessRequest": true, + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "name": "citation", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Cars" + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "Durbin, Philip" + }, + "authorAffiliation": { + "typeName": "authorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "Harvard" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactName": { + "typeName": "datasetContactName", + "multiple": false, + "typeClass": "primitive", + "value": "Durbin, Philip" + }, + "datasetContactAffiliation": { + "typeName": "datasetContactAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "Harvard" + }, + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "dataverse@mailinator.com" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "This dataset is about cars." + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Other" + ] + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "Durbin, Philip" + }, + { + "typeName": "dateOfDeposit", + "multiple": false, + "typeClass": "primitive", + "value": "2024-03-13" + } + ] + } + }, + "files": [ + { + "description": "", + "label": "compute.py", + "restricted": false, + "directoryLabel": "code", + "version": 2, + "datasetVersionId": 3, + "dataFile": { + "id": 7, + "persistentId": "", + "filename": "compute.py", + "contentType": "text/x-python", + "friendlyType": "Python Source Code", + "filesize": 15, + "description": "", + "storageIdentifier": "local://196d9f154f7-8cadf34ee905", + "rootDataFileId": -1, + "md5": "d84985e94dde671f318076bd7a137f15", + "checksum": { + "type": "MD5", + "value": "d84985e94dde671f318076bd7a137f15" + }, + "tabularData": false, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true + } + }, + { + "description": "", + "label": "README.md", + "restricted": false, + "directoryLabel": "doc", + "version": 2, + "datasetVersionId": 3, + "dataFile": { + "id": 8, + "persistentId": "", + "filename": "README.md", + "contentType": "text/markdown", + "friendlyType": "Markdown Text", + "filesize": 28, + "description": "", + "storageIdentifier": "local://196d9f15664-1d4bb4e96a97", + "rootDataFileId": -1, + "md5": "a2e484d07ee5590cc32182dc2c6ccc83", + "checksum": { + "type": "MD5", + "value": "a2e484d07ee5590cc32182dc2c6ccc83" + }, + "tabularData": false, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true + } + }, + { + "description": "", + "label": "stata13-auto.tab", + "restricted": true, + "directoryLabel": "data", + "version": 4, + "datasetVersionId": 3, + "dataFile": { + "id": 9, + "persistentId": "", + "filename": "stata13-auto.tab", + "contentType": "text/tab-separated-values", + "friendlyType": "Tab-Delimited", + "filesize": 4026, + "description": "", + "storageIdentifier": "local://196d9f15719-2270bfca2b48", + "originalFileFormat": "application/x-stata-13", + "originalFormatLabel": "Stata 13 Binary", + "originalFileSize": 6443, + "originalFileName": "stata13-auto.dta", + "UNF": "UNF:6:RPd9EWHSZwqUvRZuKTJMqg==", + "rootDataFileId": -1, + "md5": "7b1201ce6b469796837a835377338c5a", + "checksum": { + "type": "MD5", + "value": "7b1201ce6b469796837a835377338c5a" + }, + "tabularData": true, + "creationDate": "2025-05-16", + "publicationDate": "2025-05-16", + "fileAccessRequest": true + } + } + ], + "citation": "Durbin, Philip, 2025, \"Cars\", https://doi.org/10.5072/FK2/CY7BWA, Root, V1, UNF:6:RPd9EWHSZwqUvRZuKTJMqg== [fileUNF]" + } +} diff --git a/src/test/resources/croissant/restricted/in/datasetORE.json b/src/test/resources/croissant/restricted/in/datasetORE.json new file mode 100644 index 00000000000..8e6c5b93507 --- /dev/null +++ b/src/test/resources/croissant/restricted/in/datasetORE.json @@ -0,0 +1,133 @@ +{ + "dcterms:modified": "2025-05-19", + "dcterms:creator": "Root", + "@type": "ore:ResourceMap", + "schema:additionalType": "Dataverse OREMap Format v1.0.1", + "dvcore:generatedBy": { + "@type": "schema:SoftwareApplication", + "schema:name": "Dataverse", + "schema:version": "6.6", + "schema:url": "https://github.com/iqss/dataverse" + }, + "@id": "http://localhost:8080/api/datasets/export?exporter=OAI_ORE&persistentId=https://doi.org/10.5072/FK2/CY7BWA", + "ore:describes": { + "citation:datasetContact": { + "citation:datasetContactName": "Durbin, Philip", + "citation:datasetContactAffiliation": "Harvard", + "citation:datasetContactEmail": "dataverse@mailinator.com" + }, + "author": { + "citation:authorName": "Durbin, Philip", + "citation:authorAffiliation": "Harvard" + }, + "citation:dsDescription": { + "citation:dsDescriptionValue": "This dataset is about cars." + }, + "dateOfDeposit": "2024-03-13", + "title": "Cars", + "citation:depositor": "Durbin, Philip", + "subject": "Other", + "@id": "https://doi.org/10.5072/FK2/CY7BWA", + "@type": [ + "ore:Aggregation", + "schema:Dataset" + ], + "schema:version": "1.0", + "schema:name": "Cars", + "schema:dateModified": "Fri May 16 16:33:18 UTC 2025", + "schema:datePublished": "2025-05-16", + "schema:creativeWorkStatus": "RELEASED", + "schema:license": "http://creativecommons.org/publicdomain/zero/1.0", + "dvcore:fileTermsOfAccess": { + "dvcore:fileRequestAccess": true + }, + "schema:includedInDataCatalog": "Root", + "schema:isPartOf": { + "schema:name": "Cars", + "@id": "http://localhost:8080/dataverse/cars", + "schema:description": "Data about cars.", + "schema:isPartOf": { + "schema:name": "Root", + "@id": "http://localhost:8080/dataverse/root", + "schema:description": "The root dataverse." + } + }, + "ore:aggregates": [ + { + "schema:description": "", + "schema:name": "compute.py", + "dvcore:restricted": false, + "dvcore:directoryLabel": "code", + "schema:version": 2, + "dvcore:datasetVersionId": 3, + "@id": "http://localhost:8080/file.xhtml?fileId=7", + "schema:sameAs": "http://localhost:8080/api/access/datafile/7", + "@type": "ore:AggregatedResource", + "schema:fileFormat": "text/x-python", + "dvcore:filesize": 15, + "dvcore:storageIdentifier": "local://196d9f154f7-8cadf34ee905", + "dvcore:rootDataFileId": -1, + "dvcore:checksum": { + "@type": "MD5", + "@value": "d84985e94dde671f318076bd7a137f15" + } + }, + { + "schema:description": "", + "schema:name": "README.md", + "dvcore:restricted": false, + "dvcore:directoryLabel": "doc", + "schema:version": 2, + "dvcore:datasetVersionId": 3, + "@id": "http://localhost:8080/file.xhtml?fileId=8", + "schema:sameAs": "http://localhost:8080/api/access/datafile/8", + "@type": "ore:AggregatedResource", + "schema:fileFormat": "text/markdown", + "dvcore:filesize": 28, + "dvcore:storageIdentifier": "local://196d9f15664-1d4bb4e96a97", + "dvcore:rootDataFileId": -1, + "dvcore:checksum": { + "@type": "MD5", + "@value": "a2e484d07ee5590cc32182dc2c6ccc83" + } + }, + { + "schema:description": "", + "schema:name": "stata13-auto.dta", + "dvcore:restricted": true, + "dvcore:directoryLabel": "data", + "schema:version": 4, + "dvcore:datasetVersionId": 3, + "@id": "http://localhost:8080/file.xhtml?fileId=9", + "schema:sameAs": "http://localhost:8080/api/access/datafile/9?format=original", + "@type": "ore:AggregatedResource", + "schema:fileFormat": "application/x-stata-13", + "dvcore:filesize": 6443, + "dvcore:storageIdentifier": "local://196d9f15719-2270bfca2b48", + "dvcore:currentIngestedName": "stata13-auto.tab", + "dvcore:UNF": "UNF:6:RPd9EWHSZwqUvRZuKTJMqg==", + "dvcore:rootDataFileId": -1, + "dvcore:checksum": { + "@type": "MD5", + "@value": "7b1201ce6b469796837a835377338c5a" + } + } + ], + "schema:hasPart": [ + "http://localhost:8080/file.xhtml?fileId=7", + "http://localhost:8080/file.xhtml?fileId=8", + "http://localhost:8080/file.xhtml?fileId=9" + ] + }, + "@context": { + "author": "http://purl.org/dc/terms/creator", + "citation": "https://dataverse.org/schema/citation/", + "dateOfDeposit": "http://purl.org/dc/terms/dateSubmitted", + "dcterms": "http://purl.org/dc/terms/", + "dvcore": "https://dataverse.org/schema/core#", + "ore": "http://www.openarchives.org/ore/terms/", + "schema": "http://schema.org/", + "subject": "http://purl.org/dc/terms/subject", + "title": "http://purl.org/dc/terms/title" + } +} diff --git a/src/test/resources/croissant/restricted/in/datasetSchemaDotOrg.json b/src/test/resources/croissant/restricted/in/datasetSchemaDotOrg.json new file mode 100644 index 00000000000..83f587c5fd7 --- /dev/null +++ b/src/test/resources/croissant/restricted/in/datasetSchemaDotOrg.json @@ -0,0 +1,78 @@ +{ + "@context": "http://schema.org", + "@type": "Dataset", + "@id": "https://doi.org/10.5072/FK2/CY7BWA", + "identifier": "https://doi.org/10.5072/FK2/CY7BWA", + "name": "Cars", + "creator": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard" + }, + "name": "Durbin, Philip" + } + ], + "author": [ + { + "@type": "Person", + "givenName": "Philip", + "familyName": "Durbin", + "affiliation": { + "@type": "Organization", + "name": "Harvard" + }, + "name": "Durbin, Philip" + } + ], + "datePublished": "2025-05-16", + "dateModified": "2025-05-16", + "version": "1", + "description": "This dataset is about cars.", + "keywords": [ + "Other" + ], + "license": "http://creativecommons.org/publicdomain/zero/1.0", + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "Root", + "url": "http://localhost:8080" + }, + "publisher": { + "@type": "Organization", + "name": "Root" + }, + "provider": { + "@type": "Organization", + "name": "Root" + }, + "distribution": [ + { + "@type": "DataDownload", + "name": "compute.py", + "encodingFormat": "text/x-python", + "contentSize": 15, + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/7" + }, + { + "@type": "DataDownload", + "name": "stata13-auto.tab", + "encodingFormat": "text/tab-separated-values", + "contentSize": 4026, + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/9" + }, + { + "@type": "DataDownload", + "name": "README.md", + "encodingFormat": "text/markdown", + "contentSize": 28, + "description": "", + "contentUrl": "http://localhost:8080/api/access/datafile/8" + } + ] +} diff --git a/src/test/resources/json/export-formats.json b/src/test/resources/json/export-formats.json index 65fc746ee23..527d217e1fc 100644 --- a/src/test/resources/json/export-formats.json +++ b/src/test/resources/json/export-formats.json @@ -49,6 +49,12 @@ "XMLSchemaLocation": "https://ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd", "XMLSchemaVersion": "2.5" }, + "croissant": { + "displayName": "Croissant", + "mediaType": "application/json", + "isHarvestable": false, + "isVisibleInUserInterface": true + }, "dcterms": { "displayName": "Dublin Core", "mediaType": "application/xml", @@ -64,6 +70,12 @@ "isHarvestable": false, "isVisibleInUserInterface": true }, + "croissantSlim": { + "displayName": "Croissant Slim", + "mediaType": "application/json", + "isHarvestable": false, + "isVisibleInUserInterface": false + }, "dataverse_json": { "displayName": "JSON", "mediaType": "application/json",