From e117e31a4ff3faf84cdb1ac07ff800dd02aae786 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:08:17 -0400 Subject: [PATCH] new api to get formatted datafile citations --- .../11733-api-get-file-citation-format.md | 4 ++ doc/sphinx-guides/source/api/dataaccess.rst | 20 ++++++ doc/sphinx-guides/source/api/native-api.rst | 2 + .../harvard/iq/dataverse/DataCitation.java | 66 ++++++++---------- .../edu/harvard/iq/dataverse/api/Access.java | 24 ++++++- .../edu/harvard/iq/dataverse/api/FilesIT.java | 69 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 6 ++ 7 files changed, 154 insertions(+), 37 deletions(-) create mode 100644 doc/release-notes/11733-api-get-file-citation-format.md diff --git a/doc/release-notes/11733-api-get-file-citation-format.md b/doc/release-notes/11733-api-get-file-citation-format.md new file mode 100644 index 00000000000..b315e8176b8 --- /dev/null +++ b/doc/release-notes/11733-api-get-file-citation-format.md @@ -0,0 +1,4 @@ +### Feature New API ### +New API added to retrieve the DataFile Citation in a requested format. This is similar output to the API to get the Dataset Citation. + +SERVER_URL/api/access/datafile/{fileId}/citation/{format} diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst index 80d9a4d9ed2..c60d214a45d 100755 --- a/doc/sphinx-guides/source/api/dataaccess.rst +++ b/doc/sphinx-guides/source/api/dataaccess.rst @@ -254,6 +254,26 @@ Value Description ID Exports file with specific file metadata ``ID``. ============== =========== + +.. _datafile-citation-formatted-access: + +Citation - Get Citation In Other Formats +---------------------------------------- + +Dataverse can generate datafile citations in "EndNote", "RIS", "BibTeX", and "CSL" formats. +This API call sends the raw format with the appropriate content-type (EndNote is XML, RIS and BibTeX are plain text, and CSL is JSON). ("Internal" is also a valid value, returning the content as HTML). +This API call requires a format in the API call which can be any of the values listed above. + +Usage example: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export DATAFILE_ID=99 + export FORMAT=EndNote + + curl "$SERVER_URL/api/access/datafile/$DATAFILE_ID/citation/$FORMAT" + .. _data-variable-metadata-access: Data Variable Metadata Access diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 808dbeec815..c1e6ac2399f 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -4095,6 +4095,8 @@ Usage example: The type under CSL can vary based on the dataset type, with "dataset", "software", and "review" as supported values. See also :ref:`dataset-types`. +.. note:: You can also get the Datafile Citation by using the Access Datafile API. See: :ref:`datafile-citation-formatted-access`. + Get Citation by Preview URL Token ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index 57734911470..5d35c2b85f7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -5,55 +5,40 @@ */ package edu.harvard.iq.dataverse; +import de.undercouch.citeproc.csl.CSLItemDataBuilder; +import de.undercouch.citeproc.csl.CSLName; +import de.undercouch.citeproc.csl.CSLNameBuilder; +import de.undercouch.citeproc.csl.CSLType; +import de.undercouch.citeproc.helper.json.JsonBuilder; +import de.undercouch.citeproc.helper.json.StringJsonBuilderFactory; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataset.DatasetType; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.DateUtil; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.ejb.EJBException; +import jakarta.json.JsonObject; +import jakarta.ws.rs.core.MediaType; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.StringEscapeUtils; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; +import java.io.*; import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; -import jakarta.ejb.EJBException; -import jakarta.json.JsonObject; -import jakarta.ws.rs.core.MediaType; - -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; - -import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.DateUtil; -import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; -import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.json.JsonUtil; - -import org.apache.commons.text.StringEscapeUtils; - -import de.undercouch.citeproc.csl.CSLItemDataBuilder; -import de.undercouch.citeproc.csl.CSLName; -import de.undercouch.citeproc.csl.CSLNameBuilder; -import de.undercouch.citeproc.csl.CSLType; -import de.undercouch.citeproc.helper.json.JsonBuilder; -import de.undercouch.citeproc.helper.json.StringJsonBuilderFactory; - -import org.apache.commons.lang3.StringUtils; - import static edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider.DOI_PROTOCOL; import static edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider.HDL_PROTOCOL; import static edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider.PERMA_PROTOCOL; @@ -99,6 +84,15 @@ public enum Format { BibTeX, CSL } + + public static Format getFormat(String name) { + for (Format format : Format.values()) { + if (format.name().equalsIgnoreCase(name)) { + return format; + } + } + return null; + } public DataCitation(DatasetVersion dsv) { this(dsv, false); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index f7654720b71..dce162e9899 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -130,7 +130,29 @@ public class Access extends AbstractApiBean { private static final String DEFAULT_BUNDLE_NAME = "dataverse_files.zip"; //@EJB - + + @GET + @AuthRequired + @Path("datafile/{fileId}/citation/{format}") + public Response datafileCitation(@Context ContainerRequestContext crc, + @PathParam("fileId") String fileId, + @PathParam("format") String formatString) { + + DataCitation.Format format = DataCitation.getFormat(formatString); + if (format == null) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.citation.invalidFormat")); + } + + DataFile df = findDataFileOrDieWrapper(fileId); + + // This will throw a ForbiddenException if access isn't authorized: + checkAuthorization(crc, df); + + String dataCitationFormatted = (new DataCitation(df.getFileMetadata())).toString(format, true, false); + + return Response.ok().type(DataCitation.getCitationFormatMediaType(format, true)).entity(dataCitationFormatted).build(); + } + // TODO: // versions? -- L.A. 4.0 beta 10 @GET diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index ed96b5b4656..8fa9689e057 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -4060,4 +4060,73 @@ public void testDownloadFileWithGuestbookResponse() throws IOException, JsonPars signedUrlResponse = get(signedUrl); assertEquals(OK.getStatusCode(), signedUrlResponse.getStatusCode()); } + + @Test + public void testGetFileCitationFormatted() { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + + String pathToTestFile = "src/test/resources/images/coffeeshop.png"; + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTestFile, Json.createObjectBuilder().build(), apiToken); + uploadFile.then().assertThat().statusCode(OK.getStatusCode()); + + String fileId = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); + + // Test good formats + Response response = UtilIT.getFileCitationFormat(fileId,"EndNote", apiToken); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + assertTrue(response.prettyPrint().contains("coffeeshop.png")); + + response = UtilIT.getFileCitationFormat(fileId,"RIS", apiToken); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + assertTrue(response.prettyPrint().contains("C1 - coffeeshop.png")); + + response = UtilIT.getFileCitationFormat(fileId,"BibTeX", apiToken); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + assertTrue(response.prettyPrint().contains("author = {Finch, Fiona},")); + + response = UtilIT.getFileCitationFormat(fileId,"CSL", apiToken); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + assertTrue(response.prettyPrint().contains("\"title\": \"Darwin's Finches\",")); + + response = UtilIT.getFileCitationFormat(fileId,"Internal", apiToken); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + assertTrue(response.prettyPrint().contains("coffeeshop.png [fileName]")); + + // Test an unknown format + response = UtilIT.getFileCitationFormat(fileId,"bad", apiToken); + response.prettyPrint(); + response.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", equalTo(BundleUtil.getStringFromBundle("datasets.api.citation.invalidFormat"))); + // Test an NULL format + response = UtilIT.getFileCitationFormat(fileId,null, apiToken); + response.prettyPrint(); + response.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", equalTo(BundleUtil.getStringFromBundle("datasets.api.citation.invalidFormat"))); + + // Test a user that doesn't have permission to get the citation + Response createUser2 = UtilIT.createRandomUser(); + createUser2.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken2 = UtilIT.getApiTokenFromResponse(createUser2); + response = UtilIT.getFileCitationFormat(fileId,"EndNote",apiToken2); + response.prettyPrint(); + response.then().assertThat() + .statusCode(FORBIDDEN.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e4b5fefba3b..dbced67f436 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1443,6 +1443,12 @@ static Response getFileData(String fileId, String apiToken, String datasetVersio .get("/api/files/" + fileId + "/versions/" + datasetVersionId); } + static Response getFileCitationFormat(String dataFileId, String format, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/access/datafile/" + dataFileId + "/citation/" + format); + } + static Response getFileVersionDifferences(String fileId, String apiToken) { return getFileVersionDifferences(fileId, apiToken, null, null); }