Skip to content

Commit 802af0d

Browse files
committed
A quick implementation of using DataCite REST API for retrieving
DOI metadata from DataCite. This is to address an apparent issue with UTF8 characters when relying on the MDS API used traditionally. (#12070)
1 parent cd01fd5 commit 802af0d

3 files changed

Lines changed: 79 additions & 11 deletions

File tree

src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ public DOIDataCiteRegisterService(String url, String username, String password)
4646
client = new DataCiteRESTfullClient(url, username, password);
4747
}
4848

49+
public DOIDataCiteRegisterService(String url, String restApiUrl, String username, String password) {
50+
client = new DataCiteRESTfullClient(url, restApiUrl, username, password);
51+
}
52+
4953
/**
5054
* This "reserveIdentifier" method is heavily based on the
5155
* "registerIdentifier" method below but doesn't, this one doesn't doesn't
@@ -80,13 +84,14 @@ public String registerIdentifier(String identifier, Map<String, String> metadata
8084

8185
public String reRegisterIdentifier(String identifier, Map<String, String> metadata, DvObject dvObject) throws IOException {
8286
String retString = "";
83-
String numericIdentifier = identifier.substring(identifier.indexOf(":") + 1);
87+
// "bare identifier" is the canonical pid with the "doi:" prefix stripped
88+
String bareIdentifier = identifier.substring(identifier.indexOf(":") + 1);
8489
String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject);
8590
String target = metadata.get("_target");
8691
String currentMetadata = null;
8792
boolean hasDifferences = false;
8893
try {
89-
currentMetadata = client.getMetadata(numericIdentifier);
94+
currentMetadata = client.getMetadataViaRestApi(bareIdentifier);
9095
Diff myDiff = DiffBuilder.compare(xmlMetadata).withTest(currentMetadata).ignoreWhitespace().checkForSimilar()
9196
.build();
9297
hasDifferences = myDiff.hasDifferences();
@@ -96,7 +101,7 @@ public String reRegisterIdentifier(String identifier, Map<String, String> metada
96101
}
97102
}
98103
} catch (RuntimeException e) {
99-
logger.log(Level.INFO, "DOI " + numericIdentifier + " not registered with DataCite, registering now.");
104+
logger.log(Level.INFO, "DOI " + bareIdentifier + " not registered with DataCite, registering now.");
100105
hasDifferences = true;
101106
}
102107

@@ -106,13 +111,13 @@ public String reRegisterIdentifier(String identifier, Map<String, String> metada
106111
String currentUrl = null;
107112
try {
108113
//May get a 204 if the DOI is still draft
109-
currentUrl = client.getUrl(numericIdentifier);
114+
currentUrl = client.getUrl(bareIdentifier);
110115
} catch (RuntimeException ex) {
111-
logger.fine("Error getting Url for " + numericIdentifier + ": " + ex.getMessage());
116+
logger.fine("Error getting Url for " + bareIdentifier + ": " + ex.getMessage());
112117
}
113118
if (!target.equals(currentUrl)) {
114119
logger.info("Updating target URL to " + target);
115-
client.postUrl(numericIdentifier, target);
120+
client.postUrl(bareIdentifier, target);
116121
retString = retString + "url:\\r" + target;
117122

118123
}

src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public DataCiteDOIProvider(String id, String label, String providerAuthority, St
5959
this.apiUrl = apiUrl;
6060
this.username = username;
6161
this.password = password;
62-
doiDataCiteRegisterService = new DOIDataCiteRegisterService(mdsUrl, username, password);
62+
doiDataCiteRegisterService = new DOIDataCiteRegisterService(mdsUrl, apiUrl, username, password);
6363
}
6464

6565
@Override
@@ -349,7 +349,7 @@ public boolean updateIdentifier(DvObject dvObject) {
349349
logger.info(identifier + "updated: " + updated );
350350
return true;
351351
} else {
352-
logger.info("No updated needed for " + identifier);
352+
logger.info("No update needed for " + identifier);
353353
return false; //No update needed
354354
}
355355
} catch (Exception e) {

src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import java.io.Closeable;
1010
import java.io.IOException;
11+
import java.util.Base64;
1112

1213
import java.util.logging.Level;
1314
import java.util.logging.Logger;
@@ -26,10 +27,9 @@
2627
import org.apache.http.impl.client.BasicCredentialsProvider;
2728
import org.apache.http.impl.client.CloseableHttpClient;
2829
import org.apache.http.impl.client.HttpClients;
29-
30-
31-
3230
import org.apache.http.util.EntityUtils;
31+
import edu.harvard.iq.dataverse.util.json.JsonUtil;
32+
import jakarta.json.JsonObject;
3333

3434
/**
3535
* DataCiteRESTfullClient
@@ -46,6 +46,7 @@ public class DataCiteRESTfullClient implements Closeable {
4646
private static final long RETRY_DELAY_MS = 10000; // 10 seconds
4747

4848
private String url;
49+
private String restApiUrl;
4950
private CloseableHttpClient httpClient;
5051
private HttpClientContext context;
5152
private String encoding = "utf-8";
@@ -59,6 +60,17 @@ public DataCiteRESTfullClient(String url, String username, String password) {
5960

6061
httpClient = HttpClients.createDefault();
6162
}
63+
64+
public DataCiteRESTfullClient(String url, String restApiUrl, String username, String password) {
65+
this.url = url;
66+
this.restApiUrl = restApiUrl;
67+
context = HttpClientContext.create();
68+
CredentialsProvider credsProvider = new BasicCredentialsProvider();
69+
credsProvider.setCredentials(new AuthScope(null, -1), new UsernamePasswordCredentials(username, password));
70+
context.setCredentialsProvider(credsProvider);
71+
72+
httpClient = HttpClients.createDefault();
73+
}
6274

6375
public void close() {
6476
if (this.httpClient != null) {
@@ -209,6 +221,57 @@ public String getMetadata(String doi) {
209221
}
210222
}
211223

224+
/**
225+
* getMetadataViaRestApi
226+
* a temporary/dev. version of the method utilizing REST API instead of MDS
227+
*
228+
* @param doi
229+
* @return
230+
*/
231+
public String getMetadataViaRestApi(String doi) {
232+
HttpGet httpGet = new HttpGet(this.restApiUrl + "/dois/" + doi);
233+
234+
try {
235+
HttpResponse response = executeWithRetry(httpGet, "getMetadataViaRestApi");
236+
String restApiRawData = EntityUtils.toString(response.getEntity(), encoding);
237+
238+
logger.fine("REST API raw data: " + restApiRawData);
239+
240+
if (response.getStatusLine().getStatusCode() != 200) {
241+
String errMsg = "getMetadataViaRestApi, Response: " + response.getStatusLine().getStatusCode() + ", " + restApiRawData;
242+
logger.log(Level.SEVERE, errMsg);
243+
throw new RuntimeException(errMsg);
244+
}
245+
246+
JsonObject restApiJson = JsonUtil.getJsonObject(restApiRawData);
247+
String xmlEncoded = null;
248+
249+
JsonObject restApiJsonData = restApiJson.getJsonObject("data");
250+
if (restApiJsonData != null) {
251+
JsonObject restApiJsonAttributes = restApiJsonData.getJsonObject("attributes");
252+
if (restApiJsonAttributes != null) {
253+
xmlEncoded = restApiJsonAttributes.getString("xml");
254+
}
255+
}
256+
logger.fine("encoded XML entry: " + xmlEncoded);
257+
258+
String metadata = null; // what we want to return, registration metadata in the XML format
259+
260+
if (xmlEncoded != null) {
261+
// Stripping any newlines below may be unnecessary - it is likely
262+
// always returned as a continuous string; but shouldn't hurt
263+
// either.
264+
metadata = new String(Base64.getDecoder().decode(xmlEncoded.replaceAll("[\\r\\n]", "")), encoding);
265+
}
266+
267+
logger.fine("decoded XML metadata: " + metadata);
268+
return metadata;
269+
} catch (IOException ioe) {
270+
logger.log(Level.SEVERE, "IOException in getMetadataViaRestApi", ioe);
271+
throw new RuntimeException("IOException in getMetadataViaRestAPi", ioe);
272+
}
273+
}
274+
212275
/**
213276
* testDOIExists
214277
*

0 commit comments

Comments
 (0)