Skip to content

Commit fca96fd

Browse files
Merge pull request #215 from datacite/container
Expand mappings to container and citeproc JSON
2 parents 52d4e15 + 9b67dc5 commit fca96fd

15 files changed

+575
-37
lines changed

lib/bolognese/datacite_utils.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ def insert_descriptions(xml)
337337
return xml unless descriptions.present? || container && container["title"].present?
338338

339339
xml.descriptions do
340-
if container && container["title"].present?
340+
if (respond_to?(:from) && !from.to_s.include?("datacite")) && container && container["title"].present?
341341
issue = container["issue"].present? ? "(#{container["issue"]})" : nil
342342
volume_issue = container["volume"].present? ? [container["volume"], issue].join("") : nil
343343
pages = [container["firstPage"], container["lastPage"]].compact.join("-") if container["firstPage"].present?

lib/bolognese/metadata.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
module Bolognese
66
class Metadata
77
include Bolognese::MetadataUtils
8+
include Bolognese::Utils
89

910
attr_accessor :string, :from, :sandbox, :meta, :regenerate, :issue, :show_errors
1011
attr_reader :doc, :page_start, :page_end
@@ -205,7 +206,9 @@ def publication_year
205206
end
206207

207208
def container
208-
@container ||= meta.fetch("container", nil)
209+
@container ||= begin
210+
generate_container(types, related_items, related_identifiers, descriptions) || meta.fetch("container", nil)
211+
end
209212
end
210213

211214
def geo_locations

lib/bolognese/metadata_utils.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,15 +153,21 @@ def citeproc_hsh
153153
"language" => language,
154154
"author" => author,
155155
"contributor" => to_citeproc(contributors),
156+
"editor" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Editor" }) : nil,
156157
"translator" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Translator" }) : nil,
157158
"issued" => get_date(dates, "Issued") ? get_date_parts(get_date(dates, "Issued")) : get_date_parts(publication_year.to_s),
158159
"submitted" => Array.wrap(dates).find { |d| d["dateType"] == "Submitted" }.to_h.fetch("__content__", nil),
160+
"available-date" => Array.wrap(dates).find { |d| d["dateType"] == "Available" }.to_h.fetch("__content__", nil),
159161
"abstract" => parse_attributes(descriptions, content: "description", first: true),
160162
"container-title" => container_title,
161163
"DOI" => doi,
162164
"volume" => container.to_h["volume"],
163165
"issue" => container.to_h["issue"],
166+
"number" => container.to_h["number"],
167+
"chapter-number" => container.to_h["chapterNumber"],
168+
"edition" => container.to_h["edition"],
164169
"page" => page,
170+
"page-first" => container.to_h["firstPage"],
165171
"publisher" => publisher["name"],
166172
"title" => parse_attributes(titles, content: "title", first: true),
167173
"URL" => url,

lib/bolognese/readers/datacite_reader.rb

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,6 @@ def read_datacite(string: nil, **options)
298298
"titles" => titles,
299299
"creators" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
300300
"contributors" => get_authors(Array.wrap(meta.dig("contributors", "contributor"))),
301-
"container" => set_container(meta),
302301
"publisher" => publisher,
303302
"agency" => "datacite",
304303
"funding_references" => funding_references,
@@ -319,28 +318,6 @@ def read_datacite(string: nil, **options)
319318
}.merge(read_options)
320319
end
321320

322-
def set_container(meta)
323-
series_information = Array.wrap(meta.dig("descriptions", "description")).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("__content__", nil)
324-
si = get_series_information(series_information)
325-
326-
is_part_of = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).find { |ri| ri["relationType"] == "IsPartOf" }.to_h
327-
328-
if si["title"].present? || is_part_of.present?
329-
{
330-
"type" => meta.dig("resourceType", "resourceTypeGeneral") == "Dataset" ? "DataRepository" : "Series",
331-
"identifier" => is_part_of["__content__"],
332-
"identifierType" => is_part_of["relatedIdentifierType"],
333-
"title" => si["title"],
334-
"volume" => si["volume"],
335-
"issue" => si["issue"],
336-
"firstPage" => si["firstPage"],
337-
"lastPage" => si["lastPage"]
338-
}.compact
339-
else
340-
{}
341-
end
342-
end
343-
344321
def get_titles(meta)
345322
titles = Array.wrap(meta.dig("titles", "title")).map do |r|
346323
if r.blank?

lib/bolognese/utils.rb

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,5 +1434,47 @@ def abstract_description
14341434
# Fetch the first description with descriptionType "Abstract"
14351435
Array.wrap(descriptions)&.find { |d| d["descriptionType"] == "Abstract" }
14361436
end
1437+
1438+
def generate_container(types, related_items, related_identifiers, descriptions)
1439+
container_type = (types.respond_to?(:dig) && types&.dig("resourceTypeGeneral")) == "Dataset" ? "DataRepository" : "Series"
1440+
1441+
# relatedItem container
1442+
related_item = Array.wrap(related_items).find { |ri| ri["relationType"] == "IsPublishedIn" }.to_h
1443+
1444+
if related_item.present?
1445+
return {
1446+
"type" => container_type,
1447+
"identifier" => related_item.dig("relatedItemIdentifier", "relatedItemIdentifier"),
1448+
"identifierType" => related_item.dig("relatedItemIdentifier", "relatedItemIdentifierType"),
1449+
"title" => related_item.dig("titles", 0).then { |t| t ? parse_attributes(t, content: "title", first: true) : nil },
1450+
"volume" => related_item["volume"],
1451+
"issue" => related_item["issue"],
1452+
"edition" => related_item["edition"],
1453+
"number" => related_item["number"],
1454+
"chapterNumber" => related_item["numberType"] == "Chapter" ? related_item["number"] : nil,
1455+
"firstPage" => related_item["firstPage"],
1456+
"lastPage" => related_item["lastPage"]
1457+
}.compact
1458+
end
1459+
1460+
# Legacy SeriesInformation/relatedIdentifier container fallback
1461+
series_information = Array.wrap(descriptions).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("description", nil)
1462+
si = get_series_information(series_information)
1463+
1464+
is_part_of = Array.wrap(related_identifiers).find { |ri| ri["relationType"] == "IsPartOf" }.to_h
1465+
1466+
if si["title"].present?
1467+
return {
1468+
"type" => container_type,
1469+
"identifier" => is_part_of["relatedIdentifier"],
1470+
"identifierType" => is_part_of["relatedIdentifierType"],
1471+
"title" => si["title"],
1472+
"volume" => si["volume"],
1473+
"issue" => si["issue"],
1474+
"firstPage" => si["firstPage"],
1475+
"lastPage" => si["lastPage"]
1476+
}.compact
1477+
end
1478+
end
14371479
end
14381480
end

spec/fixtures/datacite-example-full-v4.6.xml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,18 @@
6161
<familyName>Smith</familyName>
6262
<affiliation affiliationIdentifier="https://ror.org/04wxnsj81" affiliationIdentifierScheme="ROR" schemeURI="https://ror.org">ExampleAffiliation</affiliation>
6363
</contributor>
64+
<contributor contributorType="Editor">
65+
<contributorName nameType="Personal">Ross, Cody</contributorName>
66+
<givenName>Cody</givenName>
67+
<familyName>Ross</familyName>
68+
<affiliation affiliationIdentifier="https://ror.org/04wxnsj81" affiliationIdentifierScheme="ROR" schemeURI="https://ror.org">ExampleAffiliation</affiliation>
69+
</contributor>
70+
<contributor contributorType="Editor">
71+
<contributorName nameType="Personal">Stathis, Kelly</contributorName>
72+
<givenName>Kelly</givenName>
73+
<familyName>Stathis</familyName>
74+
<affiliation affiliationIdentifier="https://ror.org/04wxnsj81" affiliationIdentifierScheme="ROR" schemeURI="https://ror.org">ExampleAffiliation</affiliation>
75+
</contributor>
6476
</contributors>
6577

6678
<!-- Dates (Including New dateType "Coverage") -->
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
{
2+
"id": "https://doi.org/10.5438/4k3m-nyvg",
3+
"types": {
4+
"resourceTypeGeneral": "Text",
5+
"resourceType": "BlogPosting",
6+
"schemaOrg": "ScholarlyArticle",
7+
"citeproc": "article-journal",
8+
"bibtex": "article",
9+
"ris": "RPRT"
10+
},
11+
"doi": "10.5438/4k3m-nyvg",
12+
"identifiers": [
13+
{
14+
"identifierType": "DOI",
15+
"identifier": "https://doi.org/10.5438/4k3m-nyvg"
16+
},
17+
{
18+
"identifierType": "Local accession number",
19+
"identifier": "MS-49-3632-5083"
20+
}
21+
],
22+
"creators": [
23+
{
24+
"type": "Person",
25+
"id": "http://orcid.org/0000-0003-1419-2405",
26+
"name": "Fenner, Martin",
27+
"givenName": "Martin",
28+
"familyName": "Fenner"
29+
}
30+
],
31+
"titles": [
32+
{
33+
"title": "Eating your own Dog Food"
34+
}
35+
],
36+
"publisher": "DataCite",
37+
"publicationYear": "2016",
38+
"subjects": [
39+
{
40+
"subject": "datacite"
41+
},
42+
{
43+
"subject": "doi"
44+
},
45+
{
46+
"subject": "metadata"
47+
}
48+
],
49+
"dates": [
50+
{
51+
"dateType": "Created",
52+
"date": "2016-12-20"
53+
},
54+
{
55+
"dateType": "Issued",
56+
"date": "2016-12-20"
57+
},
58+
{
59+
"dateType": "Updated",
60+
"date": "2016-12-20"
61+
}
62+
],
63+
"relatedIdentifiers": [
64+
{
65+
"relatedIdentifier": "10.5438/0000-00ss",
66+
"relatedIdentifierType": "DOI",
67+
"relationType": "IsPartOf"
68+
},
69+
{
70+
"relatedIdentifier": "10.5438/0012",
71+
"relatedIdentifierType": "DOI",
72+
"relationType": "References"
73+
},
74+
{
75+
"relatedIdentifier": "10.5438/55e5-t5c0",
76+
"relatedIdentifierType": "DOI",
77+
"relationType": "References"
78+
}
79+
],
80+
"version": "1.0",
81+
"descriptions": [
82+
{
83+
"descriptionType": "Abstract",
84+
"description": "Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for..."
85+
}
86+
],
87+
"container": {
88+
"type": "Series",
89+
"identifier": "10.17605/OSF.IO/CEA94",
90+
"identifierType": "DOI"
91+
},
92+
"schemaVersion": "http://datacite.org/schema/kernel-4",
93+
"agency": "datacite"
94+
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
{
2+
"id": "https://doi.org/10.5438/4k3m-nyvg",
3+
"types": {
4+
"resourceTypeGeneral": "Text",
5+
"resourceType": "BlogPosting",
6+
"schemaOrg": "ScholarlyArticle",
7+
"citeproc": "article-journal",
8+
"bibtex": "article",
9+
"ris": "RPRT"
10+
},
11+
"doi": "10.5438/4k3m-nyvg",
12+
"identifiers": [
13+
{
14+
"identifierType": "DOI",
15+
"identifier": "https://doi.org/10.5438/4k3m-nyvg"
16+
},
17+
{
18+
"identifierType": "Local accession number",
19+
"identifier": "MS-49-3632-5083"
20+
}
21+
],
22+
"creators": [
23+
{
24+
"type": "Person",
25+
"id": "http://orcid.org/0000-0003-1419-2405",
26+
"name": "Fenner, Martin",
27+
"givenName": "Martin",
28+
"familyName": "Fenner"
29+
}
30+
],
31+
"titles": [
32+
{
33+
"title": "Eating your own Dog Food"
34+
}
35+
],
36+
"publisher": "DataCite",
37+
"publicationYear": "2016",
38+
"subjects": [
39+
{
40+
"subject": "datacite"
41+
},
42+
{
43+
"subject": "doi"
44+
},
45+
{
46+
"subject": "metadata"
47+
}
48+
],
49+
"dates": [
50+
{
51+
"dateType": "Created",
52+
"date": "2016-12-20"
53+
},
54+
{
55+
"dateType": "Issued",
56+
"date": "2016-12-20"
57+
},
58+
{
59+
"dateType": "Updated",
60+
"date": "2016-12-20"
61+
}
62+
],
63+
"relatedIdentifiers": [
64+
{
65+
"relatedIdentifier": "10.5438/0000-00ss",
66+
"relatedIdentifierType": "DOI",
67+
"relationType": "IsPartOf"
68+
},
69+
{
70+
"relatedIdentifier": "10.5438/0012",
71+
"relatedIdentifierType": "DOI",
72+
"relationType": "References"
73+
},
74+
{
75+
"relatedIdentifier": "10.5438/55e5-t5c0",
76+
"relatedIdentifierType": "DOI",
77+
"relationType": "References"
78+
}
79+
],
80+
"version": "1.0",
81+
"descriptions": [
82+
{
83+
"descriptionType": "SeriesInformation",
84+
"description": "series title, volume(issue), firstpage-lastpage"
85+
},
86+
{
87+
"descriptionType": "Abstract",
88+
"description": "Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for..."
89+
}
90+
],
91+
"container": {
92+
"type": "Series",
93+
"identifier": "10.17605/OSF.IO/CEA94",
94+
"identifierType": "DOI"
95+
},
96+
"schemaVersion": "http://datacite.org/schema/kernel-4",
97+
"agency": "datacite"
98+
}

0 commit comments

Comments
 (0)