-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathods2gc.xsl
More file actions
412 lines (395 loc) · 21.4 KB
/
ods2gc.xsl
File metadata and controls
412 lines (395 loc) · 21.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
version="1.0"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:gc="http://docs.oasis-open.org/codelist/ns/genericode/1.0/"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:str="http://exslt.org/strings"
xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
exclude-result-prefixes="office table text"
extension-element-prefixes="str">
<xsl:output
method="xml"
version="1.0"
encoding="UTF-8"
indent="yes" />
<!-- This stylesheet assumes sheets with the following names, if 4 sheets are present in total. -->
<xsl:variable
name="sheetNameIdentification"
select="'Identification'" />
<xsl:variable
name="sheetNameMetadata"
select="'Additional code list metadata'" />
<xsl:variable
name="sheetNameColumnSet"
select="'Columns'" />
<xsl:variable
name="sheetNameSimpleCodeList"
select="'Values'" />
<xsl:template match="/">
<xsl:choose>
<xsl:when test="count(office:document/office:body/office:spreadsheet/table:table) = 4">
<!-- Create a file with root element gc:CodeList, which can be validated against the genericode XML schema -->
<gc:CodeList>
<xsl:apply-templates
select="office:document/office:body/office:spreadsheet/table:table[@table:name = $sheetNameMetadata]"
mode="codeListMetadata" />
<xsl:apply-templates
select="office:document/office:body/office:spreadsheet/table:table[@table:name = $sheetNameIdentification]"
mode="identification" />
<xsl:apply-templates
select="office:document/office:body/office:spreadsheet/table:table[@table:name = $sheetNameColumnSet]"
mode="columnset" />
<xsl:apply-templates
select="office:document/office:body/office:spreadsheet/table:table[@table:name = $sheetNameSimpleCodeList]"
mode="values" />
</gc:CodeList>
</xsl:when>
<xsl:when test="count(office:document/office:body/office:spreadsheet/table:table[@table:name = $sheetNameSimpleCodeList]) = 1">
<!-- Create a file with root element SimpleCodeList, which cannot be validated against the genericode XML schema,
but can be inserted in another document containing a gc:CodeList root element.
The sheet must be called Values, otherwise the key cannot be created. -->
<xsl:apply-templates
select="office:document/office:body/office:spreadsheet/table:table[@table:name = $sheetNameSimpleCodeList]"
mode="values" />
</xsl:when>
<xsl:otherwise>
<xsl:message terminate="yes">
<xsl:value-of select="'This spreadsheet contains does not live up to the conventions regarding the structure of a genericode document represented in a spreadsheet'" />
</xsl:message>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Use the mode attribute and not a reference to a $sheetNameXXX variable in the match attribute,
as in XSLT 1.0 stylesheets, it is an error for the value of the match attribute to contain a variable reference,
see also https://www.w3.org/TR/xslt-10/#section-Defining-Template-Rules -->
<xsl:template
match="table:table"
mode="codeListMetadata">
<!-- Do not convert if sheet does not contain text -->
<xsl:if test="count(table:table-row/table:table-cell/text:p) > 0">
<Annotation>
<Description>
<xsl:apply-templates
select="table:table-row"
mode="convertTableRowToXmlElement" />
</Description>
</Annotation>
</xsl:if>
</xsl:template>
<xsl:template
match="table:table-row"
mode="convertTableRowToXmlElement">
<xsl:if test="count(table:table-cell/@table:number-columns-repeated) > 0">
<xsl:message terminate="yes">
<xsl:text>This stylesheet does not take into account adjacent cells with the same content in sheets </xsl:text>
<xsl:value-of select="$sheetNameIdentification" />
<xsl:text> and </xsl:text>
<xsl:value-of select="$sheetNameMetadata" />
<xsl:text>.</xsl:text>
</xsl:message>
</xsl:if>
<!-- Assume that the first cell contains only one paragraph -->
<xsl:variable
name="textFirstCell"
select="table:table-cell[1]/text:p[1]" />
<xsl:choose>
<!-- no attributes; no child element, e.g. Version -->
<xsl:when test="not(contains($textFirstCell, ' ')) and not(contains($textFirstCell, '/'))">
<!-- Curly brackets are needed in the specification of the name of the element!
They inform the XSLT processor that the contents need to be treated as XPath. -->
<xsl:element name="{$textFirstCell}">
<xsl:apply-templates
select="table:table-cell[2]"
mode="joinTableCellParagraphs" />
</xsl:element>
</xsl:when>
<!-- attributes; no child element, e.g. AlternateFormatLocationUri MimeType=text/csv -->
<xsl:when test="contains($textFirstCell, ' ') and not(contains(substring-before($textFirstCell, ' '), '/'))">
<!-- attributes -->
<xsl:element name="{substring-before($textFirstCell, ' ')}">
<xsl:call-template name="tokenizeAttributestringAndCreateAttributes">
<xsl:with-param
name="attributestring"
select="substring-after($textFirstCell, ' ')" />
</xsl:call-template>
<xsl:apply-templates
select="table:table-cell[2]"
mode="joinTableCellParagraphs" />
</xsl:element>
</xsl:when>
<!-- no attributes; child element, e.g. Agency/LongName -->
<xsl:when test="not(contains($textFirstCell, ' ')) and contains($textFirstCell, '/')">
<xsl:element name="{substring-after($textFirstCell, '/')}">
<xsl:apply-templates
select="table:table-cell[2]"
mode="joinTableCellParagraphs" />
</xsl:element>
</xsl:when>
<!-- attributes; child element, e.g. Agency/LongName xml:lang=da -->
<xsl:when test="contains($textFirstCell, ' ') and contains(substring-before($textFirstCell, ' '), '/')">
<!-- attributes -->
<xsl:element name="{substring-after(substring-before($textFirstCell, ' '), '/')}">
<xsl:call-template name="tokenizeAttributestringAndCreateAttributes">
<xsl:with-param
name="attributestring"
select="substring-after($textFirstCell, ' ')" />
</xsl:call-template>
<xsl:apply-templates
select="table:table-cell[2]"
mode="joinTableCellParagraphs" />
</xsl:element>
</xsl:when>
</xsl:choose>
</xsl:template>
<xsl:template
match="table:table"
mode="identification">
<Identification>
<xsl:apply-templates
select="table:table-row[not(starts-with(table:table-cell[position() = 1]/text:p, 'Agency/'))]"
mode="convertTableRowToXmlElement" />
<Agency>
<xsl:apply-templates
select="table:table-row[starts-with(table:table-cell[position() = 1]/text:p, 'Agency/')]"
mode="convertTableRowToXmlElement" />
</Agency>
</Identification>
</xsl:template>
<xsl:template name="tokenizeAttributestringAndCreateAttributes">
<xsl:param name="attributestring" />
<xsl:if test="string-length($attributestring) > 0">
<xsl:for-each select="str:tokenize($attributestring, ',')">
<xsl:attribute name="{substring-before(., '=')}">
<xsl:value-of select="substring-after(., '=')" />
</xsl:attribute>
</xsl:for-each>
</xsl:if>
</xsl:template>
<xsl:template
match="table:table"
mode="columnset">
<xsl:variable
name="positionId"
select="count(table:table-row[position() = 1]/table:table-cell[text:p/text() = '@Id']/preceding-sibling::table:table-cell) + 1" />
<xsl:variable
name="positionUse"
select="count(table:table-row[position() = 1]/table:table-cell[text:p/text() = '@Use']/preceding-sibling::table:table-cell) + 1" />
<xsl:variable
name="positionDataType"
select="count(table:table-row[position() = 1]/table:table-cell[text:p/text() = 'Data/@Type']/preceding-sibling::table:table-cell) + 1" />
<xsl:variable
name="positionDataLang"
select="count(table:table-row[position() = 1]/table:table-cell[text:p/text() = 'Data/@Lang']/preceding-sibling::table:table-cell) + 1" />
<xsl:variable
name="positionDctermsDescription"
select="count(table:table-row[position() = 1]/table:table-cell[text:p/text() = 'Annotation/Description/dcterms:description']/preceding-sibling::table:table-cell) + 1" />
<xsl:variable
name="positionKey"
select="count(table:table-row[position() = 1]/table:table-cell[text:p/text() = 'Key']/preceding-sibling::table:table-cell) + 1" />
<ColumnSet>
<xsl:for-each select="table:table-row[position() > 1]">
<xsl:if test="count(table:table-cell/@table:number-columns-repeated) > 0">
<xsl:message terminate="yes">
<xsl:text>This stylesheet does not take into account adjacent cells with the same content in sheet </xsl:text>
<xsl:value-of select="$sheetNameColumnSet" />
<xsl:text>.</xsl:text>
</xsl:message>
</xsl:if>
<Column>
<xsl:attribute name="Id">
<xsl:value-of select="table:table-cell[position() = $positionId]/text:p" />
</xsl:attribute>
<xsl:attribute name="Use">
<xsl:value-of select="table:table-cell[position() = $positionUse]/text:p" />
</xsl:attribute>
<Annotation>
<Description>
<dcterms:description>
<xsl:value-of select="table:table-cell[position() = $positionDctermsDescription]/text:p" />
</dcterms:description>
</Description>
</Annotation>
<ShortName>
<!-- Use id attribute as short name -->
<xsl:value-of select="table:table-cell[position() = $positionId]/text:p" />
</ShortName>
<Data>
<xsl:attribute name="Type">
<xsl:value-of select="table:table-cell[position() = $positionDataType]/text:p" />
</xsl:attribute>
<xsl:if test="string-length(table:table-cell[position() = $positionDataLang]/text:p) > 0">
<xsl:attribute name="Lang">
<xsl:value-of select="table:table-cell[position() = $positionDataLang]/text:p" />
</xsl:attribute>
</xsl:if>
</Data>
</Column>
</xsl:for-each>
<xsl:for-each select="table:table-row[position() > 1]">
<xsl:if test="string-length(table:table-cell[position() = $positionKey]/text:p) > 0">
<Key>
<xsl:attribute name="Id">
<xsl:value-of select="table:table-cell[position() = $positionKey]/text:p" />
</xsl:attribute>
<ShortName>
<xsl:value-of select="table:table-cell[position() = $positionKey]/text:p" />
</ShortName>
<ColumnRef>
<xsl:attribute name="Ref">
<xsl:value-of select="table:table-cell[position() = $positionId]/text:p" />
</xsl:attribute>
</ColumnRef>
</Key>
</xsl:if>
</xsl:for-each>
</ColumnSet>
</xsl:template>
<!-- Create a lookup key of the first table row in the Values sheet with
- index: the position of the cell within the table row
- value: the table cell
The position() function cannot be used in this case, as it will always return 1.
In XSLT 1.0 stylesheets, it is an error for the value of either the use attribute or the match attribute to contain a variable reference,
see also https://www.w3.org/TR/xslt-10/#key,
therefore, the name of the sheet is hardcoded -->
<xsl:key
name="valuesTableColumnPositionKey"
match="table:table[@table:name = 'Values']/table:table-row[position() = 1]/table:table-cell"
use="count(preceding-sibling::table:table-cell) + 1" />
<xsl:template
match="table:table"
mode="values">
<!-- The header row is used to create a lookup key, see above;
process the actual data in the rest of the rows -->
<SimpleCodeList>
<xsl:for-each select="table:table-row[position() > 1]">
<!-- Do not take into account empty rows (rows with only whitespace will be converted though) -->
<xsl:if test="count(table:table-cell/text:p) > 0">
<Row>
<xsl:for-each select="table:table-cell">
<xsl:apply-templates
select="."
mode="writeValue">
<!-- Default value of table:number-columns-repeated is 1 according to the
OpenDocument Format specification, see also
https://docs.oasis-open.org/office/OpenDocument/v1.3/os/part3-schema/OpenDocument-v1.3-os-part3-schema.html#__RefHeading__1418526_253892949 -->
<xsl:with-param
name="columnPosition"
select="count(preceding-sibling::table:table-cell[not(@table:number-columns-repeated)]) + sum(preceding-sibling::table:table-cell/@table:number-columns-repeated) + 1" />
<xsl:with-param name="noOfRepetitions">
<xsl:choose>
<xsl:when test="count(@table:number-columns-repeated) = 1">
<xsl:value-of select="@table:number-columns-repeated" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="1" />
</xsl:otherwise>
</xsl:choose>
</xsl:with-param>
</xsl:apply-templates>
</xsl:for-each>
</Row>
</xsl:if>
</xsl:for-each>
</SimpleCodeList>
</xsl:template>
<xsl:template
match="table:table-cell"
mode="writeValue">
<!-- number of the column the value is located in the spreadsheet visible in the GUI (column A is 1, column B is 2, etc.) -->
<xsl:param name="columnPosition" />
<!-- number of times that the value is repeated in the successive columns -->
<xsl:param name="noOfRepetitions" />
<!-- A Value element is always written, thus also if the cell contains an empty string.
Following this convention makes it easier to transform to other formats.
This is more strict than the genericode specification, which also allows rows that do not have a
Value element corresponding to every column. -->
<xsl:variable
name="correspondingTableCellHeader"
select="key('valuesTableColumnPositionKey', $columnPosition)" />
<!-- No Value is written and no further iterations are done if any of the following are true:
- the first table row in the Values sheet does not contain a table cell in the given position;
- the first table row in the Values sheet in the given position does not contain anything.
This avoids
- the following in the output: <Value ColumnRef=""/>;
- unneccesary iterations of this recursive template.
-->
<xsl:if test="count($correspondingTableCellHeader) = 1 and count($correspondingTableCellHeader/text:p) > 0">
<Value>
<xsl:attribute name="ColumnRef">
<xsl:value-of select="$correspondingTableCellHeader/text:p" />
</xsl:attribute>
<!-- In this transformation, an undefined value (an empty string in the cell) (only applicable in optional columns)
is always written as a Value element that does not contain a SimpleValue element. -->
<xsl:if test="string-length(normalize-space(text:p)) > 0 and not(@office:value-type = 'void')">
<SimpleValue>
<!-- See https://docs.oasis-open.org/office/OpenDocument/v1.3/os/part3-schema/OpenDocument-v1.3-os-part3-schema.html#attribute-office_value-type -->
<xsl:choose>
<xsl:when test="@office:value-type = 'boolean'">
<xsl:value-of select="@office:boolean-value" />
</xsl:when>
<xsl:when test="@office:value-type = 'date'">
<xsl:value-of select="@office:date-value" />
</xsl:when>
<xsl:when test="@office:value-type = 'float' or @office:value-type = 'percentage'">
<xsl:value-of select="@office:value" />
</xsl:when>
<xsl:when test="@office:value-type = 'time'">
<xsl:value-of select="@office:time-value" />
</xsl:when>
<xsl:otherwise>
<!-- E.g. when @office:value-type = 'string' -->
<xsl:apply-templates
select="."
mode="joinTableCellParagraphs" />
</xsl:otherwise>
</xsl:choose>
</SimpleValue>
</xsl:if>
</Value>
<xsl:if test="$noOfRepetitions > 1">
<!-- This is a recursive template -->
<xsl:apply-templates
select="."
mode="writeValue">
<xsl:with-param
name="columnPosition"
select="$columnPosition + 1" />
<xsl:with-param
name="noOfRepetitions"
select="$noOfRepetitions - 1" />
</xsl:apply-templates>
</xsl:if>
</xsl:if>
</xsl:template>
<!-- A cell contains several paragraphs if the user has inserted a line break in it,
see also https://help.libreoffice.org/latest/en-US/text/shared/guide/breaking_lines.html
(tested with LibreOffice 24.8.0.3).
This template is called for cells that potentially contain several paragraphs. -->
<xsl:template
match="table:table-cell"
mode="joinTableCellParagraphs">
<xsl:for-each select="text:p">
<!-- Autoformatting of links is not an issue. The input will look like
<table:table-cell
office:value-type="string"
calcext:value-type="string">
<text:p>
<text:a
xlink:href="http://example.org/"
xlink:type="simple">http://example.org</text:a>
</text:p>
</table:table-cell>
By using select=".", http://example.org is the value, as all text is extracted from the source tree.
Note: if select="text()" would be used, the value would be empty, because it would only take text directly in <text:p>.
-->
<xsl:value-of select="." />
<!-- Add a new line if more paragraphs follow -->
<xsl:if test="not(position()=last())">
<xsl:value-of select="' '" />
</xsl:if>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>