Skip to content

Commit 0b6b73a

Browse files
committed
Removed version from .jar filename
replaceInvalidXMLChar() attempts to implement replacement of invalid XML chars in JSON input with REPLACEMENT_CHAR (with unit test) Version and encoding params for XML output Maven release script
1 parent b6738d3 commit 0b6b73a

File tree

7 files changed

+140
-14
lines changed

7 files changed

+140
-14
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ RUN mvn clean install
1010

1111
### entrypoint
1212

13-
ENTRYPOINT ["java", "-jar", "target/json2xml-1.0.2-SNAPSHOT-jar-with-dependencies.jar"]
13+
ENTRYPOINT ["java", "-jar", "target/json2xml-jar-with-dependencies.jar"]

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ Reads any JSON data and produces [XML Representation of JSON](https://www.w3.org
55

66
JSON2XML enables JSON transformation with XSLT even without having an XSLT 3.0 processor. You can simply pre-process the data by having JSON2XML before the transformation, and pipeline it into an XSLT 2.0 stylesheet, for example. That way your stylesheet stays forward compatible with XSLT 3.0, as the XML representation is exactly the same.
77

8+
## Invalid characters
9+
10+
The JSON input might contain characters (for example, form feed `\f`) which would be invalid in the XML output. The [`json-to-xml()` function](https://www.w3.org/TR/xslt-30/#func-json-to-xml) specifies character escape rules that apply in this case.
11+
12+
JSON2XML currently implements only the default escape rule:
13+
> Any characters or codepoints that are not valid XML characters (for example, unpaired surrogates) are passed to the fallback function as described below; in the absence of a fallback function, they are replaced by the Unicode `REPLACEMENT CHARACTER (xFFFD)`.
14+
815
## Build
916

1017
mvn clean install

pom.xml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>com.atomgraph.etl.json</groupId>
88
<artifactId>json2xml</artifactId>
9-
<version>1.0.2</version>
9+
<version>1.0.3</version>
1010
<packaging>jar</packaging>
1111

1212
<name>JSON2XML</name>
@@ -52,9 +52,16 @@
5252
<artifactId>javax.json</artifactId>
5353
<version>1.1.4</version>
5454
</dependency>
55+
<dependency>
56+
<groupId>junit</groupId>
57+
<artifactId>junit</artifactId>
58+
<version>4.8.2</version>
59+
<scope>test</scope>
60+
</dependency>
5561
</dependencies>
5662

5763
<build>
64+
<finalName>json2xml</finalName>
5865
<plugins>
5966
<plugin>
6067
<groupId>org.apache.maven.plugins</groupId>

release.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/bash
2+
3+
mvn release:clean release:prepare
4+
5+
mvn release:perform

src/main/java/com/atomgraph/etl/json/JSON2XML.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,14 @@ public static void main(String[] args) throws IOException, XMLStreamException
4040
if (json.available() == 0)
4141
{
4242
System.err.println("JSON input: stdin");
43-
System.err.println("Example: cat sample.json | java -jar json2xml-1.0.0-SNAPSHOT-jar-with-dependencies.jar > sample.xml");
43+
System.err.println("Example: cat sample.json | java -jar json2xml-jar-with-dependencies.jar > sample.xml");
4444
System.exit(1);
4545
}
4646

4747
try (Reader reader = new BufferedReader(new InputStreamReader(json, StandardCharsets.UTF_8)))
4848
{
49-
new JsonStreamXMLWriter(reader, new BufferedWriter(new OutputStreamWriter(System.out))).convert();
49+
new JsonStreamXMLWriter(reader, new BufferedWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8))).
50+
convert(StandardCharsets.UTF_8.name(), "1.0");
5051
}
5152
}
5253

src/main/java/com/atomgraph/etl/json/JsonStreamXMLWriter.java

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public class JsonStreamXMLWriter
3737
{
3838

3939
public static final String XPATH_FUNCTIONS_NS = "http://www.w3.org/2005/xpath-functions";
40+
public static final String REPLACEMENT_CHAR = "\uFFFD";
4041
private static final XMLOutputFactory XOF = XMLOutputFactory.newInstance();
4142

4243
static
@@ -123,19 +124,18 @@ public JsonStreamXMLWriter(JsonParser parser, XMLStreamWriter writer)
123124
this.writer = writer;
124125
}
125126

126-
public void convert() throws XMLStreamException
127+
public void convert(String encoding, String version) throws XMLStreamException
127128
{
128-
convert(getWriter());
129+
convert(getWriter(), encoding, version);
129130
}
130131

131-
public void convert(XMLStreamWriter writer) throws XMLStreamException
132+
public void convert(XMLStreamWriter writer, String encoding, String version) throws XMLStreamException
132133
{
133-
convert(getParser(), writer);
134+
convert(getParser(), writer, encoding, version);
134135
}
135-
136-
public static void convert(JsonParser parser, XMLStreamWriter writer) throws XMLStreamException
136+
public static void convert(JsonParser parser, XMLStreamWriter writer, String encoding, String version) throws XMLStreamException
137137
{
138-
writer.writeStartDocument();
138+
writer.writeStartDocument(encoding, version);
139139
writer.setDefaultNamespace(XPATH_FUNCTIONS_NS);
140140

141141
write(parser, writer);
@@ -198,7 +198,7 @@ public static void write(JsonParser parser, XMLStreamWriter writer) throws XMLSt
198198
writer.writeEndElement();
199199
break;
200200
case KEY_NAME:
201-
keyName = parser.getString();
201+
keyName = replaceInvalidXMLChars(parser.getString(), REPLACEMENT_CHAR);
202202
break;
203203
case VALUE_STRING:
204204
writer.writeStartElement(XPATH_FUNCTIONS_NS, "string");
@@ -207,7 +207,7 @@ public static void write(JsonParser parser, XMLStreamWriter writer) throws XMLSt
207207
writer.writeAttribute("key", keyName);
208208
keyName = null;
209209
}
210-
writer.writeCharacters(parser.getString());
210+
writer.writeCharacters(replaceInvalidXMLChars(parser.getString(), REPLACEMENT_CHAR));
211211
writer.writeEndElement();
212212
break;
213213
case VALUE_NUMBER:
@@ -217,7 +217,7 @@ public static void write(JsonParser parser, XMLStreamWriter writer) throws XMLSt
217217
writer.writeAttribute("key", keyName);
218218
keyName = null;
219219
}
220-
writer.writeCharacters(parser.getString());
220+
writer.writeCharacters(replaceInvalidXMLChars(parser.getString(), REPLACEMENT_CHAR));
221221
writer.writeEndElement();
222222
break;
223223
case VALUE_NULL:
@@ -234,6 +234,41 @@ public static void write(JsonParser parser, XMLStreamWriter writer) throws XMLSt
234234
}
235235
}
236236

237+
public static String replaceInvalidXMLChars(String text, String replacement)
238+
{
239+
if (null == text || text.isEmpty()) return text;
240+
241+
final int len = text.length();
242+
char current = 0;
243+
int codePoint = 0;
244+
StringBuilder sb = new StringBuilder();
245+
for (int i = 0; i < len; i++)
246+
{
247+
current = text.charAt(i);
248+
boolean surrogate = false;
249+
if (Character.isHighSurrogate(current)
250+
&& i + 1 < len && Character.isLowSurrogate(text.charAt(i + 1)))
251+
{
252+
surrogate = true;
253+
codePoint = text.codePointAt(i++);
254+
}
255+
else codePoint = current;
256+
257+
if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD)
258+
|| ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
259+
|| ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
260+
|| ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF)))
261+
{
262+
sb.append(current);
263+
if (surrogate) sb.append(text.charAt(i));
264+
}
265+
else
266+
sb.append(replacement);
267+
}
268+
269+
return sb.toString();
270+
}
271+
237272
protected JsonParser getParser()
238273
{
239274
return parser;
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Copyright 2020 Martynas Jusevičius <martynas@atomgraph.com>.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.atomgraph.etl.json;
17+
18+
import java.io.BufferedReader;
19+
import java.io.BufferedWriter;
20+
import java.io.ByteArrayInputStream;
21+
import java.io.ByteArrayOutputStream;
22+
import java.io.IOException;
23+
import java.io.InputStream;
24+
import java.io.InputStreamReader;
25+
import java.io.OutputStreamWriter;
26+
import java.io.Reader;
27+
import java.nio.charset.StandardCharsets;
28+
import javax.xml.stream.XMLStreamException;
29+
import static org.junit.Assert.assertTrue;
30+
import org.junit.Test;
31+
32+
/**
33+
*
34+
* @author Martynas Jusevičius <martynas@atomgraph.com>
35+
*/
36+
public class JsonStreamXMLWriterTest
37+
{
38+
39+
@Test
40+
public void testJsonKeyWithInvalidXMLChar() throws IOException, XMLStreamException
41+
{
42+
String jsonString = "{ \"a\\fb\": \"c\" }"; // form feed in the key (invalid in XML 1.0, valid in XML 1.1)
43+
InputStream json = new ByteArrayInputStream(jsonString.getBytes(StandardCharsets.UTF_8));
44+
ByteArrayOutputStream xml = new ByteArrayOutputStream();
45+
46+
try (Reader reader = new BufferedReader(new InputStreamReader(json, StandardCharsets.UTF_8)))
47+
{
48+
new JsonStreamXMLWriter(reader, new BufferedWriter(new OutputStreamWriter(xml, StandardCharsets.UTF_8))).
49+
convert(StandardCharsets.UTF_8.name(), "1.0");
50+
String xmlString = xml.toString(StandardCharsets.UTF_8.name());
51+
assertTrue(xmlString.contains(JsonStreamXMLWriter.REPLACEMENT_CHAR)); //
52+
}
53+
}
54+
55+
@Test
56+
public void testJsonValueWithInvalidXMLChar() throws IOException, XMLStreamException
57+
{
58+
String jsonString = "{ \"a\": \"b\\fc\" }"; // form feed in the value (invalid in XML 1.0, valid in XML 1.1)
59+
InputStream json = new ByteArrayInputStream(jsonString.getBytes(StandardCharsets.UTF_8));
60+
ByteArrayOutputStream xml = new ByteArrayOutputStream();
61+
62+
try (Reader reader = new BufferedReader(new InputStreamReader(json, StandardCharsets.UTF_8)))
63+
{
64+
new JsonStreamXMLWriter(reader, new BufferedWriter(new OutputStreamWriter(xml, StandardCharsets.UTF_8))).
65+
convert(StandardCharsets.UTF_8.name(), "1.0");
66+
String xmlString = xml.toString(StandardCharsets.UTF_8.name());
67+
assertTrue(xmlString.contains(JsonStreamXMLWriter.REPLACEMENT_CHAR)); // form feed has been replaced
68+
}
69+
}
70+
71+
}

0 commit comments

Comments
 (0)