On 2/23/2021 12:22 PM, mike wrote:
> I will have files like:
> I will need to parse to xml:
> However it is not valid xml since it is two roots. How can I detect
> that I have two roots. Currently I am only seeing this when parse
> fails ( see below) And message is too general to know there are
> multiple ( in this case 2) roots. If I can detect multi roots I can
> split string and parse each and add them together later ( I think).
Parsing and catching is probably the most safe approach.
If you want a string hack then regex may be the easiest approach.
For inspiration:
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class XmlSniffer {
private static Pattern either =
Pattern.compile("(\\<[\\w-]+.*?\\>)|(\\</[\\w-]+\\>)");
private static Pattern start = Pattern.compile("\\<[\\w-]+.*?\\>");
private static Pattern end = Pattern.compile("\\</[\\w-]+\\>");
private static Pattern both = Pattern.compile("\\<[\\w-]+.*?/\\>");
public static boolean check(String xmlstr) {
Matcher e = either.matcher(xmlstr);
int depth = 0;
boolean first = true;
while(e.find()) {
String piece = e.group();
if(start.matcher(piece).matches()) {
if(depth == 0) {
if(first) {
first = false;
} else {
return false;
}
}
if(!both.matcher(piece).matches()) {
depth++;
}
} else if(end.matcher(piece).matches()) {
depth--;
}
}
return true;
}
private static void test(String xmlstr) {
System.out.println(xmlstr);
System.out.println(check(xmlstr));
}
public static void main(String[] args) {
test("<a/>");
test("<a>xxxx</a>");
test("<a>\r\n <b>xxxx</b>\r\n</a>");
test("<a>\r\n <b/>\r\n</a>");
test("<a>xxxx</a>\r\n<c>yyyy</c>");
test("<a>\r\n <b>xxxx</b>\r\n</a>\r\n<c>yyyy</c>");
test("<a>\r\n <b/>\r\n</a>\r\n<c>yyyy</c>");
test("<a>xxxx</a>\r\n<c/>");
test("<a>\r\n <b>xxxx</b>\r\n</a>\r\n<c/>");
test("<a>\r\n <b/>\r\n</a>\r\n<c/>");
test("<a/>\r\n<c/>");
}
}
Arne