diff --git a/src/main/java/com/dashjoin/jsonata/Functions.java b/src/main/java/com/dashjoin/jsonata/Functions.java index f66c96e..22edad8 100644 --- a/src/main/java/com/dashjoin/jsonata/Functions.java +++ b/src/main/java/com/dashjoin/jsonata/Functions.java @@ -609,10 +609,11 @@ public static String leftPad(final String str, final int size, String padStr) { if (pads <= 0) { return str; } - String padding = ""; + StringBuilder paddingSb = new StringBuilder(); for (int i = 0; i < pads + 1; i++) { - padding += padStr; + paddingSb.append(padStr); } + String padding = paddingSb.toString(); return substr(padding, 0, pads).concat(str); } @@ -638,10 +639,11 @@ public static String rightPad(final String str, final int size, String padStr) { if (pads <= 0) { return str; } - String padding = ""; + StringBuilder paddingSb = new StringBuilder(); for (int i = 0; i < pads + 1; i++) { - padding += padStr; + paddingSb.append(padStr); } + String padding = paddingSb.toString(); return str.concat(substr(padding, 0, pads)); } @@ -770,14 +772,21 @@ public static String join(List strs, String separator) { return String.join(separator, strs); } + private static final Pattern DOLLAR_DOLLAR = Pattern.compile("\\$\\$"); + private static final Pattern DOLLAR_WITHOUT_ESCAPE = Pattern.compile("([^\\\\]|^)\\$([^0-9^<])"); + private static final Pattern DOLLAR_AT_END = Pattern.compile("\\$$"); static String safeReplacement(String in) { // In JSONata and in Java the $ in the replacement test usually starts the insertion of a capturing group // In order to replace a simple $ in Java you have to escape the $ with "\$" // in JSONata you do this with a '$$' - // "\$" followed any character besides '<' and and digit into $ + this character - return in.replaceAll("\\$\\$", "\\\\\\$") - .replaceAll("([^\\\\]|^)\\$([^0-9^<])", "$1\\\\\\$$2") - .replaceAll("\\$$", "\\\\\\$"); // allow $ at end + // "\$" followed any character besides '<' and and digit into $ + this character + if (!in.contains("$")) { + return in; + } + String result = DOLLAR_DOLLAR.matcher(in).replaceAll("\\\\\\$"); + result = DOLLAR_WITHOUT_ESCAPE.matcher(result).replaceAll("$1\\\\\\$$2"); + result = DOLLAR_AT_END.matcher(result).replaceAll("\\\\\\$"); + return result; } /** @@ -963,6 +972,12 @@ public static String base64decode(String str) { } } + private static final Pattern PLUS = Pattern.compile("\\+"); + private static final Pattern PERCENT_21 = Pattern.compile("%21"); + private static final Pattern PERCENT_27 = Pattern.compile("%27"); + private static final Pattern PERCENT_28 = Pattern.compile("%28"); + private static final Pattern PERCENT_29 = Pattern.compile("%29"); + private static final Pattern PERCENT_7E = Pattern.compile("%7E"); /** * Encode a string into a component for a url * @param {String} str - String to encode @@ -975,14 +990,20 @@ public static String encodeUrlComponent(String str) { } Utils.checkUrl(str); - - return URLEncoder.encode(str, StandardCharsets.UTF_8) - .replaceAll("\\+", "%20") - .replaceAll("\\%21", "!") - .replaceAll("\\%27", "'") - .replaceAll("\\%28", "(") - .replaceAll("\\%29", ")") - .replaceAll("\\%7E", "~"); + + String encoded = URLEncoder.encode(str, StandardCharsets.UTF_8); + + if (!encoded.contains("+") && !encoded.contains("%")) { + return encoded; + } + + encoded = PLUS.matcher(encoded).replaceAll("%20"); + encoded = PERCENT_21.matcher(encoded).replaceAll("!"); + encoded = PERCENT_27.matcher(encoded).replaceAll("'"); + encoded = PERCENT_28.matcher(encoded).replaceAll("("); + encoded = PERCENT_29.matcher(encoded).replaceAll(")"); + encoded = PERCENT_7E.matcher(encoded).replaceAll("~"); + return encoded; } /** diff --git a/src/main/java/com/dashjoin/jsonata/Parser.java b/src/main/java/com/dashjoin/jsonata/Parser.java index 553494f..cf11960 100644 --- a/src/main/java/com/dashjoin/jsonata/Parser.java +++ b/src/main/java/com/dashjoin/jsonata/Parser.java @@ -590,7 +590,7 @@ Symbol led(Symbol left) { // is the next token a '<' - if so, parse the function signature if (node.id.equals("<")) { int depth = 1; - String sig = "<"; + StringBuilder sigBuilder = new StringBuilder("<"); while (depth > 0 && !node.id.equals("{") && !node.id.equals("(end)")) { Symbol tok = advance(); if (tok.id.equals(">")) { @@ -598,9 +598,10 @@ Symbol led(Symbol left) { } else if (tok.id.equals("<")) { depth++; } - sig += tok.value; + sigBuilder.append(tok.value); } advance(">"); + String sig = sigBuilder.toString(); this.signature = new Signature(sig, "lambda"); } // parse the function body diff --git a/src/main/java/com/dashjoin/jsonata/Tokenizer.java b/src/main/java/com/dashjoin/jsonata/Tokenizer.java index 1da1ec9..bcac323 100644 --- a/src/main/java/com/dashjoin/jsonata/Tokenizer.java +++ b/src/main/java/com/dashjoin/jsonata/Tokenizer.java @@ -83,6 +83,7 @@ public class Tokenizer { // = function (path) { put("t", "\t"); }}; + private static final Pattern NUM_REGEX = Pattern.compile("^-?(0|([1-9][0-9]*))(\\.[0-9]+)?([Ee][-+]?[0-9]+)?"); // Tokenizer (lexer) - invoked by the parser to return one token at a time String path; int position = 0; @@ -262,20 +263,20 @@ Token next(boolean prefix) { char quoteType = currentChar; // double quoted string literal - find end of string position++; - var qstr = ""; + var qstr = new StringBuilder(); while (position < length) { currentChar = path.charAt(position); if (currentChar == '\\') { // escape sequence position++; if (position < path.length()) currentChar = path.charAt(position); else throw new JException("S0103", position, ""); if (escapes.get(""+currentChar)!=null) { - qstr += escapes.get(""+currentChar); + qstr.append(escapes.get(""+currentChar)); } else if (currentChar == 'u') { // u should be followed by 4 hex digits String octets = position+5 < path.length() ? path.substring(position + 1, (position + 1) + 4) : ""; if (octets.matches("^[0-9a-fA-F]+$")) { // /^[0-9a-fA-F]+$/.test(octets)) { int codepoint = Integer.parseInt(octets, 16); - qstr += Character.toString((char) codepoint); + qstr.append((char) codepoint); position += 4; } else { throw new JException("S0104", position); @@ -287,17 +288,16 @@ Token next(boolean prefix) { } } else if (currentChar == quoteType) { position++; - return create("string", qstr); + return create("string", qstr.toString()); } else { - qstr += currentChar; + qstr.append(currentChar); } position++; } throw new JException("S0101", position); } // test for numbers - Pattern numregex = Pattern.compile("^-?(0|([1-9][0-9]*))(\\.[0-9]+)?([Ee][-+]?[0-9]+)?"); - Matcher match = numregex.matcher(path.substring(position)); + Matcher match = NUM_REGEX.matcher(path.substring(position)); if (match.find()) { double num = Double.parseDouble(match.group(0)); if (!Double.isNaN(num) && Double.isFinite(num)) { diff --git a/src/main/java/com/dashjoin/jsonata/json/JsonParser.java b/src/main/java/com/dashjoin/jsonata/json/JsonParser.java index 4727705..7ad7221 100644 --- a/src/main/java/com/dashjoin/jsonata/json/JsonParser.java +++ b/src/main/java/com/dashjoin/jsonata/json/JsonParser.java @@ -344,15 +344,15 @@ private void readEscape() throws IOException { captureBuffer.append('\t'); break; case 'u': - char[] hexChars = new char[4]; + int value = 0; for (int i = 0; i < 4; i++) { read(); if (!isHexDigit()) { throw expected("hexadecimal digit"); } - hexChars[i] = (char)current; + value = (value << 4) | hexCharToValue((char) current); } - captureBuffer.append((char)Integer.parseInt(new String(hexChars), 16)); + captureBuffer.append((char) value); break; default: throw expected("valid escape sequence"); @@ -360,6 +360,16 @@ private void readEscape() throws IOException { read(); } + private int hexCharToValue(char c) { + if (c >= '0' && c <= '9') { + return c - '0'; + } else if (c >= 'A' && c <= 'F') { + return c - 'A' + 10; + } else { // c >= 'a' && c <= 'f' + return c - 'a' + 10; + } + } + private void readNumber() throws IOException { handler.startNumber(); startCapture(); diff --git a/src/main/java/com/dashjoin/jsonata/utils/DateTimeUtils.java b/src/main/java/com/dashjoin/jsonata/utils/DateTimeUtils.java index f2a5e56..ce3dae3 100644 --- a/src/main/java/com/dashjoin/jsonata/utils/DateTimeUtils.java +++ b/src/main/java/com/dashjoin/jsonata/utils/DateTimeUtils.java @@ -174,9 +174,9 @@ private static String lookup(long num, boolean prev, boolean ord) { wordValuesLong.put(lword + "th", val); } } - + private static final Pattern SPLIT_PATTERN = Pattern.compile(",\\s|\\sand\\s|[\\s\\-]"); public static int wordsToNumber(String text) { - String[] parts = text.split(",\\s|\\sand\\s|[\\s\\-]"); + String[] parts = SPLIT_PATTERN.split(text); Integer[] values = new Integer[parts.length]; for (int i = 0; i < parts.length; i++) { values[i] = wordValues.get(parts[i]); @@ -202,7 +202,7 @@ public static int wordsToNumber(String text) { * long version of above */ public static long wordsToLong(String text) { - String[] parts = text.split(",\\s|\\sand\\s|[\\s\\-]"); + String[] parts = SPLIT_PATTERN.split(text); Long[] values = new Long[parts.length]; for (int i = 0; i < parts.length; i++) { values[i] = wordValuesLong.get(parts[i]); @@ -788,16 +788,15 @@ public static String formatDateTime(long millis, String picture, String timezone int offsetMillis = (60 * offsetHours + offsetMinutes) * 60 * 1000; LocalDateTime dateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(millis + offsetMillis), ZoneOffset.UTC); - String result = ""; + StringBuilder resultBuilder = new StringBuilder(); for (SpecPart part : formatSpec.parts) { if (part.type.equals("literal")) { - result += part.value; + resultBuilder.append(part.value); } else { - result += formatComponent(dateTime, part, offsetHours, offsetMinutes); + resultBuilder.append(formatComponent(dateTime, part, offsetHours, offsetMinutes)); } } - - return result; + return resultBuilder.toString(); } private static String formatComponent(LocalDateTime date, SpecPart markerSpec, int offsetHours, int offsetMinutes) { @@ -938,11 +937,12 @@ private static String getDateTimeFragment(LocalDateTime date, Character componen public static Long parseDateTime(String timestamp, String picture) { PictureFormat formatSpec = analyseDateTimePicture(picture); PictureMatcher matchSpec = generateRegex(formatSpec); - String fullRegex = "^"; + StringBuilder fullRegexBuilder = new StringBuilder("^"); for (MatcherPart part : matchSpec.parts) { - fullRegex += "(" + part.regex + ")"; + fullRegexBuilder.append("(").append(part.regex).append(")"); } - fullRegex += "$"; + fullRegexBuilder.append("$"); + String fullRegex = fullRegexBuilder.toString(); Pattern pattern = Pattern.compile(fullRegex, Pattern.CASE_INSENSITIVE); Matcher matcher = pattern.matcher(timestamp); if (matcher.find()) { @@ -1060,13 +1060,13 @@ private static boolean isType(int type, int mask) { return ((~type & mask) == 0) && (type & mask) != 0; } + private static final Pattern LITERAL_ESCAPE_PATTERN = Pattern.compile("[.*+?^${}()|\\[\\]\\\\]"); private static PictureMatcher generateRegex(PictureFormat formatSpec) { PictureMatcher matcher = new PictureMatcher(); for (final SpecPart part : formatSpec.parts) { MatcherPart res; if (part.type.equals("literal")) { - Pattern p = Pattern.compile("[.*+?^${}()|\\[\\]\\\\]"); - Matcher m = p.matcher(part.value); + Matcher m = LITERAL_ESCAPE_PATTERN.matcher(part.value); String regex = m.replaceAll("\\\\$0"); res = new MatcherPart(regex) { diff --git a/src/main/java/com/dashjoin/jsonata/utils/Signature.java b/src/main/java/com/dashjoin/jsonata/utils/Signature.java index efd286c..c77f50b 100644 --- a/src/main/java/com/dashjoin/jsonata/utils/Signature.java +++ b/src/main/java/com/dashjoin/jsonata/utils/Signature.java @@ -298,13 +298,13 @@ void throwValidationError(List badArgs, String badSig, String functionName) { @SuppressWarnings({"rawtypes", "unchecked"}) public Object validate(Object _args, Object context) { - var result = new ArrayList<>(); - var args = (List)_args; - String suppliedSig = ""; - for (Object arg : args) - suppliedSig += getSymbol(arg); - + StringBuilder sigBuilder = new StringBuilder(args.size()); + for (Object arg : args) { + sigBuilder.append(getSymbol(arg)); + } + String suppliedSig = sigBuilder.toString(); + Matcher isValid = _regex.matcher(suppliedSig); if (isValid != null && isValid.matches()) { var validatedArgs = new ArrayList<>(); @@ -337,11 +337,11 @@ public Object validate(Object _args, Object context) { } else { // may have matched multiple args (if the regex ends with a '+' // split into single tokens - String[] singles = match.split(""); - for (String single : singles) { + char[] singles = match.toCharArray(); + for (char single : singles) { //match.split('').forEach(function (single) { if (param.type.equals("a")) { - if (single.equals("m")) { + if (single == 'm') { // missing (undefined) arg = null; } else { @@ -349,9 +349,9 @@ public Object validate(Object _args, Object context) { var arrayOK = true; // is there type information on the contents of the array? if (param.subtype != null) { - if (!single.equals("a") && !match.equals(param.subtype)) { + if (single != 'a' && !match.equals(param.subtype)) { arrayOK = false; - } else if (single.equals("a")) { + } else if (single == 'a') { List argArr = (List)arg; if (argArr.size() > 0) { var itemType = getSymbol(argArr.get(0)); @@ -377,24 +377,22 @@ public Object validate(Object _args, Object context) { ); } // the function expects an array. If it's not one, make it so - if (!single.equals("a")) { + if (single != 'a') { List _arg = new ArrayList<>(); _arg.add(arg); arg = _arg; } } - validatedArgs.add(arg); - argIndex++; } else { arg = argIndex compiler happy }