diff --git a/javalib/src/main/scala/java/net/URI.scala b/javalib/src/main/scala/java/net/URI.scala index f3f40f594c..08873af9dc 100644 --- a/javalib/src/main/scala/java/net/URI.scala +++ b/javalib/src/main/scala/java/net/URI.scala @@ -12,14 +12,11 @@ package java.net -import scala.scalajs.js.RegExp -import scala.scalajs.js - import scala.annotation.tailrec -import java.lang.Utils._ import java.nio._ import java.nio.charset.{CodingErrorAction, StandardCharsets} +import java.util.regex.RegExpImpl final class URI(origStr: String) extends Serializable with Comparable[URI] { @@ -32,14 +29,14 @@ final class URI(origStr: String) extends Serializable with Comparable[URI] { * This is a local val for the primary constructor. It is a val, * since we'll set it to null after initializing all fields. */ - private[this] var _fld: RegExp.ExecResult = URI.uriRe.exec(origStr) - if (_fld == null) + private[this] var _parsed: Array[String] = URI.parseURI(origStr) + if (_parsed == null) throw new URISyntaxException(origStr, "Malformed URI") - private val _isAbsolute = undefOrIsDefined(_fld(AbsScheme)) - private val _isOpaque = undefOrIsDefined(_fld(AbsOpaquePart)) + private val _isAbsolute = _parsed(AbsScheme) != null + private val _isOpaque = _parsed(AbsOpaquePart) != null - @inline private def fld(idx: Int): String = undefOrGetOrNull(_fld(idx)) + @inline private def fld(idx: Int): String = _parsed(idx) @inline private def fld(absIdx: Int, relIdx: Int): String = if (_isAbsolute) fld(absIdx) else fld(relIdx) @@ -93,7 +90,7 @@ final class URI(origStr: String) extends Serializable with Comparable[URI] { private val _fragment = fld(Fragment) // End of default ctor. Unset helper field - _fld = null + _parsed = null def this(scheme: String, ssp: String, fragment: String) = this(URI.uriStr(scheme, ssp, fragment)) @@ -217,11 +214,10 @@ final class URI(origStr: String) extends Serializable with Comparable[URI] { def normalize(): URI = if (_isOpaque || _path == null) this else { - import js.JSStringOps._ - val origPath = _path - val segments = origPath.jsSplit("/") + // Use String#split which works on both JS and pure Wasm targets + val segments = origPath.split("/", -1) // Step 1: Remove all "." segments // Step 2: Remove ".." segments preceded by non ".." segment until no @@ -279,17 +275,24 @@ final class URI(origStr: String) extends Serializable with Comparable[URI] { } } - // Truncate `segments` at `outIdx` - segments.length = outIdx - // Step 3: If path is relative and first segment contains ":", prepend "." // segment (according to JavaDoc). If the path is absolute, the first // segment is "" so the `contains(':')` returns false. - if (outIdx != 0 && segments(0).contains(":")) - segments.unshift(".") + val prependDot = outIdx != 0 && segments(0).contains(":") - // Now add all the segments from step 1, 2 and 3 - val newPath = segments.join("/") + // Build the new path from segments[0..outIdx) + val newPath = { + val sb = new java.lang.StringBuilder() + if (prependDot) + sb.append(".") + var i = 0 + while (i < outIdx) { + if (i != 0 || prependDot) sb.append("/") + sb.append(segments(i)) + i += 1 + } + sb.toString + } // Only create new instance if anything changed if (newPath == origPath) @@ -437,14 +440,23 @@ object URI { // (25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]) # 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33 (IPv4-Embedded IPv6 Address) } - private val ipv6Re = new RegExp("^" + ipv6address + "$", "i") + private val ipv6ReStr = "^" + ipv6address + "$" + + private[this] lazy val ipv6RePat = + RegExpImpl.impl.compile(ipv6ReStr, "i") + + /** Test whether a host string is an IPv6 address. */ + private def testIPv6(host: String): Boolean = { + import RegExpImpl.impl + impl.matches(impl.exec(ipv6RePat, host)) + } // URI syntax parser. Based on RFC2396, RFC2732 and adaptations according to // JavaDoc. // - http://www.ietf.org/rfc/rfc2396.txt (see Appendix A for complete syntax) // - http://www.ietf.org/rfc/rfc2732.txt - private val uriRe = { + private val uriReStr = { // We don't use any interpolators here to allow for constant folding /////////////////// @@ -584,9 +596,30 @@ object URI { "((?:" + net_path + "|(" + abs_path + ")|(" + rel_path + "))(?:\\?" + query + ")?)" // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] - val uriRef = "^(?:" + absoluteURI + "|" + relativeURI + ")(?:#" + fragment + ")?$" + "^(?:" + absoluteURI + "|" + relativeURI + ")(?:#" + fragment + ")?$" + } + + private[this] lazy val uriRePat = + RegExpImpl.impl.compile(uriReStr, "i") - new RegExp(uriRef, "i") + /** Parse a URI string, returning an Array[String] of matched groups + * (null for non-matching groups), or null if the string is not a valid URI. + */ + private def parseURI(str: String): Array[String] = { + import RegExpImpl.impl + val result = impl.exec(uriRePat, str) + if (!impl.matches(result)) { + null + } else { + val len = Fields.Fragment + 1 + val arr = new Array[String](len) + var i = 0 + while (i < len) { + arr(i) = impl.getOrElse(result, i, null) + i += 1 + } + arr + } } private object Fields { @@ -643,7 +676,7 @@ object URI { resStr += quoteUserInfo(userInfo) + "@" if (host != null) { - if (URI.ipv6Re.test(host)) + if (testIPv6(host)) resStr += "[" + host + "]" else resStr += host @@ -753,7 +786,8 @@ object URI { } } - private val quoteStr: js.Function1[String, String] = { (str: String) => + /** Encode a matched string as percent-encoded UTF-8 bytes. */ + private def quoteStrFn(str: String): String = { val buf = StandardCharsets.UTF_8.encode(str) var res = "" @@ -765,39 +799,60 @@ object URI { res } + /** Replace all matches of a compiled regex pattern with percent-encoded form. */ + private def quoteReplace(str: String, + pattern: RegExpImpl.impl.PatRepr): String = { + import RegExpImpl.impl + val sb = new java.lang.StringBuilder() + var lastEnd = 0 + var result = impl.execFrom(pattern, str, 0) + while (impl.matchStart(result) != -1) { + val start = impl.matchStart(result) + val end = impl.matchEnd(pattern, result) + sb.append(str, lastEnd, start) + sb.append(quoteStrFn(str.substring(start, end))) + lastEnd = end + result = impl.execFrom(pattern, str, end) + } + sb.append(str, lastEnd, str.length) + sb.toString + } + /** matches any character not in unreserved, punct, escaped or other */ - private val userInfoQuoteRe = new RegExp( - // !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029] - // Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%] - "[\u0000- \"#/<>?@\\[-\\^`{-}" + - "\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" + - "%(?![0-9a-f]{2})", - "ig") + private val userInfoQuoteReStr = { + // !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029] + // Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%] + "[\u0000- \"#/<>?@\\[-\\^`{-}" + + "\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" + + "%(?![0-9a-f]{2})" + } + + private[this] lazy val userInfoQuotePat = + RegExpImpl.impl.compile(userInfoQuoteReStr, "ig") /** Quote any character not in unreserved, punct, escaped or other */ - private def quoteUserInfo(str: String) = { - import js.JSStringOps._ - str.jsReplace(userInfoQuoteRe, quoteStr) - } + private def quoteUserInfo(str: String): String = + quoteReplace(str, userInfoQuotePat) /** matches any character not in unreserved, punct, escaped, other or equal * to '/' or '@' */ - private val pathQuoteRe = new RegExp( - // !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029] - // Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%@/] - "[\u0000- \"#<>?\\[-\\^`{-}" + - "\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" + - "%(?![0-9a-f]{2})", - "ig") + private val pathQuoteReStr = { + // !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029] + // Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%@/] + "[\u0000- \"#<>?\\[-\\^`{-}" + + "\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" + + "%(?![0-9a-f]{2})" + } + + private[this] lazy val pathQuotePat = + RegExpImpl.impl.compile(pathQuoteReStr, "ig") /** Quote any character not in unreserved, punct, escaped, other or equal * to '/' or '@' */ - private def quotePath(str: String) = { - import js.JSStringOps._ - str.jsReplace(pathQuoteRe, quoteStr) - } + private def quotePath(str: String): String = + quoteReplace(str, pathQuotePat) /** matches any character not in unreserved, punct, escaped, other or equal * to '@', '[' or ']' @@ -806,48 +861,51 @@ object URI { * in IPv6 addresses, but technically speaking they are in reserved * due to RFC2732). */ - private val authorityQuoteRe = new RegExp( - // !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029] - // Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%@\[\]] - "[\u0000- \"#/<>?\\^`{-}" + - "\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" + - "%(?![0-9a-f]{2})", - "ig") + private val authorityQuoteReStr = { + // !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029] + // Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%@\[\]] + "[\u0000- \"#/<>?\\^`{-}" + + "\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" + + "%(?![0-9a-f]{2})" + } + + private[this] lazy val authorityQuotePat = + RegExpImpl.impl.compile(authorityQuoteReStr, "ig") /** Quote any character not in unreserved, punct, escaped, other or equal * to '@' */ - private def quoteAuthority(str: String) = { - import js.JSStringOps._ - str.jsReplace(authorityQuoteRe, quoteStr) - } + private def quoteAuthority(str: String): String = + quoteReplace(str, authorityQuotePat) /** matches any character not in unreserved, reserved, escaped or other */ - private val illegalQuoteRe = new RegExp( - // !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029] - // Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=?/\\[\\]%] - "[\u0000- \"#<>@\\^`{-}" + - "\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" + - "%(?![0-9a-f]{2})", - "ig") + private val illegalQuoteReStr = { + // !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029] + // Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=?/\\[\\]%] + "[\u0000- \"#<>@\\^`{-}" + + "\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" + + "%(?![0-9a-f]{2})" + } + + private[this] lazy val illegalQuotePat = + RegExpImpl.impl.compile(illegalQuoteReStr, "ig") /** Quote any character not in unreserved, reserved, escaped or other */ - private def quoteIllegal(str: String) = { - import js.JSStringOps._ - str.jsReplace(illegalQuoteRe, quoteStr) - } + private def quoteIllegal(str: String): String = + quoteReplace(str, illegalQuotePat) /** matches characters not in ASCII * * Note: It is important that the match is maximal, since we might encounter * surrogates that need to be encoded in one shot. */ - private val nonASCIIQuoteRe = new RegExp("[^\u0000-\u007F]+", "g") + private val nonASCIIQuoteReStr = "[^\u0000-\u007F]+" - private def quoteNonASCII(str: String) = { - import js.JSStringOps._ - str.jsReplace(nonASCIIQuoteRe, quoteStr) - } + private[this] lazy val nonASCIIQuotePat = + RegExpImpl.impl.compile(nonASCIIQuoteReStr, "g") + + private def quoteNonASCII(str: String): String = + quoteReplace(str, nonASCIIQuotePat) /** Case-insensitive comparison that accepts `null` values. * diff --git a/javalib/src/main/scala/java/util/regex/RegExpImpl.scala b/javalib/src/main/scala/java/util/regex/RegExpImpl.scala index 505dab722c..6d33db0db4 100644 --- a/javalib/src/main/scala/java/util/regex/RegExpImpl.scala +++ b/javalib/src/main/scala/java/util/regex/RegExpImpl.scala @@ -27,6 +27,7 @@ private[java] sealed abstract class RegExpImpl { def compile(patternStr: String): PatRepr def compile(patternStr: String, global: Boolean): PatRepr + def compile(patternStr: String, flags: String): PatRepr def exec(pattern: PatRepr, string: String): Repr def matches(r: Repr): Boolean def exists(r: Repr, index: Int): Boolean @@ -58,6 +59,9 @@ private[java] object RegExpImpl { else new js.RegExp(patternStr) } + def compile(patternStr: String, flags: String): PatRepr = + new js.RegExp(patternStr, flags) + def exec(pattern: PatRepr, string: String): Repr = pattern.exec(string) def matches(r: Repr): Boolean = r != null def exists(r: Repr, index: Int): Boolean = undefOrIsDefined(r(index)) @@ -88,6 +92,12 @@ private[java] object RegExpImpl { def compile(patternStr: String): PatRepr = Pattern.compile(patternStr) def compile(patternStr: String, global: Boolean): PatRepr = Pattern.compile(patternStr) + + def compile(patternStr: String, flags: String): PatRepr = { + var javaFlags = 0 + if (flags.contains("i")) javaFlags |= Pattern.CASE_INSENSITIVE + Pattern.compile(patternStr, javaFlags) + } def exec(pattern: PatRepr, string: String): Repr = pattern.matcher(string) def matches(r: Repr): Boolean = r.matches() def exists(r: Repr, index: Int): Boolean = r.group(index) != null diff --git a/project/Build.scala b/project/Build.scala index fd259198fd..38e99ea51c 100644 --- a/project/Build.scala +++ b/project/Build.scala @@ -2360,10 +2360,10 @@ object Build { // javalib/util !endsWith(f, "/DateTest.scala") && // js.Date - !endsWith(f, "/PropertiesTest.scala") && // Date.toString + !endsWith(f, "/PropertiesTest.scala") // Date.toString // javalib/net - !endsWith(f, "/net/URITest.scala") // URI.normalize + // (URITest is now compatible with pure Wasm) ) || contains(f, "/js/src/test/scala/org/scalajs/testsuite/") && ( // compiler diff --git a/test-suite/shared/src/test/scala/org/scalajs/testsuite/javalib/net/URITest.scala b/test-suite/shared/src/test/scala/org/scalajs/testsuite/javalib/net/URITest.scala index 52f4b4f6b2..fe0ec85df2 100644 --- a/test-suite/shared/src/test/scala/org/scalajs/testsuite/javalib/net/URITest.scala +++ b/test-suite/shared/src/test/scala/org/scalajs/testsuite/javalib/net/URITest.scala @@ -474,4 +474,55 @@ class URITest { assertTrue("SSP case-sensitive", new URI("mailto:john") != new URI("mailto:JOHN")) assertTrue(new URI("mailto:john") != new URI("MAILTO:jim")) } + + // Tests for multi-component constructors, normalize edge cases, and + // non-ASCII quoting. These exercise quoting/parsing code paths that + // were previously not covered by the test suite. + + @Test def multiComponentConstructorQuoting(): Unit = { + // 7-arg constructor exercises quoteUserInfo, quotePath, quoteAuthority + val uri = new URI("http", "us er", "example.com", 80, "/a path", "q=1 2", "frag ment") + assertEquals("http", uri.getScheme()) + assertEquals("example.com", uri.getHost()) + assertEquals(80, uri.getPort()) + assertEquals("/a path", uri.getPath()) + assertEquals("q=1 2", uri.getQuery()) + assertEquals("frag ment", uri.getFragment()) + assertEquals("us er", uri.getUserInfo()) + // Raw forms should have spaces percent-encoded + assertEquals("/a%20path", uri.getRawPath()) + assertEquals("q=1%202", uri.getRawQuery()) + assertEquals("frag%20ment", uri.getRawFragment()) + assertEquals("us%20er", uri.getRawUserInfo()) + } + + @Test def multiComponentConstructorWithIPv6Host(): Unit = { // exercises IPv6 detection + val uri = new URI("http", null, "::1", 8080, "/path", null, null) + assertEquals("[::1]", uri.getHost()) + assertEquals(8080, uri.getPort()) + assertEquals("/path", uri.getPath()) + assertTrue(uri.toString().contains("[::1]")) + } + + @Test def threeArgConstructorQuoting(): Unit = { // exercises quoteIllegal + val uri = new URI("foo", "hello world", "frag ment") + assertEquals("foo", uri.getScheme()) + assertEquals("hello world", uri.getSchemeSpecificPart()) + assertEquals("frag ment", uri.getFragment()) + assertEquals("hello%20world", uri.getRawSchemeSpecificPart()) + assertEquals("frag%20ment", uri.getRawFragment()) + } + + @Test def normalizeDotOnlyPaths(): Unit = { + // Dot-only relative paths (not covered by RFC resolve examples above) + assertEquals("", new URI(".").normalize().getPath()) + assertEquals("", new URI("./").normalize().getPath()) + assertEquals("..", new URI("..").normalize().getPath()) + assertEquals("../", new URI("../").normalize().getPath()) + } + + @Test def normalizeEmptySegments(): Unit = { + // Multiple consecutive slashes produce empty segments + assertEquals("/a/b", new URI("/a///b").normalize().getPath()) + } }