From f52e17e8996dc881fc3b5d6eb25cf2eaede4cb4d Mon Sep 17 00:00:00 2001 From: "Adam D. Ruppe" Date: Fri, 16 Nov 2018 08:17:22 -0500 Subject: [PATCH 1/2] false comment removed --- cgi.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cgi.d b/cgi.d index 9eb5510..01c4338 100644 --- a/cgi.d +++ b/cgi.d @@ -487,7 +487,7 @@ class Cgi { Non-simulation arguments: --port xxx listening port for non-cgi things (valid for the cgi interfaces) - --listening-host the ip address the application should listen on (only implemented for fastcgi right now) + --listening-host the ip address the application should listen on */ From 245afed7fd717a3292f5cb17848f96964164afe7 Mon Sep 17 00:00:00 2001 From: "Adam D. Ruppe" Date: Sat, 17 Nov 2018 19:49:21 -0500 Subject: [PATCH 2/2] double compile speed --- cgi.d | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 147 insertions(+), 19 deletions(-) diff --git a/cgi.d b/cgi.d index 01c4338..c32bda0 100644 --- a/cgi.d +++ b/cgi.d @@ -2327,38 +2327,141 @@ struct Uri { } private void reparse(string uri) { - import std.regex; // from RFC 3986 - // the ctRegex triples the compile time and makes ugly errors for no real benefit // it was a nice experiment but just not worth it. // enum ctr = ctRegex!r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"; - auto ctr = regex(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"); + /* + Captures: + 0 = whole url + 1 = scheme, with : + 2 = scheme, no : + 3 = authority, with // + 4 = authority, no // + 5 = path + 6 = query string, with ? + 7 = query string, no ? + 8 = anchor, with # + 9 = anchor, no # + */ + // Yikes, even regular, non-CT regex is also unacceptably slow to compile. 1.9s on my computer! + // instead, I will DIY and cut that down to 0.6s on the same computer. + /* - auto m = match(uri, ctr); - if(m) { - scheme = m.captures[2]; - auto authority = m.captures[4]; + Note that authority is + user:password@domain:port + where the user:password@ part is optional, and the :port is optional. - auto idx = authority.indexOf("@"); - if(idx != -1) { - userinfo = authority[0 .. idx]; - authority = authority[idx + 1 .. $]; + Regex translation: + + Scheme cannot have :, /, ?, or # in it, and must have one or more chars and end in a :. It is optional, but must be first. + Authority must start with //, but cannot have any other /, ?, or # in it. It is optional. + Path cannot have any ? or # in it. It is optional. + Query must start with ? and must not have # in it. It is optional. + Anchor must start with # and can have anything else in it to end of string. It is optional. + */ + + this = Uri.init; // reset all state + + // empty uri = nothing special + if(uri.length == 0) { + return; + } + + size_t idx; + + scheme_loop: foreach(char c; uri[idx .. $]) { + switch(c) { + case ':': + case '/': + case '?': + case '#': + break scheme_loop; + default: + } + idx++; + } + + if(idx == 0 && uri[idx] == ':') { + // this is actually a path! we skip way ahead + goto path_loop; + } + + if(idx == uri.length) { + // the whole thing is a path, apparently + path = uri; + return; + } + + if(idx > 0 && uri[idx] == ':') { + scheme = uri[0 .. idx]; + idx++; + } + + if(idx + 2 < uri.length && uri[idx .. idx + 2] == "//") { + // we have an authority.... + idx += 2; + + auto authority_start = idx; + authority_loop: foreach(char c; uri[idx .. $]) { + switch(c) { + case '/': + case '?': + case '#': + break authority_loop; + default: + } + idx++; } - idx = authority.indexOf(":"); - if(idx == -1) { + auto authority = uri[authority_start .. idx]; + + auto idx2 = authority.indexOf("@"); + if(idx2 != -1) { + userinfo = authority[0 .. idx2]; + authority = authority[idx2 + 1 .. $]; + } + + idx2 = authority.indexOf(":"); + if(idx2 == -1) { port = 0; // 0 means not specified; we should use the default for the scheme host = authority; } else { - host = authority[0 .. idx]; - port = to!int(authority[idx + 1 .. $]); + host = authority[0 .. idx2]; + port = to!int(authority[idx2 + 1 .. $]); } - - path = m.captures[5]; - query = m.captures[7]; - fragment = m.captures[9]; } + + path_loop: + auto path_start = idx; + + foreach(char c; uri[idx .. $]) { + if(c == '?' || c == '#') + break; + idx++; + } + + path = uri[path_start .. idx]; + + if(idx == uri.length) + return; // nothing more to examine... + + if(uri[idx] == '?') { + idx++; + auto query_start = idx; + foreach(char c; uri[idx .. $]) { + if(c == '#') + break; + idx++; + } + query = uri[query_start .. idx]; + } + + if(idx < uri.length && uri[idx] == '#') { + idx++; + fragment = uri[idx .. $]; + } + // uriInvalidated = false; } @@ -2427,6 +2530,28 @@ struct Uri { return n; } + unittest { + auto uri = Uri("test.html"); + assert(uri.path == "test.html"); + uri = Uri("http://me@example.com"); + assert(uri.scheme == "http"); + assert(uri.userinfo == "me"); + assert(uri.host == "example.com"); + uri = Uri("http://example.com/#a"); + assert(uri.scheme == "http"); + assert(uri.host == "example.com"); + assert(uri.fragment == "a"); + uri = Uri("#foo"); + assert(uri.fragment == "foo"); + uri = Uri("?lol"); + assert(uri.query == "lol"); + uri = Uri("#foo?lol"); + assert(uri.fragment == "foo?lol"); + uri = Uri("?lol#foo"); + assert(uri.fragment == "foo"); + assert(uri.query == "lol"); + } + // This can sometimes be a big pain in the butt for me, so lots of copy/paste here to cover // the possibilities. unittest { @@ -2464,7 +2589,10 @@ struct Uri { assert(url.basedOn(Uri("http://test.com/what/test.html?a=b&c=d#what")) == "http://test.com/what/test.html?query=answer"); assert(url.basedOn(Uri("http://test.com")) == "http://test.com?query=answer"); + //auto uriBefore = url; url = Uri("#anchor"); // everything should remain the same except the anchor + //uriBefore.anchor = "anchor"); + //assert(url == uriBefore); url = Uri("//example.com"); // same protocol, but different server. the path here should be blank.