From cdde0b8e1806e8ad6175c53e94473b563af76ef2 Mon Sep 17 00:00:00 2001 From: "Alex Munene (@enenumxela)" <62714471+enenumxela@users.noreply.github.com> Date: Sat, 20 May 2023 14:57:31 +0300 Subject: [PATCH 1/2] feat(url): Add extension field --- url/url.go | 9 +++++++-- url/url_test.go | 11 +++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/url/url.go b/url/url.go index 35563e9..bc514df 100644 --- a/url/url.go +++ b/url/url.go @@ -3,6 +3,7 @@ package url import ( "fmt" "net/url" + "path" "strings" "golang.org/x/net/publicsuffix" @@ -18,14 +19,14 @@ import ( // // scheme:opaque[?query][#fragment] // -// https://sub.example.com:8080 +// https://sub.example.com:8080/path/to/file.txt type URL struct { *url.URL // Scheme string // e.g https // Opaque string // encoded opaque data // User *Userinfo // username and password information // Host string // e.g. sub.example.com, sub.example.com:8080 - // Path string // path (relative paths may omit leading slash) + // Path string // path (relative paths may omit leading slash) e.g /path/to/file.txt // RawPath string // encoded path hint (see EscapedPath method) // OmitHost bool // do not emit empty host (authority) // ForceQuery bool // append a query ('?') even if RawQuery is empty @@ -38,6 +39,7 @@ type URL struct { RootDomain string // e.g. example TLD string // e.g. com Port string // e.g. 8080 + Extension string // e.g. txt } // Parse parses a raw url into a URL structure. @@ -91,6 +93,9 @@ func Parse(rawURL string) (parsedURL *URL, err error) { parsedURL.Subdomain = rest } + // Extension + parsedURL.Extension = path.Ext(parsedURL.Path) + return } diff --git a/url/url_test.go b/url/url_test.go index 5af4463..33f9d13 100644 --- a/url/url_test.go +++ b/url/url_test.go @@ -18,6 +18,17 @@ func TestParse(t *testing.T) { Subdomain: "sub", TLD: "com", Port: "8080", + Extension: "", + }, + }, + { + input: "https://sub.example.com:8080/path/to/file.txt", + output: url.URL{ + ETLDPlusOne: "example.com", + Subdomain: "sub", + TLD: "com", + Port: "8080", + Extension: "txt", }, }, } From cbcf4d19ce84e5454227a72632b01bef6657edcb Mon Sep 17 00:00:00 2001 From: "Alex Munene (@enenumxela)" <62714471+enenumxela@users.noreply.github.com> Date: Sat, 20 May 2023 16:00:18 +0300 Subject: [PATCH 2/2] refactor(url): - --- url/url.go | 62 +++++++++----------- url/url_test.go | 147 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 131 insertions(+), 78 deletions(-) diff --git a/url/url.go b/url/url.go index bc514df..27db471 100644 --- a/url/url.go +++ b/url/url.go @@ -9,7 +9,7 @@ import ( "golang.org/x/net/publicsuffix" ) -// A URL represents a parsed URL (technically, a URI reference). +// URL represents a parsed URL (technically, a URI reference). // // The general form represented is: // @@ -45,40 +45,31 @@ type URL struct { // Parse parses a raw url into a URL structure. // // It uses the `net/url`'s Parse() internally, but it slightly changes its behavior: -// 1. It forces the default scheme if the url doesnt have a scheme and port to http +// 1. It forces the default scheme, if the url doesnt have a scheme, to http // 2. It favors absolute paths over relative ones, thus "example.com" // is parsed into url.Host instead of url.Path. // 3. It lowercases the Host (not only the Scheme). func Parse(rawURL string) (parsedURL *URL, err error) { - var ( - defaultScheme string = "http" - ) + const defaultScheme string = "http" + + rawURL = AddDefaultScheme(rawURL, defaultScheme) - rawURL = DefaultScheme(rawURL, defaultScheme) parsedURL = &URL{} parsedURL.URL, err = url.Parse(rawURL) if err != nil { - err = fmt.Errorf("[hqgoutils/url] %s", err) + err = fmt.Errorf("[hqgoutils/url]: %w", err) return } // Host = Domain + Port - for i := len(parsedURL.URL.Host) - 1; i >= 0; i-- { - if parsedURL.URL.Host[i] == ':' { - parsedURL.Domain = parsedURL.URL.Host[:i] - parsedURL.Port = parsedURL.URL.Host[i+1:] - break - } else if parsedURL.URL.Host[i] < '0' || parsedURL.URL.Host[i] > '9' { - parsedURL.Domain = parsedURL.URL.Host - } - } + parsedURL.Domain, parsedURL.Port = SplitHost(parsedURL.URL.Host) // ETLDPlusOne parsedURL.ETLDPlusOne, err = publicsuffix.EffectiveTLDPlusOne(parsedURL.Domain) if err != nil { - err = fmt.Errorf("[hqgoutils/url] %s", err) + err = fmt.Errorf("[hqgoutils/url] %w", err) return } @@ -99,25 +90,28 @@ func Parse(rawURL string) (parsedURL *URL, err error) { return } -// DefaultScheme forces default scheme to `http` scheme, so net/url.Parse() doesn't -// put both host and path into the (relative) path. -func DefaultScheme(URL, scheme string) (URLWithScheme string) { - URLWithScheme = URL - - // e.g //example.com - if strings.Index(URLWithScheme, "//") == 0 { - URLWithScheme = scheme + ":" + URLWithScheme - } - - // e.g ://example.com - if strings.Contains(URLWithScheme, "://") && !strings.HasPrefix(URLWithScheme, "http") { - URLWithScheme = scheme + URLWithScheme +// AddDefaultScheme ensures a scheme is added if none exists. +func AddDefaultScheme(rawURL, scheme string) string { + switch { + case strings.HasPrefix(rawURL, "//"): + return scheme + ":" + rawURL + case strings.Contains(rawURL, "://") && !strings.HasPrefix(rawURL, "http"): + return scheme + rawURL + case !strings.Contains(rawURL, "://"): + return scheme + "://" + rawURL + default: + return rawURL } +} - // e.g example.com, localhost - if !strings.Contains(URLWithScheme, "://") { - URLWithScheme = scheme + "://" + URLWithScheme +// splitHost splits the host into domain and port. +func SplitHost(host string) (domain string, port string) { + for i := len(host) - 1; i >= 0; i-- { + if host[i] == ':' { + return host[:i], host[i+1:] + } else if host[i] < '0' || host[i] > '9' { + domain = host + } } - return } diff --git a/url/url_test.go b/url/url_test.go index 33f9d13..4267aef 100644 --- a/url/url_test.go +++ b/url/url_test.go @@ -8,76 +8,135 @@ import ( func TestParse(t *testing.T) { tests := []struct { + name string input string - output url.URL + output *url.URL + err error }{ { - input: "https://sub.example.com:8080", - output: url.URL{ - ETLDPlusOne: "example.com", - Subdomain: "sub", - TLD: "com", - Port: "8080", - Extension: "", - }, - }, - { + name: "Test example URL", input: "https://sub.example.com:8080/path/to/file.txt", - output: url.URL{ + output: &url.URL{ + Domain: "sub.example.com", ETLDPlusOne: "example.com", Subdomain: "sub", + RootDomain: "example", TLD: "com", Port: "8080", - Extension: "txt", + Extension: ".txt", }, + err: nil, }, } for index := range tests { - test := tests[index] + tt := tests[index] - URL, err := url.Parse(test.input) - if err != nil { - t.Error(err) - } + t.Run(tt.name, func(t *testing.T) { + got, err := url.Parse(tt.input) + if err != nil { + t.Errorf("Parse(%q) returned error %v", tt.input, err) + } - if URL.ETLDPlusOne != test.output.ETLDPlusOne { - t.Errorf(`"%s": got "%s", want "%v"`, test.input, URL.ETLDPlusOne, test.output.ETLDPlusOne) - } + if got.Domain != tt.output.Domain || got.ETLDPlusOne != tt.output.ETLDPlusOne || got.Subdomain != tt.output.Subdomain || got.RootDomain != tt.output.RootDomain || got.TLD != tt.output.TLD || got.Port != tt.output.Port || got.Extension != tt.output.Extension { + t.Errorf("Parse(%q) = %v, want %v", tt.input, got, tt.output) + } + }) + } +} - if URL.Subdomain != test.output.Subdomain { - t.Errorf(`"%s": got "%s", want "%v"`, test.input, URL.Subdomain, test.output.Subdomain) - } +func TestAddDefaultScheme(t *testing.T) { + tests := []struct { + name string + url string + scheme string + output string + }{ + { + name: "Case: localhost", + url: "localhost", + scheme: "http", + output: "http://localhost", + }, + { + name: "Case: example.com", + url: "example.com", + scheme: "http", + output: "http://example.com", + }, + { + name: "Case: //example.com", + url: "//example.com", + scheme: "http", + output: "http://example.com", + }, + { + name: "Case: ://example.com", + url: "://example.com", + scheme: "http", + output: "http://example.com", + }, + { + name: "Case: https://example.com", + url: "https://example.com", + scheme: "http", + output: "https://example.com", + }, + } - if URL.TLD != test.output.TLD { - t.Errorf(`"%s": got "%s", want "%v"`, test.input, URL.TLD, test.output.TLD) - } + for index := range tests { + tt := tests[index] - if URL.Port != test.output.Port { - t.Errorf(`"%s": got "%s", want "%v"`, test.input, URL.Port, test.output.Port) - } + t.Run(tt.name, func(t *testing.T) { + got := url.AddDefaultScheme(tt.url, tt.scheme) + if got != tt.output { + t.Errorf("AddDefaultScheme(%q, %q) = %v, want %v", tt.url, tt.scheme, got, tt.output) + } + }) } } -func TestDefaultScheme(t *testing.T) { +func TestSplitHost(t *testing.T) { tests := []struct { - input string - output string + name string + host string + domain string + port string }{ - {input: "localhost", output: "http://localhost"}, - {input: "example.com", output: "http://example.com"}, - {input: "https://example.com", output: "https://example.com"}, - {input: "://example.com", output: "http://example.com"}, - {input: "//example.com", output: "http://example.com"}, + { + name: "Case: localhost", + host: "localhost", + domain: "localhost", + port: "", + }, + { + name: "Case: example.com", + host: "example.com", + domain: "example.com", + port: "", + }, + { + name: "Case: localhost:8080", + host: "localhost:8080", + domain: "localhost", + port: "8080", + }, + { + name: "Case: example.com:8080", + host: "example.com:8080", + domain: "example.com", + port: "8080", + }, } for index := range tests { - test := tests[index] - - URL := url.DefaultScheme(test.input, "http") + tt := tests[index] - if URL != test.output { - t.Errorf(`"%s": got "%s", want "%v"`, test.input, URL, test.output) - } + t.Run(tt.name, func(t *testing.T) { + domain, port := url.SplitHost(tt.host) + if domain != tt.domain || port != tt.port { + t.Errorf("splitHost(%q) = %v, %v, want %v, %v", tt.host, domain, port, tt.domain, tt.port) + } + }) } }