Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Development #21

Merged
merged 2 commits into from
May 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 35 additions & 36 deletions url/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ package url
import (
"fmt"
"net/url"
"path"
"strings"

"golang.org/x/net/publicsuffix"
)

// A URL represents a parsed URL (technically, a URI reference).
// URL represents a parsed URL (technically, a URI reference).
//
// The general form represented is:
//
Expand All @@ -18,14 +19,14 @@ import (
//
// scheme:opaque[?query][#fragment]
//
// https://sub.example.com:8080
// https://sub.example.com:8080/path/to/file.txt
type URL struct {
*url.URL
// Scheme string // e.g https
// Opaque string // encoded opaque data
// User *Userinfo // username and password information
// Host string // e.g. sub.example.com, sub.example.com:8080
// Path string // path (relative paths may omit leading slash)
// Path string // path (relative paths may omit leading slash) e.g /path/to/file.txt
// RawPath string // encoded path hint (see EscapedPath method)
// OmitHost bool // do not emit empty host (authority)
// ForceQuery bool // append a query ('?') even if RawQuery is empty
Expand All @@ -38,45 +39,37 @@ type URL struct {
RootDomain string // e.g. example
TLD string // e.g. com
Port string // e.g. 8080
Extension string // e.g. txt
}

// Parse parses a raw url into a URL structure.
//
// It uses the `net/url`'s Parse() internally, but it slightly changes its behavior:
// 1. It forces the default scheme if the url doesnt have a scheme and port to http
// 1. It forces the default scheme, if the url doesnt have a scheme, to http
// 2. It favors absolute paths over relative ones, thus "example.com"
// is parsed into url.Host instead of url.Path.
// 3. It lowercases the Host (not only the Scheme).
func Parse(rawURL string) (parsedURL *URL, err error) {
var (
defaultScheme string = "http"
)
const defaultScheme string = "http"

rawURL = AddDefaultScheme(rawURL, defaultScheme)

rawURL = DefaultScheme(rawURL, defaultScheme)
parsedURL = &URL{}

parsedURL.URL, err = url.Parse(rawURL)
if err != nil {
err = fmt.Errorf("[hqgoutils/url] %s", err)
err = fmt.Errorf("[hqgoutils/url]: %w", err)

return
}

// Host = Domain + Port
for i := len(parsedURL.URL.Host) - 1; i >= 0; i-- {
if parsedURL.URL.Host[i] == ':' {
parsedURL.Domain = parsedURL.URL.Host[:i]
parsedURL.Port = parsedURL.URL.Host[i+1:]
break
} else if parsedURL.URL.Host[i] < '0' || parsedURL.URL.Host[i] > '9' {
parsedURL.Domain = parsedURL.URL.Host
}
}
parsedURL.Domain, parsedURL.Port = SplitHost(parsedURL.URL.Host)

// ETLDPlusOne
parsedURL.ETLDPlusOne, err = publicsuffix.EffectiveTLDPlusOne(parsedURL.Domain)
if err != nil {
err = fmt.Errorf("[hqgoutils/url] %s", err)
err = fmt.Errorf("[hqgoutils/url] %w", err)

return
}
Expand All @@ -91,28 +84,34 @@ func Parse(rawURL string) (parsedURL *URL, err error) {
parsedURL.Subdomain = rest
}

// Extension
parsedURL.Extension = path.Ext(parsedURL.Path)

return
}

// DefaultScheme forces default scheme to `http` scheme, so net/url.Parse() doesn't
// put both host and path into the (relative) path.
func DefaultScheme(URL, scheme string) (URLWithScheme string) {
URLWithScheme = URL

// e.g //example.com
if strings.Index(URLWithScheme, "//") == 0 {
URLWithScheme = scheme + ":" + URLWithScheme
}

// e.g ://example.com
if strings.Contains(URLWithScheme, "://") && !strings.HasPrefix(URLWithScheme, "http") {
URLWithScheme = scheme + URLWithScheme
// AddDefaultScheme ensures a scheme is added if none exists.
func AddDefaultScheme(rawURL, scheme string) string {
switch {
case strings.HasPrefix(rawURL, "//"):
return scheme + ":" + rawURL
case strings.Contains(rawURL, "://") && !strings.HasPrefix(rawURL, "http"):
return scheme + rawURL
case !strings.Contains(rawURL, "://"):
return scheme + "://" + rawURL
default:
return rawURL
}
}

// e.g example.com, localhost
if !strings.Contains(URLWithScheme, "://") {
URLWithScheme = scheme + "://" + URLWithScheme
// splitHost splits the host into domain and port.
func SplitHost(host string) (domain string, port string) {
for i := len(host) - 1; i >= 0; i-- {
if host[i] == ':' {
return host[:i], host[i+1:]
} else if host[i] < '0' || host[i] > '9' {
domain = host
}
}

return
}
138 changes: 104 additions & 34 deletions url/url_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,65 +8,135 @@ import (

func TestParse(t *testing.T) {
tests := []struct {
name string
input string
output url.URL
output *url.URL
err error
}{
{
input: "https://sub.example.com:8080",
output: url.URL{
name: "Test example URL",
input: "https://sub.example.com:8080/path/to/file.txt",
output: &url.URL{
Domain: "sub.example.com",
ETLDPlusOne: "example.com",
Subdomain: "sub",
RootDomain: "example",
TLD: "com",
Port: "8080",
Extension: ".txt",
},
err: nil,
},
}

for index := range tests {
test := tests[index]
tt := tests[index]

URL, err := url.Parse(test.input)
if err != nil {
t.Error(err)
}
t.Run(tt.name, func(t *testing.T) {
got, err := url.Parse(tt.input)
if err != nil {
t.Errorf("Parse(%q) returned error %v", tt.input, err)
}

if URL.ETLDPlusOne != test.output.ETLDPlusOne {
t.Errorf(`"%s": got "%s", want "%v"`, test.input, URL.ETLDPlusOne, test.output.ETLDPlusOne)
}
if got.Domain != tt.output.Domain || got.ETLDPlusOne != tt.output.ETLDPlusOne || got.Subdomain != tt.output.Subdomain || got.RootDomain != tt.output.RootDomain || got.TLD != tt.output.TLD || got.Port != tt.output.Port || got.Extension != tt.output.Extension {
t.Errorf("Parse(%q) = %v, want %v", tt.input, got, tt.output)
}
})
}
}

if URL.Subdomain != test.output.Subdomain {
t.Errorf(`"%s": got "%s", want "%v"`, test.input, URL.Subdomain, test.output.Subdomain)
}
func TestAddDefaultScheme(t *testing.T) {
tests := []struct {
name string
url string
scheme string
output string
}{
{
name: "Case: localhost",
url: "localhost",
scheme: "http",
output: "http://localhost",
},
{
name: "Case: example.com",
url: "example.com",
scheme: "http",
output: "http://example.com",
},
{
name: "Case: //example.com",
url: "//example.com",
scheme: "http",
output: "http://example.com",
},
{
name: "Case: ://example.com",
url: "://example.com",
scheme: "http",
output: "http://example.com",
},
{
name: "Case: https://example.com",
url: "https://example.com",
scheme: "http",
output: "https://example.com",
},
}

if URL.TLD != test.output.TLD {
t.Errorf(`"%s": got "%s", want "%v"`, test.input, URL.TLD, test.output.TLD)
}
for index := range tests {
tt := tests[index]

if URL.Port != test.output.Port {
t.Errorf(`"%s": got "%s", want "%v"`, test.input, URL.Port, test.output.Port)
}
t.Run(tt.name, func(t *testing.T) {
got := url.AddDefaultScheme(tt.url, tt.scheme)
if got != tt.output {
t.Errorf("AddDefaultScheme(%q, %q) = %v, want %v", tt.url, tt.scheme, got, tt.output)
}
})
}
}

func TestDefaultScheme(t *testing.T) {
func TestSplitHost(t *testing.T) {
tests := []struct {
input string
output string
name string
host string
domain string
port string
}{
{input: "localhost", output: "http://localhost"},
{input: "example.com", output: "http://example.com"},
{input: "https://example.com", output: "https://example.com"},
{input: "://example.com", output: "http://example.com"},
{input: "//example.com", output: "http://example.com"},
{
name: "Case: localhost",
host: "localhost",
domain: "localhost",
port: "",
},
{
name: "Case: example.com",
host: "example.com",
domain: "example.com",
port: "",
},
{
name: "Case: localhost:8080",
host: "localhost:8080",
domain: "localhost",
port: "8080",
},
{
name: "Case: example.com:8080",
host: "example.com:8080",
domain: "example.com",
port: "8080",
},
}

for index := range tests {
test := tests[index]

URL := url.DefaultScheme(test.input, "http")
tt := tests[index]

if URL != test.output {
t.Errorf(`"%s": got "%s", want "%v"`, test.input, URL, test.output)
}
t.Run(tt.name, func(t *testing.T) {
domain, port := url.SplitHost(tt.host)
if domain != tt.domain || port != tt.port {
t.Errorf("splitHost(%q) = %v, %v, want %v, %v", tt.host, domain, port, tt.domain, tt.port)
}
})
}
}
Loading