diff --git a/README.md b/README.md index 5f0e580..a256452 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,124 @@ # bench -refined performance measurement in Go + +Reliable performance measurement for Go programs. All in one design. + +``` +$ go get golang.design/x/bench +``` + +## Features + +- Combine [benchstat](https://pkg.go.dev/golang.org/x/perf/cmd/benchstat), [perflock](https://github.com/aclements/perflock) and more... +- Short command and only run benchmarks +- Automatic performance locking for benchmarks +- Automatic statistic analysis for benchmark results +- Color indications for benchmark results + +## Usage + +### Enable `bench` Daemon (optional, Linux only) + +```sh +$ cd $GOPATH/src/golang.design/x/bench +$ ./install.bash +``` + +If your init system is supported, this will also configure `bench` to start automatically on boot. + +Or you can install and run `bench` daemon manually: + +```sh +$ sudo install $GOPATH/bin/bench /usr/bin/bench +$ sudo -b bench -daemon +``` + +### Default Behavior + +```sh +$ bench +``` + +The detault behavior of `bench` run benchmarks under your current +working directory, and each benchmark will be ran 10 times for further +statistical analysis. It will also try to acquire performance lock from +`bench` daemon to gain more stable results. Furthermore, the benchmark +results are saved as a text file to the working directory and named as +`.txt`. + +Example: + +``` +$ cd example +$ bench +bench: run benchmarks under 90% cpufreq... +bench: go test -run=^$ -bench=. -count=10 +goos: linux +goarch: amd64 +pkg: golang.design/x/bench/example +BenchmarkDemo-16 21114 57340 ns/op +... +BenchmarkDemo-16 21004 57097 ns/op +PASS +ok golang.design/x/bench/example 17.791s +bench: results are saved to file: ./bench-2020-11-07-19:59:51.txt + +name time/op +Demo-16 57.0µs ±1% + +$ # ... do some changes to the benchmark ... + +$ bench +bench: run benchmarks under 90% cpufreq... +bench: go test -run=^$ -bench=. -count=10 +goos: linux +goarch: amd64 +pkg: golang.design/x/bench/example +BenchmarkDemo-16 213145 5625 ns/op +... +BenchmarkDemo-16 212959 5632 ns/op +PASS +ok golang.design/x/bench/example 12.536s +bench: results are saved to file: ./bench-2020-11-07-20:00:16.txt + +name time/op +Demo-16 5.63µs ±0% + +$ bench bench-2020-11-07-19:59:51.txt bench-2020-11-07-20:00:16.txt +name old time/op new time/op delta +Demo-16 57.0µs ±1% 5.6µs ±0% -90.13% (p=0.000 n=10+8) +``` + +### Options + +Options for checking daemon status: + +```sh +bench -list +``` + +Options for statistic tests: + +```sh +bench old.txt [new.txt] # same from benchstat +bench -delta-test +bench -alpha +bench -geomean +bench -split +bench -sort +``` + +Options for running benchmarks: + +```sh +bench -v # enable verbose outputs +bench -shared # enable shared execution +bench -cpufreq 90 # cpu frequency (default: 90) +bench -name BenchmarkXXX # go test `-bench` flag (default: .) +bench -count 20 # go test `-count` flag (default: 10) +bench -time 100x # go test `-benchtime` flag (default: unset) +bench -cpuproc 1,2,4,8,16,32,128 # go test `-cpu` flag (default: unset) +``` + +## License + +© 2020 The golang.design Authors \ No newline at end of file diff --git a/example/bench-2020-11-07-19:59:51.txt b/example/bench-2020-11-07-19:59:51.txt new file mode 100644 index 0000000..c9f1071 --- /dev/null +++ b/example/bench-2020-11-07-19:59:51.txt @@ -0,0 +1,15 @@ +goos: linux +goarch: amd64 +pkg: golang.design/x/bench/example +BenchmarkDemo-16 21114 57340 ns/op +BenchmarkDemo-16 21109 56897 ns/op +BenchmarkDemo-16 21091 56833 ns/op +BenchmarkDemo-16 21121 56821 ns/op +BenchmarkDemo-16 21105 56953 ns/op +BenchmarkDemo-16 21112 56830 ns/op +BenchmarkDemo-16 20836 57499 ns/op +BenchmarkDemo-16 21084 56885 ns/op +BenchmarkDemo-16 20990 57245 ns/op +BenchmarkDemo-16 21004 57097 ns/op +PASS +ok golang.design/x/bench/example 17.791s diff --git a/example/bench-2020-11-07-20:00:16.txt b/example/bench-2020-11-07-20:00:16.txt new file mode 100644 index 0000000..f908588 --- /dev/null +++ b/example/bench-2020-11-07-20:00:16.txt @@ -0,0 +1,15 @@ +goos: linux +goarch: amd64 +pkg: golang.design/x/bench/example +BenchmarkDemo-16 213145 5625 ns/op +BenchmarkDemo-16 211988 5629 ns/op +BenchmarkDemo-16 211838 5629 ns/op +BenchmarkDemo-16 212151 5627 ns/op +BenchmarkDemo-16 212577 5627 ns/op +BenchmarkDemo-16 210150 5635 ns/op +BenchmarkDemo-16 212650 5637 ns/op +BenchmarkDemo-16 210063 5670 ns/op +BenchmarkDemo-16 212568 5665 ns/op +BenchmarkDemo-16 212959 5632 ns/op +PASS +ok golang.design/x/bench/example 12.536s diff --git a/example/bench_test.go b/example/bench_test.go new file mode 100644 index 0000000..41fdf19 --- /dev/null +++ b/example/bench_test.go @@ -0,0 +1,19 @@ +// Copyright 2020 The golang.design Initiative Authors. +// All rights reserved. Use of this source code is governed +// by a GNU GPLv3 license that can be found in the LICENSE file. + +package example + +import "testing" + +func grow(n int) { + if n > 0 { + grow(n - 1) + } +} + +func BenchmarkDemo(b *testing.B) { + for i := 0; i < b.N; i++ { + grow(10000) // try change this and re-run `bench` + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..e9ab0f9 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module golang.design/x/bench + +go 1.15 diff --git a/init/systemd/README.md b/init/systemd/README.md new file mode 100644 index 0000000..9b89bdd --- /dev/null +++ b/init/systemd/README.md @@ -0,0 +1,6 @@ +To configure systemd to run `bench`, run + +``` +$ sudo install -m 0644 bench.service /etc/systemd/system +$ sudo systemctl enable --now bench.service +``` \ No newline at end of file diff --git a/init/systemd/bench.service b/init/systemd/bench.service new file mode 100644 index 0000000..b26e176 --- /dev/null +++ b/init/systemd/bench.service @@ -0,0 +1,10 @@ +[Unit] +Description=Bench daemon + +[Service] +Type=simple +ExecStart=/usr/bin/bench -daemon +Restart=on-failure + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/init/upstart/README.md b/init/upstart/README.md new file mode 100644 index 0000000..352756d --- /dev/null +++ b/init/upstart/README.md @@ -0,0 +1,6 @@ +To configure Upstart to run `bench`, copy `bench.conf` into `/etc/init/` and run sudo start `bench`. E.g., + +```sh +$ sudo install -m 0644 bench.conf /etc/init/ +$ sudo start bench +``` \ No newline at end of file diff --git a/init/upstart/bench.conf b/init/upstart/bench.conf new file mode 100644 index 0000000..a12771e --- /dev/null +++ b/init/upstart/bench.conf @@ -0,0 +1,11 @@ +# bench - Benchmark performance locking service + +description "bench daemon" + +start on runlevel [2345] +stop on runlevel [!2345] +respawn + +console log + +exec bench -daemon \ No newline at end of file diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..a0db679 --- /dev/null +++ b/install.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +set -e + +BINPATH="$(go env GOBIN)" +if [[ -z "$BINPATH" ]]; then + BINPATH="$(go env GOPATH)/bin" +fi +BIN="$BINPATH/bench" +if [[ ! -x "$BIN" ]]; then + echo "bench binary $BIN does not exist." 2>&1 + echo "Please run go install golang.design/x/bench" 2>&1 + exit 1 +fi + +echo "Installing $BIN to /usr/bin" 1>&2 +sudo install "$BIN" /usr/bin/bench + +start="-b /usr/bin/bench -daemon" +starttype= +if [[ -d /etc/init ]]; then + echo "Installing init script for Upstart" 1>&2 + sudo install -m 0644 init/upstart/bench.conf /etc/init/ + start="service bench start" + starttype=" (using Upstart)" +fi +if [[ -d /etc/systemd ]]; then + echo "Installing service for systemd" 1>&2 + sudo install -m 0644 init/systemd/bench.service /etc/systemd/system + sudo systemctl enable --quiet bench.service + start="systemctl start bench.service" + starttype=" (using systemd)" +fi + +if /usr/bin/bench -list >/dev/null 2>&1; then + echo "Not starting bench daemon (already running)" 1>&2 +else + echo "Starting bench daemon$starttype" 1>&2 + sudo $start +fi \ No newline at end of file diff --git a/internal/benchfmt/fmt.go b/internal/benchfmt/fmt.go new file mode 100644 index 0000000..6d1c718 --- /dev/null +++ b/internal/benchfmt/fmt.go @@ -0,0 +1,329 @@ +// Package benchfmt provides readers and writers for the Go benchmark format. +// +// The format is documented at https://golang.org/design/14313-benchmark-format +package benchfmt + +import ( + "bufio" + "bytes" + "fmt" + "io" + "sort" + "strconv" + "strings" + "unicode" +) + +// Reader reads benchmark results from an io.Reader. +// Use Next to advance through the results. +// +// br := benchfmt.NewReader(r) +// for br.Next() { +// res := br.Result() +// ... +// } +// err = br.Err() // get any error encountered during iteration +// ... +type Reader struct { + s *bufio.Scanner + labels Labels + // permLabels are permanent labels read from the start of the + // file or provided by AddLabels. They cannot be overridden. + permLabels Labels + lineNum int + // cached from last call to newResult, to save on allocations + lastName string + lastNameLabels Labels + // cached from the last call to Next + result *Result + err error +} + +// NewReader creates a BenchmarkReader that reads from r. +func NewReader(r io.Reader) *Reader { + return &Reader{ + s: bufio.NewScanner(r), + labels: make(Labels), + } +} + +// AddLabels adds additional labels as if they had been read from the header of a file. +// It must be called before the first call to r.Next. +func (r *Reader) AddLabels(labels Labels) { + r.permLabels = labels.Copy() + for k, v := range labels { + r.labels[k] = v + } +} + +// Result represents a single line from a benchmark file. +// All information about that line is self-contained in the Result. +// A Result is immutable once created. +type Result struct { + // Labels is the set of persistent labels that apply to the result. + // Labels must not be modified. + Labels Labels + // NameLabels is the set of ephemeral labels that were parsed + // from the benchmark name/line. + // NameLabels must not be modified. + NameLabels Labels + // LineNum is the line number on which the result was found + LineNum int + // Content is the verbatim input line of the benchmark file, beginning with the string "Benchmark". + Content string +} + +// SameLabels reports whether r and b have the same labels. +func (r *Result) SameLabels(b *Result) bool { + return r.Labels.Equal(b.Labels) && r.NameLabels.Equal(b.NameLabels) +} + +// Labels is a set of key-value strings. +type Labels map[string]string + +// String returns the labels formatted as a comma-separated +// list enclosed in braces. +func (l Labels) String() string { + var out bytes.Buffer + out.WriteString("{") + for k, v := range l { + fmt.Fprintf(&out, "%q: %q, ", k, v) + } + if out.Len() > 1 { + // Remove extra ", " + out.Truncate(out.Len() - 2) + } + out.WriteString("}") + return out.String() +} + +// Keys returns a sorted list of the keys in l. +func (l Labels) Keys() []string { + var out []string + for k := range l { + out = append(out, k) + } + sort.Strings(out) + return out +} + +// Equal reports whether l and b have the same keys and values. +func (l Labels) Equal(b Labels) bool { + if len(l) != len(b) { + return false + } + for k := range l { + if l[k] != b[k] { + return false + } + } + return true +} + +// A Printer prints a sequence of benchmark results. +type Printer struct { + w io.Writer + labels Labels +} + +// NewPrinter constructs a BenchmarkPrinter writing to w. +func NewPrinter(w io.Writer) *Printer { + return &Printer{w: w} +} + +// Print writes the lines necessary to recreate r. +func (p *Printer) Print(r *Result) error { + var keys []string + // Print removed keys first. + for k := range p.labels { + if r.Labels[k] == "" { + keys = append(keys, k) + } + } + sort.Strings(keys) + for _, k := range keys { + if _, err := fmt.Fprintf(p.w, "%s:\n", k); err != nil { + return err + } + } + // Then print new or changed keys. + keys = keys[:0] + for k, v := range r.Labels { + if v != "" && p.labels[k] != v { + keys = append(keys, k) + } + } + sort.Strings(keys) + for _, k := range keys { + if _, err := fmt.Fprintf(p.w, "%s: %s\n", k, r.Labels[k]); err != nil { + return err + } + } + // Finally print the actual line itself. + if _, err := fmt.Fprintf(p.w, "%s\n", r.Content); err != nil { + return err + } + p.labels = r.Labels + return nil +} + +// parseNameLabels extracts extra labels from a benchmark name and sets them in labels. +func parseNameLabels(name string, labels Labels) { + dash := strings.LastIndex(name, "-") + if dash >= 0 { + // Accept -N as an alias for /gomaxprocs=N + _, err := strconv.Atoi(name[dash+1:]) + if err == nil { + labels["gomaxprocs"] = name[dash+1:] + name = name[:dash] + } + } + parts := strings.Split(name, "/") + labels["name"] = parts[0] + for i, sub := range parts[1:] { + equals := strings.Index(sub, "=") + var key string + if equals >= 0 { + key, sub = sub[:equals], sub[equals+1:] + } else { + key = fmt.Sprintf("sub%d", i+1) + } + labels[key] = sub + } +} + +// newResult parses a line and returns a Result object for the line. +func (r *Reader) newResult(labels Labels, lineNum int, name, content string) *Result { + res := &Result{ + Labels: labels, + LineNum: lineNum, + Content: content, + } + if r.lastName != name { + r.lastName = name + r.lastNameLabels = make(Labels) + parseNameLabels(name, r.lastNameLabels) + } + res.NameLabels = r.lastNameLabels + return res +} + +// Copy returns a new copy of the labels map, to protect against +// future modifications to labels. +func (l Labels) Copy() Labels { + new := make(Labels) + for k, v := range l { + new[k] = v + } + return new +} + +// TODO(quentin): How to represent and efficiently group multiple lines? + +// Next returns the next benchmark result from the file. If there are +// no further results, it returns nil, io.EOF. +func (r *Reader) Next() bool { + if r.err != nil { + return false + } + copied := false + havePerm := r.permLabels != nil + for r.s.Scan() { + r.lineNum++ + line := r.s.Text() + if key, value, ok := parseKeyValueLine(line); ok { + if _, ok := r.permLabels[key]; ok { + continue + } + if !copied { + copied = true + r.labels = r.labels.Copy() + } + // TODO(quentin): Spec says empty value is valid, but + // we need a way to cancel previous labels, so we'll + // treat an empty value as a removal. + if value == "" { + delete(r.labels, key) + } else { + r.labels[key] = value + } + continue + } + // Blank line delimits the header. If we find anything else, the file must not have a header. + if !havePerm { + if line == "" { + r.permLabels = r.labels.Copy() + } else { + r.permLabels = Labels{} + } + } + if fullName, ok := parseBenchmarkLine(line); ok { + r.result = r.newResult(r.labels, r.lineNum, fullName, line) + return true + } + } + if err := r.s.Err(); err != nil { + r.err = err + return false + } + r.err = io.EOF + return false +} + +// Result returns the most recent result generated by a call to Next. +func (r *Reader) Result() *Result { + return r.result +} + +// Err returns the error state of the reader. +func (r *Reader) Err() error { + if r.err == io.EOF { + return nil + } + return r.err +} + +// parseKeyValueLine attempts to parse line as a key: value pair. ok +// indicates whether the line could be parsed. +func parseKeyValueLine(line string) (key, val string, ok bool) { + for i, c := range line { + if i == 0 && !unicode.IsLower(c) { + return + } + if unicode.IsSpace(c) || unicode.IsUpper(c) { + return + } + if i > 0 && c == ':' { + key = line[:i] + val = line[i+1:] + break + } + } + if key == "" { + return + } + if val == "" { + ok = true + return + } + for len(val) > 0 && (val[0] == ' ' || val[0] == '\t') { + val = val[1:] + ok = true + } + return +} + +// parseBenchmarkLine attempts to parse line as a benchmark result. If +// successful, fullName is the name of the benchmark with the +// "Benchmark" prefix stripped, and ok is true. +func parseBenchmarkLine(line string) (fullName string, ok bool) { + space := strings.IndexFunc(line, unicode.IsSpace) + if space < 0 { + return + } + name := line[:space] + if !strings.HasPrefix(name, "Benchmark") { + return + } + return name[len("Benchmark"):], true +} diff --git a/internal/cpupower/cpupower.go b/internal/cpupower/cpupower.go new file mode 100644 index 0000000..9d9b056 --- /dev/null +++ b/internal/cpupower/cpupower.go @@ -0,0 +1,134 @@ +// Package cpupower manipulates Linux CPU frequency scaling settings. +package cpupower + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "sort" + "strconv" + "strings" +) + +// Domain is a frequency scaling domain. This may include more than +// one CPU. +type Domain struct { + path string + min, max int + available []int +} + +var cpuRe = regexp.MustCompile(`cpu\d+$`) + +// Domains returns the frequency scaling domains of this host. +func Domains() ([]*Domain, error) { + dir := "/sys/devices/system/cpu" + fs, err := ioutil.ReadDir(dir) + if err != nil { + return nil, err + } + + var domains []*Domain + haveDomains := make(map[string]bool) + for _, f := range fs { + if !f.IsDir() || !cpuRe.MatchString(f.Name()) { + continue + } + pdir := filepath.Join(dir, f.Name(), "cpufreq") + + // Get the frequency domain, if any. + cpus, err := ioutil.ReadFile(filepath.Join(pdir, "freqdomain_cpus")) + if err == nil { + if haveDomains[string(cpus)] { + // We already have a CPU in this domain. + continue + } + haveDomains[string(cpus)] = true + } else if !os.IsNotExist(err) { + return nil, err + } + + min, err := readInt(filepath.Join(pdir, "cpuinfo_min_freq")) + if err != nil { + return nil, err + } + max, err := readInt(filepath.Join(pdir, "cpuinfo_max_freq")) + if err != nil { + return nil, err + } + avail, err := readInts(filepath.Join(pdir, "scaling_available_frequencies")) + if err != nil && !os.IsNotExist(err) { + return nil, err + } + sort.Ints(avail) + domains = append(domains, &Domain{pdir, min, max, avail}) + } + return domains, nil +} + +// AvailableRange returns the available frequency range this CPU is +// capable of and the set of available frequencies in ascending order +// or nil if any frequency can be set. +func (d *Domain) AvailableRange() (int, int, []int) { + return d.min, d.max, d.available +} + +// CurrentRange returns the current frequency range this CPU's +// governor can select between. +func (d *Domain) CurrentRange() (int, int, error) { + min, err := readInt(filepath.Join(d.path, "scaling_min_freq")) + if err != nil { + return 0, 0, err + } + max, err := readInt(filepath.Join(d.path, "scaling_max_freq")) + if err != nil { + return 0, 0, err + } + return min, max, nil +} + +// SetRange sets the frequency range this CPU's governor can select +// between. +func (d *Domain) SetRange(min, max int) error { + // Attempting to set an empty range will cause an IO error. + // Rather than trying to figure out the right order to set + // them in, try both orders. + err1 := writeInt(filepath.Join(d.path, "scaling_min_freq"), min) + if err2 := writeInt(filepath.Join(d.path, "scaling_max_freq"), max); err2 != nil { + return err2 + } + if err1 != nil { + err1 = writeInt(filepath.Join(d.path, "scaling_min_freq"), min) + } + return err1 +} + +func readInt(path string) (int, error) { + data, err := ioutil.ReadFile(path) + if err != nil { + return 0, err + } + return strconv.Atoi(strings.TrimSpace(string(data))) +} + +func writeInt(path string, val int) error { + return ioutil.WriteFile(path, []byte(fmt.Sprintf("%d", val)), 0) +} + +func readInts(path string) ([]int, error) { + data, err := ioutil.ReadFile(path) + if err != nil { + return nil, err + } + fields := strings.Fields(string(data)) + ints := make([]int, len(fields)) + for i, field := range fields { + ints[i], err = strconv.Atoi(field) + if err != nil { + return nil, err + } + } + return ints, nil +} diff --git a/internal/lock/client.go b/internal/lock/client.go new file mode 100644 index 0000000..41925e4 --- /dev/null +++ b/internal/lock/client.go @@ -0,0 +1,71 @@ +package lock + +import ( + "encoding/gob" + "fmt" + "log" + "net" +) + +// Client is a lock client +type Client struct { + c net.Conn + + gr *gob.Encoder + gw *gob.Decoder +} + +// NewClient returns a lock client +func NewClient() *Client { + c, err := net.Dial("unix", Socketpath) + if err != nil { + log.Printf("failed to connect bench daemon: %v", err) + return nil + } + + // Send credentials. + err = writeCredentials(c.(*net.UnixConn)) + if err != nil { + log.Fatal("failed to send credentials: ", err) + } + + gr, gw := gob.NewEncoder(c), gob.NewDecoder(c) + + return &Client{c, gr, gw} +} + +func (c *Client) do(action perflockAction, response interface{}) { + err := c.gr.Encode(action) + if err != nil { + log.Fatal(err) + } + + err = c.gw.Decode(response) + if err != nil { + log.Fatal(err) + } +} + +// Acquire acuiqres the lock +func (c *Client) Acquire(shared, nonblocking bool, msg string) bool { + var ok bool + c.do(perflockAction{actionAcquire{Shared: shared, NonBlocking: nonblocking, Msg: msg}}, &ok) + return ok +} + +// List lists all perflock actions +func (c *Client) List() []string { + var list []string + c.do(perflockAction{actionList{}}, &list) + return list +} + +// SetCPUFreq sets the given cpu frequency +func (c *Client) SetCPUFreq(percent int) error { + var err string + c.do(perflockAction{actionSetCPUFreq{Percent: percent}}, &err) + if err == "" { + return nil + } + return fmt.Errorf("%s", err) +} diff --git a/internal/lock/creds.go b/internal/lock/creds.go new file mode 100644 index 0000000..56d6f0b --- /dev/null +++ b/internal/lock/creds.go @@ -0,0 +1,61 @@ +package lock + +import ( + "fmt" + "net" + "os" + "syscall" +) + +// TODO: Use SO_PEERCRED instead? + +func writeCredentials(c *net.UnixConn) error { + ucred := syscall.Ucred{Pid: int32(os.Getpid()), Uid: uint32(os.Getuid()), Gid: uint32(os.Getgid())} + credOob := syscall.UnixCredentials(&ucred) + credMsg := []byte("x") + n, oobn, err := c.WriteMsgUnix(credMsg, credOob, nil) + if err != nil { + return err + } + if n != 1 { + return fmt.Errorf("short send (%d bytes)", n) + } + if oobn != len(credOob) { + return fmt.Errorf("short OOB send (%d bytes)", oobn) + } + return nil +} + +func readCredentials(c *net.UnixConn) (*syscall.Ucred, error) { + // Enable receiving credentials on c. + f, err := c.File() + if err != nil { + return nil, err + } + err = syscall.SetsockoptInt(int(f.Fd()), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1) + f.Close() + if err != nil { + return nil, err + } + + // Receive credentials. + buf := make([]byte, 1) + oob := make([]byte, 128) + n, oobn, _, _, err := c.ReadMsgUnix(buf, oob) + if err != nil { + return nil, err + } + if n != 1 { + return nil, fmt.Errorf("expected 1 byte, got %d", n) + } + + // Parse OOB data. + scms, err := syscall.ParseSocketControlMessage(oob[:oobn]) + if err != nil { + return nil, err + } + if len(scms) != 1 { + return nil, fmt.Errorf("expected 1 control message, got %d", len(scms)) + } + return syscall.ParseUnixCredentials(&scms[0]) +} diff --git a/internal/lock/daemon.go b/internal/lock/daemon.go new file mode 100644 index 0000000..2af0e91 --- /dev/null +++ b/internal/lock/daemon.go @@ -0,0 +1,262 @@ +package lock + +import ( + "encoding/gob" + "fmt" + "io" + "log" + "net" + "os" + "os/user" + "time" + + "golang.design/x/bench/internal/cpupower" +) + +var theLock perflock + +// RunDaemon runs lock daemon +func RunDaemon() { + // check if daemon is running + c, _ := net.Dial("unix", Socketpath) + if c != nil { + c.Close() + log.Fatalf("The bench daemon is already running at %s !", Socketpath) + return + } + + os.Remove(Socketpath) + l, err := net.Listen("unix", Socketpath) + if err != nil { + log.Fatal(err) + } + defer l.Close() + + // Make the socket world-writable/connectable. + err = os.Chmod(Socketpath, 0777) + if err != nil { + log.Fatal(err) + } + + // Receive connections. + for { + conn, err := l.Accept() + if err != nil { + log.Fatal(err) + } + + go func(c net.Conn) { + defer c.Close() + NewServer(c).Serve() + }(conn) + } +} + +// Server is the bench lock server +type Server struct { + c net.Conn + userName string + + locker *locker + acquiring bool + + oldCPUFreqs []*cpuFreqSettings +} + +// NewServer returns a bench lock server +func NewServer(c net.Conn) *Server { + return &Server{c: c} +} + +// Serve serves the bench lock server +func (s *Server) Serve() { + // Drop any held locks if we exit for any reason. + defer s.drop() + + // Get connection credentials. + ucred, err := readCredentials(s.c.(*net.UnixConn)) + if err != nil { + log.Print("reading credentials: ", err) + return + } + + u, err := user.LookupId(fmt.Sprintf("%d", ucred.Uid)) + s.userName = "???" + if err == nil { + s.userName = u.Username + } + + // Receive incoming actions. We do this in a goroutine so the + // main handler can select on EOF or lock acquisition. + actions := make(chan perflockAction) + go func() { + gr := gob.NewDecoder(s.c) + for { + var msg perflockAction + err := gr.Decode(&msg) + if err != nil { + if err != io.EOF { + log.Print(err) + } + close(actions) + return + } + actions <- msg + } + }() + + // Process incoming actions. + var acquireC <-chan bool + gw := gob.NewEncoder(s.c) + for { + select { + case action, ok := <-actions: + if !ok { + // Connection closed. + return + } + if s.acquiring { + log.Printf("protocol error: message while acquiring") + return + } + switch action := action.Action.(type) { + case actionAcquire: + if s.locker != nil { + log.Printf("protocol error: acquiring lock twice") + return + } + msg := fmt.Sprintf("%s\t%s\t%s", s.userName, time.Now().Format(time.Stamp), action.Msg) + if action.Shared { + msg += " [shared]" + } + s.locker = theLock.Enqueue(action.Shared, action.NonBlocking, msg) + if s.locker != nil { + // Enqueued. Wait for acquire. + s.acquiring = true + acquireC = s.locker.C + } else { + // Non-blocking acquire failed. + if err := gw.Encode(false); err != nil { + log.Print(err) + return + } + } + + case actionList: + list := theLock.Queue() + if err := gw.Encode(list); err != nil { + log.Print(err) + return + } + + case actionSetCPUFreq: + if s.locker == nil { + log.Printf("protocol error: setting cpuFreq without lock") + return + } + err := s.setCPUFreq(action.Percent) + errString := "" + if err != nil { + errString = err.Error() + } + if err := gw.Encode(errString); err != nil { + log.Print(err) + return + } + + default: + log.Printf("unknown message") + return + } + + case <-acquireC: + // Lock acquired. + s.acquiring, acquireC = false, nil + if err := gw.Encode(true); err != nil { + log.Print(err) + return + } + } + } +} + +func (s *Server) drop() { + // Restore the CPU cpuFreq before releasing the lock. + if s.oldCPUFreqs != nil { + s.restoreCPUFreq() + s.oldCPUFreqs = nil + } + // Release the lock. + if s.locker != nil { + theLock.Dequeue(s.locker) + s.locker = nil + } +} + +type cpuFreqSettings struct { + domain *cpupower.Domain + min, max int +} + +func (s *Server) setCPUFreq(percent int) error { + domains, err := cpupower.Domains() + if err != nil { + return err + } + if len(domains) == 0 { + return fmt.Errorf("no power domains") + } + + // Save current frequency settings. + old := []*cpuFreqSettings{} + for _, d := range domains { + min, max, err := d.CurrentRange() + if err != nil { + return err + } + old = append(old, &cpuFreqSettings{d, min, max}) + } + s.oldCPUFreqs = old + + // Set new settings. + abs := func(x int) int { + if x < 0 { + return -x + } + return x + } + for _, d := range domains { + min, max, avail := d.AvailableRange() + target := (max-min)*percent/100 + min + + // Find the nearest available frequency. + if len(avail) != 0 { + closest := avail[0] + for _, a := range avail { + if abs(target-a) < abs(target-closest) { + closest = a + } + } + target = closest + } + + err := d.SetRange(target, target) + if err != nil { + return err + } + } + + return nil +} + +func (s *Server) restoreCPUFreq() error { + var err error + for _, g := range s.oldCPUFreqs { + // Try to set all of the domains, even if one fails. + err1 := g.domain.SetRange(g.min, g.max) + if err1 != nil && err == nil { + err = err1 + } + } + return err +} diff --git a/internal/lock/flag.go b/internal/lock/flag.go new file mode 100644 index 0000000..23be15d --- /dev/null +++ b/internal/lock/flag.go @@ -0,0 +1,36 @@ +package lock + +import ( + "fmt" + "regexp" + "strconv" +) + +// Socketpath between lock daemon and client +var Socketpath = "/var/run/bench.socket" + +// CpufreqFlag ... +type CpufreqFlag struct { + Percent int +} + +func (f *CpufreqFlag) String() string { + if f.Percent < 0 { + return "none" + } + return fmt.Sprintf("%d", f.Percent) +} + +// Set set the cpu frequency percentage +func (f *CpufreqFlag) Set(v string) error { + if v == "none" { + f.Percent = -1 + } else { + m := regexp.MustCompile(`^([0-9]+)$`).FindStringSubmatch(v) + if m == nil { + return fmt.Errorf("cpufreq must be \"none\" or \"N\"") + } + f.Percent, _ = strconv.Atoi(m[1]) + } + return nil +} diff --git a/internal/lock/lock.go b/internal/lock/lock.go new file mode 100644 index 0000000..0dab757 --- /dev/null +++ b/internal/lock/lock.go @@ -0,0 +1,84 @@ +package lock + +import "sync" + +type perflock struct { + l sync.Mutex + q []*locker +} + +type locker struct { + C <-chan bool + c chan<- bool + shared bool + woken bool + + msg string +} + +func (l *perflock) Enqueue(shared, nonblocking bool, msg string) *locker { + ch := make(chan bool, 1) + locker := &locker{ch, ch, shared, false, msg} + + // Enqueue. + l.l.Lock() + defer l.l.Unlock() + l.setQ(append(l.q, locker)) + + if nonblocking && !locker.woken { + // Acquire failed. Dequeue. + l.setQ(l.q[:len(l.q)-1]) + return nil + } + + return locker +} + +func (l *perflock) Dequeue(locker *locker) { + l.l.Lock() + defer l.l.Unlock() + for i, o := range l.q { + if locker == o { + copy(l.q[i:], l.q[i+1:]) + l.setQ(l.q[:len(l.q)-1]) + return + } + } + panic("Dequeue of non-enqueued locker") +} + +func (l *perflock) Queue() []string { + var q []string + + l.l.Lock() + defer l.l.Unlock() + for _, locker := range l.q { + q = append(q, locker.msg) + } + return q +} + +func (l *perflock) setQ(q []*locker) { + l.q = q + if len(q) == 0 { + return + } + + wake := func(locker *locker) { + if locker.woken == false { + locker.woken = true + locker.c <- true + } + } + if q[0].shared { + // Wake all shared acquires at the head of the queue. + for _, locker := range q { + if !locker.shared { + break + } + wake(locker) + } + } else { + wake(q[0]) + } +} diff --git a/internal/lock/proto.go b/internal/lock/proto.go new file mode 100644 index 0000000..a990e48 --- /dev/null +++ b/internal/lock/proto.go @@ -0,0 +1,35 @@ +package lock + +import "encoding/gob" + +type perflockAction struct { + Action interface{} +} + +// actionAcquire acquires the lock. The response is a boolean +// indicating whether or not the lock was acquired (which may be false +// for a non-blocking acquire). +type actionAcquire struct { + Shared bool + NonBlocking bool + Msg string +} + +// actionList returns the list of current and pending lock +// acquisitions as a []string. +type actionList struct { +} + +// actionSetCPUFreq sets the CPU frequency of all CPUs. The caller +// must hold the lock. +type actionSetCPUFreq struct { + // Percent indicates the percent to set the CPU cpuFreq to + // between the lower and highest available frequencies. + Percent int +} + +func init() { + gob.Register(actionAcquire{}) + gob.Register(actionList{}) + gob.Register(actionSetCPUFreq{}) +} diff --git a/internal/stat/algo.go b/internal/stat/algo.go new file mode 100644 index 0000000..2f3336c --- /dev/null +++ b/internal/stat/algo.go @@ -0,0 +1,108 @@ +package stat + +// Miscellaneous helper algorithms + +import ( + "fmt" +) + +func maxint(a, b int) int { + if a > b { + return a + } + return b +} + +func minint(a, b int) int { + if a < b { + return a + } + return b +} + +func sumint(xs []int) int { + sum := 0 + for _, x := range xs { + sum += x + } + return sum +} + +// bisect returns an x in [low, high] such that |f(x)| <= tolerance +// using the bisection method. +// +// f(low) and f(high) must have opposite signs. +// +// If f does not have a root in this interval (e.g., it is +// discontiguous), this returns the X of the apparent discontinuity +// and false. +func bisect(f func(float64) float64, low, high, tolerance float64) (float64, bool) { + flow, fhigh := f(low), f(high) + if -tolerance <= flow && flow <= tolerance { + return low, true + } + if -tolerance <= fhigh && fhigh <= tolerance { + return high, true + } + if mathSign(flow) == mathSign(fhigh) { + panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%g f(%g)=%g", low, flow, high, fhigh)) + } + for { + mid := (high + low) / 2 + fmid := f(mid) + if -tolerance <= fmid && fmid <= tolerance { + return mid, true + } + if mid == high || mid == low { + return mid, false + } + if mathSign(fmid) == mathSign(flow) { + low = mid + flow = fmid + } else { + high = mid + fhigh = fmid + } + } +} + +// bisectBool implements the bisection method on a boolean function. +// It returns x1, x2 ∈ [low, high], x1 < x2 such that f(x1) != f(x2) +// and x2 - x1 <= xtol. +// +// If f(low) == f(high), it panics. +func bisectBool(f func(float64) bool, low, high, xtol float64) (x1, x2 float64) { + flow, fhigh := f(low), f(high) + if flow == fhigh { + panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%v f(%g)=%v", low, flow, high, fhigh)) + } + for { + if high-low <= xtol { + return low, high + } + mid := (high + low) / 2 + if mid == high || mid == low { + return low, high + } + fmid := f(mid) + if fmid == flow { + low = mid + flow = fmid + } else { + high = mid + fhigh = fmid + } + } +} + +// series returns the sum of the series f(0), f(1), ... +// +// This implementation is fast, but subject to round-off error. +func series(f func(float64) float64) float64 { + y, yp := 0.0, 1.0 + for n := 0.0; y != yp; n++ { + yp = y + y += f(n) + } + return y +} diff --git a/internal/stat/beta.go b/internal/stat/beta.go new file mode 100644 index 0000000..49a4805 --- /dev/null +++ b/internal/stat/beta.go @@ -0,0 +1,88 @@ +package stat + +import "math" + +func lgamma(x float64) float64 { + y, _ := math.Lgamma(x) + return y +} + +// mathBeta returns the value of the complete beta function B(a, b). +func mathBeta(a, b float64) float64 { + // B(x,y) = Γ(x)Γ(y) / Γ(x+y) + return math.Exp(lgamma(a) + lgamma(b) - lgamma(a+b)) +} + +// mathBetaInc returns the value of the regularized incomplete beta +// function Iₓ(a, b). +// +// This is not to be confused with the "incomplete beta function", +// which can be computed as BetaInc(x, a, b)*Beta(a, b). +// +// If x < 0 or x > 1, returns NaN. +func mathBetaInc(x, a, b float64) float64 { + // Based on Numerical Recipes in C, section 6.4. This uses the + // continued fraction definition of I: + // + // (xᵃ*(1-x)ᵇ)/(a*B(a,b)) * (1/(1+(d₁/(1+(d₂/(1+...)))))) + // + // where B(a,b) is the beta function and + // + // d_{2m+1} = -(a+m)(a+b+m)x/((a+2m)(a+2m+1)) + // d_{2m} = m(b-m)x/((a+2m-1)(a+2m)) + if x < 0 || x > 1 { + return math.NaN() + } + bt := 0.0 + if 0 < x && x < 1 { + // Compute the coefficient before the continued + // fraction. + bt = math.Exp(lgamma(a+b) - lgamma(a) - lgamma(b) + + a*math.Log(x) + b*math.Log(1-x)) + } + if x < (a+1)/(a+b+2) { + // Compute continued fraction directly. + return bt * betacf(x, a, b) / a + } + // Compute continued fraction after symmetry transform. + return 1 - bt*betacf(1-x, b, a)/b +} + +// betacf is the continued fraction component of the regularized +// incomplete beta function Iₓ(a, b). +func betacf(x, a, b float64) float64 { + const maxIterations = 200 + const epsilon = 3e-14 + + raiseZero := func(z float64) float64 { + if math.Abs(z) < math.SmallestNonzeroFloat64 { + return math.SmallestNonzeroFloat64 + } + return z + } + + c := 1.0 + d := 1 / raiseZero(1-(a+b)*x/(a+1)) + h := d + for m := 1; m <= maxIterations; m++ { + mf := float64(m) + + // Even step of the recurrence. + numer := mf * (b - mf) * x / ((a + 2*mf - 1) * (a + 2*mf)) + d = 1 / raiseZero(1+numer*d) + c = raiseZero(1 + numer/c) + h *= d * c + + // Odd step of the recurrence. + numer = -(a + mf) * (a + b + mf) * x / ((a + 2*mf) * (a + 2*mf + 1)) + d = 1 / raiseZero(1+numer*d) + c = raiseZero(1 + numer/c) + hfac := d * c + h *= hfac + + if math.Abs(hfac-1) < epsilon { + return h + } + } + panic("betainc: a or b too big; failed to converge") +} diff --git a/internal/stat/data.go b/internal/stat/data.go new file mode 100644 index 0000000..64d91b8 --- /dev/null +++ b/internal/stat/data.go @@ -0,0 +1,230 @@ +package stat + +import ( + "bytes" + "fmt" + "io" + "strconv" + "strings" + + "golang.design/x/bench/internal/benchfmt" + "golang.design/x/bench/internal/term" +) + +// A Collection is a collection of benchmark results. +type Collection struct { + // Configs, Groups, and Units give the set of configs, + // groups, and units from the keys in Stats in an order + // meant to match the order the benchmarks were read in. + Configs, Groups, Units []string + + // Benchmarks gives the set of benchmarks from the keys in + // Stats by group in an order meant to match the order + // benchmarks were read in. + Benchmarks map[string][]string + + // Metrics holds the accumulated metrics for each key. + Metrics map[Key]*Metrics + + // DeltaTest is the test to use to decide if a change is significant. + // If nil, it defaults to UTest. + DeltaTest DeltaTest + + // Alpha is the p-value cutoff to report a change as significant. + // If zero, it defaults to 0.05. + Alpha float64 + + // AddGeoMean specifies whether to add a line to the table + // showing the geometric mean of all the benchmark results. + AddGeoMean bool + + // SplitBy specifies the labels to split results by. + // By default, results will only be split by full name. + SplitBy []string + + // Order specifies the row display order for this table. + // If Order is nil, the table rows are printed in order of + // first appearance in the input. + Order Order +} + +// A Key identifies one metric (e.g., "ns/op", "B/op") from one +// benchmark (function name sans "Benchmark" prefix) and optional +// group in one configuration (input file name). +type Key struct { + Config, Group, Benchmark, Unit string +} + +// A Metrics holds the measurements of a single metric +// (for example, ns/op or MB/s) +// for all runs of a particular benchmark. +type Metrics struct { + Unit string // unit being measured + Values []float64 // measured values + RValues []float64 // Values with outliers removed + Min float64 // min of RValues + Mean float64 // mean of RValues + Max float64 // max of RValues +} + +// FormatMean formats m.Mean using scaler. +func (m *Metrics) FormatMean(scaler Scaler) string { + var s string + if scaler != nil { + s = scaler(m.Mean) + } else { + s = fmt.Sprint(m.Mean) + } + return s +} + +// FormatDiff computes and formats the percent variation of max and min compared to mean. +// If b.Mean or b.Max is zero, FormatDiff returns an empty string. +func (m *Metrics) FormatDiff() string { + if m.Mean == 0 || m.Max == 0 { + return "" + } + diff := 1 - m.Min/m.Mean + if d := m.Max/m.Mean - 1; d > diff { + diff = d + } + + if diff > 0.05 { + return term.Orange(fmt.Sprintf("±%.0f%%", diff*100.0)) + } + return term.Gray(fmt.Sprintf("±%.0f%%", diff*100.0)) +} + +// Format returns a textual formatting of "Mean ±Diff" using scaler. +func (m *Metrics) Format(scaler Scaler) string { + if m.Unit == "" { + return "" + } + mean := m.FormatMean(scaler) + diff := m.FormatDiff() + if diff == "" { + return mean + " " + } + return fmt.Sprintf("%s %3s", mean, diff) +} + +// computeStats updates the derived statistics in m from the raw +// samples in m.Values. +func (m *Metrics) computeStats() { + // Discard outliers. + values := Sample{Xs: m.Values} + q1, q3 := values.Percentile(0.25), values.Percentile(0.75) + lo, hi := q1-1.5*(q3-q1), q3+1.5*(q3-q1) + for _, value := range m.Values { + if lo <= value && value <= hi { + m.RValues = append(m.RValues, value) + } + } + + // Compute statistics of remaining data. + m.Min, m.Max = Bounds(m.RValues) + m.Mean = Mean(m.RValues) +} + +// addMetrics returns the metrics with the given key from c, +// creating a new one if needed. +func (c *Collection) addMetrics(key Key) *Metrics { + if c.Metrics == nil { + c.Metrics = make(map[Key]*Metrics) + } + if stat, ok := c.Metrics[key]; ok { + return stat + } + + addString := func(strings *[]string, add string) { + for _, s := range *strings { + if s == add { + return + } + } + *strings = append(*strings, add) + } + addString(&c.Configs, key.Config) + addString(&c.Groups, key.Group) + if c.Benchmarks == nil { + c.Benchmarks = make(map[string][]string) + } + benchmarks := c.Benchmarks[key.Group] + addString(&benchmarks, key.Benchmark) + c.Benchmarks[key.Group] = benchmarks + addString(&c.Units, key.Unit) + m := &Metrics{Unit: key.Unit} + c.Metrics[key] = m + return m +} + +// AddFile adds the benchmark results in the formatted data +// (read from the reader r) to the named configuration. +func (c *Collection) AddFile(config string, f io.Reader) error { + c.Configs = append(c.Configs, config) + key := Key{Config: config} + br := benchfmt.NewReader(f) + for br.Next() { + c.addResult(key, br.Result()) + } + return br.Err() +} + +// AddData adds the benchmark results in the formatted data +// (read from the reader r) to the named configuration. +func (c *Collection) AddData(config string, data []byte) error { + return c.AddFile(config, bytes.NewReader(data)) +} + +// AddResults adds the benchmark results to the named configuration. +func (c *Collection) AddResults(config string, results []*benchfmt.Result) { + c.Configs = append(c.Configs, config) + key := Key{Config: config} + for _, r := range results { + c.addResult(key, r) + } +} + +func (c *Collection) addResult(key Key, r *benchfmt.Result) { + f := strings.Fields(r.Content) + if len(f) < 4 { + return + } + name := f[0] + if !strings.HasPrefix(name, "Benchmark") { + return + } + name = strings.TrimPrefix(name, "Benchmark") + n, _ := strconv.Atoi(f[1]) + if n == 0 { + return + } + key.Group = c.makeGroup(r) + key.Benchmark = name + for i := 2; i+2 <= len(f); i += 2 { + val, err := strconv.ParseFloat(f[i], 64) + if err != nil { + continue + } + key.Unit = f[i+1] + m := c.addMetrics(key) + m.Values = append(m.Values, val) + } +} + +func (c *Collection) makeGroup(r *benchfmt.Result) string { + var out string + for _, s := range c.SplitBy { + v := r.NameLabels[s] + if v == "" { + v = r.Labels[s] + } + if v != "" { + if out != "" { + out = out + " " + } + out += fmt.Sprintf("%s:%s", s, v) + } + } + return out +} diff --git a/internal/stat/delta.go b/internal/stat/delta.go new file mode 100644 index 0000000..9ec2605 --- /dev/null +++ b/internal/stat/delta.go @@ -0,0 +1,49 @@ +package stat + +import "errors" + +// A DeltaTest compares the old and new metrics and returns the +// expected probability that they are drawn from the same distribution. +// +// If a probability cannot be computed, the DeltaTest returns an +// error explaining why. Common errors include ErrSamplesEqual +// (all samples are equal), ErrSampleSize (there aren't enough samples), +// and ErrZeroVariance (the sample has zero variance). +// +// As a special case, the missing test NoDeltaTest returns -1, nil. +type DeltaTest func(old, new *Metrics) (float64, error) + +// Errors returned by DeltaTest. +var ( + ErrSamplesEqual = errors.New("all equal") + ErrSampleSize = errors.New("too few samples") + ErrZeroVariance = errors.New("zero variance") + ErrMismatchedSamples = errors.New("samples have different lengths") +) + +// NoDeltaTest applies no delta test; it returns -1, nil. +func NoDeltaTest(old, new *Metrics) (pval float64, err error) { + return -1, nil +} + +// TTest is a DeltaTest using the two-sample Welch t-test. +func TTest(old, new *Metrics) (pval float64, err error) { + t, err := TwoSampleWelchTTest( + Sample{Xs: old.RValues}, + Sample{Xs: new.RValues}, + LocationDiffers, + ) + if err != nil { + return -1, err + } + return t.P, nil +} + +// UTest is a DeltaTest using the Mann-Whitney U test. +func UTest(old, new *Metrics) (pval float64, err error) { + u, err := MannWhitneyUTest(old.RValues, new.RValues, LocationDiffers) + if err != nil { + return -1, err + } + return u.P, nil +} diff --git a/internal/stat/mathx.go b/internal/stat/mathx.go new file mode 100644 index 0000000..4673609 --- /dev/null +++ b/internal/stat/mathx.go @@ -0,0 +1,74 @@ +package stat + +import "math" + +var inf = math.Inf(1) +var nan = math.NaN() + +// mathSign returns the sign of x: -1 if x < 0, 0 if x == 0, 1 if x > 0. +// If x is NaN, it returns NaN. +func mathSign(x float64) float64 { + if x == 0 { + return 0 + } else if x < 0 { + return -1 + } else if x > 0 { + return 1 + } + return nan +} + +const smallFactLimit = 20 // 20! => 62 bits +var smallFact [smallFactLimit + 1]int64 + +func init() { + smallFact[0] = 1 + fact := int64(1) + for n := int64(1); n <= smallFactLimit; n++ { + fact *= n + smallFact[n] = fact + } +} + +// mathChoose returns the binomial coefficient of n and k. +func mathChoose(n, k int) float64 { + if k == 0 || k == n { + return 1 + } + if k < 0 || n < k { + return 0 + } + if n <= smallFactLimit { // Implies k <= smallFactLimit + // It's faster to do several integer multiplications + // than it is to do an extra integer division. + // Remarkably, this is also faster than pre-computing + // Pascal's triangle (presumably because this is very + // cache efficient). + numer := int64(1) + for n1 := int64(n - (k - 1)); n1 <= int64(n); n1++ { + numer *= n1 + } + denom := smallFact[k] + return float64(numer / denom) + } + + return math.Exp(lchoose(n, k)) +} + +// mathLchoose returns math.Log(mathChoose(n, k)). +func mathLchoose(n, k int) float64 { + if k == 0 || k == n { + return 0 + } + if k < 0 || n < k { + return math.NaN() + } + return lchoose(n, k) +} + +func lchoose(n, k int) float64 { + a, _ := math.Lgamma(float64(n + 1)) + b, _ := math.Lgamma(float64(k + 1)) + c, _ := math.Lgamma(float64(n - k + 1)) + return a - b - c +} diff --git a/internal/stat/normaldist.go b/internal/stat/normaldist.go new file mode 100644 index 0000000..bf59ea8 --- /dev/null +++ b/internal/stat/normaldist.go @@ -0,0 +1,137 @@ +package stat + +import ( + "math" + "math/rand" +) + +// NormalDist is a normal (Gaussian) distribution with mean Mu and +// standard deviation Sigma. +type NormalDist struct { + Mu, Sigma float64 +} + +// StdNormal is the standard normal distribution (Mu = 0, Sigma = 1) +var StdNormal = NormalDist{0, 1} + +// 1/sqrt(2 * pi) +const invSqrt2Pi = 0.39894228040143267793994605993438186847585863116493465766592583 + +func (n NormalDist) PDF(x float64) float64 { + z := x - n.Mu + return math.Exp(-z*z/(2*n.Sigma*n.Sigma)) * invSqrt2Pi / n.Sigma +} + +func (n NormalDist) pdfEach(xs []float64) []float64 { + res := make([]float64, len(xs)) + if n.Mu == 0 && n.Sigma == 1 { + // Standard normal fast path + for i, x := range xs { + res[i] = math.Exp(-x*x/2) * invSqrt2Pi + } + } else { + a := -1 / (2 * n.Sigma * n.Sigma) + b := invSqrt2Pi / n.Sigma + for i, x := range xs { + z := x - n.Mu + res[i] = math.Exp(z*z*a) * b + } + } + return res +} + +func (n NormalDist) CDF(x float64) float64 { + return math.Erfc(-(x-n.Mu)/(n.Sigma*math.Sqrt2)) / 2 +} + +func (n NormalDist) cdfEach(xs []float64) []float64 { + res := make([]float64, len(xs)) + a := 1 / (n.Sigma * math.Sqrt2) + for i, x := range xs { + res[i] = math.Erfc(-(x-n.Mu)*a) / 2 + } + return res +} + +func (n NormalDist) InvCDF(p float64) (x float64) { + // This is based on Peter John Acklam's inverse normal CDF + // algorithm: http://home.online.no/~pjacklam/notes/invnorm/ + const ( + a1 = -3.969683028665376e+01 + a2 = 2.209460984245205e+02 + a3 = -2.759285104469687e+02 + a4 = 1.383577518672690e+02 + a5 = -3.066479806614716e+01 + a6 = 2.506628277459239e+00 + + b1 = -5.447609879822406e+01 + b2 = 1.615858368580409e+02 + b3 = -1.556989798598866e+02 + b4 = 6.680131188771972e+01 + b5 = -1.328068155288572e+01 + + c1 = -7.784894002430293e-03 + c2 = -3.223964580411365e-01 + c3 = -2.400758277161838e+00 + c4 = -2.549732539343734e+00 + c5 = 4.374664141464968e+00 + c6 = 2.938163982698783e+00 + + d1 = 7.784695709041462e-03 + d2 = 3.224671290700398e-01 + d3 = 2.445134137142996e+00 + d4 = 3.754408661907416e+00 + + plow = 0.02425 + phigh = 1 - plow + ) + + if p < 0 || p > 1 { + return nan + } else if p == 0 { + return -inf + } else if p == 1 { + return inf + } + + if p < plow { + // Rational approximation for lower region. + q := math.Sqrt(-2 * math.Log(p)) + x = (((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) / + ((((d1*q+d2)*q+d3)*q+d4)*q + 1) + } else if phigh < p { + // Rational approximation for upper region. + q := math.Sqrt(-2 * math.Log(1-p)) + x = -(((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) / + ((((d1*q+d2)*q+d3)*q+d4)*q + 1) + } else { + // Rational approximation for central region. + q := p - 0.5 + r := q * q + x = (((((a1*r+a2)*r+a3)*r+a4)*r+a5)*r + a6) * q / + (((((b1*r+b2)*r+b3)*r+b4)*r+b5)*r + 1) + } + + // Refine approximation. + e := 0.5*math.Erfc(-x/math.Sqrt2) - p + u := e * math.Sqrt(2*math.Pi) * math.Exp(x*x/2) + x = x - u/(1+x*u/2) + + // Adjust from standard normal. + return x*n.Sigma + n.Mu +} + +func (n NormalDist) Rand(r *rand.Rand) float64 { + var x float64 + if r == nil { + x = rand.NormFloat64() + } else { + x = r.NormFloat64() + } + return x*n.Sigma + n.Mu +} + +func (n NormalDist) Bounds() (float64, float64) { + const stddevs = 3 + return n.Mu - stddevs*n.Sigma, n.Mu + stddevs*n.Sigma +} diff --git a/internal/stat/sample.go b/internal/stat/sample.go new file mode 100644 index 0000000..8bc68d7 --- /dev/null +++ b/internal/stat/sample.go @@ -0,0 +1,336 @@ +package stat + +import ( + "math" + "sort" +) + +// Sample is a collection of possibly weighted data points. +type Sample struct { + // Xs is the slice of sample values. + Xs []float64 + + // Weights[i] is the weight of sample Xs[i]. If Weights is + // nil, all Xs have weight 1. Weights must have the same + // length of Xs and all values must be non-negative. + Weights []float64 + + // Sorted indicates that Xs is sorted in ascending order. + Sorted bool +} + +// Bounds returns the minimum and maximum values of xs. +func Bounds(xs []float64) (min float64, max float64) { + if len(xs) == 0 { + return math.NaN(), math.NaN() + } + min, max = xs[0], xs[0] + for _, x := range xs { + if x < min { + min = x + } + if x > max { + max = x + } + } + return +} + +// Bounds returns the minimum and maximum values of the Sample. +// +// If the Sample is weighted, this ignores samples with zero weight. +// +// This is constant time if s.Sorted and there are no zero-weighted +// values. +func (s Sample) Bounds() (min float64, max float64) { + if len(s.Xs) == 0 || (!s.Sorted && s.Weights == nil) { + return Bounds(s.Xs) + } + + if s.Sorted { + if s.Weights == nil { + return s.Xs[0], s.Xs[len(s.Xs)-1] + } + min, max = math.NaN(), math.NaN() + for i, w := range s.Weights { + if w != 0 { + min = s.Xs[i] + break + } + } + if math.IsNaN(min) { + return + } + for i := range s.Weights { + if s.Weights[len(s.Weights)-i-1] != 0 { + max = s.Xs[len(s.Weights)-i-1] + break + } + } + } else { + min, max = math.Inf(1), math.Inf(-1) + for i, x := range s.Xs { + w := s.Weights[i] + if x < min && w != 0 { + min = x + } + if x > max && w != 0 { + max = x + } + } + if math.IsInf(min, 0) { + min, max = math.NaN(), math.NaN() + } + } + return +} + +// vecSum returns the sum of xs. +func vecSum(xs []float64) float64 { + sum := 0.0 + for _, x := range xs { + sum += x + } + return sum +} + +// Sum returns the (possibly weighted) sum of the Sample. +func (s Sample) Sum() float64 { + if s.Weights == nil { + return vecSum(s.Xs) + } + sum := 0.0 + for i, x := range s.Xs { + sum += x * s.Weights[i] + } + return sum +} + +// Weight returns the total weight of the Sasmple. +func (s Sample) Weight() float64 { + if s.Weights == nil { + return float64(len(s.Xs)) + } + return vecSum(s.Weights) +} + +// Mean returns the arithmetic mean of xs. +func Mean(xs []float64) float64 { + if len(xs) == 0 { + return math.NaN() + } + m := 0.0 + for i, x := range xs { + m += (x - m) / float64(i+1) + } + return m +} + +// Mean returns the arithmetic mean of the Sample. +func (s Sample) Mean() float64 { + if len(s.Xs) == 0 || s.Weights == nil { + return Mean(s.Xs) + } + + m, wsum := 0.0, 0.0 + for i, x := range s.Xs { + // Use weighted incremental mean: + // m_i = (1 - w_i/wsum_i) * m_(i-1) + (w_i/wsum_i) * x_i + // = m_(i-1) + (x_i - m_(i-1)) * (w_i/wsum_i) + w := s.Weights[i] + wsum += w + m += (x - m) * w / wsum + } + return m +} + +// GeoMean returns the geometric mean of xs. xs must be positive. +func GeoMean(xs []float64) float64 { + if len(xs) == 0 { + return math.NaN() + } + m := 0.0 + for i, x := range xs { + if x <= 0 { + return math.NaN() + } + lx := math.Log(x) + m += (lx - m) / float64(i+1) + } + return math.Exp(m) +} + +// GeoMean returns the geometric mean of the Sample. All samples +// values must be positive. +func (s Sample) GeoMean() float64 { + if len(s.Xs) == 0 || s.Weights == nil { + return GeoMean(s.Xs) + } + + m, wsum := 0.0, 0.0 + for i, x := range s.Xs { + w := s.Weights[i] + wsum += w + lx := math.Log(x) + m += (lx - m) * w / wsum + } + return math.Exp(m) +} + +// Variance returns the sample variance of xs. +func Variance(xs []float64) float64 { + if len(xs) == 0 { + return math.NaN() + } else if len(xs) <= 1 { + return 0 + } + + // Based on Wikipedia's presentation of Welford 1962 + // (http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm). + // This is more numerically stable than the standard two-pass + // formula and not prone to massive cancellation. + mean, M2 := 0.0, 0.0 + for n, x := range xs { + delta := x - mean + mean += delta / float64(n+1) + M2 += delta * (x - mean) + } + return M2 / float64(len(xs)-1) +} + +// Variance returns the sample variance of xs. +func (s Sample) Variance() float64 { + if len(s.Xs) == 0 || s.Weights == nil { + return Variance(s.Xs) + } + // TODO(austin) + panic("Weighted Variance not implemented") +} + +// StdDev returns the sample standard deviation of xs. +func StdDev(xs []float64) float64 { + return math.Sqrt(Variance(xs)) +} + +// StdDev returns the sample standard deviation of the Sample. +func (s Sample) StdDev() float64 { + if len(s.Xs) == 0 || s.Weights == nil { + return StdDev(s.Xs) + } + // TODO(austin) + panic("Weighted StdDev not implemented") +} + +// Percentile returns the pctileth value from the Sample. This uses +// interpolation method R8 from Hyndman and Fan (1996). +// +// pctile will be capped to the range [0, 1]. If len(xs) == 0 or all +// weights are 0, returns NaN. +// +// Percentile(0.5) is the median. Percentile(0.25) and +// Percentile(0.75) are the first and third quartiles, respectively. +// +// This is constant time if s.Sorted and s.Weights == nil. +func (s Sample) Percentile(pctile float64) float64 { + if len(s.Xs) == 0 { + return math.NaN() + } else if pctile <= 0 { + min, _ := s.Bounds() + return min + } else if pctile >= 1 { + _, max := s.Bounds() + return max + } + + if !s.Sorted { + // TODO(austin) Use select algorithm instead + s = *s.Copy().Sort() + } + + if s.Weights == nil { + N := float64(len(s.Xs)) + //n := pctile * (N + 1) // R6 + n := 1/3.0 + pctile*(N+1/3.0) // R8 + kf, frac := math.Modf(n) + k := int(kf) + if k <= 0 { + return s.Xs[0] + } else if k >= len(s.Xs) { + return s.Xs[len(s.Xs)-1] + } + return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1]) + } + // TODO(austin): Implement interpolation + + target := s.Weight() * pctile + + // TODO(austin) If we had cumulative weights, we could + // do this in log time. + for i, weight := range s.Weights { + target -= weight + if target < 0 { + return s.Xs[i] + } + } + return s.Xs[len(s.Xs)-1] +} + +// IQR returns the interquartile range of the Sample. +// +// This is constant time if s.Sorted and s.Weights == nil. +func (s Sample) IQR() float64 { + if !s.Sorted { + s = *s.Copy().Sort() + } + return s.Percentile(0.75) - s.Percentile(0.25) +} + +type sampleSorter struct { + xs []float64 + weights []float64 +} + +func (p *sampleSorter) Len() int { + return len(p.xs) +} + +func (p *sampleSorter) Less(i, j int) bool { + return p.xs[i] < p.xs[j] +} + +func (p *sampleSorter) Swap(i, j int) { + p.xs[i], p.xs[j] = p.xs[j], p.xs[i] + p.weights[i], p.weights[j] = p.weights[j], p.weights[i] +} + +// Sort sorts the samples in place in s and returns s. +// +// A sorted sample improves the performance of some algorithms. +func (s *Sample) Sort() *Sample { + if s.Sorted || sort.Float64sAreSorted(s.Xs) { + // All set + } else if s.Weights == nil { + sort.Float64s(s.Xs) + } else { + sort.Sort(&sampleSorter{s.Xs, s.Weights}) + } + s.Sorted = true + return s +} + +// Copy returns a copy of the Sample. +// +// The returned Sample shares no data with the original, so they can +// be modified (for example, sorted) independently. +func (s Sample) Copy() *Sample { + xs := make([]float64, len(s.Xs)) + copy(xs, s.Xs) + + weights := []float64(nil) + if s.Weights != nil { + weights = make([]float64, len(s.Weights)) + copy(weights, s.Weights) + } + + return &Sample{xs, weights, s.Sorted} +} diff --git a/internal/stat/scaler.go b/internal/stat/scaler.go new file mode 100644 index 0000000..62d102d --- /dev/null +++ b/internal/stat/scaler.go @@ -0,0 +1,114 @@ +package stat + +import ( + "fmt" + "strings" +) + +// A Scaler is a function that scales and formats a measurement. +// All measurements within a given table row are formatted +// using the same scaler, so that the units are consistent +// across the row. +type Scaler func(float64) string + +// NewScaler returns a Scaler appropriate for formatting +// the measurement val, which has the given unit. +func NewScaler(val float64, unit string) Scaler { + if hasBaseUnit(unit, "ns/op") || hasBaseUnit(unit, "ns/GC") { + return timeScaler(val) + } + + var format string + var scale float64 + var suffix string + + prescale := 1.0 + if hasBaseUnit(unit, "MB/s") { + prescale = 1e6 + } + + switch x := val * prescale; { + case x >= 99500000000000: + format, scale, suffix = "%.0f", 1e12, "T" + case x >= 9950000000000: + format, scale, suffix = "%.1f", 1e12, "T" + case x >= 995000000000: + format, scale, suffix = "%.2f", 1e12, "T" + case x >= 99500000000: + format, scale, suffix = "%.0f", 1e9, "G" + case x >= 9950000000: + format, scale, suffix = "%.1f", 1e9, "G" + case x >= 995000000: + format, scale, suffix = "%.2f", 1e9, "G" + case x >= 99500000: + format, scale, suffix = "%.0f", 1e6, "M" + case x >= 9950000: + format, scale, suffix = "%.1f", 1e6, "M" + case x >= 995000: + format, scale, suffix = "%.2f", 1e6, "M" + case x >= 99500: + format, scale, suffix = "%.0f", 1e3, "k" + case x >= 9950: + format, scale, suffix = "%.1f", 1e3, "k" + case x >= 995: + format, scale, suffix = "%.2f", 1e3, "k" + case x >= 99.5: + format, scale, suffix = "%.0f", 1, "" + case x >= 9.95: + format, scale, suffix = "%.1f", 1, "" + default: + format, scale, suffix = "%.2f", 1, "" + } + + if hasBaseUnit(unit, "B/op") || hasBaseUnit(unit, "bytes/op") || hasBaseUnit(unit, "bytes") { + suffix += "B" + } + if hasBaseUnit(unit, "MB/s") { + suffix += "B/s" + } + scale /= prescale + + return func(val float64) string { + return fmt.Sprintf(format+suffix, val/scale) + } +} + +func timeScaler(ns float64) Scaler { + var format string + var scale float64 + switch x := ns / 1e9; { + case x >= 99.5: + format, scale = "%.0fs", 1 + case x >= 9.95: + format, scale = "%.1fs", 1 + case x >= 0.995: + format, scale = "%.2fs", 1 + case x >= 0.0995: + format, scale = "%.0fms", 1000 + case x >= 0.00995: + format, scale = "%.1fms", 1000 + case x >= 0.000995: + format, scale = "%.2fms", 1000 + case x >= 0.0000995: + format, scale = "%.0fµs", 1000*1000 + case x >= 0.00000995: + format, scale = "%.1fµs", 1000*1000 + case x >= 0.000000995: + format, scale = "%.2fµs", 1000*1000 + case x >= 0.0000000995: + format, scale = "%.0fns", 1000*1000*1000 + case x >= 0.00000000995: + format, scale = "%.1fns", 1000*1000*1000 + default: + format, scale = "%.2fns", 1000*1000*1000 + } + return func(ns float64) string { + return fmt.Sprintf(format, ns/1e9*scale) + } +} + +// hasBaseUnit reports whether s has unit unit. +// For now, it reports whether s == unit or s ends in -unit. +func hasBaseUnit(s, unit string) bool { + return s == unit || strings.HasSuffix(s, "-"+unit) +} diff --git a/internal/stat/sort.go b/internal/stat/sort.go new file mode 100644 index 0000000..f618216 --- /dev/null +++ b/internal/stat/sort.go @@ -0,0 +1,32 @@ +package stat + +import ( + "math" + "sort" +) + +// An Order defines a sort order for a table. +// It reports whether t.Rows[i] should appear before t.Rows[j]. +type Order func(t *Table, i, j int) bool + +// ByName sorts tables by the Benchmark name column +func ByName(t *Table, i, j int) bool { + return t.Rows[i].Benchmark < t.Rows[j].Benchmark +} + +// ByDelta sorts tables by the Delta column, +// reversing the order when larger is better (for "speed" results). +func ByDelta(t *Table, i, j int) bool { + return math.Abs(t.Rows[i].PctDelta)*float64(t.Rows[i].Change) < + math.Abs(t.Rows[j].PctDelta)*float64(t.Rows[j].Change) +} + +// Reverse returns the reverse of the given order. +func Reverse(order Order) Order { + return func(t *Table, i, j int) bool { return order(t, j, i) } +} + +// Sort sorts a Table t (in place) by the given order. +func Sort(t *Table, order Order) { + sort.SliceStable(t.Rows, func(i, j int) bool { return order(t, i, j) }) +} diff --git a/internal/stat/table.go b/internal/stat/table.go new file mode 100644 index 0000000..06e202a --- /dev/null +++ b/internal/stat/table.go @@ -0,0 +1,208 @@ +package stat + +import ( + "fmt" + "strings" +) + +// A Table is a table for display in the benchstat output. +type Table struct { + Metric string + OldNewDelta bool // is this an old-new-delta table? + Configs []string + Groups []string + Rows []*Row +} + +// A Row is a table row for display in the benchstat output. +type Row struct { + Benchmark string // benchmark name + Group string // group name + Scaler Scaler // formatter for stats means + Metrics []*Metrics // columns of statistics + PctDelta float64 // unformatted percent change + Delta string // formatted percent change + Note string // additional information + Change int // +1 better, -1 worse, 0 unchanged +} + +// Tables returns tables comparing the benchmarks in the collection. +func (c *Collection) Tables() []*Table { + deltaTest := c.DeltaTest + if deltaTest == nil { + deltaTest = UTest + } + alpha := c.Alpha + if alpha == 0 { + alpha = 0.05 + } + + // Update statistics. + for _, m := range c.Metrics { + m.computeStats() + } + + var tables []*Table + key := Key{} + for _, key.Unit = range c.Units { + table := new(Table) + table.Configs = c.Configs + table.Groups = c.Groups + table.Metric = metricOf(key.Unit) + table.OldNewDelta = len(c.Configs) == 2 + for _, key.Group = range c.Groups { + for _, key.Benchmark = range c.Benchmarks[key.Group] { + row := &Row{Benchmark: key.Benchmark} + if len(c.Groups) > 1 { + // Show group headers if there is more than one group. + row.Group = key.Group + } + + for _, key.Config = range c.Configs { + m := c.Metrics[key] + if m == nil { + row.Metrics = append(row.Metrics, new(Metrics)) + continue + } + row.Metrics = append(row.Metrics, m) + if row.Scaler == nil { + row.Scaler = NewScaler(m.Mean, m.Unit) + } + } + + // If there are only two configs being compared, add stats. + if table.OldNewDelta { + k0 := key + k0.Config = c.Configs[0] + k1 := key + k1.Config = c.Configs[1] + old := c.Metrics[k0] + new := c.Metrics[k1] + // If one is missing, omit row entirely. + // TODO: Control this better. + if old == nil || new == nil { + continue + } + pval, testerr := deltaTest(old, new) + row.PctDelta = 0.00 + row.Delta = "~" + if testerr == ErrZeroVariance { + row.Note = "(zero variance)" + } else if testerr == ErrSampleSize { + row.Note = "(too few samples)" + } else if testerr == ErrSamplesEqual { + row.Note = "(all equal)" + } else if testerr != nil { + row.Note = fmt.Sprintf("(%s)", testerr) + } else if pval < alpha { + if new.Mean == old.Mean { + row.Delta = "0.00%" + } else { + pct := ((new.Mean / old.Mean) - 1.0) * 100.0 + row.PctDelta = pct + row.Delta = fmt.Sprintf("%+.2f%%", pct) + if pct < 0 == (table.Metric != "speed") { // smaller is better, except speeds + row.Change = +1 + } else { + row.Change = -1 + } + } + } + if row.Note == "" && pval != -1 { + row.Note = fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues)) + } + } + + table.Rows = append(table.Rows, row) + } + } + + if len(table.Rows) > 0 { + if c.Order != nil { + Sort(table, c.Order) + } + if c.AddGeoMean { + addGeomean(c, table, key.Unit, table.OldNewDelta) + } + tables = append(tables, table) + } + } + + return tables +} + +var metricSuffix = map[string]string{ + "ns/op": "time/op", + "ns/GC": "time/GC", + "B/op": "alloc/op", + "MB/s": "speed", +} + +// metricOf returns the name of the metric with the given unit. +func metricOf(unit string) string { + if s := metricSuffix[unit]; s != "" { + return s + } + for s, suff := range metricSuffix { + if dashs := "-" + s; strings.HasSuffix(unit, dashs) { + prefix := strings.TrimSuffix(unit, dashs) + return prefix + "-" + suff + } + } + return unit +} + +// addGeomean adds a "geomean" row to the table, +// showing the geometric mean of all the benchmarks. +func addGeomean(c *Collection, t *Table, unit string, delta bool) { + row := &Row{Benchmark: "[Geo mean]"} + key := Key{Unit: unit} + geomeans := []float64{} + maxCount := 0 + for _, key.Config = range c.Configs { + var means []float64 + for _, key.Group = range c.Groups { + for _, key.Benchmark = range c.Benchmarks[key.Group] { + m := c.Metrics[key] + // Omit 0 values from the geomean calculation, + // as these either make the geomean undefined + // or zero (depending on who you ask). This + // typically comes up with things like + // allocation counts, where it's fine to just + // ignore the benchmark. + if m != nil && m.Mean != 0 { + means = append(means, m.Mean) + } + } + } + if len(means) > maxCount { + maxCount = len(means) + } + if len(means) == 0 { + row.Metrics = append(row.Metrics, new(Metrics)) + delta = false + } else { + geomean := GeoMean(means) + geomeans = append(geomeans, geomean) + if row.Scaler == nil { + row.Scaler = NewScaler(geomean, unit) + } + row.Metrics = append(row.Metrics, &Metrics{ + Unit: unit, + Mean: geomean, + }) + } + } + if maxCount <= 1 { + // Only one benchmark contributed to this geomean. + // Since the geomean is the same as the benchmark + // result, don't bother outputting it. + return + } + if delta { + pct := ((geomeans[1] / geomeans[0]) - 1.0) * 100.0 + row.PctDelta = pct + row.Delta = fmt.Sprintf("%+.2f%%", pct) + } + t.Rows = append(t.Rows, row) +} diff --git a/internal/stat/tdist.go b/internal/stat/tdist.go new file mode 100644 index 0000000..cc5da07 --- /dev/null +++ b/internal/stat/tdist.go @@ -0,0 +1,32 @@ +package stat + +import "math" + +// A TDist is a Student's t-distribution with V degrees of freedom. +type TDist struct { + V float64 +} + +// PDF is the probability distribution function +func (t TDist) PDF(x float64) float64 { + return math.Exp(lgamma((t.V+1)/2)-lgamma(t.V/2)) / + math.Sqrt(t.V*math.Pi) * math.Pow(1+(x*x)/t.V, -(t.V+1)/2) +} + +// CDF is the cumulative distribution function +func (t TDist) CDF(x float64) float64 { + if x == 0 { + return 0.5 + } else if x > 0 { + return 1 - 0.5*mathBetaInc(t.V/(t.V+x*x), t.V/2, 0.5) + } else if x < 0 { + return 1 - t.CDF(-x) + } else { + return math.NaN() + } +} + +// Bounds ... +func (t TDist) Bounds() (float64, float64) { + return -4, 4 +} diff --git a/internal/stat/text.go b/internal/stat/text.go new file mode 100644 index 0000000..68d3a68 --- /dev/null +++ b/internal/stat/text.go @@ -0,0 +1,161 @@ +package stat + +import ( + "fmt" + "io" + "unicode/utf8" + + "golang.design/x/bench/internal/term" +) + +// FormatText appends a fixed-width text formatting of the tables to w. +func FormatText(w io.Writer, tables []*Table) { + var textTables [][]*textRow + for _, t := range tables { + textTables = append(textTables, toText(t, true)) + } + + var max []int + for _, table := range textTables { + for _, row := range table { + if len(row.cols) == 1 { + // Header row + continue + } + for len(max) < len(row.cols) { + max = append(max, 0) + } + for i, s := range row.cols { + n := utf8.RuneCountInString(s) + if max[i] < n { + max[i] = n + } + } + } + } + + for i, table := range textTables { + if i > 0 { + fmt.Fprintf(w, "\n") + } + + // headings + row := table[0] + for i, s := range row.cols { + switch i { + case 0: + fmt.Fprintf(w, "%-*s", max[i], s) + default: + fmt.Fprintf(w, " %-*s", max[i], s) + case len(row.cols) - 1: + fmt.Fprintf(w, " %s\n", s) + } + } + + // data + for _, row := range table[1:] { + for i, s := range row.cols { + switch { + case len(row.cols) == 1: + // Header row + fmt.Fprint(w, s) + case i == 0: + fmt.Fprintf(w, "%-*s", max[i], s) + default: + if i == len(row.cols)-1 && len(s) > 0 && s[0] == '(' { + // Left-align p value. + fmt.Fprintf(w, " %s", s) + break + } + fmt.Fprintf(w, " %*s", max[i], s) + } + } + fmt.Fprintf(w, "\n") + } + } +} + +// A textRow is a row of printed text columns. +type textRow struct { + cols []string +} + +func newTextRow(cols ...string) *textRow { + return &textRow{cols: cols} +} + +// newTextRowDelta returns a labeled row of text, with "±" inserted after +// each member of "cols" unless norange is true. +func newTextRowDelta(norange bool, label string, cols ...string) *textRow { + newcols := []string{} + newcols = append(newcols, label) + for _, s := range cols { + newcols = append(newcols, s) + if !norange { + newcols = append(newcols, "±") + } + } + return &textRow{cols: newcols} +} + +func (r *textRow) add(col string) { + r.cols = append(r.cols, col) +} + +func (r *textRow) trim() { + for len(r.cols) > 0 && r.cols[len(r.cols)-1] == "" { + r.cols = r.cols[:len(r.cols)-1] + } +} + +// toText converts the Table to a textual grid of cells, +// which can then be printed in fixed-width output. +func toText(t *Table, colorful bool) []*textRow { + var textRows []*textRow + switch len(t.Configs) { + case 1: + textRows = append(textRows, newTextRow("name", t.Metric)) + case 2: + textRows = append(textRows, newTextRow("name", "old "+t.Metric, "new "+t.Metric, "delta")) + default: + row := newTextRow("name \\ " + t.Metric) + row.cols = append(row.cols, t.Configs...) // TODO Should this also trim common path prefix? (see toCSV) + textRows = append(textRows, row) + } + + var group string + + for _, row := range t.Rows { + if row.Group != group { + group = row.Group + textRows = append(textRows, newTextRow(group)) + } + text := newTextRow(row.Benchmark) + for _, m := range row.Metrics { + text.cols = append(text.cols, m.Format(row.Scaler)) + } + if len(t.Configs) == 2 { + delta := row.Delta + if delta == "~" { + delta = "~ " + } + if colorful { + switch row.Change { + case 1: // better + delta = term.Green(delta) + case -1: // worse + delta = term.Red(delta) + default: // no change + delta = term.Gray(delta) + } + } + text.cols = append(text.cols, delta) + text.cols = append(text.cols, row.Note) + } + textRows = append(textRows, text) + } + for _, r := range textRows { + r.trim() + } + return textRows +} diff --git a/internal/stat/ttest.go b/internal/stat/ttest.go new file mode 100644 index 0000000..1f810c0 --- /dev/null +++ b/internal/stat/ttest.go @@ -0,0 +1,138 @@ +package stat + +import ( + "math" +) + +// A TTestResult is the result of a t-test. +type TTestResult struct { + // N1 and N2 are the sizes of the input samples. For a + // one-sample t-test, N2 is 0. + N1, N2 int + + // T is the value of the t-statistic for this t-test. + T float64 + + // DoF is the degrees of freedom for this t-test. + DoF float64 + + // AltHypothesis specifies the alternative hypothesis tested + // by this test against the null hypothesis that there is no + // difference in the means of the samples. + AltHypothesis LocationHypothesis + + // P is p-value for this t-test for the given null hypothesis. + P float64 +} + +func newTTestResult(n1, n2 int, t, dof float64, alt LocationHypothesis) *TTestResult { + dist := TDist{dof} + var p float64 + switch alt { + case LocationDiffers: + p = 2 * (1 - dist.CDF(math.Abs(t))) + case LocationLess: + p = dist.CDF(t) + case LocationGreater: + p = 1 - dist.CDF(t) + } + return &TTestResult{N1: n1, N2: n2, T: t, DoF: dof, AltHypothesis: alt, P: p} +} + +// A TTestSample is a sample that can be used for a one or two sample +// t-test. +type TTestSample interface { + Weight() float64 + Mean() float64 + Variance() float64 +} + +var () + +// TwoSampleTTest performs a two-sample (unpaired) Student's t-test on +// samples x1 and x2. This is a test of the null hypothesis that x1 +// and x2 are drawn from populations with equal means. It assumes x1 +// and x2 are independent samples, that the distributions have equal +// variance, and that the populations are normally distributed. +func TwoSampleTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) { + n1, n2 := x1.Weight(), x2.Weight() + if n1 == 0 || n2 == 0 { + return nil, ErrSampleSize + } + v1, v2 := x1.Variance(), x2.Variance() + if v1 == 0 && v2 == 0 { + return nil, ErrZeroVariance + } + + dof := n1 + n2 - 2 + v12 := ((n1-1)*v1 + (n2-1)*v2) / dof + t := (x1.Mean() - x2.Mean()) / math.Sqrt(v12*(1/n1+1/n2)) + return newTTestResult(int(n1), int(n2), t, dof, alt), nil +} + +// TwoSampleWelchTTest performs a two-sample (unpaired) Welch's t-test +// on samples x1 and x2. This is like TwoSampleTTest, but does not +// assume the distributions have equal variance. +func TwoSampleWelchTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) { + n1, n2 := x1.Weight(), x2.Weight() + if n1 <= 1 || n2 <= 1 { + // TODO: Can we still do this with n == 1? + return nil, ErrSampleSize + } + v1, v2 := x1.Variance(), x2.Variance() + if v1 == 0 && v2 == 0 { + return nil, ErrZeroVariance + } + + dof := math.Pow(v1/n1+v2/n2, 2) / + (math.Pow(v1/n1, 2)/(n1-1) + math.Pow(v2/n2, 2)/(n2-1)) + s := math.Sqrt(v1/n1 + v2/n2) + t := (x1.Mean() - x2.Mean()) / s + return newTTestResult(int(n1), int(n2), t, dof, alt), nil +} + +// PairedTTest performs a two-sample paired t-test on samples x1 and +// x2. If μ0 is non-zero, this tests if the average of the difference +// is significantly different from μ0. If x1 and x2 are identical, +// this returns nil. +func PairedTTest(x1, x2 []float64, μ0 float64, alt LocationHypothesis) (*TTestResult, error) { + if len(x1) != len(x2) { + return nil, ErrMismatchedSamples + } + if len(x1) <= 1 { + // TODO: Can we still do this with n == 1? + return nil, ErrSampleSize + } + + dof := float64(len(x1) - 1) + + diff := make([]float64, len(x1)) + for i := range x1 { + diff[i] = x1[i] - x2[i] + } + sd := StdDev(diff) + if sd == 0 { + // TODO: Can we still do the test? + return nil, ErrZeroVariance + } + t := (Mean(diff) - μ0) * math.Sqrt(float64(len(x1))) / sd + return newTTestResult(len(x1), len(x2), t, dof, alt), nil +} + +// OneSampleTTest performs a one-sample t-test on sample x. This tests +// the null hypothesis that the population mean is equal to μ0. This +// assumes the distribution of the population of sample means is +// normal. +func OneSampleTTest(x TTestSample, μ0 float64, alt LocationHypothesis) (*TTestResult, error) { + n, v := x.Weight(), x.Variance() + if n == 0 { + return nil, ErrSampleSize + } + if v == 0 { + // TODO: Can we still do the test? + return nil, ErrZeroVariance + } + dof := n - 1 + t := (x.Mean() - μ0) * math.Sqrt(n) / math.Sqrt(v) + return newTTestResult(int(n), 0, t, dof, alt), nil +} diff --git a/internal/stat/udist.go b/internal/stat/udist.go new file mode 100644 index 0000000..b47d34a --- /dev/null +++ b/internal/stat/udist.go @@ -0,0 +1,381 @@ +package stat + +import "math" + +// A UDist is the discrete probability distribution of the +// Mann-Whitney U statistic for a pair of samples of sizes N1 and N2. +// +// The details of computing this distribution with no ties can be +// found in Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of +// Whether one of Two Random Variables is Stochastically Larger than +// the Other". Annals of Mathematical Statistics 18 (1): 50–60. +// Computing this distribution in the presence of ties is described in +// Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer". +// Journal of the American Statistical Association 61 (315): 772-787 +// and Cheung, Ying Kuen; Klotz, Jerome H. (1997). "The Mann Whitney +// Wilcoxon Distribution Using Linked Lists". Statistica Sinica 7: +// 805-813 (the former paper contains details that are glossed over in +// the latter paper but has mathematical typesetting issues, so it's +// easiest to get the context from the former paper and the details +// from the latter). +type UDist struct { + N1, N2 int + + // T is the count of the number of ties at each rank in the + // input distributions. T may be nil, in which case it is + // assumed there are no ties (which is equivalent to an M+N + // slice of 1s). It must be the case that Sum(T) == M+N. + T []int +} + +// hasTies returns true if d has any tied samples. +func (d UDist) hasTies() bool { + for _, t := range d.T { + if t > 1 { + return true + } + } + return false +} + +// p returns the p_{d.N1,d.N2} function defined by Mann, Whitney 1947 +// for values of U from 0 up to and including the U argument. +// +// This algorithm runs in Θ(N1*N2*U) = O(N1²N2²) time and is quite +// fast for small values of N1 and N2. However, it does not handle ties. +func (d UDist) p(U int) []float64 { + // This is a dynamic programming implementation of the + // recursive recurrence definition given by Mann and Whitney: + // + // p_{n,m}(U) = (n * p_{n-1,m}(U-m) + m * p_{n,m-1}(U)) / (n+m) + // p_{n,m}(U) = 0 if U < 0 + // p_{0,m}(U) = p{n,0}(U) = 1 / nCr(m+n, n) if U = 0 + // = 0 if U > 0 + // + // (Note that there is a typo in the original paper. The first + // recursive application of p should be for U-m, not U-M.) + // + // Since p{n,m} only depends on p{n-1,m} and p{n,m-1}, we only + // need to store one "plane" of the three dimensional space at + // a time. + // + // Furthermore, p_{n,m} = p_{m,n}, so we only construct values + // for n <= m and obtain the rest through symmetry. + // + // We organize the computed values of p as followed: + // + // n → N + // m * + // ↓ * * + // * * * + // * * * * + // * * * * + // M * * * * + // + // where each * is a slice indexed by U. The code below + // computes these left-to-right, top-to-bottom, so it only + // stores one row of this matrix at a time. Furthermore, + // computing an element in a given U slice only depends on the + // same and smaller values of U, so we can overwrite the U + // slice we're computing in place as long as we start with the + // largest value of U. Finally, even though the recurrence + // depends on (n,m) above the diagonal and we use symmetry to + // mirror those across the diagonal to (m,n), the mirrored + // indexes are always available in the current row, so this + // mirroring does not interfere with our ability to recycle + // state. + + N, M := d.N1, d.N2 + if N > M { + N, M = M, N + } + + memo := make([][]float64, N+1) + for n := range memo { + memo[n] = make([]float64, U+1) + } + + for m := 0; m <= M; m++ { + // Compute p_{0,m}. This is zero except for U=0. + memo[0][0] = 1 + + // Compute the remainder of this row. + nlim := N + if m < nlim { + nlim = m + } + for n := 1; n <= nlim; n++ { + lp := memo[n-1] // p_{n-1,m} + var rp []float64 + if n <= m-1 { + rp = memo[n] // p_{n,m-1} + } else { + rp = memo[m-1] // p{m-1,n} and m==n + } + + // For a given n,m, U is at most n*m. + // + // TODO: Actually, it's at most ⌈n*m/2⌉, but + // then we need to use more complex symmetries + // in the inner loop below. + ulim := n * m + if U < ulim { + ulim = U + } + + out := memo[n] // p_{n,m} + nplusm := float64(n + m) + for U1 := ulim; U1 >= 0; U1-- { + l := 0.0 + if U1-m >= 0 { + l = float64(n) * lp[U1-m] + } + r := float64(m) * rp[U1] + out[U1] = (l + r) / nplusm + } + } + } + return memo[N] +} + +type ukey struct { + n1 int // size of first sample + twoU int // 2*U statistic for this permutation +} + +// This computes the cumulative counts of the Mann-Whitney U +// distribution in the presence of ties using the computation from +// Cheung, Ying Kuen; Klotz, Jerome H. (1997). "The Mann Whitney +// Wilcoxon Distribution Using Linked Lists". Statistica Sinica 7: +// 805-813, with much guidance from appendix L of Klotz, A +// Computational Approach to Statistics. +// +// makeUmemo constructs a table memo[K][ukey{n1, 2*U}], where K is the +// number of ranks (up to len(t)), n1 is the size of the first sample +// (up to the n1 argument), and U is the U statistic (up to the +// argument twoU/2). The value of an entry in the memo table is the +// number of permutations of a sample of size n1 in a ranking with tie +// vector t[:K] having a U statistic <= U. +func makeUmemo(twoU, n1 int, t []int) []map[ukey]float64 { + // Another candidate for a fast implementation is van de Wiel, + // "The split-up algorithm: a fast symbolic method for + // computing p-values of distribution-free statistics". This + // is what's used by R's coin package. It's a comparatively + // recent publication, so it's presumably faster (or perhaps + // just more general) than previous techniques, but I can't + // get my hands on the paper. + // + // TODO: ~40% of this function's time is spent in mapassign on + // the assignment lines in the two loops and another ~20% in + // map access and iteration. Improving map behavior or + // replacing the maps altogether with some other constant-time + // structure could double performance. + // + // TODO: The worst case for this function is when there are + // few ties. Yet the best case overall is when there are *no* + // ties. Can we get the best of both worlds? Use the fast + // algorithm for the most part when there are few ties and mix + // in the general algorithm just where we need it? That's + // certainly possible for sub-problems where t[:k] has no + // ties, but that doesn't help if t[0] has a tie but nothing + // else does. Is it possible to rearrange the ranks without + // messing up our computation of the U statistic for + // sub-problems? + + K := len(t) + + // Compute a coefficients. The a slice is indexed by k (a[0] + // is unused). + a := make([]int, K+1) + a[1] = t[0] + for k := 2; k <= K; k++ { + a[k] = a[k-1] + t[k-2] + t[k-1] + } + + // Create the memo table for the counts function, A. The A + // slice is indexed by k (A[0] is unused). + // + // In "The Mann Whitney Distribution Using Linked Lists", they + // use linked lists (*gasp*) for this, but within each K it's + // really just a memoization table, so it's faster to use a + // map. The outer structure is a slice indexed by k because we + // need to find all memo entries with certain values of k. + // + // TODO: The n1 and twoU values in the ukeys follow strict + // patterns. For each K value, the n1 values are every integer + // between two bounds. For each (K, n1) value, the twoU values + // are every integer multiple of a certain base between two + // bounds. It might be worth turning these into directly + // indexible slices. + A := make([]map[ukey]float64, K+1) + A[K] = map[ukey]float64{ukey{n1: n1, twoU: twoU}: 0} + + // Compute memo table (k, n1, twoU) triples from high K values + // to low K values. This drives the recurrence relation + // downward to figure out all of the needed argument triples. + // + // TODO: Is it possible to generate this table bottom-up? If + // so, this could be a pure dynamic programming algorithm and + // we could discard the K dimension. We could at least store + // the inputs in a more compact representation that replaces + // the twoU dimension with an interval and a step size (as + // suggested by Cheung, Klotz, not that they make it at all + // clear *why* they're suggesting this). + tsum := sumint(t) // always ∑ t[0:k] + for k := K - 1; k >= 2; k-- { + tsum -= t[k] + A[k] = make(map[ukey]float64) + + // Construct A[k] from A[k+1]. + for A_kplus1 := range A[k+1] { + rkLow := maxint(0, A_kplus1.n1-tsum) + rkHigh := minint(A_kplus1.n1, t[k]) + for rk := rkLow; rk <= rkHigh; rk++ { + twoU_k := A_kplus1.twoU - rk*(a[k+1]-2*A_kplus1.n1+rk) + n1_k := A_kplus1.n1 - rk + if twoUmin(n1_k, t[:k], a) <= twoU_k && twoU_k <= twoUmax(n1_k, t[:k], a) { + key := ukey{n1: n1_k, twoU: twoU_k} + A[k][key] = 0 + } + } + } + } + + // Fill counts in memo table from low K values to high K + // values. This unwinds the recurrence relation. + + // Start with K==2 base case. + // + // TODO: Later computations depend on these, but these don't + // depend on anything (including each other), so if K==2, we + // can skip the memo table altogether. + if K < 2 { + panic("K < 2") + } + N_2 := t[0] + t[1] + for A_2i := range A[2] { + Asum := 0.0 + r2Low := maxint(0, A_2i.n1-t[0]) + r2High := (A_2i.twoU - A_2i.n1*(t[0]-A_2i.n1)) / N_2 + for r2 := r2Low; r2 <= r2High; r2++ { + Asum += mathChoose(t[0], A_2i.n1-r2) * + mathChoose(t[1], r2) + } + A[2][A_2i] = Asum + } + + // Derive counts for the rest of the memo table. + tsum = t[0] // always ∑ t[0:k-1] + for k := 3; k <= K; k++ { + tsum += t[k-2] + + // Compute A[k] counts from A[k-1] counts. + for A_ki := range A[k] { + Asum := 0.0 + rkLow := maxint(0, A_ki.n1-tsum) + rkHigh := minint(A_ki.n1, t[k-1]) + for rk := rkLow; rk <= rkHigh; rk++ { + twoU_kminus1 := A_ki.twoU - rk*(a[k]-2*A_ki.n1+rk) + n1_kminus1 := A_ki.n1 - rk + x, ok := A[k-1][ukey{n1: n1_kminus1, twoU: twoU_kminus1}] + if !ok && twoUmax(n1_kminus1, t[:k-1], a) < twoU_kminus1 { + x = mathChoose(tsum, n1_kminus1) + } + Asum += x * mathChoose(t[k-1], rk) + } + A[k][A_ki] = Asum + } + } + + return A +} + +func twoUmin(n1 int, t, a []int) int { + K := len(t) + twoU := -n1 * n1 + n1_k := n1 + for k := 1; k <= K; k++ { + twoU_k := minint(n1_k, t[k-1]) + twoU += twoU_k * a[k] + n1_k -= twoU_k + } + return twoU +} + +func twoUmax(n1 int, t, a []int) int { + K := len(t) + twoU := -n1 * n1 + n1_k := n1 + for k := K; k > 0; k-- { + twoU_k := minint(n1_k, t[k-1]) + twoU += twoU_k * a[k] + n1_k -= twoU_k + } + return twoU +} + +func (d UDist) PMF(U float64) float64 { + if U < 0 || U >= 0.5+float64(d.N1*d.N2) { + return 0 + } + + if d.hasTies() { + // makeUmemo computes the CDF directly. Take its + // difference to get the PMF. + p1, ok1 := makeUmemo(int(2*U)-1, d.N1, d.T)[len(d.T)][ukey{d.N1, int(2*U) - 1}] + p2, ok2 := makeUmemo(int(2*U), d.N1, d.T)[len(d.T)][ukey{d.N1, int(2 * U)}] + if !ok1 || !ok2 { + panic("makeUmemo did not return expected memoization table") + } + return (p2 - p1) / mathChoose(d.N1+d.N2, d.N1) + } + + // There are no ties. Use the fast algorithm. U must be integral. + Ui := int(math.Floor(U)) + // TODO: Use symmetry to minimize U + return d.p(Ui)[Ui] +} + +func (d UDist) CDF(U float64) float64 { + if U < 0 { + return 0 + } else if U >= float64(d.N1*d.N2) { + return 1 + } + + if d.hasTies() { + // TODO: Minimize U? + p, ok := makeUmemo(int(2*U), d.N1, d.T)[len(d.T)][ukey{d.N1, int(2 * U)}] + if !ok { + panic("makeUmemo did not return expected memoization table") + } + return p / mathChoose(d.N1+d.N2, d.N1) + } + + // There are no ties. Use the fast algorithm. U must be integral. + Ui := int(math.Floor(U)) + // The distribution is symmetric around U = m * n / 2. Sum up + // whichever tail is smaller. + flip := Ui >= (d.N1*d.N2+1)/2 + if flip { + Ui = d.N1*d.N2 - Ui - 1 + } + pdfs := d.p(Ui) + p := 0.0 + for _, pdf := range pdfs[:Ui+1] { + p += pdf + } + if flip { + p = 1 - p + } + return p +} + +func (d UDist) Step() float64 { + return 0.5 +} + +func (d UDist) Bounds() (float64, float64) { + // TODO: More precise bounds when there are ties. + return 0, float64(d.N1 * d.N2) +} diff --git a/internal/stat/utest.go b/internal/stat/utest.go new file mode 100644 index 0000000..9272b69 --- /dev/null +++ b/internal/stat/utest.go @@ -0,0 +1,270 @@ +package stat + +import ( + "math" + "sort" +) + +// A LocationHypothesis specifies the alternative hypothesis of a +// location test such as a t-test or a Mann-Whitney U-test. The +// default (zero) value is to test against the alternative hypothesis +// that they differ. +type LocationHypothesis int + +//go:generate stringer -type LocationHypothesis + +const ( + // LocationLess specifies the alternative hypothesis that the + // location of the first sample is less than the second. This + // is a one-tailed test. + LocationLess LocationHypothesis = -1 + + // LocationDiffers specifies the alternative hypothesis that + // the locations of the two samples are not equal. This is a + // two-tailed test. + LocationDiffers LocationHypothesis = 0 + + // LocationGreater specifies the alternative hypothesis that + // the location of the first sample is greater than the + // second. This is a one-tailed test. + LocationGreater LocationHypothesis = 1 +) + +// A MannWhitneyUTestResult is the result of a Mann-Whitney U-test. +type MannWhitneyUTestResult struct { + // N1 and N2 are the sizes of the input samples. + N1, N2 int + + // U is the value of the Mann-Whitney U statistic for this + // test, generalized by counting ties as 0.5. + // + // Given the Cartesian product of the two samples, this is the + // number of pairs in which the value from the first sample is + // greater than the value of the second, plus 0.5 times the + // number of pairs where the values from the two samples are + // equal. Hence, U is always an integer multiple of 0.5 (it is + // a whole integer if there are no ties) in the range [0, N1*N2]. + // + // U statistics always come in pairs, depending on which + // sample is "first". The mirror U for the other sample can be + // calculated as N1*N2 - U. + // + // There are many equivalent statistics with slightly + // different definitions. The Wilcoxon (1945) W statistic + // (generalized for ties) is U + (N1(N1+1))/2. It is also + // common to use 2U to eliminate the half steps and Smid + // (1956) uses N1*N2 - 2U to additionally center the + // distribution. + U float64 + + // AltHypothesis specifies the alternative hypothesis tested + // by this test against the null hypothesis that there is no + // difference in the locations of the samples. + AltHypothesis LocationHypothesis + + // P is the p-value of the Mann-Whitney test for the given + // null hypothesis. + P float64 +} + +// MannWhitneyExactLimit gives the largest sample size for which the +// exact U distribution will be used for the Mann-Whitney U-test. +// +// Using the exact distribution is necessary for small sample sizes +// because the distribution is highly irregular. However, computing +// the distribution for large sample sizes is both computationally +// expensive and unnecessary because it quickly approaches a normal +// approximation. Computing the distribution for two 50 value samples +// takes a few milliseconds on a 2014 laptop. +var MannWhitneyExactLimit = 50 + +// MannWhitneyTiesExactLimit gives the largest sample size for which +// the exact U distribution will be used for the Mann-Whitney U-test +// in the presence of ties. +// +// Computing this distribution is more expensive than computing the +// distribution without ties, so this is set lower. Computing this +// distribution for two 25 value samples takes about ten milliseconds +// on a 2014 laptop. +var MannWhitneyTiesExactLimit = 25 + +// MannWhitneyUTest performs a Mann-Whitney U-test [1,2] of the null +// hypothesis that two samples come from the same population against +// the alternative hypothesis that one sample tends to have larger or +// smaller values than the other. +// +// This is similar to a t-test, but unlike the t-test, the +// Mann-Whitney U-test is non-parametric (it does not assume a normal +// distribution). It has very slightly lower efficiency than the +// t-test on normal distributions. +// +// Computing the exact U distribution is expensive for large sample +// sizes, so this uses a normal approximation for sample sizes larger +// than MannWhitneyExactLimit if there are no ties or +// MannWhitneyTiesExactLimit if there are ties. This normal +// approximation uses both the tie correction and the continuity +// correction. +// +// This can fail with ErrSampleSize if either sample is empty or +// ErrSamplesEqual if all sample values are equal. +// +// This is also known as a Mann-Whitney-Wilcoxon test and is +// equivalent to the Wilcoxon rank-sum test, though the Wilcoxon +// rank-sum test differs in nomenclature. +// +// [1] Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of +// Whether one of Two Random Variables is Stochastically Larger than +// the Other". Annals of Mathematical Statistics 18 (1): 50–60. +// +// [2] Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer". +// Journal of the American Statistical Association 61 (315): 772-787. +func MannWhitneyUTest(x1, x2 []float64, alt LocationHypothesis) (*MannWhitneyUTestResult, error) { + n1, n2 := len(x1), len(x2) + if n1 == 0 || n2 == 0 { + return nil, ErrSampleSize + } + + // Compute the U statistic and tie vector T. + x1 = append([]float64(nil), x1...) + x2 = append([]float64(nil), x2...) + sort.Float64s(x1) + sort.Float64s(x2) + merged, labels := labeledMerge(x1, x2) + + R1 := 0.0 + T, hasTies := []int{}, false + for i := 0; i < len(merged); { + rank1, nx1, v1 := i+1, 0, merged[i] + // Consume samples that tie this sample (including itself). + for ; i < len(merged) && merged[i] == v1; i++ { + if labels[i] == 1 { + nx1++ + } + } + // Assign all tied samples the average rank of the + // samples, where merged[0] has rank 1. + if nx1 != 0 { + rank := float64(i+rank1) / 2 + R1 += rank * float64(nx1) + } + T = append(T, i-rank1+1) + if i > rank1 { + hasTies = true + } + } + U1 := R1 - float64(n1*(n1+1))/2 + + // Compute the smaller of U1 and U2 + U2 := float64(n1*n2) - U1 + Usmall := math.Min(U1, U2) + + var p float64 + if !hasTies && n1 <= MannWhitneyExactLimit && n2 <= MannWhitneyExactLimit || + hasTies && n1 <= MannWhitneyTiesExactLimit && n2 <= MannWhitneyTiesExactLimit { + // Use exact U distribution. U1 will be an integer. + if len(T) == 1 { + // All values are equal. Test is meaningless. + return nil, ErrSamplesEqual + } + + dist := UDist{N1: n1, N2: n2, T: T} + switch alt { + case LocationDiffers: + if U1 == U2 { + // The distribution is symmetric about + // Usmall. Since the distribution is + // discrete, the CDF is discontinuous + // and if simply double CDF(Usmall), + // we'll double count the + // (non-infinitesimal) probability + // mass at Usmall. What we want is + // just the integral of the whole CDF, + // which is 1. + p = 1 + } else { + p = dist.CDF(Usmall) * 2 + } + + case LocationLess: + p = dist.CDF(U1) + + case LocationGreater: + p = 1 - dist.CDF(U1-1) + } + } else { + // Use normal approximation (with tie and continuity + // correction). + t := tieCorrection(T) + N := float64(n1 + n2) + μU := float64(n1*n2) / 2 + σU := math.Sqrt(float64(n1*n2) * ((N + 1) - t/(N*(N-1))) / 12) + if σU == 0 { + return nil, ErrSamplesEqual + } + numer := U1 - μU + // Perform continuity correction. + switch alt { + case LocationDiffers: + numer -= mathSign(numer) * 0.5 + case LocationLess: + numer += 0.5 + case LocationGreater: + numer -= 0.5 + } + z := numer / σU + switch alt { + case LocationDiffers: + p = 2 * math.Min(StdNormal.CDF(z), 1-StdNormal.CDF(z)) + case LocationLess: + p = StdNormal.CDF(z) + case LocationGreater: + p = 1 - StdNormal.CDF(z) + } + } + + return &MannWhitneyUTestResult{N1: n1, N2: n2, U: U1, + AltHypothesis: alt, P: p}, nil +} + +// labeledMerge merges sorted lists x1 and x2 into sorted list merged. +// labels[i] is 1 or 2 depending on whether merged[i] is a value from +// x1 or x2, respectively. +func labeledMerge(x1, x2 []float64) (merged []float64, labels []byte) { + merged = make([]float64, len(x1)+len(x2)) + labels = make([]byte, len(x1)+len(x2)) + + i, j, o := 0, 0, 0 + for i < len(x1) && j < len(x2) { + if x1[i] < x2[j] { + merged[o] = x1[i] + labels[o] = 1 + i++ + } else { + merged[o] = x2[j] + labels[o] = 2 + j++ + } + o++ + } + for ; i < len(x1); i++ { + merged[o] = x1[i] + labels[o] = 1 + o++ + } + for ; j < len(x2); j++ { + merged[o] = x2[j] + labels[o] = 2 + o++ + } + return +} + +// tieCorrection computes the tie correction factor Σ_j (t_j³ - t_j) +// where t_j is the number of ties in the j'th rank. +func tieCorrection(ties []int) float64 { + t := 0 + for _, tie := range ties { + t += tie*tie*tie - tie + } + return float64(t) +} diff --git a/internal/term/color.go b/internal/term/color.go new file mode 100644 index 0000000..769a0c5 --- /dev/null +++ b/internal/term/color.go @@ -0,0 +1,655 @@ +// Copyright 2020 The golang.design Initiative Authors. +// All rights reserved. Use of this source code is governed +// by a GNU GPLv3 license that can be found in the LICENSE file. + +package term + +// Red turns a given string to red +func Red(in string) string { + return fgString(in, 255, 0, 0) +} + +// Orange turns a given string to orange +func Orange(in string) string { + return fgString(in, 252, 140, 3) +} + +// Green turns a given string to green +func Green(in string) string { + return fgString(in, 0, 255, 0) +} + +// Gray turns a given string to gray +func Gray(in string) string { + return fgString(in, 125, 125, 125) +} + +var ( + before = []byte("\033[") + after = []byte("m") + reset = []byte("\033[0;00m") + fgcolors = fgTermRGB[16:232] + bgcolors = bgTermRGB[16:232] +) + +func fgString(in string, r, g, b uint8) string { + return string(fgBytes([]byte(in), r, g, b)) +} + +// Bytes colorizes the foreground with the terminal color that matches +// the closest the RGB color. +func fgBytes(in []byte, r, g, b uint8) []byte { + return colorize(color(r, g, b, true), in) +} + +func colorize(color, in []byte) []byte { + return append(append(append(append(before, color...), after...), in...), reset...) +} + +func color(r, g, b uint8, foreground bool) []byte { + // if all colors are equal, it might be in the grayscale range + if r == g && g == b { + color, ok := grayscale(r, foreground) + if ok { + return color + } + } + + // the general case approximates RGB by using the closest color. + r6 := ((uint16(r) * 5) / 255) + g6 := ((uint16(g) * 5) / 255) + b6 := ((uint16(b) * 5) / 255) + i := 36*r6 + 6*g6 + b6 + if foreground { + return fgcolors[i] + } + return bgcolors[i] +} + +func grayscale(scale uint8, foreground bool) ([]byte, bool) { + var source [256][]byte + + if foreground { + source = fgTermRGB + } else { + source = bgTermRGB + } + + switch scale { + case 0x08: + return source[232], true + case 0x12: + return source[233], true + case 0x1c: + return source[234], true + case 0x26: + return source[235], true + case 0x30: + return source[236], true + case 0x3a: + return source[237], true + case 0x44: + return source[238], true + case 0x4e: + return source[239], true + case 0x58: + return source[240], true + case 0x62: + return source[241], true + case 0x6c: + return source[242], true + case 0x76: + return source[243], true + case 0x80: + return source[244], true + case 0x8a: + return source[245], true + case 0x94: + return source[246], true + case 0x9e: + return source[247], true + case 0xa8: + return source[248], true + case 0xb2: + return source[249], true + case 0xbc: + return source[250], true + case 0xc6: + return source[251], true + case 0xd0: + return source[252], true + case 0xda: + return source[253], true + case 0xe4: + return source[254], true + case 0xee: + return source[255], true + } + return nil, false +} + +var ( + yellow = fgString("", 252, 140, 3) + red = fgString("", 255, 0, 0) + green = fgString("", 0, 255, 0) +) + +// \033[ + +var fgTermRGB = [...][]byte{ + []byte("38;5;0"), + []byte("38;5;1"), + []byte("38;5;2"), + []byte("38;5;3"), + []byte("38;5;4"), + []byte("38;5;5"), + []byte("38;5;6"), + []byte("38;5;7"), + []byte("38;5;8"), + []byte("38;5;9"), + []byte("38;5;10"), + []byte("38;5;11"), + []byte("38;5;12"), + []byte("38;5;13"), + []byte("38;5;14"), + []byte("38;5;15"), + []byte("38;5;16"), + []byte("38;5;17"), + []byte("38;5;18"), + []byte("38;5;19"), + []byte("38;5;20"), + []byte("38;5;21"), + []byte("38;5;22"), + []byte("38;5;23"), + []byte("38;5;24"), + []byte("38;5;25"), + []byte("38;5;26"), + []byte("38;5;27"), + []byte("38;5;28"), + []byte("38;5;29"), + []byte("38;5;30"), + []byte("38;5;31"), + []byte("38;5;32"), + []byte("38;5;33"), + []byte("38;5;34"), + []byte("38;5;35"), + []byte("38;5;36"), + []byte("38;5;37"), + []byte("38;5;38"), + []byte("38;5;39"), + []byte("38;5;40"), + []byte("38;5;41"), + []byte("38;5;42"), + []byte("38;5;43"), + []byte("38;5;44"), + []byte("38;5;45"), + []byte("38;5;46"), + []byte("38;5;47"), + []byte("38;5;48"), + []byte("38;5;49"), + []byte("38;5;50"), + []byte("38;5;51"), + []byte("38;5;52"), + []byte("38;5;53"), + []byte("38;5;54"), + []byte("38;5;55"), + []byte("38;5;56"), + []byte("38;5;57"), + []byte("38;5;58"), + []byte("38;5;59"), + []byte("38;5;60"), + []byte("38;5;61"), + []byte("38;5;62"), + []byte("38;5;63"), + []byte("38;5;64"), + []byte("38;5;65"), + []byte("38;5;66"), + []byte("38;5;67"), + []byte("38;5;68"), + []byte("38;5;69"), + []byte("38;5;70"), + []byte("38;5;71"), + []byte("38;5;72"), + []byte("38;5;73"), + []byte("38;5;74"), + []byte("38;5;75"), + []byte("38;5;76"), + []byte("38;5;77"), + []byte("38;5;78"), + []byte("38;5;79"), + []byte("38;5;80"), + []byte("38;5;81"), + []byte("38;5;82"), + []byte("38;5;83"), + []byte("38;5;84"), + []byte("38;5;85"), + []byte("38;5;86"), + []byte("38;5;87"), + []byte("38;5;88"), + []byte("38;5;89"), + []byte("38;5;90"), + []byte("38;5;91"), + []byte("38;5;92"), + []byte("38;5;93"), + []byte("38;5;94"), + []byte("38;5;95"), + []byte("38;5;96"), + []byte("38;5;97"), + []byte("38;5;98"), + []byte("38;5;99"), + []byte("38;5;100"), + []byte("38;5;101"), + []byte("38;5;102"), + []byte("38;5;103"), + []byte("38;5;104"), + []byte("38;5;105"), + []byte("38;5;106"), + []byte("38;5;107"), + []byte("38;5;108"), + []byte("38;5;109"), + []byte("38;5;110"), + []byte("38;5;111"), + []byte("38;5;112"), + []byte("38;5;113"), + []byte("38;5;114"), + []byte("38;5;115"), + []byte("38;5;116"), + []byte("38;5;117"), + []byte("38;5;118"), + []byte("38;5;119"), + []byte("38;5;120"), + []byte("38;5;121"), + []byte("38;5;122"), + []byte("38;5;123"), + []byte("38;5;124"), + []byte("38;5;125"), + []byte("38;5;126"), + []byte("38;5;127"), + []byte("38;5;128"), + []byte("38;5;129"), + []byte("38;5;130"), + []byte("38;5;131"), + []byte("38;5;132"), + []byte("38;5;133"), + []byte("38;5;134"), + []byte("38;5;135"), + []byte("38;5;136"), + []byte("38;5;137"), + []byte("38;5;138"), + []byte("38;5;139"), + []byte("38;5;140"), + []byte("38;5;141"), + []byte("38;5;142"), + []byte("38;5;143"), + []byte("38;5;144"), + []byte("38;5;145"), + []byte("38;5;146"), + []byte("38;5;147"), + []byte("38;5;148"), + []byte("38;5;149"), + []byte("38;5;150"), + []byte("38;5;151"), + []byte("38;5;152"), + []byte("38;5;153"), + []byte("38;5;154"), + []byte("38;5;155"), + []byte("38;5;156"), + []byte("38;5;157"), + []byte("38;5;158"), + []byte("38;5;159"), + []byte("38;5;160"), + []byte("38;5;161"), + []byte("38;5;162"), + []byte("38;5;163"), + []byte("38;5;164"), + []byte("38;5;165"), + []byte("38;5;166"), + []byte("38;5;167"), + []byte("38;5;168"), + []byte("38;5;169"), + []byte("38;5;170"), + []byte("38;5;171"), + []byte("38;5;172"), + []byte("38;5;173"), + []byte("38;5;174"), + []byte("38;5;175"), + []byte("38;5;176"), + []byte("38;5;177"), + []byte("38;5;178"), + []byte("38;5;179"), + []byte("38;5;180"), + []byte("38;5;181"), + []byte("38;5;182"), + []byte("38;5;183"), + []byte("38;5;184"), + []byte("38;5;185"), + []byte("38;5;186"), + []byte("38;5;187"), + []byte("38;5;188"), + []byte("38;5;189"), + []byte("38;5;190"), + []byte("38;5;191"), + []byte("38;5;192"), + []byte("38;5;193"), + []byte("38;5;194"), + []byte("38;5;195"), + []byte("38;5;196"), + []byte("38;5;197"), + []byte("38;5;198"), + []byte("38;5;199"), + []byte("38;5;200"), + []byte("38;5;201"), + []byte("38;5;202"), + []byte("38;5;203"), + []byte("38;5;204"), + []byte("38;5;205"), + []byte("38;5;206"), + []byte("38;5;207"), + []byte("38;5;208"), + []byte("38;5;209"), + []byte("38;5;210"), + []byte("38;5;211"), + []byte("38;5;212"), + []byte("38;5;213"), + []byte("38;5;214"), + []byte("38;5;215"), + []byte("38;5;216"), + []byte("38;5;217"), + []byte("38;5;218"), + []byte("38;5;219"), + []byte("38;5;220"), + []byte("38;5;221"), + []byte("38;5;222"), + []byte("38;5;223"), + []byte("38;5;224"), + []byte("38;5;225"), + []byte("38;5;226"), + []byte("38;5;227"), + []byte("38;5;228"), + []byte("38;5;229"), + []byte("38;5;230"), + []byte("38;5;231"), + []byte("38;5;232"), + []byte("38;5;233"), + []byte("38;5;234"), + []byte("38;5;235"), + []byte("38;5;236"), + []byte("38;5;237"), + []byte("38;5;238"), + []byte("38;5;239"), + []byte("38;5;240"), + []byte("38;5;241"), + []byte("38;5;242"), + []byte("38;5;243"), + []byte("38;5;244"), + []byte("38;5;245"), + []byte("38;5;246"), + []byte("38;5;247"), + []byte("38;5;248"), + []byte("38;5;249"), + []byte("38;5;250"), + []byte("38;5;251"), + []byte("38;5;252"), + []byte("38;5;253"), + []byte("38;5;254"), + []byte("38;5;255"), +} + +var bgTermRGB = [...][]byte{ + []byte("48;5;0"), + []byte("48;5;1"), + []byte("48;5;2"), + []byte("48;5;3"), + []byte("48;5;4"), + []byte("48;5;5"), + []byte("48;5;6"), + []byte("48;5;7"), + []byte("48;5;8"), + []byte("48;5;9"), + []byte("48;5;10"), + []byte("48;5;11"), + []byte("48;5;12"), + []byte("48;5;13"), + []byte("48;5;14"), + []byte("48;5;15"), + []byte("48;5;16"), + []byte("48;5;17"), + []byte("48;5;18"), + []byte("48;5;19"), + []byte("48;5;20"), + []byte("48;5;21"), + []byte("48;5;22"), + []byte("48;5;23"), + []byte("48;5;24"), + []byte("48;5;25"), + []byte("48;5;26"), + []byte("48;5;27"), + []byte("48;5;28"), + []byte("48;5;29"), + []byte("48;5;30"), + []byte("48;5;31"), + []byte("48;5;32"), + []byte("48;5;33"), + []byte("48;5;34"), + []byte("48;5;35"), + []byte("48;5;36"), + []byte("48;5;37"), + []byte("48;5;38"), + []byte("48;5;39"), + []byte("48;5;40"), + []byte("48;5;41"), + []byte("48;5;42"), + []byte("48;5;43"), + []byte("48;5;44"), + []byte("48;5;45"), + []byte("48;5;46"), + []byte("48;5;47"), + []byte("48;5;48"), + []byte("48;5;49"), + []byte("48;5;50"), + []byte("48;5;51"), + []byte("48;5;52"), + []byte("48;5;53"), + []byte("48;5;54"), + []byte("48;5;55"), + []byte("48;5;56"), + []byte("48;5;57"), + []byte("48;5;58"), + []byte("48;5;59"), + []byte("48;5;60"), + []byte("48;5;61"), + []byte("48;5;62"), + []byte("48;5;63"), + []byte("48;5;64"), + []byte("48;5;65"), + []byte("48;5;66"), + []byte("48;5;67"), + []byte("48;5;68"), + []byte("48;5;69"), + []byte("48;5;70"), + []byte("48;5;71"), + []byte("48;5;72"), + []byte("48;5;73"), + []byte("48;5;74"), + []byte("48;5;75"), + []byte("48;5;76"), + []byte("48;5;77"), + []byte("48;5;78"), + []byte("48;5;79"), + []byte("48;5;80"), + []byte("48;5;81"), + []byte("48;5;82"), + []byte("48;5;83"), + []byte("48;5;84"), + []byte("48;5;85"), + []byte("48;5;86"), + []byte("48;5;87"), + []byte("48;5;88"), + []byte("48;5;89"), + []byte("48;5;90"), + []byte("48;5;91"), + []byte("48;5;92"), + []byte("48;5;93"), + []byte("48;5;94"), + []byte("48;5;95"), + []byte("48;5;96"), + []byte("48;5;97"), + []byte("48;5;98"), + []byte("48;5;99"), + []byte("48;5;100"), + []byte("48;5;101"), + []byte("48;5;102"), + []byte("48;5;103"), + []byte("48;5;104"), + []byte("48;5;105"), + []byte("48;5;106"), + []byte("48;5;107"), + []byte("48;5;108"), + []byte("48;5;109"), + []byte("48;5;110"), + []byte("48;5;111"), + []byte("48;5;112"), + []byte("48;5;113"), + []byte("48;5;114"), + []byte("48;5;115"), + []byte("48;5;116"), + []byte("48;5;117"), + []byte("48;5;118"), + []byte("48;5;119"), + []byte("48;5;120"), + []byte("48;5;121"), + []byte("48;5;122"), + []byte("48;5;123"), + []byte("48;5;124"), + []byte("48;5;125"), + []byte("48;5;126"), + []byte("48;5;127"), + []byte("48;5;128"), + []byte("48;5;129"), + []byte("48;5;130"), + []byte("48;5;131"), + []byte("48;5;132"), + []byte("48;5;133"), + []byte("48;5;134"), + []byte("48;5;135"), + []byte("48;5;136"), + []byte("48;5;137"), + []byte("48;5;138"), + []byte("48;5;139"), + []byte("48;5;140"), + []byte("48;5;141"), + []byte("48;5;142"), + []byte("48;5;143"), + []byte("48;5;144"), + []byte("48;5;145"), + []byte("48;5;146"), + []byte("48;5;147"), + []byte("48;5;148"), + []byte("48;5;149"), + []byte("48;5;150"), + []byte("48;5;151"), + []byte("48;5;152"), + []byte("48;5;153"), + []byte("48;5;154"), + []byte("48;5;155"), + []byte("48;5;156"), + []byte("48;5;157"), + []byte("48;5;158"), + []byte("48;5;159"), + []byte("48;5;160"), + []byte("48;5;161"), + []byte("48;5;162"), + []byte("48;5;163"), + []byte("48;5;164"), + []byte("48;5;165"), + []byte("48;5;166"), + []byte("48;5;167"), + []byte("48;5;168"), + []byte("48;5;169"), + []byte("48;5;170"), + []byte("48;5;171"), + []byte("48;5;172"), + []byte("48;5;173"), + []byte("48;5;174"), + []byte("48;5;175"), + []byte("48;5;176"), + []byte("48;5;177"), + []byte("48;5;178"), + []byte("48;5;179"), + []byte("48;5;180"), + []byte("48;5;181"), + []byte("48;5;182"), + []byte("48;5;183"), + []byte("48;5;184"), + []byte("48;5;185"), + []byte("48;5;186"), + []byte("48;5;187"), + []byte("48;5;188"), + []byte("48;5;189"), + []byte("48;5;190"), + []byte("48;5;191"), + []byte("48;5;192"), + []byte("48;5;193"), + []byte("48;5;194"), + []byte("48;5;195"), + []byte("48;5;196"), + []byte("48;5;197"), + []byte("48;5;198"), + []byte("48;5;199"), + []byte("48;5;200"), + []byte("48;5;201"), + []byte("48;5;202"), + []byte("48;5;203"), + []byte("48;5;204"), + []byte("48;5;205"), + []byte("48;5;206"), + []byte("48;5;207"), + []byte("48;5;208"), + []byte("48;5;209"), + []byte("48;5;210"), + []byte("48;5;211"), + []byte("48;5;212"), + []byte("48;5;213"), + []byte("48;5;214"), + []byte("48;5;215"), + []byte("48;5;216"), + []byte("48;5;217"), + []byte("48;5;218"), + []byte("48;5;219"), + []byte("48;5;220"), + []byte("48;5;221"), + []byte("48;5;222"), + []byte("48;5;223"), + []byte("48;5;224"), + []byte("48;5;225"), + []byte("48;5;226"), + []byte("48;5;227"), + []byte("48;5;228"), + []byte("48;5;229"), + []byte("48;5;230"), + []byte("48;5;231"), + []byte("48;5;232"), + []byte("48;5;233"), + []byte("48;5;234"), + []byte("48;5;235"), + []byte("48;5;236"), + []byte("48;5;237"), + []byte("48;5;238"), + []byte("48;5;239"), + []byte("48;5;240"), + []byte("48;5;241"), + []byte("48;5;242"), + []byte("48;5;243"), + []byte("48;5;244"), + []byte("48;5;245"), + []byte("48;5;246"), + []byte("48;5;247"), + []byte("48;5;248"), + []byte("48;5;249"), + []byte("48;5;250"), + []byte("48;5;251"), + []byte("48;5;252"), + []byte("48;5;253"), + []byte("48;5;254"), + []byte("48;5;255"), +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..16e08a3 --- /dev/null +++ b/main.go @@ -0,0 +1,380 @@ +// Copyright 2020 The golang.design Initiative Authors. +// All rights reserved. Use of this source code is governed +// by a GNU GPLv3 license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "os" + "os/exec" + "os/signal" + "strings" + "syscall" + "time" + + "golang.design/x/bench/internal/lock" + "golang.design/x/bench/internal/stat" + "golang.design/x/bench/internal/term" +) + +func usage() { + fmt.Fprintf(os.Stderr, `usage: bench [options] +options for daemon usage: + -daemon + run bench service + -list + print current and pending commands + +options for significant tests: + -delta-test test + significance test to apply to delta: utest, ttest, or none (default "utest") + -alpha α + consider change significant if p < α (default 0.05) + -geomean + print the geometric mean of each file (default false) + -split labels + split benchmarks by labels (default "pkg,goos,goarch") + -sort order + sort by order: [-]delta, [-]name, none (default "none") + +options for running benchmarks: + -v go test + the -v flag from go test, (default false) + -name go test + the -bench flag from go test (default .) (default ".") + -count go test + the -count flag from go test (default 10) (default 10) + -time go test + the -benchtime flag from go test (default unset) + -cpuprocs go test + the -cpu flag to go test (default unset) + +options for performance locking + -shared + acquire lock in shared mode (default exclusive mode) + -cpufreq percent + set CPU frequency to percent between the min and max (default 90) + while running command, or "none" for no adjustment +`) + os.Exit(2) +} + +var ( + flagDaemon *bool + flagList *bool + + flagDeltaTest *string + flagAlpha *float64 + flagGeomean *bool + flagSplit *string + flagSort *string + + flagShared *bool + flagCPUFreq *lock.CpufreqFlag + + flagVerbose *bool + flagName *string + flagCount *int + flagTime *string + flagCPUProcs *string +) + +func main() { + log.SetPrefix("bench: ") + log.SetFlags(0) + flag.Usage = usage + + // daemon args + flagDaemon = flag.Bool("daemon", false, "run bench service") + flagList = flag.Bool("list", false, "print current and pending commands") + + // benchstat args + flagDeltaTest = flag.String("delta-test", "utest", "significance `test` to apply to delta: utest, ttest, or none") + flagAlpha = flag.Float64("alpha", 0.05, "consider change significant if p < `α`") + flagGeomean = flag.Bool("geomean", false, "print the geometric mean of each file") + flagSplit = flag.String("split", "pkg,goos,goarch", "split benchmarks by `labels`") + flagSort = flag.String("sort", "none", "sort by `order`: [-]delta, [-]name, none") + + // perflock flags + flagShared = flag.Bool("shared", false, "acquire lock in shared mode (default exclusive mode)") + flagCPUFreq = &lock.CpufreqFlag{Percent: 90} + flag.Var(flagCPUFreq, "cpufreq", "set CPU frequency to `percent` between the min and max\n\twhile running command, or \"none\" for no adjustment") + + // go test args + flagVerbose = flag.Bool("v", false, "the -v flag from `go test`, (default false)") + flagName = flag.String("name", ".", "the -bench flag from `go test` (default .)") + flagCount = flag.Int("count", 10, "the -count flag from `go test` (default 10)") + flagTime = flag.String("time", "", "the -benchtime flag from `go test` (default unset)") + flagCPUProcs = flag.String("cpuprocs", "", "the -cpu flag to `go test` (default unset)") + flag.Parse() + + if *flagDaemon { + if flag.NArg() > 0 { + flag.Usage() + os.Exit(2) + } + lock.RunDaemon() + return + } + if *flagList { + if flag.NArg() > 0 { + flag.Usage() + os.Exit(2) + } + c := lock.NewClient() + if c == nil { + log.Fatal("Is the bench daemon running?") + } + list := c.List() + if len(list) == 0 { + log.Println("daemon is running but no running benchmarks.") + return + } + for _, l := range list { + log.Println(l) + } + return + } + + if flag.NArg() > 0 { + runCompare() + return + } + + // prepare go test command + args := []string{ + "go", + "test", + "-run=^$", + } + if *flagVerbose { + args = append(args, "-v") + } + args = append(args, fmt.Sprintf("-bench=%s", *flagName)) + if *flagCount <= 0 { + *flagCount = 10 + } + args = append(args, fmt.Sprintf("-count=%d", *flagCount)) + if *flagTime != "" { + args = append(args, fmt.Sprintf("-benchtime=%s", *flagTime)) + } + if *flagCPUProcs != "" { + args = append(args, fmt.Sprintf("-cpu=%s", *flagCPUProcs)) + } + + // acquire lock + c := lock.NewClient() + if c == nil { + log.Printf(term.Red("run benchmarks without performance locking...")) + } else { + if !c.Acquire(*flagShared, true, strings.Join(args, " ")) { + list := c.List() + log.Printf("Waiting for lock...\n") + for _, l := range list { + log.Println(l) + } + c.Acquire(*flagShared, false, strings.Join(args, " ")) + } + if !*flagShared && flagCPUFreq.Percent >= 0 { + c.SetCPUFreq(flagCPUFreq.Percent) + log.Print(term.Gray(fmt.Sprintf("run benchmarks under %d%% cpufreq...", flagCPUFreq.Percent))) + } + } + + // Ignore SIGINT and SIGQUIT so they pass through to the + // child. + signal.Notify(make(chan os.Signal), os.Interrupt, syscall.SIGQUIT) + + // run bench + runBench(args) +} + +// shellEscape escapes a single shell token. +func shellEscape(x string) string { + if len(x) == 0 { + return "''" + } + for _, r := range x { + if 'a' <= r && r <= 'z' || 'A' <= r && r <= 'Z' || '0' <= r && r <= '9' || strings.ContainsRune("@%_-+:,./", r) { + continue + } + // Unsafe character. + return "'" + strings.Replace(x, "'", "'\"'\"'", -1) + "'" + } + return x +} + +// shellEscapeList escapes a list of shell tokens. +func shellEscapeList(xs []string) string { + out := make([]string, len(xs)) + for i, x := range xs { + out[i] = shellEscape(x) + } + return strings.Join(out, " ") +} + +var deltaTestNames = map[string]stat.DeltaTest{ + "none": stat.NoDeltaTest, + "u": stat.UTest, + "u-test": stat.UTest, + "utest": stat.UTest, + "t": stat.TTest, + "t-test": stat.TTest, + "ttest": stat.TTest, +} + +func runCompare() { + c := &stat.Collection{ + Alpha: *flagAlpha, + AddGeoMean: *flagGeomean, + DeltaTest: deltaTestNames[strings.ToLower(*flagDeltaTest)], + } + + for _, file := range flag.Args() { + f, err := os.Open(file) + if err != nil { + log.Print(err) + flag.Usage() + os.Exit(2) + } + defer f.Close() + + if err := c.AddFile(file, f); err != nil { + log.Fatal(err) + } + } + + tables := c.Tables() + var buf bytes.Buffer + stat.FormatText(&buf, tables) + os.Stdout.Write(buf.Bytes()) +} + +func runBench(args []string) { + log.Print(strings.Join(args, " ")) + cmd := exec.Command(args[0], args[1:]...) + stdout, err := cmd.StdoutPipe() + if err != nil { + log.Fatal(err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + log.Fatal(err) + } + doneCh := make(chan []byte) + go func() { + data := []byte{} + buf := make([]byte, 1024) + for { + n, err := stdout.Read(buf) + if err != nil { + if err != io.EOF { + fmt.Fprintf(os.Stderr, "%v", err) + } + doneCh <- data + close(doneCh) + return + } + fmt.Fprintf(os.Stdout, string(buf[:n])) + data = append(data, buf[:n]...) + } + }() + errCh := make(chan error) + go func() { + buf := make([]byte, 1024) + for { + n, err := stderr.Read(buf) + if err != nil { + if err == io.EOF { + errCh <- nil + } else { + errCh <- err + } + close(errCh) + return + } + fmt.Fprintf(os.Stderr, string(buf[:n])) + } + }() + + err = cmd.Start() + switch err := err.(type) { + case *exec.ExitError: + status := err.Sys().(syscall.WaitStatus) + if status.Exited() { + os.Exit(status.ExitStatus()) + } + log.Fatal(err) + } + + var results []byte + select { + case err = <-errCh: + if err != nil { // benchmark was interrupted or not success, exit. + os.Exit(2) + return + } + case results = <-doneCh: + } + + // do nothing if no tests were ran. + if strings.Index(string(results), "no Go files") != -1 || + strings.Index(string(results), "no test files") != -1 { + return + } + + fname := "bench-" + time.Now().Format("2006-01-02-15:04:05") + ".txt" + err = ioutil.WriteFile(fname, results, 0644) + if err != nil { + // try again, maybe the user was too fast? + err = ioutil.WriteFile(fname, results, 0644) + if err != nil { + log.Fatal("cannot save benchmark result to your disk.") + } + } + log.Printf("results are saved to file: ./%s\n\n", fname) + + computeStat(results) +} + +var sortNames = map[string]stat.Order{ + "none": nil, + "name": stat.ByName, + "delta": stat.ByDelta, +} + +func computeStat(data []byte) { + sortName := *flagSort + reverse := false + if strings.HasPrefix(sortName, "-") { + reverse = true + sortName = sortName[1:] + } + order, _ := sortNames[sortName] + c := &stat.Collection{ + Alpha: *flagAlpha, + AddGeoMean: *flagGeomean, + DeltaTest: deltaTestNames[strings.ToLower(*flagDeltaTest)], + } + + if *flagSplit != "" { + c.SplitBy = strings.Split(*flagSplit, ",") + } + if order != nil { + if reverse { + order = stat.Reverse(order) + } + c.Order = order + } + c.AddData("", data) + tables := c.Tables() + var buf bytes.Buffer + stat.FormatText(&buf, tables) + fmt.Print(buf.String()) +}