diff --git a/README.md b/README.md
index 5f0e580..a256452 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,124 @@
 # bench
-refined performance measurement in Go
+
+Reliable performance measurement for Go programs. All in one design.
+
+```
+$ go get golang.design/x/bench
+```
+
+## Features
+
+- Combine [benchstat](https://pkg.go.dev/golang.org/x/perf/cmd/benchstat), [perflock](https://github.com/aclements/perflock) and more...
+- Short command and only run benchmarks
+- Automatic performance locking for benchmarks
+- Automatic statistic analysis for benchmark results
+- Color indications for benchmark results
+
+## Usage
+
+### Enable `bench` Daemon (optional, Linux only)
+
+```sh
+$ cd $GOPATH/src/golang.design/x/bench
+$ ./install.bash
+```
+
+If your init system is supported, this will also configure `bench` to start automatically on boot.
+
+Or you can install and run `bench` daemon manually:
+
+```sh
+$ sudo install $GOPATH/bin/bench /usr/bin/bench
+$ sudo -b bench -daemon
+```
+
+### Default Behavior
+
+```sh
+$ bench
+```
+
+The detault behavior of `bench` run benchmarks under your current
+working directory, and each benchmark will be ran 10 times for further
+statistical analysis. It will also try to acquire performance lock from
+`bench` daemon to gain more stable results. Furthermore, the benchmark
+results are saved as a text file to the working directory and named as
+`<timestamp>.txt`.
+
+Example:
+
+```
+$ cd example
+$ bench
+bench: run benchmarks under 90% cpufreq...
+bench: go test -run=^$ -bench=. -count=10
+goos: linux
+goarch: amd64
+pkg: golang.design/x/bench/example
+BenchmarkDemo-16           21114             57340 ns/op
+...
+BenchmarkDemo-16           21004             57097 ns/op
+PASS
+ok      golang.design/x/bench/example   17.791s
+bench: results are saved to file: ./bench-2020-11-07-19:59:51.txt
+
+name     time/op
+Demo-16  57.0µs ±1%
+
+$ # ... do some changes to the benchmark ...
+
+$ bench
+bench: run benchmarks under 90% cpufreq...
+bench: go test -run=^$ -bench=. -count=10
+goos: linux
+goarch: amd64
+pkg: golang.design/x/bench/example
+BenchmarkDemo-16          213145              5625 ns/op
+...
+BenchmarkDemo-16          212959              5632 ns/op
+PASS
+ok      golang.design/x/bench/example   12.536s
+bench: results are saved to file: ./bench-2020-11-07-20:00:16.txt
+
+name     time/op
+Demo-16  5.63µs ±0%
+
+$ bench bench-2020-11-07-19:59:51.txt bench-2020-11-07-20:00:16.txt
+name     old time/op new time/op  delta
+Demo-16  57.0µs ±1%  5.6µs ±0%   -90.13%  (p=0.000 n=10+8)
+```
+
+### Options
+
+Options for checking daemon status:
+
+```sh
+bench -list
+```
+
+Options for statistic tests:
+
+```sh
+bench old.txt [new.txt]             # same from benchstat
+bench -delta-test
+bench -alpha
+bench -geomean
+bench -split
+bench -sort
+```
+
+Options for running benchmarks:
+
+```sh
+bench -v                            # enable verbose outputs
+bench -shared                       # enable shared execution
+bench -cpufreq 90                   # cpu frequency             (default: 90)
+bench -name BenchmarkXXX            # go test `-bench` flag     (default: .)
+bench -count 20                     # go test `-count` flag     (default: 10)
+bench -time 100x                    # go test `-benchtime` flag (default: unset)
+bench -cpuproc 1,2,4,8,16,32,128    # go test `-cpu` flag       (default: unset)
+```
+
+## License
+
+&copy; 2020 The golang.design Authors
\ No newline at end of file
diff --git a/example/bench-2020-11-07-19:59:51.txt b/example/bench-2020-11-07-19:59:51.txt
new file mode 100644
index 0000000..c9f1071
--- /dev/null
+++ b/example/bench-2020-11-07-19:59:51.txt
@@ -0,0 +1,15 @@
+goos: linux
+goarch: amd64
+pkg: golang.design/x/bench/example
+BenchmarkDemo-16    	   21114	     57340 ns/op
+BenchmarkDemo-16    	   21109	     56897 ns/op
+BenchmarkDemo-16    	   21091	     56833 ns/op
+BenchmarkDemo-16    	   21121	     56821 ns/op
+BenchmarkDemo-16    	   21105	     56953 ns/op
+BenchmarkDemo-16    	   21112	     56830 ns/op
+BenchmarkDemo-16    	   20836	     57499 ns/op
+BenchmarkDemo-16    	   21084	     56885 ns/op
+BenchmarkDemo-16    	   20990	     57245 ns/op
+BenchmarkDemo-16    	   21004	     57097 ns/op
+PASS
+ok  	golang.design/x/bench/example	17.791s
diff --git a/example/bench-2020-11-07-20:00:16.txt b/example/bench-2020-11-07-20:00:16.txt
new file mode 100644
index 0000000..f908588
--- /dev/null
+++ b/example/bench-2020-11-07-20:00:16.txt
@@ -0,0 +1,15 @@
+goos: linux
+goarch: amd64
+pkg: golang.design/x/bench/example
+BenchmarkDemo-16    	  213145	      5625 ns/op
+BenchmarkDemo-16    	  211988	      5629 ns/op
+BenchmarkDemo-16    	  211838	      5629 ns/op
+BenchmarkDemo-16    	  212151	      5627 ns/op
+BenchmarkDemo-16    	  212577	      5627 ns/op
+BenchmarkDemo-16    	  210150	      5635 ns/op
+BenchmarkDemo-16    	  212650	      5637 ns/op
+BenchmarkDemo-16    	  210063	      5670 ns/op
+BenchmarkDemo-16    	  212568	      5665 ns/op
+BenchmarkDemo-16    	  212959	      5632 ns/op
+PASS
+ok  	golang.design/x/bench/example	12.536s
diff --git a/example/bench_test.go b/example/bench_test.go
new file mode 100644
index 0000000..41fdf19
--- /dev/null
+++ b/example/bench_test.go
@@ -0,0 +1,19 @@
+// Copyright 2020 The golang.design Initiative Authors.
+// All rights reserved. Use of this source code is governed
+// by a GNU GPLv3 license that can be found in the LICENSE file.
+
+package example
+
+import "testing"
+
+func grow(n int) {
+	if n > 0 {
+		grow(n - 1)
+	}
+}
+
+func BenchmarkDemo(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		grow(10000) // try change this and re-run `bench`
+	}
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..e9ab0f9
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,3 @@
+module golang.design/x/bench
+
+go 1.15
diff --git a/init/systemd/README.md b/init/systemd/README.md
new file mode 100644
index 0000000..9b89bdd
--- /dev/null
+++ b/init/systemd/README.md
@@ -0,0 +1,6 @@
+To configure systemd to run `bench`, run
+
+```
+$ sudo install -m 0644 bench.service /etc/systemd/system
+$ sudo systemctl enable --now bench.service
+```
\ No newline at end of file
diff --git a/init/systemd/bench.service b/init/systemd/bench.service
new file mode 100644
index 0000000..b26e176
--- /dev/null
+++ b/init/systemd/bench.service
@@ -0,0 +1,10 @@
+[Unit]
+Description=Bench daemon
+
+[Service]
+Type=simple
+ExecStart=/usr/bin/bench -daemon
+Restart=on-failure
+
+[Install]
+WantedBy=multi-user.target
\ No newline at end of file
diff --git a/init/upstart/README.md b/init/upstart/README.md
new file mode 100644
index 0000000..352756d
--- /dev/null
+++ b/init/upstart/README.md
@@ -0,0 +1,6 @@
+To configure Upstart to run `bench`, copy `bench.conf` into `/etc/init/` and run sudo start `bench`. E.g.,
+
+```sh
+$ sudo install -m 0644 bench.conf /etc/init/
+$ sudo start bench
+```
\ No newline at end of file
diff --git a/init/upstart/bench.conf b/init/upstart/bench.conf
new file mode 100644
index 0000000..a12771e
--- /dev/null
+++ b/init/upstart/bench.conf
@@ -0,0 +1,11 @@
+# bench - Benchmark performance locking service
+
+description "bench daemon"
+
+start on runlevel [2345]
+stop on runlevel [!2345]
+respawn
+
+console log
+
+exec bench -daemon
\ No newline at end of file
diff --git a/install.sh b/install.sh
new file mode 100755
index 0000000..a0db679
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+set -e
+
+BINPATH="$(go env GOBIN)"
+if [[ -z "$BINPATH" ]]; then
+    BINPATH="$(go env GOPATH)/bin"
+fi
+BIN="$BINPATH/bench"
+if [[ ! -x "$BIN" ]]; then
+    echo "bench binary $BIN does not exist." 2>&1
+    echo "Please run go install golang.design/x/bench" 2>&1
+    exit 1
+fi
+
+echo "Installing $BIN to /usr/bin" 1>&2
+sudo install "$BIN" /usr/bin/bench
+
+start="-b /usr/bin/bench -daemon"
+starttype=
+if [[ -d /etc/init ]]; then
+    echo "Installing init script for Upstart" 1>&2
+    sudo install -m 0644 init/upstart/bench.conf /etc/init/
+    start="service bench start"
+    starttype=" (using Upstart)"
+fi
+if [[ -d /etc/systemd ]]; then
+    echo "Installing service for systemd" 1>&2
+    sudo install -m 0644 init/systemd/bench.service /etc/systemd/system
+    sudo systemctl enable --quiet bench.service
+    start="systemctl start bench.service"
+    starttype=" (using systemd)"
+fi
+
+if /usr/bin/bench -list >/dev/null 2>&1; then
+    echo "Not starting bench daemon (already running)" 1>&2
+else
+    echo "Starting bench daemon$starttype" 1>&2
+    sudo $start
+fi
\ No newline at end of file
diff --git a/internal/benchfmt/fmt.go b/internal/benchfmt/fmt.go
new file mode 100644
index 0000000..6d1c718
--- /dev/null
+++ b/internal/benchfmt/fmt.go
@@ -0,0 +1,329 @@
+// Package benchfmt provides readers and writers for the Go benchmark format.
+//
+// The format is documented at https://golang.org/design/14313-benchmark-format
+package benchfmt
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"io"
+	"sort"
+	"strconv"
+	"strings"
+	"unicode"
+)
+
+// Reader reads benchmark results from an io.Reader.
+// Use Next to advance through the results.
+//
+//   br := benchfmt.NewReader(r)
+//   for br.Next() {
+//     res := br.Result()
+//     ...
+//   }
+//   err = br.Err() // get any error encountered during iteration
+//   ...
+type Reader struct {
+	s      *bufio.Scanner
+	labels Labels
+	// permLabels are permanent labels read from the start of the
+	// file or provided by AddLabels. They cannot be overridden.
+	permLabels Labels
+	lineNum    int
+	// cached from last call to newResult, to save on allocations
+	lastName       string
+	lastNameLabels Labels
+	// cached from the last call to Next
+	result *Result
+	err    error
+}
+
+// NewReader creates a BenchmarkReader that reads from r.
+func NewReader(r io.Reader) *Reader {
+	return &Reader{
+		s:      bufio.NewScanner(r),
+		labels: make(Labels),
+	}
+}
+
+// AddLabels adds additional labels as if they had been read from the header of a file.
+// It must be called before the first call to r.Next.
+func (r *Reader) AddLabels(labels Labels) {
+	r.permLabels = labels.Copy()
+	for k, v := range labels {
+		r.labels[k] = v
+	}
+}
+
+// Result represents a single line from a benchmark file.
+// All information about that line is self-contained in the Result.
+// A Result is immutable once created.
+type Result struct {
+	// Labels is the set of persistent labels that apply to the result.
+	// Labels must not be modified.
+	Labels Labels
+	// NameLabels is the set of ephemeral labels that were parsed
+	// from the benchmark name/line.
+	// NameLabels must not be modified.
+	NameLabels Labels
+	// LineNum is the line number on which the result was found
+	LineNum int
+	// Content is the verbatim input line of the benchmark file, beginning with the string "Benchmark".
+	Content string
+}
+
+// SameLabels reports whether r and b have the same labels.
+func (r *Result) SameLabels(b *Result) bool {
+	return r.Labels.Equal(b.Labels) && r.NameLabels.Equal(b.NameLabels)
+}
+
+// Labels is a set of key-value strings.
+type Labels map[string]string
+
+// String returns the labels formatted as a comma-separated
+// list enclosed in braces.
+func (l Labels) String() string {
+	var out bytes.Buffer
+	out.WriteString("{")
+	for k, v := range l {
+		fmt.Fprintf(&out, "%q: %q, ", k, v)
+	}
+	if out.Len() > 1 {
+		// Remove extra ", "
+		out.Truncate(out.Len() - 2)
+	}
+	out.WriteString("}")
+	return out.String()
+}
+
+// Keys returns a sorted list of the keys in l.
+func (l Labels) Keys() []string {
+	var out []string
+	for k := range l {
+		out = append(out, k)
+	}
+	sort.Strings(out)
+	return out
+}
+
+// Equal reports whether l and b have the same keys and values.
+func (l Labels) Equal(b Labels) bool {
+	if len(l) != len(b) {
+		return false
+	}
+	for k := range l {
+		if l[k] != b[k] {
+			return false
+		}
+	}
+	return true
+}
+
+// A Printer prints a sequence of benchmark results.
+type Printer struct {
+	w      io.Writer
+	labels Labels
+}
+
+// NewPrinter constructs a BenchmarkPrinter writing to w.
+func NewPrinter(w io.Writer) *Printer {
+	return &Printer{w: w}
+}
+
+// Print writes the lines necessary to recreate r.
+func (p *Printer) Print(r *Result) error {
+	var keys []string
+	// Print removed keys first.
+	for k := range p.labels {
+		if r.Labels[k] == "" {
+			keys = append(keys, k)
+		}
+	}
+	sort.Strings(keys)
+	for _, k := range keys {
+		if _, err := fmt.Fprintf(p.w, "%s:\n", k); err != nil {
+			return err
+		}
+	}
+	// Then print new or changed keys.
+	keys = keys[:0]
+	for k, v := range r.Labels {
+		if v != "" && p.labels[k] != v {
+			keys = append(keys, k)
+		}
+	}
+	sort.Strings(keys)
+	for _, k := range keys {
+		if _, err := fmt.Fprintf(p.w, "%s: %s\n", k, r.Labels[k]); err != nil {
+			return err
+		}
+	}
+	// Finally print the actual line itself.
+	if _, err := fmt.Fprintf(p.w, "%s\n", r.Content); err != nil {
+		return err
+	}
+	p.labels = r.Labels
+	return nil
+}
+
+// parseNameLabels extracts extra labels from a benchmark name and sets them in labels.
+func parseNameLabels(name string, labels Labels) {
+	dash := strings.LastIndex(name, "-")
+	if dash >= 0 {
+		// Accept -N as an alias for /gomaxprocs=N
+		_, err := strconv.Atoi(name[dash+1:])
+		if err == nil {
+			labels["gomaxprocs"] = name[dash+1:]
+			name = name[:dash]
+		}
+	}
+	parts := strings.Split(name, "/")
+	labels["name"] = parts[0]
+	for i, sub := range parts[1:] {
+		equals := strings.Index(sub, "=")
+		var key string
+		if equals >= 0 {
+			key, sub = sub[:equals], sub[equals+1:]
+		} else {
+			key = fmt.Sprintf("sub%d", i+1)
+		}
+		labels[key] = sub
+	}
+}
+
+// newResult parses a line and returns a Result object for the line.
+func (r *Reader) newResult(labels Labels, lineNum int, name, content string) *Result {
+	res := &Result{
+		Labels:  labels,
+		LineNum: lineNum,
+		Content: content,
+	}
+	if r.lastName != name {
+		r.lastName = name
+		r.lastNameLabels = make(Labels)
+		parseNameLabels(name, r.lastNameLabels)
+	}
+	res.NameLabels = r.lastNameLabels
+	return res
+}
+
+// Copy returns a new copy of the labels map, to protect against
+// future modifications to labels.
+func (l Labels) Copy() Labels {
+	new := make(Labels)
+	for k, v := range l {
+		new[k] = v
+	}
+	return new
+}
+
+// TODO(quentin): How to represent and efficiently group multiple lines?
+
+// Next returns the next benchmark result from the file. If there are
+// no further results, it returns nil, io.EOF.
+func (r *Reader) Next() bool {
+	if r.err != nil {
+		return false
+	}
+	copied := false
+	havePerm := r.permLabels != nil
+	for r.s.Scan() {
+		r.lineNum++
+		line := r.s.Text()
+		if key, value, ok := parseKeyValueLine(line); ok {
+			if _, ok := r.permLabels[key]; ok {
+				continue
+			}
+			if !copied {
+				copied = true
+				r.labels = r.labels.Copy()
+			}
+			// TODO(quentin): Spec says empty value is valid, but
+			// we need a way to cancel previous labels, so we'll
+			// treat an empty value as a removal.
+			if value == "" {
+				delete(r.labels, key)
+			} else {
+				r.labels[key] = value
+			}
+			continue
+		}
+		// Blank line delimits the header. If we find anything else, the file must not have a header.
+		if !havePerm {
+			if line == "" {
+				r.permLabels = r.labels.Copy()
+			} else {
+				r.permLabels = Labels{}
+			}
+		}
+		if fullName, ok := parseBenchmarkLine(line); ok {
+			r.result = r.newResult(r.labels, r.lineNum, fullName, line)
+			return true
+		}
+	}
+	if err := r.s.Err(); err != nil {
+		r.err = err
+		return false
+	}
+	r.err = io.EOF
+	return false
+}
+
+// Result returns the most recent result generated by a call to Next.
+func (r *Reader) Result() *Result {
+	return r.result
+}
+
+// Err returns the error state of the reader.
+func (r *Reader) Err() error {
+	if r.err == io.EOF {
+		return nil
+	}
+	return r.err
+}
+
+// parseKeyValueLine attempts to parse line as a key: value pair. ok
+// indicates whether the line could be parsed.
+func parseKeyValueLine(line string) (key, val string, ok bool) {
+	for i, c := range line {
+		if i == 0 && !unicode.IsLower(c) {
+			return
+		}
+		if unicode.IsSpace(c) || unicode.IsUpper(c) {
+			return
+		}
+		if i > 0 && c == ':' {
+			key = line[:i]
+			val = line[i+1:]
+			break
+		}
+	}
+	if key == "" {
+		return
+	}
+	if val == "" {
+		ok = true
+		return
+	}
+	for len(val) > 0 && (val[0] == ' ' || val[0] == '\t') {
+		val = val[1:]
+		ok = true
+	}
+	return
+}
+
+// parseBenchmarkLine attempts to parse line as a benchmark result. If
+// successful, fullName is the name of the benchmark with the
+// "Benchmark" prefix stripped, and ok is true.
+func parseBenchmarkLine(line string) (fullName string, ok bool) {
+	space := strings.IndexFunc(line, unicode.IsSpace)
+	if space < 0 {
+		return
+	}
+	name := line[:space]
+	if !strings.HasPrefix(name, "Benchmark") {
+		return
+	}
+	return name[len("Benchmark"):], true
+}
diff --git a/internal/cpupower/cpupower.go b/internal/cpupower/cpupower.go
new file mode 100644
index 0000000..9d9b056
--- /dev/null
+++ b/internal/cpupower/cpupower.go
@@ -0,0 +1,134 @@
+// Package cpupower manipulates Linux CPU frequency scaling settings.
+package cpupower
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+// Domain is a frequency scaling domain. This may include more than
+// one CPU.
+type Domain struct {
+	path      string
+	min, max  int
+	available []int
+}
+
+var cpuRe = regexp.MustCompile(`cpu\d+$`)
+
+// Domains returns the frequency scaling domains of this host.
+func Domains() ([]*Domain, error) {
+	dir := "/sys/devices/system/cpu"
+	fs, err := ioutil.ReadDir(dir)
+	if err != nil {
+		return nil, err
+	}
+
+	var domains []*Domain
+	haveDomains := make(map[string]bool)
+	for _, f := range fs {
+		if !f.IsDir() || !cpuRe.MatchString(f.Name()) {
+			continue
+		}
+		pdir := filepath.Join(dir, f.Name(), "cpufreq")
+
+		// Get the frequency domain, if any.
+		cpus, err := ioutil.ReadFile(filepath.Join(pdir, "freqdomain_cpus"))
+		if err == nil {
+			if haveDomains[string(cpus)] {
+				// We already have a CPU in this domain.
+				continue
+			}
+			haveDomains[string(cpus)] = true
+		} else if !os.IsNotExist(err) {
+			return nil, err
+		}
+
+		min, err := readInt(filepath.Join(pdir, "cpuinfo_min_freq"))
+		if err != nil {
+			return nil, err
+		}
+		max, err := readInt(filepath.Join(pdir, "cpuinfo_max_freq"))
+		if err != nil {
+			return nil, err
+		}
+		avail, err := readInts(filepath.Join(pdir, "scaling_available_frequencies"))
+		if err != nil && !os.IsNotExist(err) {
+			return nil, err
+		}
+		sort.Ints(avail)
+		domains = append(domains, &Domain{pdir, min, max, avail})
+	}
+	return domains, nil
+}
+
+// AvailableRange returns the available frequency range this CPU is
+// capable of and the set of available frequencies in ascending order
+// or nil if any frequency can be set.
+func (d *Domain) AvailableRange() (int, int, []int) {
+	return d.min, d.max, d.available
+}
+
+// CurrentRange returns the current frequency range this CPU's
+// governor can select between.
+func (d *Domain) CurrentRange() (int, int, error) {
+	min, err := readInt(filepath.Join(d.path, "scaling_min_freq"))
+	if err != nil {
+		return 0, 0, err
+	}
+	max, err := readInt(filepath.Join(d.path, "scaling_max_freq"))
+	if err != nil {
+		return 0, 0, err
+	}
+	return min, max, nil
+}
+
+// SetRange sets the frequency range this CPU's governor can select
+// between.
+func (d *Domain) SetRange(min, max int) error {
+	// Attempting to set an empty range will cause an IO error.
+	// Rather than trying to figure out the right order to set
+	// them in, try both orders.
+	err1 := writeInt(filepath.Join(d.path, "scaling_min_freq"), min)
+	if err2 := writeInt(filepath.Join(d.path, "scaling_max_freq"), max); err2 != nil {
+		return err2
+	}
+	if err1 != nil {
+		err1 = writeInt(filepath.Join(d.path, "scaling_min_freq"), min)
+	}
+	return err1
+}
+
+func readInt(path string) (int, error) {
+	data, err := ioutil.ReadFile(path)
+	if err != nil {
+		return 0, err
+	}
+	return strconv.Atoi(strings.TrimSpace(string(data)))
+}
+
+func writeInt(path string, val int) error {
+	return ioutil.WriteFile(path, []byte(fmt.Sprintf("%d", val)), 0)
+}
+
+func readInts(path string) ([]int, error) {
+	data, err := ioutil.ReadFile(path)
+	if err != nil {
+		return nil, err
+	}
+	fields := strings.Fields(string(data))
+	ints := make([]int, len(fields))
+	for i, field := range fields {
+		ints[i], err = strconv.Atoi(field)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return ints, nil
+}
diff --git a/internal/lock/client.go b/internal/lock/client.go
new file mode 100644
index 0000000..41925e4
--- /dev/null
+++ b/internal/lock/client.go
@@ -0,0 +1,71 @@
+package lock
+
+import (
+	"encoding/gob"
+	"fmt"
+	"log"
+	"net"
+)
+
+// Client is a lock client
+type Client struct {
+	c net.Conn
+
+	gr *gob.Encoder
+	gw *gob.Decoder
+}
+
+// NewClient returns a lock client
+func NewClient() *Client {
+	c, err := net.Dial("unix", Socketpath)
+	if err != nil {
+		log.Printf("failed to connect bench daemon: %v", err)
+		return nil
+	}
+
+	// Send credentials.
+	err = writeCredentials(c.(*net.UnixConn))
+	if err != nil {
+		log.Fatal("failed to send credentials: ", err)
+	}
+
+	gr, gw := gob.NewEncoder(c), gob.NewDecoder(c)
+
+	return &Client{c, gr, gw}
+}
+
+func (c *Client) do(action perflockAction, response interface{}) {
+	err := c.gr.Encode(action)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	err = c.gw.Decode(response)
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
+// Acquire acuiqres the lock
+func (c *Client) Acquire(shared, nonblocking bool, msg string) bool {
+	var ok bool
+	c.do(perflockAction{actionAcquire{Shared: shared, NonBlocking: nonblocking, Msg: msg}}, &ok)
+	return ok
+}
+
+// List lists all perflock actions
+func (c *Client) List() []string {
+	var list []string
+	c.do(perflockAction{actionList{}}, &list)
+	return list
+}
+
+// SetCPUFreq sets the given cpu frequency
+func (c *Client) SetCPUFreq(percent int) error {
+	var err string
+	c.do(perflockAction{actionSetCPUFreq{Percent: percent}}, &err)
+	if err == "" {
+		return nil
+	}
+	return fmt.Errorf("%s", err)
+}
diff --git a/internal/lock/creds.go b/internal/lock/creds.go
new file mode 100644
index 0000000..56d6f0b
--- /dev/null
+++ b/internal/lock/creds.go
@@ -0,0 +1,61 @@
+package lock
+
+import (
+	"fmt"
+	"net"
+	"os"
+	"syscall"
+)
+
+// TODO: Use SO_PEERCRED instead?
+
+func writeCredentials(c *net.UnixConn) error {
+	ucred := syscall.Ucred{Pid: int32(os.Getpid()), Uid: uint32(os.Getuid()), Gid: uint32(os.Getgid())}
+	credOob := syscall.UnixCredentials(&ucred)
+	credMsg := []byte("x")
+	n, oobn, err := c.WriteMsgUnix(credMsg, credOob, nil)
+	if err != nil {
+		return err
+	}
+	if n != 1 {
+		return fmt.Errorf("short send (%d bytes)", n)
+	}
+	if oobn != len(credOob) {
+		return fmt.Errorf("short OOB send (%d bytes)", oobn)
+	}
+	return nil
+}
+
+func readCredentials(c *net.UnixConn) (*syscall.Ucred, error) {
+	// Enable receiving credentials on c.
+	f, err := c.File()
+	if err != nil {
+		return nil, err
+	}
+	err = syscall.SetsockoptInt(int(f.Fd()), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1)
+	f.Close()
+	if err != nil {
+		return nil, err
+	}
+
+	// Receive credentials.
+	buf := make([]byte, 1)
+	oob := make([]byte, 128)
+	n, oobn, _, _, err := c.ReadMsgUnix(buf, oob)
+	if err != nil {
+		return nil, err
+	}
+	if n != 1 {
+		return nil, fmt.Errorf("expected 1 byte, got %d", n)
+	}
+
+	// Parse OOB data.
+	scms, err := syscall.ParseSocketControlMessage(oob[:oobn])
+	if err != nil {
+		return nil, err
+	}
+	if len(scms) != 1 {
+		return nil, fmt.Errorf("expected 1 control message, got %d", len(scms))
+	}
+	return syscall.ParseUnixCredentials(&scms[0])
+}
diff --git a/internal/lock/daemon.go b/internal/lock/daemon.go
new file mode 100644
index 0000000..2af0e91
--- /dev/null
+++ b/internal/lock/daemon.go
@@ -0,0 +1,262 @@
+package lock
+
+import (
+	"encoding/gob"
+	"fmt"
+	"io"
+	"log"
+	"net"
+	"os"
+	"os/user"
+	"time"
+
+	"golang.design/x/bench/internal/cpupower"
+)
+
+var theLock perflock
+
+// RunDaemon runs lock daemon
+func RunDaemon() {
+	// check if daemon is running
+	c, _ := net.Dial("unix", Socketpath)
+	if c != nil {
+		c.Close()
+		log.Fatalf("The bench daemon is already running at %s !", Socketpath)
+		return
+	}
+
+	os.Remove(Socketpath)
+	l, err := net.Listen("unix", Socketpath)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer l.Close()
+
+	// Make the socket world-writable/connectable.
+	err = os.Chmod(Socketpath, 0777)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	// Receive connections.
+	for {
+		conn, err := l.Accept()
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		go func(c net.Conn) {
+			defer c.Close()
+			NewServer(c).Serve()
+		}(conn)
+	}
+}
+
+// Server is the bench lock server
+type Server struct {
+	c        net.Conn
+	userName string
+
+	locker    *locker
+	acquiring bool
+
+	oldCPUFreqs []*cpuFreqSettings
+}
+
+// NewServer returns a bench lock server
+func NewServer(c net.Conn) *Server {
+	return &Server{c: c}
+}
+
+// Serve serves the bench lock server
+func (s *Server) Serve() {
+	// Drop any held locks if we exit for any reason.
+	defer s.drop()
+
+	// Get connection credentials.
+	ucred, err := readCredentials(s.c.(*net.UnixConn))
+	if err != nil {
+		log.Print("reading credentials: ", err)
+		return
+	}
+
+	u, err := user.LookupId(fmt.Sprintf("%d", ucred.Uid))
+	s.userName = "???"
+	if err == nil {
+		s.userName = u.Username
+	}
+
+	// Receive incoming actions. We do this in a goroutine so the
+	// main handler can select on EOF or lock acquisition.
+	actions := make(chan perflockAction)
+	go func() {
+		gr := gob.NewDecoder(s.c)
+		for {
+			var msg perflockAction
+			err := gr.Decode(&msg)
+			if err != nil {
+				if err != io.EOF {
+					log.Print(err)
+				}
+				close(actions)
+				return
+			}
+			actions <- msg
+		}
+	}()
+
+	// Process incoming actions.
+	var acquireC <-chan bool
+	gw := gob.NewEncoder(s.c)
+	for {
+		select {
+		case action, ok := <-actions:
+			if !ok {
+				// Connection closed.
+				return
+			}
+			if s.acquiring {
+				log.Printf("protocol error: message while acquiring")
+				return
+			}
+			switch action := action.Action.(type) {
+			case actionAcquire:
+				if s.locker != nil {
+					log.Printf("protocol error: acquiring lock twice")
+					return
+				}
+				msg := fmt.Sprintf("%s\t%s\t%s", s.userName, time.Now().Format(time.Stamp), action.Msg)
+				if action.Shared {
+					msg += " [shared]"
+				}
+				s.locker = theLock.Enqueue(action.Shared, action.NonBlocking, msg)
+				if s.locker != nil {
+					// Enqueued. Wait for acquire.
+					s.acquiring = true
+					acquireC = s.locker.C
+				} else {
+					// Non-blocking acquire failed.
+					if err := gw.Encode(false); err != nil {
+						log.Print(err)
+						return
+					}
+				}
+
+			case actionList:
+				list := theLock.Queue()
+				if err := gw.Encode(list); err != nil {
+					log.Print(err)
+					return
+				}
+
+			case actionSetCPUFreq:
+				if s.locker == nil {
+					log.Printf("protocol error: setting cpuFreq without lock")
+					return
+				}
+				err := s.setCPUFreq(action.Percent)
+				errString := ""
+				if err != nil {
+					errString = err.Error()
+				}
+				if err := gw.Encode(errString); err != nil {
+					log.Print(err)
+					return
+				}
+
+			default:
+				log.Printf("unknown message")
+				return
+			}
+
+		case <-acquireC:
+			// Lock acquired.
+			s.acquiring, acquireC = false, nil
+			if err := gw.Encode(true); err != nil {
+				log.Print(err)
+				return
+			}
+		}
+	}
+}
+
+func (s *Server) drop() {
+	// Restore the CPU cpuFreq before releasing the lock.
+	if s.oldCPUFreqs != nil {
+		s.restoreCPUFreq()
+		s.oldCPUFreqs = nil
+	}
+	// Release the lock.
+	if s.locker != nil {
+		theLock.Dequeue(s.locker)
+		s.locker = nil
+	}
+}
+
+type cpuFreqSettings struct {
+	domain   *cpupower.Domain
+	min, max int
+}
+
+func (s *Server) setCPUFreq(percent int) error {
+	domains, err := cpupower.Domains()
+	if err != nil {
+		return err
+	}
+	if len(domains) == 0 {
+		return fmt.Errorf("no power domains")
+	}
+
+	// Save current frequency settings.
+	old := []*cpuFreqSettings{}
+	for _, d := range domains {
+		min, max, err := d.CurrentRange()
+		if err != nil {
+			return err
+		}
+		old = append(old, &cpuFreqSettings{d, min, max})
+	}
+	s.oldCPUFreqs = old
+
+	// Set new settings.
+	abs := func(x int) int {
+		if x < 0 {
+			return -x
+		}
+		return x
+	}
+	for _, d := range domains {
+		min, max, avail := d.AvailableRange()
+		target := (max-min)*percent/100 + min
+
+		// Find the nearest available frequency.
+		if len(avail) != 0 {
+			closest := avail[0]
+			for _, a := range avail {
+				if abs(target-a) < abs(target-closest) {
+					closest = a
+				}
+			}
+			target = closest
+		}
+
+		err := d.SetRange(target, target)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (s *Server) restoreCPUFreq() error {
+	var err error
+	for _, g := range s.oldCPUFreqs {
+		// Try to set all of the domains, even if one fails.
+		err1 := g.domain.SetRange(g.min, g.max)
+		if err1 != nil && err == nil {
+			err = err1
+		}
+	}
+	return err
+}
diff --git a/internal/lock/flag.go b/internal/lock/flag.go
new file mode 100644
index 0000000..23be15d
--- /dev/null
+++ b/internal/lock/flag.go
@@ -0,0 +1,36 @@
+package lock
+
+import (
+	"fmt"
+	"regexp"
+	"strconv"
+)
+
+// Socketpath between lock daemon and client
+var Socketpath = "/var/run/bench.socket"
+
+// CpufreqFlag ...
+type CpufreqFlag struct {
+	Percent int
+}
+
+func (f *CpufreqFlag) String() string {
+	if f.Percent < 0 {
+		return "none"
+	}
+	return fmt.Sprintf("%d", f.Percent)
+}
+
+// Set set the cpu frequency percentage
+func (f *CpufreqFlag) Set(v string) error {
+	if v == "none" {
+		f.Percent = -1
+	} else {
+		m := regexp.MustCompile(`^([0-9]+)$`).FindStringSubmatch(v)
+		if m == nil {
+			return fmt.Errorf("cpufreq must be \"none\" or \"N\"")
+		}
+		f.Percent, _ = strconv.Atoi(m[1])
+	}
+	return nil
+}
diff --git a/internal/lock/lock.go b/internal/lock/lock.go
new file mode 100644
index 0000000..0dab757
--- /dev/null
+++ b/internal/lock/lock.go
@@ -0,0 +1,84 @@
+package lock
+
+import "sync"
+
+type perflock struct {
+	l sync.Mutex
+	q []*locker
+}
+
+type locker struct {
+	C      <-chan bool
+	c      chan<- bool
+	shared bool
+	woken  bool
+
+	msg string
+}
+
+func (l *perflock) Enqueue(shared, nonblocking bool, msg string) *locker {
+	ch := make(chan bool, 1)
+	locker := &locker{ch, ch, shared, false, msg}
+
+	// Enqueue.
+	l.l.Lock()
+	defer l.l.Unlock()
+	l.setQ(append(l.q, locker))
+
+	if nonblocking && !locker.woken {
+		// Acquire failed. Dequeue.
+		l.setQ(l.q[:len(l.q)-1])
+		return nil
+	}
+
+	return locker
+}
+
+func (l *perflock) Dequeue(locker *locker) {
+	l.l.Lock()
+	defer l.l.Unlock()
+	for i, o := range l.q {
+		if locker == o {
+			copy(l.q[i:], l.q[i+1:])
+			l.setQ(l.q[:len(l.q)-1])
+			return
+		}
+	}
+	panic("Dequeue of non-enqueued locker")
+}
+
+func (l *perflock) Queue() []string {
+	var q []string
+
+	l.l.Lock()
+	defer l.l.Unlock()
+	for _, locker := range l.q {
+		q = append(q, locker.msg)
+	}
+	return q
+}
+
+func (l *perflock) setQ(q []*locker) {
+	l.q = q
+	if len(q) == 0 {
+		return
+	}
+
+	wake := func(locker *locker) {
+		if locker.woken == false {
+			locker.woken = true
+			locker.c <- true
+		}
+	}
+	if q[0].shared {
+		// Wake all shared acquires at the head of the queue.
+		for _, locker := range q {
+			if !locker.shared {
+				break
+			}
+			wake(locker)
+		}
+	} else {
+		wake(q[0])
+	}
+}
diff --git a/internal/lock/proto.go b/internal/lock/proto.go
new file mode 100644
index 0000000..a990e48
--- /dev/null
+++ b/internal/lock/proto.go
@@ -0,0 +1,35 @@
+package lock
+
+import "encoding/gob"
+
+type perflockAction struct {
+	Action interface{}
+}
+
+// actionAcquire acquires the lock. The response is a boolean
+// indicating whether or not the lock was acquired (which may be false
+// for a non-blocking acquire).
+type actionAcquire struct {
+	Shared      bool
+	NonBlocking bool
+	Msg         string
+}
+
+// actionList returns the list of current and pending lock
+// acquisitions as a []string.
+type actionList struct {
+}
+
+// actionSetCPUFreq sets the CPU frequency of all CPUs. The caller
+// must hold the lock.
+type actionSetCPUFreq struct {
+	// Percent indicates the percent to set the CPU cpuFreq to
+	// between the lower and highest available frequencies.
+	Percent int
+}
+
+func init() {
+	gob.Register(actionAcquire{})
+	gob.Register(actionList{})
+	gob.Register(actionSetCPUFreq{})
+}
diff --git a/internal/stat/algo.go b/internal/stat/algo.go
new file mode 100644
index 0000000..2f3336c
--- /dev/null
+++ b/internal/stat/algo.go
@@ -0,0 +1,108 @@
+package stat
+
+// Miscellaneous helper algorithms
+
+import (
+	"fmt"
+)
+
+func maxint(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func minint(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func sumint(xs []int) int {
+	sum := 0
+	for _, x := range xs {
+		sum += x
+	}
+	return sum
+}
+
+// bisect returns an x in [low, high] such that |f(x)| <= tolerance
+// using the bisection method.
+//
+// f(low) and f(high) must have opposite signs.
+//
+// If f does not have a root in this interval (e.g., it is
+// discontiguous), this returns the X of the apparent discontinuity
+// and false.
+func bisect(f func(float64) float64, low, high, tolerance float64) (float64, bool) {
+	flow, fhigh := f(low), f(high)
+	if -tolerance <= flow && flow <= tolerance {
+		return low, true
+	}
+	if -tolerance <= fhigh && fhigh <= tolerance {
+		return high, true
+	}
+	if mathSign(flow) == mathSign(fhigh) {
+		panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%g f(%g)=%g", low, flow, high, fhigh))
+	}
+	for {
+		mid := (high + low) / 2
+		fmid := f(mid)
+		if -tolerance <= fmid && fmid <= tolerance {
+			return mid, true
+		}
+		if mid == high || mid == low {
+			return mid, false
+		}
+		if mathSign(fmid) == mathSign(flow) {
+			low = mid
+			flow = fmid
+		} else {
+			high = mid
+			fhigh = fmid
+		}
+	}
+}
+
+// bisectBool implements the bisection method on a boolean function.
+// It returns x1, x2 ∈ [low, high], x1 < x2 such that f(x1) != f(x2)
+// and x2 - x1 <= xtol.
+//
+// If f(low) == f(high), it panics.
+func bisectBool(f func(float64) bool, low, high, xtol float64) (x1, x2 float64) {
+	flow, fhigh := f(low), f(high)
+	if flow == fhigh {
+		panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%v f(%g)=%v", low, flow, high, fhigh))
+	}
+	for {
+		if high-low <= xtol {
+			return low, high
+		}
+		mid := (high + low) / 2
+		if mid == high || mid == low {
+			return low, high
+		}
+		fmid := f(mid)
+		if fmid == flow {
+			low = mid
+			flow = fmid
+		} else {
+			high = mid
+			fhigh = fmid
+		}
+	}
+}
+
+// series returns the sum of the series f(0), f(1), ...
+//
+// This implementation is fast, but subject to round-off error.
+func series(f func(float64) float64) float64 {
+	y, yp := 0.0, 1.0
+	for n := 0.0; y != yp; n++ {
+		yp = y
+		y += f(n)
+	}
+	return y
+}
diff --git a/internal/stat/beta.go b/internal/stat/beta.go
new file mode 100644
index 0000000..49a4805
--- /dev/null
+++ b/internal/stat/beta.go
@@ -0,0 +1,88 @@
+package stat
+
+import "math"
+
+func lgamma(x float64) float64 {
+	y, _ := math.Lgamma(x)
+	return y
+}
+
+// mathBeta returns the value of the complete beta function B(a, b).
+func mathBeta(a, b float64) float64 {
+	// B(x,y) = Γ(x)Γ(y) / Γ(x+y)
+	return math.Exp(lgamma(a) + lgamma(b) - lgamma(a+b))
+}
+
+// mathBetaInc returns the value of the regularized incomplete beta
+// function Iₓ(a, b).
+//
+// This is not to be confused with the "incomplete beta function",
+// which can be computed as BetaInc(x, a, b)*Beta(a, b).
+//
+// If x < 0 or x > 1, returns NaN.
+func mathBetaInc(x, a, b float64) float64 {
+	// Based on Numerical Recipes in C, section 6.4. This uses the
+	// continued fraction definition of I:
+	//
+	//  (xᵃ*(1-x)ᵇ)/(a*B(a,b)) * (1/(1+(d₁/(1+(d₂/(1+...))))))
+	//
+	// where B(a,b) is the beta function and
+	//
+	//  d_{2m+1} = -(a+m)(a+b+m)x/((a+2m)(a+2m+1))
+	//  d_{2m}   = m(b-m)x/((a+2m-1)(a+2m))
+	if x < 0 || x > 1 {
+		return math.NaN()
+	}
+	bt := 0.0
+	if 0 < x && x < 1 {
+		// Compute the coefficient before the continued
+		// fraction.
+		bt = math.Exp(lgamma(a+b) - lgamma(a) - lgamma(b) +
+			a*math.Log(x) + b*math.Log(1-x))
+	}
+	if x < (a+1)/(a+b+2) {
+		// Compute continued fraction directly.
+		return bt * betacf(x, a, b) / a
+	}
+	// Compute continued fraction after symmetry transform.
+	return 1 - bt*betacf(1-x, b, a)/b
+}
+
+// betacf is the continued fraction component of the regularized
+// incomplete beta function Iₓ(a, b).
+func betacf(x, a, b float64) float64 {
+	const maxIterations = 200
+	const epsilon = 3e-14
+
+	raiseZero := func(z float64) float64 {
+		if math.Abs(z) < math.SmallestNonzeroFloat64 {
+			return math.SmallestNonzeroFloat64
+		}
+		return z
+	}
+
+	c := 1.0
+	d := 1 / raiseZero(1-(a+b)*x/(a+1))
+	h := d
+	for m := 1; m <= maxIterations; m++ {
+		mf := float64(m)
+
+		// Even step of the recurrence.
+		numer := mf * (b - mf) * x / ((a + 2*mf - 1) * (a + 2*mf))
+		d = 1 / raiseZero(1+numer*d)
+		c = raiseZero(1 + numer/c)
+		h *= d * c
+
+		// Odd step of the recurrence.
+		numer = -(a + mf) * (a + b + mf) * x / ((a + 2*mf) * (a + 2*mf + 1))
+		d = 1 / raiseZero(1+numer*d)
+		c = raiseZero(1 + numer/c)
+		hfac := d * c
+		h *= hfac
+
+		if math.Abs(hfac-1) < epsilon {
+			return h
+		}
+	}
+	panic("betainc: a or b too big; failed to converge")
+}
diff --git a/internal/stat/data.go b/internal/stat/data.go
new file mode 100644
index 0000000..64d91b8
--- /dev/null
+++ b/internal/stat/data.go
@@ -0,0 +1,230 @@
+package stat
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+
+	"golang.design/x/bench/internal/benchfmt"
+	"golang.design/x/bench/internal/term"
+)
+
+// A Collection is a collection of benchmark results.
+type Collection struct {
+	// Configs, Groups, and Units give the set of configs,
+	// groups, and units from the keys in Stats in an order
+	// meant to match the order the benchmarks were read in.
+	Configs, Groups, Units []string
+
+	// Benchmarks gives the set of benchmarks from the keys in
+	// Stats by group in an order meant to match the order
+	// benchmarks were read in.
+	Benchmarks map[string][]string
+
+	// Metrics holds the accumulated metrics for each key.
+	Metrics map[Key]*Metrics
+
+	// DeltaTest is the test to use to decide if a change is significant.
+	// If nil, it defaults to UTest.
+	DeltaTest DeltaTest
+
+	// Alpha is the p-value cutoff to report a change as significant.
+	// If zero, it defaults to 0.05.
+	Alpha float64
+
+	// AddGeoMean specifies whether to add a line to the table
+	// showing the geometric mean of all the benchmark results.
+	AddGeoMean bool
+
+	// SplitBy specifies the labels to split results by.
+	// By default, results will only be split by full name.
+	SplitBy []string
+
+	// Order specifies the row display order for this table.
+	// If Order is nil, the table rows are printed in order of
+	// first appearance in the input.
+	Order Order
+}
+
+// A Key identifies one metric (e.g., "ns/op", "B/op") from one
+// benchmark (function name sans "Benchmark" prefix) and optional
+// group in one configuration (input file name).
+type Key struct {
+	Config, Group, Benchmark, Unit string
+}
+
+// A Metrics holds the measurements of a single metric
+// (for example, ns/op or MB/s)
+// for all runs of a particular benchmark.
+type Metrics struct {
+	Unit    string    // unit being measured
+	Values  []float64 // measured values
+	RValues []float64 // Values with outliers removed
+	Min     float64   // min of RValues
+	Mean    float64   // mean of RValues
+	Max     float64   // max of RValues
+}
+
+// FormatMean formats m.Mean using scaler.
+func (m *Metrics) FormatMean(scaler Scaler) string {
+	var s string
+	if scaler != nil {
+		s = scaler(m.Mean)
+	} else {
+		s = fmt.Sprint(m.Mean)
+	}
+	return s
+}
+
+// FormatDiff computes and formats the percent variation of max and min compared to mean.
+// If b.Mean or b.Max is zero, FormatDiff returns an empty string.
+func (m *Metrics) FormatDiff() string {
+	if m.Mean == 0 || m.Max == 0 {
+		return ""
+	}
+	diff := 1 - m.Min/m.Mean
+	if d := m.Max/m.Mean - 1; d > diff {
+		diff = d
+	}
+
+	if diff > 0.05 {
+		return term.Orange(fmt.Sprintf("±%.0f%%", diff*100.0))
+	}
+	return term.Gray(fmt.Sprintf("±%.0f%%", diff*100.0))
+}
+
+// Format returns a textual formatting of "Mean ±Diff" using scaler.
+func (m *Metrics) Format(scaler Scaler) string {
+	if m.Unit == "" {
+		return ""
+	}
+	mean := m.FormatMean(scaler)
+	diff := m.FormatDiff()
+	if diff == "" {
+		return mean + "     "
+	}
+	return fmt.Sprintf("%s %3s", mean, diff)
+}
+
+// computeStats updates the derived statistics in m from the raw
+// samples in m.Values.
+func (m *Metrics) computeStats() {
+	// Discard outliers.
+	values := Sample{Xs: m.Values}
+	q1, q3 := values.Percentile(0.25), values.Percentile(0.75)
+	lo, hi := q1-1.5*(q3-q1), q3+1.5*(q3-q1)
+	for _, value := range m.Values {
+		if lo <= value && value <= hi {
+			m.RValues = append(m.RValues, value)
+		}
+	}
+
+	// Compute statistics of remaining data.
+	m.Min, m.Max = Bounds(m.RValues)
+	m.Mean = Mean(m.RValues)
+}
+
+// addMetrics returns the metrics with the given key from c,
+// creating a new one if needed.
+func (c *Collection) addMetrics(key Key) *Metrics {
+	if c.Metrics == nil {
+		c.Metrics = make(map[Key]*Metrics)
+	}
+	if stat, ok := c.Metrics[key]; ok {
+		return stat
+	}
+
+	addString := func(strings *[]string, add string) {
+		for _, s := range *strings {
+			if s == add {
+				return
+			}
+		}
+		*strings = append(*strings, add)
+	}
+	addString(&c.Configs, key.Config)
+	addString(&c.Groups, key.Group)
+	if c.Benchmarks == nil {
+		c.Benchmarks = make(map[string][]string)
+	}
+	benchmarks := c.Benchmarks[key.Group]
+	addString(&benchmarks, key.Benchmark)
+	c.Benchmarks[key.Group] = benchmarks
+	addString(&c.Units, key.Unit)
+	m := &Metrics{Unit: key.Unit}
+	c.Metrics[key] = m
+	return m
+}
+
+// AddFile adds the benchmark results in the formatted data
+// (read from the reader r) to the named configuration.
+func (c *Collection) AddFile(config string, f io.Reader) error {
+	c.Configs = append(c.Configs, config)
+	key := Key{Config: config}
+	br := benchfmt.NewReader(f)
+	for br.Next() {
+		c.addResult(key, br.Result())
+	}
+	return br.Err()
+}
+
+// AddData adds the benchmark results in the formatted data
+// (read from the reader r) to the named configuration.
+func (c *Collection) AddData(config string, data []byte) error {
+	return c.AddFile(config, bytes.NewReader(data))
+}
+
+// AddResults adds the benchmark results to the named configuration.
+func (c *Collection) AddResults(config string, results []*benchfmt.Result) {
+	c.Configs = append(c.Configs, config)
+	key := Key{Config: config}
+	for _, r := range results {
+		c.addResult(key, r)
+	}
+}
+
+func (c *Collection) addResult(key Key, r *benchfmt.Result) {
+	f := strings.Fields(r.Content)
+	if len(f) < 4 {
+		return
+	}
+	name := f[0]
+	if !strings.HasPrefix(name, "Benchmark") {
+		return
+	}
+	name = strings.TrimPrefix(name, "Benchmark")
+	n, _ := strconv.Atoi(f[1])
+	if n == 0 {
+		return
+	}
+	key.Group = c.makeGroup(r)
+	key.Benchmark = name
+	for i := 2; i+2 <= len(f); i += 2 {
+		val, err := strconv.ParseFloat(f[i], 64)
+		if err != nil {
+			continue
+		}
+		key.Unit = f[i+1]
+		m := c.addMetrics(key)
+		m.Values = append(m.Values, val)
+	}
+}
+
+func (c *Collection) makeGroup(r *benchfmt.Result) string {
+	var out string
+	for _, s := range c.SplitBy {
+		v := r.NameLabels[s]
+		if v == "" {
+			v = r.Labels[s]
+		}
+		if v != "" {
+			if out != "" {
+				out = out + " "
+			}
+			out += fmt.Sprintf("%s:%s", s, v)
+		}
+	}
+	return out
+}
diff --git a/internal/stat/delta.go b/internal/stat/delta.go
new file mode 100644
index 0000000..9ec2605
--- /dev/null
+++ b/internal/stat/delta.go
@@ -0,0 +1,49 @@
+package stat
+
+import "errors"
+
+// A DeltaTest compares the old and new metrics and returns the
+// expected probability that they are drawn from the same distribution.
+//
+// If a probability cannot be computed, the DeltaTest returns an
+// error explaining why. Common errors include ErrSamplesEqual
+// (all samples are equal), ErrSampleSize (there aren't enough samples),
+// and ErrZeroVariance (the sample has zero variance).
+//
+// As a special case, the missing test NoDeltaTest returns -1, nil.
+type DeltaTest func(old, new *Metrics) (float64, error)
+
+// Errors returned by DeltaTest.
+var (
+	ErrSamplesEqual      = errors.New("all equal")
+	ErrSampleSize        = errors.New("too few samples")
+	ErrZeroVariance      = errors.New("zero variance")
+	ErrMismatchedSamples = errors.New("samples have different lengths")
+)
+
+// NoDeltaTest applies no delta test; it returns -1, nil.
+func NoDeltaTest(old, new *Metrics) (pval float64, err error) {
+	return -1, nil
+}
+
+// TTest is a DeltaTest using the two-sample Welch t-test.
+func TTest(old, new *Metrics) (pval float64, err error) {
+	t, err := TwoSampleWelchTTest(
+		Sample{Xs: old.RValues},
+		Sample{Xs: new.RValues},
+		LocationDiffers,
+	)
+	if err != nil {
+		return -1, err
+	}
+	return t.P, nil
+}
+
+// UTest is a DeltaTest using the Mann-Whitney U test.
+func UTest(old, new *Metrics) (pval float64, err error) {
+	u, err := MannWhitneyUTest(old.RValues, new.RValues, LocationDiffers)
+	if err != nil {
+		return -1, err
+	}
+	return u.P, nil
+}
diff --git a/internal/stat/mathx.go b/internal/stat/mathx.go
new file mode 100644
index 0000000..4673609
--- /dev/null
+++ b/internal/stat/mathx.go
@@ -0,0 +1,74 @@
+package stat
+
+import "math"
+
+var inf = math.Inf(1)
+var nan = math.NaN()
+
+// mathSign returns the sign of x: -1 if x < 0, 0 if x == 0, 1 if x > 0.
+// If x is NaN, it returns NaN.
+func mathSign(x float64) float64 {
+	if x == 0 {
+		return 0
+	} else if x < 0 {
+		return -1
+	} else if x > 0 {
+		return 1
+	}
+	return nan
+}
+
+const smallFactLimit = 20 // 20! => 62 bits
+var smallFact [smallFactLimit + 1]int64
+
+func init() {
+	smallFact[0] = 1
+	fact := int64(1)
+	for n := int64(1); n <= smallFactLimit; n++ {
+		fact *= n
+		smallFact[n] = fact
+	}
+}
+
+// mathChoose returns the binomial coefficient of n and k.
+func mathChoose(n, k int) float64 {
+	if k == 0 || k == n {
+		return 1
+	}
+	if k < 0 || n < k {
+		return 0
+	}
+	if n <= smallFactLimit { // Implies k <= smallFactLimit
+		// It's faster to do several integer multiplications
+		// than it is to do an extra integer division.
+		// Remarkably, this is also faster than pre-computing
+		// Pascal's triangle (presumably because this is very
+		// cache efficient).
+		numer := int64(1)
+		for n1 := int64(n - (k - 1)); n1 <= int64(n); n1++ {
+			numer *= n1
+		}
+		denom := smallFact[k]
+		return float64(numer / denom)
+	}
+
+	return math.Exp(lchoose(n, k))
+}
+
+// mathLchoose returns math.Log(mathChoose(n, k)).
+func mathLchoose(n, k int) float64 {
+	if k == 0 || k == n {
+		return 0
+	}
+	if k < 0 || n < k {
+		return math.NaN()
+	}
+	return lchoose(n, k)
+}
+
+func lchoose(n, k int) float64 {
+	a, _ := math.Lgamma(float64(n + 1))
+	b, _ := math.Lgamma(float64(k + 1))
+	c, _ := math.Lgamma(float64(n - k + 1))
+	return a - b - c
+}
diff --git a/internal/stat/normaldist.go b/internal/stat/normaldist.go
new file mode 100644
index 0000000..bf59ea8
--- /dev/null
+++ b/internal/stat/normaldist.go
@@ -0,0 +1,137 @@
+package stat
+
+import (
+	"math"
+	"math/rand"
+)
+
+// NormalDist is a normal (Gaussian) distribution with mean Mu and
+// standard deviation Sigma.
+type NormalDist struct {
+	Mu, Sigma float64
+}
+
+// StdNormal is the standard normal distribution (Mu = 0, Sigma = 1)
+var StdNormal = NormalDist{0, 1}
+
+// 1/sqrt(2 * pi)
+const invSqrt2Pi = 0.39894228040143267793994605993438186847585863116493465766592583
+
+func (n NormalDist) PDF(x float64) float64 {
+	z := x - n.Mu
+	return math.Exp(-z*z/(2*n.Sigma*n.Sigma)) * invSqrt2Pi / n.Sigma
+}
+
+func (n NormalDist) pdfEach(xs []float64) []float64 {
+	res := make([]float64, len(xs))
+	if n.Mu == 0 && n.Sigma == 1 {
+		// Standard normal fast path
+		for i, x := range xs {
+			res[i] = math.Exp(-x*x/2) * invSqrt2Pi
+		}
+	} else {
+		a := -1 / (2 * n.Sigma * n.Sigma)
+		b := invSqrt2Pi / n.Sigma
+		for i, x := range xs {
+			z := x - n.Mu
+			res[i] = math.Exp(z*z*a) * b
+		}
+	}
+	return res
+}
+
+func (n NormalDist) CDF(x float64) float64 {
+	return math.Erfc(-(x-n.Mu)/(n.Sigma*math.Sqrt2)) / 2
+}
+
+func (n NormalDist) cdfEach(xs []float64) []float64 {
+	res := make([]float64, len(xs))
+	a := 1 / (n.Sigma * math.Sqrt2)
+	for i, x := range xs {
+		res[i] = math.Erfc(-(x-n.Mu)*a) / 2
+	}
+	return res
+}
+
+func (n NormalDist) InvCDF(p float64) (x float64) {
+	// This is based on Peter John Acklam's inverse normal CDF
+	// algorithm: http://home.online.no/~pjacklam/notes/invnorm/
+	const (
+		a1 = -3.969683028665376e+01
+		a2 = 2.209460984245205e+02
+		a3 = -2.759285104469687e+02
+		a4 = 1.383577518672690e+02
+		a5 = -3.066479806614716e+01
+		a6 = 2.506628277459239e+00
+
+		b1 = -5.447609879822406e+01
+		b2 = 1.615858368580409e+02
+		b3 = -1.556989798598866e+02
+		b4 = 6.680131188771972e+01
+		b5 = -1.328068155288572e+01
+
+		c1 = -7.784894002430293e-03
+		c2 = -3.223964580411365e-01
+		c3 = -2.400758277161838e+00
+		c4 = -2.549732539343734e+00
+		c5 = 4.374664141464968e+00
+		c6 = 2.938163982698783e+00
+
+		d1 = 7.784695709041462e-03
+		d2 = 3.224671290700398e-01
+		d3 = 2.445134137142996e+00
+		d4 = 3.754408661907416e+00
+
+		plow  = 0.02425
+		phigh = 1 - plow
+	)
+
+	if p < 0 || p > 1 {
+		return nan
+	} else if p == 0 {
+		return -inf
+	} else if p == 1 {
+		return inf
+	}
+
+	if p < plow {
+		// Rational approximation for lower region.
+		q := math.Sqrt(-2 * math.Log(p))
+		x = (((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) /
+			((((d1*q+d2)*q+d3)*q+d4)*q + 1)
+	} else if phigh < p {
+		// Rational approximation for upper region.
+		q := math.Sqrt(-2 * math.Log(1-p))
+		x = -(((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) /
+			((((d1*q+d2)*q+d3)*q+d4)*q + 1)
+	} else {
+		// Rational approximation for central region.
+		q := p - 0.5
+		r := q * q
+		x = (((((a1*r+a2)*r+a3)*r+a4)*r+a5)*r + a6) * q /
+			(((((b1*r+b2)*r+b3)*r+b4)*r+b5)*r + 1)
+	}
+
+	// Refine approximation.
+	e := 0.5*math.Erfc(-x/math.Sqrt2) - p
+	u := e * math.Sqrt(2*math.Pi) * math.Exp(x*x/2)
+	x = x - u/(1+x*u/2)
+
+	// Adjust from standard normal.
+	return x*n.Sigma + n.Mu
+}
+
+func (n NormalDist) Rand(r *rand.Rand) float64 {
+	var x float64
+	if r == nil {
+		x = rand.NormFloat64()
+	} else {
+		x = r.NormFloat64()
+	}
+	return x*n.Sigma + n.Mu
+}
+
+func (n NormalDist) Bounds() (float64, float64) {
+	const stddevs = 3
+	return n.Mu - stddevs*n.Sigma, n.Mu + stddevs*n.Sigma
+}
diff --git a/internal/stat/sample.go b/internal/stat/sample.go
new file mode 100644
index 0000000..8bc68d7
--- /dev/null
+++ b/internal/stat/sample.go
@@ -0,0 +1,336 @@
+package stat
+
+import (
+	"math"
+	"sort"
+)
+
+// Sample is a collection of possibly weighted data points.
+type Sample struct {
+	// Xs is the slice of sample values.
+	Xs []float64
+
+	// Weights[i] is the weight of sample Xs[i].  If Weights is
+	// nil, all Xs have weight 1.  Weights must have the same
+	// length of Xs and all values must be non-negative.
+	Weights []float64
+
+	// Sorted indicates that Xs is sorted in ascending order.
+	Sorted bool
+}
+
+// Bounds returns the minimum and maximum values of xs.
+func Bounds(xs []float64) (min float64, max float64) {
+	if len(xs) == 0 {
+		return math.NaN(), math.NaN()
+	}
+	min, max = xs[0], xs[0]
+	for _, x := range xs {
+		if x < min {
+			min = x
+		}
+		if x > max {
+			max = x
+		}
+	}
+	return
+}
+
+// Bounds returns the minimum and maximum values of the Sample.
+//
+// If the Sample is weighted, this ignores samples with zero weight.
+//
+// This is constant time if s.Sorted and there are no zero-weighted
+// values.
+func (s Sample) Bounds() (min float64, max float64) {
+	if len(s.Xs) == 0 || (!s.Sorted && s.Weights == nil) {
+		return Bounds(s.Xs)
+	}
+
+	if s.Sorted {
+		if s.Weights == nil {
+			return s.Xs[0], s.Xs[len(s.Xs)-1]
+		}
+		min, max = math.NaN(), math.NaN()
+		for i, w := range s.Weights {
+			if w != 0 {
+				min = s.Xs[i]
+				break
+			}
+		}
+		if math.IsNaN(min) {
+			return
+		}
+		for i := range s.Weights {
+			if s.Weights[len(s.Weights)-i-1] != 0 {
+				max = s.Xs[len(s.Weights)-i-1]
+				break
+			}
+		}
+	} else {
+		min, max = math.Inf(1), math.Inf(-1)
+		for i, x := range s.Xs {
+			w := s.Weights[i]
+			if x < min && w != 0 {
+				min = x
+			}
+			if x > max && w != 0 {
+				max = x
+			}
+		}
+		if math.IsInf(min, 0) {
+			min, max = math.NaN(), math.NaN()
+		}
+	}
+	return
+}
+
+// vecSum returns the sum of xs.
+func vecSum(xs []float64) float64 {
+	sum := 0.0
+	for _, x := range xs {
+		sum += x
+	}
+	return sum
+}
+
+// Sum returns the (possibly weighted) sum of the Sample.
+func (s Sample) Sum() float64 {
+	if s.Weights == nil {
+		return vecSum(s.Xs)
+	}
+	sum := 0.0
+	for i, x := range s.Xs {
+		sum += x * s.Weights[i]
+	}
+	return sum
+}
+
+// Weight returns the total weight of the Sasmple.
+func (s Sample) Weight() float64 {
+	if s.Weights == nil {
+		return float64(len(s.Xs))
+	}
+	return vecSum(s.Weights)
+}
+
+// Mean returns the arithmetic mean of xs.
+func Mean(xs []float64) float64 {
+	if len(xs) == 0 {
+		return math.NaN()
+	}
+	m := 0.0
+	for i, x := range xs {
+		m += (x - m) / float64(i+1)
+	}
+	return m
+}
+
+// Mean returns the arithmetic mean of the Sample.
+func (s Sample) Mean() float64 {
+	if len(s.Xs) == 0 || s.Weights == nil {
+		return Mean(s.Xs)
+	}
+
+	m, wsum := 0.0, 0.0
+	for i, x := range s.Xs {
+		// Use weighted incremental mean:
+		//   m_i = (1 - w_i/wsum_i) * m_(i-1) + (w_i/wsum_i) * x_i
+		//       = m_(i-1) + (x_i - m_(i-1)) * (w_i/wsum_i)
+		w := s.Weights[i]
+		wsum += w
+		m += (x - m) * w / wsum
+	}
+	return m
+}
+
+// GeoMean returns the geometric mean of xs. xs must be positive.
+func GeoMean(xs []float64) float64 {
+	if len(xs) == 0 {
+		return math.NaN()
+	}
+	m := 0.0
+	for i, x := range xs {
+		if x <= 0 {
+			return math.NaN()
+		}
+		lx := math.Log(x)
+		m += (lx - m) / float64(i+1)
+	}
+	return math.Exp(m)
+}
+
+// GeoMean returns the geometric mean of the Sample. All samples
+// values must be positive.
+func (s Sample) GeoMean() float64 {
+	if len(s.Xs) == 0 || s.Weights == nil {
+		return GeoMean(s.Xs)
+	}
+
+	m, wsum := 0.0, 0.0
+	for i, x := range s.Xs {
+		w := s.Weights[i]
+		wsum += w
+		lx := math.Log(x)
+		m += (lx - m) * w / wsum
+	}
+	return math.Exp(m)
+}
+
+// Variance returns the sample variance of xs.
+func Variance(xs []float64) float64 {
+	if len(xs) == 0 {
+		return math.NaN()
+	} else if len(xs) <= 1 {
+		return 0
+	}
+
+	// Based on Wikipedia's presentation of Welford 1962
+	// (http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm).
+	// This is more numerically stable than the standard two-pass
+	// formula and not prone to massive cancellation.
+	mean, M2 := 0.0, 0.0
+	for n, x := range xs {
+		delta := x - mean
+		mean += delta / float64(n+1)
+		M2 += delta * (x - mean)
+	}
+	return M2 / float64(len(xs)-1)
+}
+
+// Variance returns the sample variance of xs.
+func (s Sample) Variance() float64 {
+	if len(s.Xs) == 0 || s.Weights == nil {
+		return Variance(s.Xs)
+	}
+	// TODO(austin)
+	panic("Weighted Variance not implemented")
+}
+
+// StdDev returns the sample standard deviation of xs.
+func StdDev(xs []float64) float64 {
+	return math.Sqrt(Variance(xs))
+}
+
+// StdDev returns the sample standard deviation of the Sample.
+func (s Sample) StdDev() float64 {
+	if len(s.Xs) == 0 || s.Weights == nil {
+		return StdDev(s.Xs)
+	}
+	// TODO(austin)
+	panic("Weighted StdDev not implemented")
+}
+
+// Percentile returns the pctileth value from the Sample. This uses
+// interpolation method R8 from Hyndman and Fan (1996).
+//
+// pctile will be capped to the range [0, 1]. If len(xs) == 0 or all
+// weights are 0, returns NaN.
+//
+// Percentile(0.5) is the median. Percentile(0.25) and
+// Percentile(0.75) are the first and third quartiles, respectively.
+//
+// This is constant time if s.Sorted and s.Weights == nil.
+func (s Sample) Percentile(pctile float64) float64 {
+	if len(s.Xs) == 0 {
+		return math.NaN()
+	} else if pctile <= 0 {
+		min, _ := s.Bounds()
+		return min
+	} else if pctile >= 1 {
+		_, max := s.Bounds()
+		return max
+	}
+
+	if !s.Sorted {
+		// TODO(austin) Use select algorithm instead
+		s = *s.Copy().Sort()
+	}
+
+	if s.Weights == nil {
+		N := float64(len(s.Xs))
+		//n := pctile * (N + 1) // R6
+		n := 1/3.0 + pctile*(N+1/3.0) // R8
+		kf, frac := math.Modf(n)
+		k := int(kf)
+		if k <= 0 {
+			return s.Xs[0]
+		} else if k >= len(s.Xs) {
+			return s.Xs[len(s.Xs)-1]
+		}
+		return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1])
+	}
+	// TODO(austin): Implement interpolation
+
+	target := s.Weight() * pctile
+
+	// TODO(austin) If we had cumulative weights, we could
+	// do this in log time.
+	for i, weight := range s.Weights {
+		target -= weight
+		if target < 0 {
+			return s.Xs[i]
+		}
+	}
+	return s.Xs[len(s.Xs)-1]
+}
+
+// IQR returns the interquartile range of the Sample.
+//
+// This is constant time if s.Sorted and s.Weights == nil.
+func (s Sample) IQR() float64 {
+	if !s.Sorted {
+		s = *s.Copy().Sort()
+	}
+	return s.Percentile(0.75) - s.Percentile(0.25)
+}
+
+type sampleSorter struct {
+	xs      []float64
+	weights []float64
+}
+
+func (p *sampleSorter) Len() int {
+	return len(p.xs)
+}
+
+func (p *sampleSorter) Less(i, j int) bool {
+	return p.xs[i] < p.xs[j]
+}
+
+func (p *sampleSorter) Swap(i, j int) {
+	p.xs[i], p.xs[j] = p.xs[j], p.xs[i]
+	p.weights[i], p.weights[j] = p.weights[j], p.weights[i]
+}
+
+// Sort sorts the samples in place in s and returns s.
+//
+// A sorted sample improves the performance of some algorithms.
+func (s *Sample) Sort() *Sample {
+	if s.Sorted || sort.Float64sAreSorted(s.Xs) {
+		// All set
+	} else if s.Weights == nil {
+		sort.Float64s(s.Xs)
+	} else {
+		sort.Sort(&sampleSorter{s.Xs, s.Weights})
+	}
+	s.Sorted = true
+	return s
+}
+
+// Copy returns a copy of the Sample.
+//
+// The returned Sample shares no data with the original, so they can
+// be modified (for example, sorted) independently.
+func (s Sample) Copy() *Sample {
+	xs := make([]float64, len(s.Xs))
+	copy(xs, s.Xs)
+
+	weights := []float64(nil)
+	if s.Weights != nil {
+		weights = make([]float64, len(s.Weights))
+		copy(weights, s.Weights)
+	}
+
+	return &Sample{xs, weights, s.Sorted}
+}
diff --git a/internal/stat/scaler.go b/internal/stat/scaler.go
new file mode 100644
index 0000000..62d102d
--- /dev/null
+++ b/internal/stat/scaler.go
@@ -0,0 +1,114 @@
+package stat
+
+import (
+	"fmt"
+	"strings"
+)
+
+// A Scaler is a function that scales and formats a measurement.
+// All measurements within a given table row are formatted
+// using the same scaler, so that the units are consistent
+// across the row.
+type Scaler func(float64) string
+
+// NewScaler returns a Scaler appropriate for formatting
+// the measurement val, which has the given unit.
+func NewScaler(val float64, unit string) Scaler {
+	if hasBaseUnit(unit, "ns/op") || hasBaseUnit(unit, "ns/GC") {
+		return timeScaler(val)
+	}
+
+	var format string
+	var scale float64
+	var suffix string
+
+	prescale := 1.0
+	if hasBaseUnit(unit, "MB/s") {
+		prescale = 1e6
+	}
+
+	switch x := val * prescale; {
+	case x >= 99500000000000:
+		format, scale, suffix = "%.0f", 1e12, "T"
+	case x >= 9950000000000:
+		format, scale, suffix = "%.1f", 1e12, "T"
+	case x >= 995000000000:
+		format, scale, suffix = "%.2f", 1e12, "T"
+	case x >= 99500000000:
+		format, scale, suffix = "%.0f", 1e9, "G"
+	case x >= 9950000000:
+		format, scale, suffix = "%.1f", 1e9, "G"
+	case x >= 995000000:
+		format, scale, suffix = "%.2f", 1e9, "G"
+	case x >= 99500000:
+		format, scale, suffix = "%.0f", 1e6, "M"
+	case x >= 9950000:
+		format, scale, suffix = "%.1f", 1e6, "M"
+	case x >= 995000:
+		format, scale, suffix = "%.2f", 1e6, "M"
+	case x >= 99500:
+		format, scale, suffix = "%.0f", 1e3, "k"
+	case x >= 9950:
+		format, scale, suffix = "%.1f", 1e3, "k"
+	case x >= 995:
+		format, scale, suffix = "%.2f", 1e3, "k"
+	case x >= 99.5:
+		format, scale, suffix = "%.0f", 1, ""
+	case x >= 9.95:
+		format, scale, suffix = "%.1f", 1, ""
+	default:
+		format, scale, suffix = "%.2f", 1, ""
+	}
+
+	if hasBaseUnit(unit, "B/op") || hasBaseUnit(unit, "bytes/op") || hasBaseUnit(unit, "bytes") {
+		suffix += "B"
+	}
+	if hasBaseUnit(unit, "MB/s") {
+		suffix += "B/s"
+	}
+	scale /= prescale
+
+	return func(val float64) string {
+		return fmt.Sprintf(format+suffix, val/scale)
+	}
+}
+
+func timeScaler(ns float64) Scaler {
+	var format string
+	var scale float64
+	switch x := ns / 1e9; {
+	case x >= 99.5:
+		format, scale = "%.0fs", 1
+	case x >= 9.95:
+		format, scale = "%.1fs", 1
+	case x >= 0.995:
+		format, scale = "%.2fs", 1
+	case x >= 0.0995:
+		format, scale = "%.0fms", 1000
+	case x >= 0.00995:
+		format, scale = "%.1fms", 1000
+	case x >= 0.000995:
+		format, scale = "%.2fms", 1000
+	case x >= 0.0000995:
+		format, scale = "%.0fµs", 1000*1000
+	case x >= 0.00000995:
+		format, scale = "%.1fµs", 1000*1000
+	case x >= 0.000000995:
+		format, scale = "%.2fµs", 1000*1000
+	case x >= 0.0000000995:
+		format, scale = "%.0fns", 1000*1000*1000
+	case x >= 0.00000000995:
+		format, scale = "%.1fns", 1000*1000*1000
+	default:
+		format, scale = "%.2fns", 1000*1000*1000
+	}
+	return func(ns float64) string {
+		return fmt.Sprintf(format, ns/1e9*scale)
+	}
+}
+
+// hasBaseUnit reports whether s has unit unit.
+// For now, it reports whether s == unit or s ends in -unit.
+func hasBaseUnit(s, unit string) bool {
+	return s == unit || strings.HasSuffix(s, "-"+unit)
+}
diff --git a/internal/stat/sort.go b/internal/stat/sort.go
new file mode 100644
index 0000000..f618216
--- /dev/null
+++ b/internal/stat/sort.go
@@ -0,0 +1,32 @@
+package stat
+
+import (
+	"math"
+	"sort"
+)
+
+// An Order defines a sort order for a table.
+// It reports whether t.Rows[i] should appear before t.Rows[j].
+type Order func(t *Table, i, j int) bool
+
+// ByName sorts tables by the Benchmark name column
+func ByName(t *Table, i, j int) bool {
+	return t.Rows[i].Benchmark < t.Rows[j].Benchmark
+}
+
+// ByDelta sorts tables by the Delta column,
+// reversing the order when larger is better (for "speed" results).
+func ByDelta(t *Table, i, j int) bool {
+	return math.Abs(t.Rows[i].PctDelta)*float64(t.Rows[i].Change) <
+		math.Abs(t.Rows[j].PctDelta)*float64(t.Rows[j].Change)
+}
+
+// Reverse returns the reverse of the given order.
+func Reverse(order Order) Order {
+	return func(t *Table, i, j int) bool { return order(t, j, i) }
+}
+
+// Sort sorts a Table t (in place) by the given order.
+func Sort(t *Table, order Order) {
+	sort.SliceStable(t.Rows, func(i, j int) bool { return order(t, i, j) })
+}
diff --git a/internal/stat/table.go b/internal/stat/table.go
new file mode 100644
index 0000000..06e202a
--- /dev/null
+++ b/internal/stat/table.go
@@ -0,0 +1,208 @@
+package stat
+
+import (
+	"fmt"
+	"strings"
+)
+
+// A Table is a table for display in the benchstat output.
+type Table struct {
+	Metric      string
+	OldNewDelta bool // is this an old-new-delta table?
+	Configs     []string
+	Groups      []string
+	Rows        []*Row
+}
+
+// A Row is a table row for display in the benchstat output.
+type Row struct {
+	Benchmark string     // benchmark name
+	Group     string     // group name
+	Scaler    Scaler     // formatter for stats means
+	Metrics   []*Metrics // columns of statistics
+	PctDelta  float64    // unformatted percent change
+	Delta     string     // formatted percent change
+	Note      string     // additional information
+	Change    int        // +1 better, -1 worse, 0 unchanged
+}
+
+// Tables returns tables comparing the benchmarks in the collection.
+func (c *Collection) Tables() []*Table {
+	deltaTest := c.DeltaTest
+	if deltaTest == nil {
+		deltaTest = UTest
+	}
+	alpha := c.Alpha
+	if alpha == 0 {
+		alpha = 0.05
+	}
+
+	// Update statistics.
+	for _, m := range c.Metrics {
+		m.computeStats()
+	}
+
+	var tables []*Table
+	key := Key{}
+	for _, key.Unit = range c.Units {
+		table := new(Table)
+		table.Configs = c.Configs
+		table.Groups = c.Groups
+		table.Metric = metricOf(key.Unit)
+		table.OldNewDelta = len(c.Configs) == 2
+		for _, key.Group = range c.Groups {
+			for _, key.Benchmark = range c.Benchmarks[key.Group] {
+				row := &Row{Benchmark: key.Benchmark}
+				if len(c.Groups) > 1 {
+					// Show group headers if there is more than one group.
+					row.Group = key.Group
+				}
+
+				for _, key.Config = range c.Configs {
+					m := c.Metrics[key]
+					if m == nil {
+						row.Metrics = append(row.Metrics, new(Metrics))
+						continue
+					}
+					row.Metrics = append(row.Metrics, m)
+					if row.Scaler == nil {
+						row.Scaler = NewScaler(m.Mean, m.Unit)
+					}
+				}
+
+				// If there are only two configs being compared, add stats.
+				if table.OldNewDelta {
+					k0 := key
+					k0.Config = c.Configs[0]
+					k1 := key
+					k1.Config = c.Configs[1]
+					old := c.Metrics[k0]
+					new := c.Metrics[k1]
+					// If one is missing, omit row entirely.
+					// TODO: Control this better.
+					if old == nil || new == nil {
+						continue
+					}
+					pval, testerr := deltaTest(old, new)
+					row.PctDelta = 0.00
+					row.Delta = "~"
+					if testerr == ErrZeroVariance {
+						row.Note = "(zero variance)"
+					} else if testerr == ErrSampleSize {
+						row.Note = "(too few samples)"
+					} else if testerr == ErrSamplesEqual {
+						row.Note = "(all equal)"
+					} else if testerr != nil {
+						row.Note = fmt.Sprintf("(%s)", testerr)
+					} else if pval < alpha {
+						if new.Mean == old.Mean {
+							row.Delta = "0.00%"
+						} else {
+							pct := ((new.Mean / old.Mean) - 1.0) * 100.0
+							row.PctDelta = pct
+							row.Delta = fmt.Sprintf("%+.2f%%", pct)
+							if pct < 0 == (table.Metric != "speed") { // smaller is better, except speeds
+								row.Change = +1
+							} else {
+								row.Change = -1
+							}
+						}
+					}
+					if row.Note == "" && pval != -1 {
+						row.Note = fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues))
+					}
+				}
+
+				table.Rows = append(table.Rows, row)
+			}
+		}
+
+		if len(table.Rows) > 0 {
+			if c.Order != nil {
+				Sort(table, c.Order)
+			}
+			if c.AddGeoMean {
+				addGeomean(c, table, key.Unit, table.OldNewDelta)
+			}
+			tables = append(tables, table)
+		}
+	}
+
+	return tables
+}
+
+var metricSuffix = map[string]string{
+	"ns/op": "time/op",
+	"ns/GC": "time/GC",
+	"B/op":  "alloc/op",
+	"MB/s":  "speed",
+}
+
+// metricOf returns the name of the metric with the given unit.
+func metricOf(unit string) string {
+	if s := metricSuffix[unit]; s != "" {
+		return s
+	}
+	for s, suff := range metricSuffix {
+		if dashs := "-" + s; strings.HasSuffix(unit, dashs) {
+			prefix := strings.TrimSuffix(unit, dashs)
+			return prefix + "-" + suff
+		}
+	}
+	return unit
+}
+
+// addGeomean adds a "geomean" row to the table,
+// showing the geometric mean of all the benchmarks.
+func addGeomean(c *Collection, t *Table, unit string, delta bool) {
+	row := &Row{Benchmark: "[Geo mean]"}
+	key := Key{Unit: unit}
+	geomeans := []float64{}
+	maxCount := 0
+	for _, key.Config = range c.Configs {
+		var means []float64
+		for _, key.Group = range c.Groups {
+			for _, key.Benchmark = range c.Benchmarks[key.Group] {
+				m := c.Metrics[key]
+				// Omit 0 values from the geomean calculation,
+				// as these either make the geomean undefined
+				// or zero (depending on who you ask). This
+				// typically comes up with things like
+				// allocation counts, where it's fine to just
+				// ignore the benchmark.
+				if m != nil && m.Mean != 0 {
+					means = append(means, m.Mean)
+				}
+			}
+		}
+		if len(means) > maxCount {
+			maxCount = len(means)
+		}
+		if len(means) == 0 {
+			row.Metrics = append(row.Metrics, new(Metrics))
+			delta = false
+		} else {
+			geomean := GeoMean(means)
+			geomeans = append(geomeans, geomean)
+			if row.Scaler == nil {
+				row.Scaler = NewScaler(geomean, unit)
+			}
+			row.Metrics = append(row.Metrics, &Metrics{
+				Unit: unit,
+				Mean: geomean,
+			})
+		}
+	}
+	if maxCount <= 1 {
+		// Only one benchmark contributed to this geomean.
+		// Since the geomean is the same as the benchmark
+		// result, don't bother outputting it.
+		return
+	}
+	if delta {
+		pct := ((geomeans[1] / geomeans[0]) - 1.0) * 100.0
+		row.PctDelta = pct
+		row.Delta = fmt.Sprintf("%+.2f%%", pct)
+	}
+	t.Rows = append(t.Rows, row)
+}
diff --git a/internal/stat/tdist.go b/internal/stat/tdist.go
new file mode 100644
index 0000000..cc5da07
--- /dev/null
+++ b/internal/stat/tdist.go
@@ -0,0 +1,32 @@
+package stat
+
+import "math"
+
+// A TDist is a Student's t-distribution with V degrees of freedom.
+type TDist struct {
+	V float64
+}
+
+// PDF is the probability distribution function
+func (t TDist) PDF(x float64) float64 {
+	return math.Exp(lgamma((t.V+1)/2)-lgamma(t.V/2)) /
+		math.Sqrt(t.V*math.Pi) * math.Pow(1+(x*x)/t.V, -(t.V+1)/2)
+}
+
+// CDF is the cumulative distribution function
+func (t TDist) CDF(x float64) float64 {
+	if x == 0 {
+		return 0.5
+	} else if x > 0 {
+		return 1 - 0.5*mathBetaInc(t.V/(t.V+x*x), t.V/2, 0.5)
+	} else if x < 0 {
+		return 1 - t.CDF(-x)
+	} else {
+		return math.NaN()
+	}
+}
+
+// Bounds ...
+func (t TDist) Bounds() (float64, float64) {
+	return -4, 4
+}
diff --git a/internal/stat/text.go b/internal/stat/text.go
new file mode 100644
index 0000000..68d3a68
--- /dev/null
+++ b/internal/stat/text.go
@@ -0,0 +1,161 @@
+package stat
+
+import (
+	"fmt"
+	"io"
+	"unicode/utf8"
+
+	"golang.design/x/bench/internal/term"
+)
+
+// FormatText appends a fixed-width text formatting of the tables to w.
+func FormatText(w io.Writer, tables []*Table) {
+	var textTables [][]*textRow
+	for _, t := range tables {
+		textTables = append(textTables, toText(t, true))
+	}
+
+	var max []int
+	for _, table := range textTables {
+		for _, row := range table {
+			if len(row.cols) == 1 {
+				// Header row
+				continue
+			}
+			for len(max) < len(row.cols) {
+				max = append(max, 0)
+			}
+			for i, s := range row.cols {
+				n := utf8.RuneCountInString(s)
+				if max[i] < n {
+					max[i] = n
+				}
+			}
+		}
+	}
+
+	for i, table := range textTables {
+		if i > 0 {
+			fmt.Fprintf(w, "\n")
+		}
+
+		// headings
+		row := table[0]
+		for i, s := range row.cols {
+			switch i {
+			case 0:
+				fmt.Fprintf(w, "%-*s", max[i], s)
+			default:
+				fmt.Fprintf(w, "  %-*s", max[i], s)
+			case len(row.cols) - 1:
+				fmt.Fprintf(w, "  %s\n", s)
+			}
+		}
+
+		// data
+		for _, row := range table[1:] {
+			for i, s := range row.cols {
+				switch {
+				case len(row.cols) == 1:
+					// Header row
+					fmt.Fprint(w, s)
+				case i == 0:
+					fmt.Fprintf(w, "%-*s", max[i], s)
+				default:
+					if i == len(row.cols)-1 && len(s) > 0 && s[0] == '(' {
+						// Left-align p value.
+						fmt.Fprintf(w, "  %s", s)
+						break
+					}
+					fmt.Fprintf(w, "  %*s", max[i], s)
+				}
+			}
+			fmt.Fprintf(w, "\n")
+		}
+	}
+}
+
+// A textRow is a row of printed text columns.
+type textRow struct {
+	cols []string
+}
+
+func newTextRow(cols ...string) *textRow {
+	return &textRow{cols: cols}
+}
+
+// newTextRowDelta returns a labeled row of text, with "±" inserted after
+// each member of "cols" unless norange is true.
+func newTextRowDelta(norange bool, label string, cols ...string) *textRow {
+	newcols := []string{}
+	newcols = append(newcols, label)
+	for _, s := range cols {
+		newcols = append(newcols, s)
+		if !norange {
+			newcols = append(newcols, "±")
+		}
+	}
+	return &textRow{cols: newcols}
+}
+
+func (r *textRow) add(col string) {
+	r.cols = append(r.cols, col)
+}
+
+func (r *textRow) trim() {
+	for len(r.cols) > 0 && r.cols[len(r.cols)-1] == "" {
+		r.cols = r.cols[:len(r.cols)-1]
+	}
+}
+
+// toText converts the Table to a textual grid of cells,
+// which can then be printed in fixed-width output.
+func toText(t *Table, colorful bool) []*textRow {
+	var textRows []*textRow
+	switch len(t.Configs) {
+	case 1:
+		textRows = append(textRows, newTextRow("name", t.Metric))
+	case 2:
+		textRows = append(textRows, newTextRow("name", "old "+t.Metric, "new "+t.Metric, "delta"))
+	default:
+		row := newTextRow("name \\ " + t.Metric)
+		row.cols = append(row.cols, t.Configs...) // TODO Should this also trim common path prefix? (see toCSV)
+		textRows = append(textRows, row)
+	}
+
+	var group string
+
+	for _, row := range t.Rows {
+		if row.Group != group {
+			group = row.Group
+			textRows = append(textRows, newTextRow(group))
+		}
+		text := newTextRow(row.Benchmark)
+		for _, m := range row.Metrics {
+			text.cols = append(text.cols, m.Format(row.Scaler))
+		}
+		if len(t.Configs) == 2 {
+			delta := row.Delta
+			if delta == "~" {
+				delta = "~   "
+			}
+			if colorful {
+				switch row.Change {
+				case 1: // better
+					delta = term.Green(delta)
+				case -1: // worse
+					delta = term.Red(delta)
+				default: // no change
+					delta = term.Gray(delta)
+				}
+			}
+			text.cols = append(text.cols, delta)
+			text.cols = append(text.cols, row.Note)
+		}
+		textRows = append(textRows, text)
+	}
+	for _, r := range textRows {
+		r.trim()
+	}
+	return textRows
+}
diff --git a/internal/stat/ttest.go b/internal/stat/ttest.go
new file mode 100644
index 0000000..1f810c0
--- /dev/null
+++ b/internal/stat/ttest.go
@@ -0,0 +1,138 @@
+package stat
+
+import (
+	"math"
+)
+
+// A TTestResult is the result of a t-test.
+type TTestResult struct {
+	// N1 and N2 are the sizes of the input samples. For a
+	// one-sample t-test, N2 is 0.
+	N1, N2 int
+
+	// T is the value of the t-statistic for this t-test.
+	T float64
+
+	// DoF is the degrees of freedom for this t-test.
+	DoF float64
+
+	// AltHypothesis specifies the alternative hypothesis tested
+	// by this test against the null hypothesis that there is no
+	// difference in the means of the samples.
+	AltHypothesis LocationHypothesis
+
+	// P is p-value for this t-test for the given null hypothesis.
+	P float64
+}
+
+func newTTestResult(n1, n2 int, t, dof float64, alt LocationHypothesis) *TTestResult {
+	dist := TDist{dof}
+	var p float64
+	switch alt {
+	case LocationDiffers:
+		p = 2 * (1 - dist.CDF(math.Abs(t)))
+	case LocationLess:
+		p = dist.CDF(t)
+	case LocationGreater:
+		p = 1 - dist.CDF(t)
+	}
+	return &TTestResult{N1: n1, N2: n2, T: t, DoF: dof, AltHypothesis: alt, P: p}
+}
+
+// A TTestSample is a sample that can be used for a one or two sample
+// t-test.
+type TTestSample interface {
+	Weight() float64
+	Mean() float64
+	Variance() float64
+}
+
+var ()
+
+// TwoSampleTTest performs a two-sample (unpaired) Student's t-test on
+// samples x1 and x2. This is a test of the null hypothesis that x1
+// and x2 are drawn from populations with equal means. It assumes x1
+// and x2 are independent samples, that the distributions have equal
+// variance, and that the populations are normally distributed.
+func TwoSampleTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
+	n1, n2 := x1.Weight(), x2.Weight()
+	if n1 == 0 || n2 == 0 {
+		return nil, ErrSampleSize
+	}
+	v1, v2 := x1.Variance(), x2.Variance()
+	if v1 == 0 && v2 == 0 {
+		return nil, ErrZeroVariance
+	}
+
+	dof := n1 + n2 - 2
+	v12 := ((n1-1)*v1 + (n2-1)*v2) / dof
+	t := (x1.Mean() - x2.Mean()) / math.Sqrt(v12*(1/n1+1/n2))
+	return newTTestResult(int(n1), int(n2), t, dof, alt), nil
+}
+
+// TwoSampleWelchTTest performs a two-sample (unpaired) Welch's t-test
+// on samples x1 and x2. This is like TwoSampleTTest, but does not
+// assume the distributions have equal variance.
+func TwoSampleWelchTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
+	n1, n2 := x1.Weight(), x2.Weight()
+	if n1 <= 1 || n2 <= 1 {
+		// TODO: Can we still do this with n == 1?
+		return nil, ErrSampleSize
+	}
+	v1, v2 := x1.Variance(), x2.Variance()
+	if v1 == 0 && v2 == 0 {
+		return nil, ErrZeroVariance
+	}
+
+	dof := math.Pow(v1/n1+v2/n2, 2) /
+		(math.Pow(v1/n1, 2)/(n1-1) + math.Pow(v2/n2, 2)/(n2-1))
+	s := math.Sqrt(v1/n1 + v2/n2)
+	t := (x1.Mean() - x2.Mean()) / s
+	return newTTestResult(int(n1), int(n2), t, dof, alt), nil
+}
+
+// PairedTTest performs a two-sample paired t-test on samples x1 and
+// x2. If μ0 is non-zero, this tests if the average of the difference
+// is significantly different from μ0. If x1 and x2 are identical,
+// this returns nil.
+func PairedTTest(x1, x2 []float64, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
+	if len(x1) != len(x2) {
+		return nil, ErrMismatchedSamples
+	}
+	if len(x1) <= 1 {
+		// TODO: Can we still do this with n == 1?
+		return nil, ErrSampleSize
+	}
+
+	dof := float64(len(x1) - 1)
+
+	diff := make([]float64, len(x1))
+	for i := range x1 {
+		diff[i] = x1[i] - x2[i]
+	}
+	sd := StdDev(diff)
+	if sd == 0 {
+		// TODO: Can we still do the test?
+		return nil, ErrZeroVariance
+	}
+	t := (Mean(diff) - μ0) * math.Sqrt(float64(len(x1))) / sd
+	return newTTestResult(len(x1), len(x2), t, dof, alt), nil
+}
+
+// OneSampleTTest performs a one-sample t-test on sample x. This tests
+// the null hypothesis that the population mean is equal to μ0. This
+// assumes the distribution of the population of sample means is
+// normal.
+func OneSampleTTest(x TTestSample, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
+	n, v := x.Weight(), x.Variance()
+	if n == 0 {
+		return nil, ErrSampleSize
+	}
+	if v == 0 {
+		// TODO: Can we still do the test?
+		return nil, ErrZeroVariance
+	}
+	dof := n - 1
+	t := (x.Mean() - μ0) * math.Sqrt(n) / math.Sqrt(v)
+	return newTTestResult(int(n), 0, t, dof, alt), nil
+}
diff --git a/internal/stat/udist.go b/internal/stat/udist.go
new file mode 100644
index 0000000..b47d34a
--- /dev/null
+++ b/internal/stat/udist.go
@@ -0,0 +1,381 @@
+package stat
+
+import "math"
+
+// A UDist is the discrete probability distribution of the
+// Mann-Whitney U statistic for a pair of samples of sizes N1 and N2.
+//
+// The details of computing this distribution with no ties can be
+// found in Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of
+// Whether one of Two Random Variables is Stochastically Larger than
+// the Other". Annals of Mathematical Statistics 18 (1): 50–60.
+// Computing this distribution in the presence of ties is described in
+// Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer".
+// Journal of the American Statistical Association 61 (315): 772-787
+// and Cheung, Ying Kuen; Klotz, Jerome H. (1997). "The Mann Whitney
+// Wilcoxon Distribution Using Linked Lists". Statistica Sinica 7:
+// 805-813 (the former paper contains details that are glossed over in
+// the latter paper but has mathematical typesetting issues, so it's
+// easiest to get the context from the former paper and the details
+// from the latter).
+type UDist struct {
+	N1, N2 int
+
+	// T is the count of the number of ties at each rank in the
+	// input distributions. T may be nil, in which case it is
+	// assumed there are no ties (which is equivalent to an M+N
+	// slice of 1s). It must be the case that Sum(T) == M+N.
+	T []int
+}
+
+// hasTies returns true if d has any tied samples.
+func (d UDist) hasTies() bool {
+	for _, t := range d.T {
+		if t > 1 {
+			return true
+		}
+	}
+	return false
+}
+
+// p returns the p_{d.N1,d.N2} function defined by Mann, Whitney 1947
+// for values of U from 0 up to and including the U argument.
+//
+// This algorithm runs in Θ(N1*N2*U) = O(N1²N2²) time and is quite
+// fast for small values of N1 and N2. However, it does not handle ties.
+func (d UDist) p(U int) []float64 {
+	// This is a dynamic programming implementation of the
+	// recursive recurrence definition given by Mann and Whitney:
+	//
+	//   p_{n,m}(U) = (n * p_{n-1,m}(U-m) + m * p_{n,m-1}(U)) / (n+m)
+	//   p_{n,m}(U) = 0                           if U < 0
+	//   p_{0,m}(U) = p{n,0}(U) = 1 / nCr(m+n, n) if U = 0
+	//                          = 0               if U > 0
+	//
+	// (Note that there is a typo in the original paper. The first
+	// recursive application of p should be for U-m, not U-M.)
+	//
+	// Since p{n,m} only depends on p{n-1,m} and p{n,m-1}, we only
+	// need to store one "plane" of the three dimensional space at
+	// a time.
+	//
+	// Furthermore, p_{n,m} = p_{m,n}, so we only construct values
+	// for n <= m and obtain the rest through symmetry.
+	//
+	// We organize the computed values of p as followed:
+	//
+	//       n →   N
+	//     m *
+	//     ↓ * *
+	//       * * *
+	//       * * * *
+	//       * * * *
+	//     M * * * *
+	//
+	// where each * is a slice indexed by U. The code below
+	// computes these left-to-right, top-to-bottom, so it only
+	// stores one row of this matrix at a time. Furthermore,
+	// computing an element in a given U slice only depends on the
+	// same and smaller values of U, so we can overwrite the U
+	// slice we're computing in place as long as we start with the
+	// largest value of U. Finally, even though the recurrence
+	// depends on (n,m) above the diagonal and we use symmetry to
+	// mirror those across the diagonal to (m,n), the mirrored
+	// indexes are always available in the current row, so this
+	// mirroring does not interfere with our ability to recycle
+	// state.
+
+	N, M := d.N1, d.N2
+	if N > M {
+		N, M = M, N
+	}
+
+	memo := make([][]float64, N+1)
+	for n := range memo {
+		memo[n] = make([]float64, U+1)
+	}
+
+	for m := 0; m <= M; m++ {
+		// Compute p_{0,m}. This is zero except for U=0.
+		memo[0][0] = 1
+
+		// Compute the remainder of this row.
+		nlim := N
+		if m < nlim {
+			nlim = m
+		}
+		for n := 1; n <= nlim; n++ {
+			lp := memo[n-1] // p_{n-1,m}
+			var rp []float64
+			if n <= m-1 {
+				rp = memo[n] // p_{n,m-1}
+			} else {
+				rp = memo[m-1] // p{m-1,n} and m==n
+			}
+
+			// For a given n,m, U is at most n*m.
+			//
+			// TODO: Actually, it's at most ⌈n*m/2⌉, but
+			// then we need to use more complex symmetries
+			// in the inner loop below.
+			ulim := n * m
+			if U < ulim {
+				ulim = U
+			}
+
+			out := memo[n] // p_{n,m}
+			nplusm := float64(n + m)
+			for U1 := ulim; U1 >= 0; U1-- {
+				l := 0.0
+				if U1-m >= 0 {
+					l = float64(n) * lp[U1-m]
+				}
+				r := float64(m) * rp[U1]
+				out[U1] = (l + r) / nplusm
+			}
+		}
+	}
+	return memo[N]
+}
+
+type ukey struct {
+	n1   int // size of first sample
+	twoU int // 2*U statistic for this permutation
+}
+
+// This computes the cumulative counts of the Mann-Whitney U
+// distribution in the presence of ties using the computation from
+// Cheung, Ying Kuen; Klotz, Jerome H. (1997). "The Mann Whitney
+// Wilcoxon Distribution Using Linked Lists". Statistica Sinica 7:
+// 805-813, with much guidance from appendix L of Klotz, A
+// Computational Approach to Statistics.
+//
+// makeUmemo constructs a table memo[K][ukey{n1, 2*U}], where K is the
+// number of ranks (up to len(t)), n1 is the size of the first sample
+// (up to the n1 argument), and U is the U statistic (up to the
+// argument twoU/2). The value of an entry in the memo table is the
+// number of permutations of a sample of size n1 in a ranking with tie
+// vector t[:K] having a U statistic <= U.
+func makeUmemo(twoU, n1 int, t []int) []map[ukey]float64 {
+	// Another candidate for a fast implementation is van de Wiel,
+	// "The split-up algorithm: a fast symbolic method for
+	// computing p-values of distribution-free statistics". This
+	// is what's used by R's coin package. It's a comparatively
+	// recent publication, so it's presumably faster (or perhaps
+	// just more general) than previous techniques, but I can't
+	// get my hands on the paper.
+	//
+	// TODO: ~40% of this function's time is spent in mapassign on
+	// the assignment lines in the two loops and another ~20% in
+	// map access and iteration. Improving map behavior or
+	// replacing the maps altogether with some other constant-time
+	// structure could double performance.
+	//
+	// TODO: The worst case for this function is when there are
+	// few ties. Yet the best case overall is when there are *no*
+	// ties. Can we get the best of both worlds? Use the fast
+	// algorithm for the most part when there are few ties and mix
+	// in the general algorithm just where we need it? That's
+	// certainly possible for sub-problems where t[:k] has no
+	// ties, but that doesn't help if t[0] has a tie but nothing
+	// else does. Is it possible to rearrange the ranks without
+	// messing up our computation of the U statistic for
+	// sub-problems?
+
+	K := len(t)
+
+	// Compute a coefficients. The a slice is indexed by k (a[0]
+	// is unused).
+	a := make([]int, K+1)
+	a[1] = t[0]
+	for k := 2; k <= K; k++ {
+		a[k] = a[k-1] + t[k-2] + t[k-1]
+	}
+
+	// Create the memo table for the counts function, A. The A
+	// slice is indexed by k (A[0] is unused).
+	//
+	// In "The Mann Whitney Distribution Using Linked Lists", they
+	// use linked lists (*gasp*) for this, but within each K it's
+	// really just a memoization table, so it's faster to use a
+	// map. The outer structure is a slice indexed by k because we
+	// need to find all memo entries with certain values of k.
+	//
+	// TODO: The n1 and twoU values in the ukeys follow strict
+	// patterns. For each K value, the n1 values are every integer
+	// between two bounds. For each (K, n1) value, the twoU values
+	// are every integer multiple of a certain base between two
+	// bounds. It might be worth turning these into directly
+	// indexible slices.
+	A := make([]map[ukey]float64, K+1)
+	A[K] = map[ukey]float64{ukey{n1: n1, twoU: twoU}: 0}
+
+	// Compute memo table (k, n1, twoU) triples from high K values
+	// to low K values. This drives the recurrence relation
+	// downward to figure out all of the needed argument triples.
+	//
+	// TODO: Is it possible to generate this table bottom-up? If
+	// so, this could be a pure dynamic programming algorithm and
+	// we could discard the K dimension. We could at least store
+	// the inputs in a more compact representation that replaces
+	// the twoU dimension with an interval and a step size (as
+	// suggested by Cheung, Klotz, not that they make it at all
+	// clear *why* they're suggesting this).
+	tsum := sumint(t) // always ∑ t[0:k]
+	for k := K - 1; k >= 2; k-- {
+		tsum -= t[k]
+		A[k] = make(map[ukey]float64)
+
+		// Construct A[k] from A[k+1].
+		for A_kplus1 := range A[k+1] {
+			rkLow := maxint(0, A_kplus1.n1-tsum)
+			rkHigh := minint(A_kplus1.n1, t[k])
+			for rk := rkLow; rk <= rkHigh; rk++ {
+				twoU_k := A_kplus1.twoU - rk*(a[k+1]-2*A_kplus1.n1+rk)
+				n1_k := A_kplus1.n1 - rk
+				if twoUmin(n1_k, t[:k], a) <= twoU_k && twoU_k <= twoUmax(n1_k, t[:k], a) {
+					key := ukey{n1: n1_k, twoU: twoU_k}
+					A[k][key] = 0
+				}
+			}
+		}
+	}
+
+	// Fill counts in memo table from low K values to high K
+	// values. This unwinds the recurrence relation.
+
+	// Start with K==2 base case.
+	//
+	// TODO: Later computations depend on these, but these don't
+	// depend on anything (including each other), so if K==2, we
+	// can skip the memo table altogether.
+	if K < 2 {
+		panic("K < 2")
+	}
+	N_2 := t[0] + t[1]
+	for A_2i := range A[2] {
+		Asum := 0.0
+		r2Low := maxint(0, A_2i.n1-t[0])
+		r2High := (A_2i.twoU - A_2i.n1*(t[0]-A_2i.n1)) / N_2
+		for r2 := r2Low; r2 <= r2High; r2++ {
+			Asum += mathChoose(t[0], A_2i.n1-r2) *
+				mathChoose(t[1], r2)
+		}
+		A[2][A_2i] = Asum
+	}
+
+	// Derive counts for the rest of the memo table.
+	tsum = t[0] // always ∑ t[0:k-1]
+	for k := 3; k <= K; k++ {
+		tsum += t[k-2]
+
+		// Compute A[k] counts from A[k-1] counts.
+		for A_ki := range A[k] {
+			Asum := 0.0
+			rkLow := maxint(0, A_ki.n1-tsum)
+			rkHigh := minint(A_ki.n1, t[k-1])
+			for rk := rkLow; rk <= rkHigh; rk++ {
+				twoU_kminus1 := A_ki.twoU - rk*(a[k]-2*A_ki.n1+rk)
+				n1_kminus1 := A_ki.n1 - rk
+				x, ok := A[k-1][ukey{n1: n1_kminus1, twoU: twoU_kminus1}]
+				if !ok && twoUmax(n1_kminus1, t[:k-1], a) < twoU_kminus1 {
+					x = mathChoose(tsum, n1_kminus1)
+				}
+				Asum += x * mathChoose(t[k-1], rk)
+			}
+			A[k][A_ki] = Asum
+		}
+	}
+
+	return A
+}
+
+func twoUmin(n1 int, t, a []int) int {
+	K := len(t)
+	twoU := -n1 * n1
+	n1_k := n1
+	for k := 1; k <= K; k++ {
+		twoU_k := minint(n1_k, t[k-1])
+		twoU += twoU_k * a[k]
+		n1_k -= twoU_k
+	}
+	return twoU
+}
+
+func twoUmax(n1 int, t, a []int) int {
+	K := len(t)
+	twoU := -n1 * n1
+	n1_k := n1
+	for k := K; k > 0; k-- {
+		twoU_k := minint(n1_k, t[k-1])
+		twoU += twoU_k * a[k]
+		n1_k -= twoU_k
+	}
+	return twoU
+}
+
+func (d UDist) PMF(U float64) float64 {
+	if U < 0 || U >= 0.5+float64(d.N1*d.N2) {
+		return 0
+	}
+
+	if d.hasTies() {
+		// makeUmemo computes the CDF directly. Take its
+		// difference to get the PMF.
+		p1, ok1 := makeUmemo(int(2*U)-1, d.N1, d.T)[len(d.T)][ukey{d.N1, int(2*U) - 1}]
+		p2, ok2 := makeUmemo(int(2*U), d.N1, d.T)[len(d.T)][ukey{d.N1, int(2 * U)}]
+		if !ok1 || !ok2 {
+			panic("makeUmemo did not return expected memoization table")
+		}
+		return (p2 - p1) / mathChoose(d.N1+d.N2, d.N1)
+	}
+
+	// There are no ties. Use the fast algorithm. U must be integral.
+	Ui := int(math.Floor(U))
+	// TODO: Use symmetry to minimize U
+	return d.p(Ui)[Ui]
+}
+
+func (d UDist) CDF(U float64) float64 {
+	if U < 0 {
+		return 0
+	} else if U >= float64(d.N1*d.N2) {
+		return 1
+	}
+
+	if d.hasTies() {
+		// TODO: Minimize U?
+		p, ok := makeUmemo(int(2*U), d.N1, d.T)[len(d.T)][ukey{d.N1, int(2 * U)}]
+		if !ok {
+			panic("makeUmemo did not return expected memoization table")
+		}
+		return p / mathChoose(d.N1+d.N2, d.N1)
+	}
+
+	// There are no ties. Use the fast algorithm. U must be integral.
+	Ui := int(math.Floor(U))
+	// The distribution is symmetric around U = m * n / 2. Sum up
+	// whichever tail is smaller.
+	flip := Ui >= (d.N1*d.N2+1)/2
+	if flip {
+		Ui = d.N1*d.N2 - Ui - 1
+	}
+	pdfs := d.p(Ui)
+	p := 0.0
+	for _, pdf := range pdfs[:Ui+1] {
+		p += pdf
+	}
+	if flip {
+		p = 1 - p
+	}
+	return p
+}
+
+func (d UDist) Step() float64 {
+	return 0.5
+}
+
+func (d UDist) Bounds() (float64, float64) {
+	// TODO: More precise bounds when there are ties.
+	return 0, float64(d.N1 * d.N2)
+}
diff --git a/internal/stat/utest.go b/internal/stat/utest.go
new file mode 100644
index 0000000..9272b69
--- /dev/null
+++ b/internal/stat/utest.go
@@ -0,0 +1,270 @@
+package stat
+
+import (
+	"math"
+	"sort"
+)
+
+// A LocationHypothesis specifies the alternative hypothesis of a
+// location test such as a t-test or a Mann-Whitney U-test. The
+// default (zero) value is to test against the alternative hypothesis
+// that they differ.
+type LocationHypothesis int
+
+//go:generate stringer -type LocationHypothesis
+
+const (
+	// LocationLess specifies the alternative hypothesis that the
+	// location of the first sample is less than the second. This
+	// is a one-tailed test.
+	LocationLess LocationHypothesis = -1
+
+	// LocationDiffers specifies the alternative hypothesis that
+	// the locations of the two samples are not equal. This is a
+	// two-tailed test.
+	LocationDiffers LocationHypothesis = 0
+
+	// LocationGreater specifies the alternative hypothesis that
+	// the location of the first sample is greater than the
+	// second. This is a one-tailed test.
+	LocationGreater LocationHypothesis = 1
+)
+
+// A MannWhitneyUTestResult is the result of a Mann-Whitney U-test.
+type MannWhitneyUTestResult struct {
+	// N1 and N2 are the sizes of the input samples.
+	N1, N2 int
+
+	// U is the value of the Mann-Whitney U statistic for this
+	// test, generalized by counting ties as 0.5.
+	//
+	// Given the Cartesian product of the two samples, this is the
+	// number of pairs in which the value from the first sample is
+	// greater than the value of the second, plus 0.5 times the
+	// number of pairs where the values from the two samples are
+	// equal. Hence, U is always an integer multiple of 0.5 (it is
+	// a whole integer if there are no ties) in the range [0, N1*N2].
+	//
+	// U statistics always come in pairs, depending on which
+	// sample is "first". The mirror U for the other sample can be
+	// calculated as N1*N2 - U.
+	//
+	// There are many equivalent statistics with slightly
+	// different definitions. The Wilcoxon (1945) W statistic
+	// (generalized for ties) is U + (N1(N1+1))/2. It is also
+	// common to use 2U to eliminate the half steps and Smid
+	// (1956) uses N1*N2 - 2U to additionally center the
+	// distribution.
+	U float64
+
+	// AltHypothesis specifies the alternative hypothesis tested
+	// by this test against the null hypothesis that there is no
+	// difference in the locations of the samples.
+	AltHypothesis LocationHypothesis
+
+	// P is the p-value of the Mann-Whitney test for the given
+	// null hypothesis.
+	P float64
+}
+
+// MannWhitneyExactLimit gives the largest sample size for which the
+// exact U distribution will be used for the Mann-Whitney U-test.
+//
+// Using the exact distribution is necessary for small sample sizes
+// because the distribution is highly irregular. However, computing
+// the distribution for large sample sizes is both computationally
+// expensive and unnecessary because it quickly approaches a normal
+// approximation. Computing the distribution for two 50 value samples
+// takes a few milliseconds on a 2014 laptop.
+var MannWhitneyExactLimit = 50
+
+// MannWhitneyTiesExactLimit gives the largest sample size for which
+// the exact U distribution will be used for the Mann-Whitney U-test
+// in the presence of ties.
+//
+// Computing this distribution is more expensive than computing the
+// distribution without ties, so this is set lower. Computing this
+// distribution for two 25 value samples takes about ten milliseconds
+// on a 2014 laptop.
+var MannWhitneyTiesExactLimit = 25
+
+// MannWhitneyUTest performs a Mann-Whitney U-test [1,2] of the null
+// hypothesis that two samples come from the same population against
+// the alternative hypothesis that one sample tends to have larger or
+// smaller values than the other.
+//
+// This is similar to a t-test, but unlike the t-test, the
+// Mann-Whitney U-test is non-parametric (it does not assume a normal
+// distribution). It has very slightly lower efficiency than the
+// t-test on normal distributions.
+//
+// Computing the exact U distribution is expensive for large sample
+// sizes, so this uses a normal approximation for sample sizes larger
+// than MannWhitneyExactLimit if there are no ties or
+// MannWhitneyTiesExactLimit if there are ties. This normal
+// approximation uses both the tie correction and the continuity
+// correction.
+//
+// This can fail with ErrSampleSize if either sample is empty or
+// ErrSamplesEqual if all sample values are equal.
+//
+// This is also known as a Mann-Whitney-Wilcoxon test and is
+// equivalent to the Wilcoxon rank-sum test, though the Wilcoxon
+// rank-sum test differs in nomenclature.
+//
+// [1] Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of
+// Whether one of Two Random Variables is Stochastically Larger than
+// the Other". Annals of Mathematical Statistics 18 (1): 50–60.
+//
+// [2] Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer".
+// Journal of the American Statistical Association 61 (315): 772-787.
+func MannWhitneyUTest(x1, x2 []float64, alt LocationHypothesis) (*MannWhitneyUTestResult, error) {
+	n1, n2 := len(x1), len(x2)
+	if n1 == 0 || n2 == 0 {
+		return nil, ErrSampleSize
+	}
+
+	// Compute the U statistic and tie vector T.
+	x1 = append([]float64(nil), x1...)
+	x2 = append([]float64(nil), x2...)
+	sort.Float64s(x1)
+	sort.Float64s(x2)
+	merged, labels := labeledMerge(x1, x2)
+
+	R1 := 0.0
+	T, hasTies := []int{}, false
+	for i := 0; i < len(merged); {
+		rank1, nx1, v1 := i+1, 0, merged[i]
+		// Consume samples that tie this sample (including itself).
+		for ; i < len(merged) && merged[i] == v1; i++ {
+			if labels[i] == 1 {
+				nx1++
+			}
+		}
+		// Assign all tied samples the average rank of the
+		// samples, where merged[0] has rank 1.
+		if nx1 != 0 {
+			rank := float64(i+rank1) / 2
+			R1 += rank * float64(nx1)
+		}
+		T = append(T, i-rank1+1)
+		if i > rank1 {
+			hasTies = true
+		}
+	}
+	U1 := R1 - float64(n1*(n1+1))/2
+
+	// Compute the smaller of U1 and U2
+	U2 := float64(n1*n2) - U1
+	Usmall := math.Min(U1, U2)
+
+	var p float64
+	if !hasTies && n1 <= MannWhitneyExactLimit && n2 <= MannWhitneyExactLimit ||
+		hasTies && n1 <= MannWhitneyTiesExactLimit && n2 <= MannWhitneyTiesExactLimit {
+		// Use exact U distribution. U1 will be an integer.
+		if len(T) == 1 {
+			// All values are equal. Test is meaningless.
+			return nil, ErrSamplesEqual
+		}
+
+		dist := UDist{N1: n1, N2: n2, T: T}
+		switch alt {
+		case LocationDiffers:
+			if U1 == U2 {
+				// The distribution is symmetric about
+				// Usmall. Since the distribution is
+				// discrete, the CDF is discontinuous
+				// and if simply double CDF(Usmall),
+				// we'll double count the
+				// (non-infinitesimal) probability
+				// mass at Usmall. What we want is
+				// just the integral of the whole CDF,
+				// which is 1.
+				p = 1
+			} else {
+				p = dist.CDF(Usmall) * 2
+			}
+
+		case LocationLess:
+			p = dist.CDF(U1)
+
+		case LocationGreater:
+			p = 1 - dist.CDF(U1-1)
+		}
+	} else {
+		// Use normal approximation (with tie and continuity
+		// correction).
+		t := tieCorrection(T)
+		N := float64(n1 + n2)
+		μU := float64(n1*n2) / 2
+		σU := math.Sqrt(float64(n1*n2) * ((N + 1) - t/(N*(N-1))) / 12)
+		if σU == 0 {
+			return nil, ErrSamplesEqual
+		}
+		numer := U1 - μU
+		// Perform continuity correction.
+		switch alt {
+		case LocationDiffers:
+			numer -= mathSign(numer) * 0.5
+		case LocationLess:
+			numer += 0.5
+		case LocationGreater:
+			numer -= 0.5
+		}
+		z := numer / σU
+		switch alt {
+		case LocationDiffers:
+			p = 2 * math.Min(StdNormal.CDF(z), 1-StdNormal.CDF(z))
+		case LocationLess:
+			p = StdNormal.CDF(z)
+		case LocationGreater:
+			p = 1 - StdNormal.CDF(z)
+		}
+	}
+
+	return &MannWhitneyUTestResult{N1: n1, N2: n2, U: U1,
+		AltHypothesis: alt, P: p}, nil
+}
+
+// labeledMerge merges sorted lists x1 and x2 into sorted list merged.
+// labels[i] is 1 or 2 depending on whether merged[i] is a value from
+// x1 or x2, respectively.
+func labeledMerge(x1, x2 []float64) (merged []float64, labels []byte) {
+	merged = make([]float64, len(x1)+len(x2))
+	labels = make([]byte, len(x1)+len(x2))
+
+	i, j, o := 0, 0, 0
+	for i < len(x1) && j < len(x2) {
+		if x1[i] < x2[j] {
+			merged[o] = x1[i]
+			labels[o] = 1
+			i++
+		} else {
+			merged[o] = x2[j]
+			labels[o] = 2
+			j++
+		}
+		o++
+	}
+	for ; i < len(x1); i++ {
+		merged[o] = x1[i]
+		labels[o] = 1
+		o++
+	}
+	for ; j < len(x2); j++ {
+		merged[o] = x2[j]
+		labels[o] = 2
+		o++
+	}
+	return
+}
+
+// tieCorrection computes the tie correction factor Σ_j (t_j³ - t_j)
+// where t_j is the number of ties in the j'th rank.
+func tieCorrection(ties []int) float64 {
+	t := 0
+	for _, tie := range ties {
+		t += tie*tie*tie - tie
+	}
+	return float64(t)
+}
diff --git a/internal/term/color.go b/internal/term/color.go
new file mode 100644
index 0000000..769a0c5
--- /dev/null
+++ b/internal/term/color.go
@@ -0,0 +1,655 @@
+// Copyright 2020 The golang.design Initiative Authors.
+// All rights reserved. Use of this source code is governed
+// by a GNU GPLv3 license that can be found in the LICENSE file.
+
+package term
+
+// Red turns a given string to red
+func Red(in string) string {
+	return fgString(in, 255, 0, 0)
+}
+
+// Orange turns a given string to orange
+func Orange(in string) string {
+	return fgString(in, 252, 140, 3)
+}
+
+// Green turns a given string to green
+func Green(in string) string {
+	return fgString(in, 0, 255, 0)
+}
+
+// Gray turns a given string to gray
+func Gray(in string) string {
+	return fgString(in, 125, 125, 125)
+}
+
+var (
+	before   = []byte("\033[")
+	after    = []byte("m")
+	reset    = []byte("\033[0;00m")
+	fgcolors = fgTermRGB[16:232]
+	bgcolors = bgTermRGB[16:232]
+)
+
+func fgString(in string, r, g, b uint8) string {
+	return string(fgBytes([]byte(in), r, g, b))
+}
+
+// Bytes colorizes the foreground with the terminal color that matches
+// the closest the RGB color.
+func fgBytes(in []byte, r, g, b uint8) []byte {
+	return colorize(color(r, g, b, true), in)
+}
+
+func colorize(color, in []byte) []byte {
+	return append(append(append(append(before, color...), after...), in...), reset...)
+}
+
+func color(r, g, b uint8, foreground bool) []byte {
+	// if all colors are equal, it might be in the grayscale range
+	if r == g && g == b {
+		color, ok := grayscale(r, foreground)
+		if ok {
+			return color
+		}
+	}
+
+	// the general case approximates RGB by using the closest color.
+	r6 := ((uint16(r) * 5) / 255)
+	g6 := ((uint16(g) * 5) / 255)
+	b6 := ((uint16(b) * 5) / 255)
+	i := 36*r6 + 6*g6 + b6
+	if foreground {
+		return fgcolors[i]
+	}
+	return bgcolors[i]
+}
+
+func grayscale(scale uint8, foreground bool) ([]byte, bool) {
+	var source [256][]byte
+
+	if foreground {
+		source = fgTermRGB
+	} else {
+		source = bgTermRGB
+	}
+
+	switch scale {
+	case 0x08:
+		return source[232], true
+	case 0x12:
+		return source[233], true
+	case 0x1c:
+		return source[234], true
+	case 0x26:
+		return source[235], true
+	case 0x30:
+		return source[236], true
+	case 0x3a:
+		return source[237], true
+	case 0x44:
+		return source[238], true
+	case 0x4e:
+		return source[239], true
+	case 0x58:
+		return source[240], true
+	case 0x62:
+		return source[241], true
+	case 0x6c:
+		return source[242], true
+	case 0x76:
+		return source[243], true
+	case 0x80:
+		return source[244], true
+	case 0x8a:
+		return source[245], true
+	case 0x94:
+		return source[246], true
+	case 0x9e:
+		return source[247], true
+	case 0xa8:
+		return source[248], true
+	case 0xb2:
+		return source[249], true
+	case 0xbc:
+		return source[250], true
+	case 0xc6:
+		return source[251], true
+	case 0xd0:
+		return source[252], true
+	case 0xda:
+		return source[253], true
+	case 0xe4:
+		return source[254], true
+	case 0xee:
+		return source[255], true
+	}
+	return nil, false
+}
+
+var (
+	yellow = fgString("", 252, 140, 3)
+	red    = fgString("", 255, 0, 0)
+	green  = fgString("", 0, 255, 0)
+)
+
+// \033[
+
+var fgTermRGB = [...][]byte{
+	[]byte("38;5;0"),
+	[]byte("38;5;1"),
+	[]byte("38;5;2"),
+	[]byte("38;5;3"),
+	[]byte("38;5;4"),
+	[]byte("38;5;5"),
+	[]byte("38;5;6"),
+	[]byte("38;5;7"),
+	[]byte("38;5;8"),
+	[]byte("38;5;9"),
+	[]byte("38;5;10"),
+	[]byte("38;5;11"),
+	[]byte("38;5;12"),
+	[]byte("38;5;13"),
+	[]byte("38;5;14"),
+	[]byte("38;5;15"),
+	[]byte("38;5;16"),
+	[]byte("38;5;17"),
+	[]byte("38;5;18"),
+	[]byte("38;5;19"),
+	[]byte("38;5;20"),
+	[]byte("38;5;21"),
+	[]byte("38;5;22"),
+	[]byte("38;5;23"),
+	[]byte("38;5;24"),
+	[]byte("38;5;25"),
+	[]byte("38;5;26"),
+	[]byte("38;5;27"),
+	[]byte("38;5;28"),
+	[]byte("38;5;29"),
+	[]byte("38;5;30"),
+	[]byte("38;5;31"),
+	[]byte("38;5;32"),
+	[]byte("38;5;33"),
+	[]byte("38;5;34"),
+	[]byte("38;5;35"),
+	[]byte("38;5;36"),
+	[]byte("38;5;37"),
+	[]byte("38;5;38"),
+	[]byte("38;5;39"),
+	[]byte("38;5;40"),
+	[]byte("38;5;41"),
+	[]byte("38;5;42"),
+	[]byte("38;5;43"),
+	[]byte("38;5;44"),
+	[]byte("38;5;45"),
+	[]byte("38;5;46"),
+	[]byte("38;5;47"),
+	[]byte("38;5;48"),
+	[]byte("38;5;49"),
+	[]byte("38;5;50"),
+	[]byte("38;5;51"),
+	[]byte("38;5;52"),
+	[]byte("38;5;53"),
+	[]byte("38;5;54"),
+	[]byte("38;5;55"),
+	[]byte("38;5;56"),
+	[]byte("38;5;57"),
+	[]byte("38;5;58"),
+	[]byte("38;5;59"),
+	[]byte("38;5;60"),
+	[]byte("38;5;61"),
+	[]byte("38;5;62"),
+	[]byte("38;5;63"),
+	[]byte("38;5;64"),
+	[]byte("38;5;65"),
+	[]byte("38;5;66"),
+	[]byte("38;5;67"),
+	[]byte("38;5;68"),
+	[]byte("38;5;69"),
+	[]byte("38;5;70"),
+	[]byte("38;5;71"),
+	[]byte("38;5;72"),
+	[]byte("38;5;73"),
+	[]byte("38;5;74"),
+	[]byte("38;5;75"),
+	[]byte("38;5;76"),
+	[]byte("38;5;77"),
+	[]byte("38;5;78"),
+	[]byte("38;5;79"),
+	[]byte("38;5;80"),
+	[]byte("38;5;81"),
+	[]byte("38;5;82"),
+	[]byte("38;5;83"),
+	[]byte("38;5;84"),
+	[]byte("38;5;85"),
+	[]byte("38;5;86"),
+	[]byte("38;5;87"),
+	[]byte("38;5;88"),
+	[]byte("38;5;89"),
+	[]byte("38;5;90"),
+	[]byte("38;5;91"),
+	[]byte("38;5;92"),
+	[]byte("38;5;93"),
+	[]byte("38;5;94"),
+	[]byte("38;5;95"),
+	[]byte("38;5;96"),
+	[]byte("38;5;97"),
+	[]byte("38;5;98"),
+	[]byte("38;5;99"),
+	[]byte("38;5;100"),
+	[]byte("38;5;101"),
+	[]byte("38;5;102"),
+	[]byte("38;5;103"),
+	[]byte("38;5;104"),
+	[]byte("38;5;105"),
+	[]byte("38;5;106"),
+	[]byte("38;5;107"),
+	[]byte("38;5;108"),
+	[]byte("38;5;109"),
+	[]byte("38;5;110"),
+	[]byte("38;5;111"),
+	[]byte("38;5;112"),
+	[]byte("38;5;113"),
+	[]byte("38;5;114"),
+	[]byte("38;5;115"),
+	[]byte("38;5;116"),
+	[]byte("38;5;117"),
+	[]byte("38;5;118"),
+	[]byte("38;5;119"),
+	[]byte("38;5;120"),
+	[]byte("38;5;121"),
+	[]byte("38;5;122"),
+	[]byte("38;5;123"),
+	[]byte("38;5;124"),
+	[]byte("38;5;125"),
+	[]byte("38;5;126"),
+	[]byte("38;5;127"),
+	[]byte("38;5;128"),
+	[]byte("38;5;129"),
+	[]byte("38;5;130"),
+	[]byte("38;5;131"),
+	[]byte("38;5;132"),
+	[]byte("38;5;133"),
+	[]byte("38;5;134"),
+	[]byte("38;5;135"),
+	[]byte("38;5;136"),
+	[]byte("38;5;137"),
+	[]byte("38;5;138"),
+	[]byte("38;5;139"),
+	[]byte("38;5;140"),
+	[]byte("38;5;141"),
+	[]byte("38;5;142"),
+	[]byte("38;5;143"),
+	[]byte("38;5;144"),
+	[]byte("38;5;145"),
+	[]byte("38;5;146"),
+	[]byte("38;5;147"),
+	[]byte("38;5;148"),
+	[]byte("38;5;149"),
+	[]byte("38;5;150"),
+	[]byte("38;5;151"),
+	[]byte("38;5;152"),
+	[]byte("38;5;153"),
+	[]byte("38;5;154"),
+	[]byte("38;5;155"),
+	[]byte("38;5;156"),
+	[]byte("38;5;157"),
+	[]byte("38;5;158"),
+	[]byte("38;5;159"),
+	[]byte("38;5;160"),
+	[]byte("38;5;161"),
+	[]byte("38;5;162"),
+	[]byte("38;5;163"),
+	[]byte("38;5;164"),
+	[]byte("38;5;165"),
+	[]byte("38;5;166"),
+	[]byte("38;5;167"),
+	[]byte("38;5;168"),
+	[]byte("38;5;169"),
+	[]byte("38;5;170"),
+	[]byte("38;5;171"),
+	[]byte("38;5;172"),
+	[]byte("38;5;173"),
+	[]byte("38;5;174"),
+	[]byte("38;5;175"),
+	[]byte("38;5;176"),
+	[]byte("38;5;177"),
+	[]byte("38;5;178"),
+	[]byte("38;5;179"),
+	[]byte("38;5;180"),
+	[]byte("38;5;181"),
+	[]byte("38;5;182"),
+	[]byte("38;5;183"),
+	[]byte("38;5;184"),
+	[]byte("38;5;185"),
+	[]byte("38;5;186"),
+	[]byte("38;5;187"),
+	[]byte("38;5;188"),
+	[]byte("38;5;189"),
+	[]byte("38;5;190"),
+	[]byte("38;5;191"),
+	[]byte("38;5;192"),
+	[]byte("38;5;193"),
+	[]byte("38;5;194"),
+	[]byte("38;5;195"),
+	[]byte("38;5;196"),
+	[]byte("38;5;197"),
+	[]byte("38;5;198"),
+	[]byte("38;5;199"),
+	[]byte("38;5;200"),
+	[]byte("38;5;201"),
+	[]byte("38;5;202"),
+	[]byte("38;5;203"),
+	[]byte("38;5;204"),
+	[]byte("38;5;205"),
+	[]byte("38;5;206"),
+	[]byte("38;5;207"),
+	[]byte("38;5;208"),
+	[]byte("38;5;209"),
+	[]byte("38;5;210"),
+	[]byte("38;5;211"),
+	[]byte("38;5;212"),
+	[]byte("38;5;213"),
+	[]byte("38;5;214"),
+	[]byte("38;5;215"),
+	[]byte("38;5;216"),
+	[]byte("38;5;217"),
+	[]byte("38;5;218"),
+	[]byte("38;5;219"),
+	[]byte("38;5;220"),
+	[]byte("38;5;221"),
+	[]byte("38;5;222"),
+	[]byte("38;5;223"),
+	[]byte("38;5;224"),
+	[]byte("38;5;225"),
+	[]byte("38;5;226"),
+	[]byte("38;5;227"),
+	[]byte("38;5;228"),
+	[]byte("38;5;229"),
+	[]byte("38;5;230"),
+	[]byte("38;5;231"),
+	[]byte("38;5;232"),
+	[]byte("38;5;233"),
+	[]byte("38;5;234"),
+	[]byte("38;5;235"),
+	[]byte("38;5;236"),
+	[]byte("38;5;237"),
+	[]byte("38;5;238"),
+	[]byte("38;5;239"),
+	[]byte("38;5;240"),
+	[]byte("38;5;241"),
+	[]byte("38;5;242"),
+	[]byte("38;5;243"),
+	[]byte("38;5;244"),
+	[]byte("38;5;245"),
+	[]byte("38;5;246"),
+	[]byte("38;5;247"),
+	[]byte("38;5;248"),
+	[]byte("38;5;249"),
+	[]byte("38;5;250"),
+	[]byte("38;5;251"),
+	[]byte("38;5;252"),
+	[]byte("38;5;253"),
+	[]byte("38;5;254"),
+	[]byte("38;5;255"),
+}
+
+var bgTermRGB = [...][]byte{
+	[]byte("48;5;0"),
+	[]byte("48;5;1"),
+	[]byte("48;5;2"),
+	[]byte("48;5;3"),
+	[]byte("48;5;4"),
+	[]byte("48;5;5"),
+	[]byte("48;5;6"),
+	[]byte("48;5;7"),
+	[]byte("48;5;8"),
+	[]byte("48;5;9"),
+	[]byte("48;5;10"),
+	[]byte("48;5;11"),
+	[]byte("48;5;12"),
+	[]byte("48;5;13"),
+	[]byte("48;5;14"),
+	[]byte("48;5;15"),
+	[]byte("48;5;16"),
+	[]byte("48;5;17"),
+	[]byte("48;5;18"),
+	[]byte("48;5;19"),
+	[]byte("48;5;20"),
+	[]byte("48;5;21"),
+	[]byte("48;5;22"),
+	[]byte("48;5;23"),
+	[]byte("48;5;24"),
+	[]byte("48;5;25"),
+	[]byte("48;5;26"),
+	[]byte("48;5;27"),
+	[]byte("48;5;28"),
+	[]byte("48;5;29"),
+	[]byte("48;5;30"),
+	[]byte("48;5;31"),
+	[]byte("48;5;32"),
+	[]byte("48;5;33"),
+	[]byte("48;5;34"),
+	[]byte("48;5;35"),
+	[]byte("48;5;36"),
+	[]byte("48;5;37"),
+	[]byte("48;5;38"),
+	[]byte("48;5;39"),
+	[]byte("48;5;40"),
+	[]byte("48;5;41"),
+	[]byte("48;5;42"),
+	[]byte("48;5;43"),
+	[]byte("48;5;44"),
+	[]byte("48;5;45"),
+	[]byte("48;5;46"),
+	[]byte("48;5;47"),
+	[]byte("48;5;48"),
+	[]byte("48;5;49"),
+	[]byte("48;5;50"),
+	[]byte("48;5;51"),
+	[]byte("48;5;52"),
+	[]byte("48;5;53"),
+	[]byte("48;5;54"),
+	[]byte("48;5;55"),
+	[]byte("48;5;56"),
+	[]byte("48;5;57"),
+	[]byte("48;5;58"),
+	[]byte("48;5;59"),
+	[]byte("48;5;60"),
+	[]byte("48;5;61"),
+	[]byte("48;5;62"),
+	[]byte("48;5;63"),
+	[]byte("48;5;64"),
+	[]byte("48;5;65"),
+	[]byte("48;5;66"),
+	[]byte("48;5;67"),
+	[]byte("48;5;68"),
+	[]byte("48;5;69"),
+	[]byte("48;5;70"),
+	[]byte("48;5;71"),
+	[]byte("48;5;72"),
+	[]byte("48;5;73"),
+	[]byte("48;5;74"),
+	[]byte("48;5;75"),
+	[]byte("48;5;76"),
+	[]byte("48;5;77"),
+	[]byte("48;5;78"),
+	[]byte("48;5;79"),
+	[]byte("48;5;80"),
+	[]byte("48;5;81"),
+	[]byte("48;5;82"),
+	[]byte("48;5;83"),
+	[]byte("48;5;84"),
+	[]byte("48;5;85"),
+	[]byte("48;5;86"),
+	[]byte("48;5;87"),
+	[]byte("48;5;88"),
+	[]byte("48;5;89"),
+	[]byte("48;5;90"),
+	[]byte("48;5;91"),
+	[]byte("48;5;92"),
+	[]byte("48;5;93"),
+	[]byte("48;5;94"),
+	[]byte("48;5;95"),
+	[]byte("48;5;96"),
+	[]byte("48;5;97"),
+	[]byte("48;5;98"),
+	[]byte("48;5;99"),
+	[]byte("48;5;100"),
+	[]byte("48;5;101"),
+	[]byte("48;5;102"),
+	[]byte("48;5;103"),
+	[]byte("48;5;104"),
+	[]byte("48;5;105"),
+	[]byte("48;5;106"),
+	[]byte("48;5;107"),
+	[]byte("48;5;108"),
+	[]byte("48;5;109"),
+	[]byte("48;5;110"),
+	[]byte("48;5;111"),
+	[]byte("48;5;112"),
+	[]byte("48;5;113"),
+	[]byte("48;5;114"),
+	[]byte("48;5;115"),
+	[]byte("48;5;116"),
+	[]byte("48;5;117"),
+	[]byte("48;5;118"),
+	[]byte("48;5;119"),
+	[]byte("48;5;120"),
+	[]byte("48;5;121"),
+	[]byte("48;5;122"),
+	[]byte("48;5;123"),
+	[]byte("48;5;124"),
+	[]byte("48;5;125"),
+	[]byte("48;5;126"),
+	[]byte("48;5;127"),
+	[]byte("48;5;128"),
+	[]byte("48;5;129"),
+	[]byte("48;5;130"),
+	[]byte("48;5;131"),
+	[]byte("48;5;132"),
+	[]byte("48;5;133"),
+	[]byte("48;5;134"),
+	[]byte("48;5;135"),
+	[]byte("48;5;136"),
+	[]byte("48;5;137"),
+	[]byte("48;5;138"),
+	[]byte("48;5;139"),
+	[]byte("48;5;140"),
+	[]byte("48;5;141"),
+	[]byte("48;5;142"),
+	[]byte("48;5;143"),
+	[]byte("48;5;144"),
+	[]byte("48;5;145"),
+	[]byte("48;5;146"),
+	[]byte("48;5;147"),
+	[]byte("48;5;148"),
+	[]byte("48;5;149"),
+	[]byte("48;5;150"),
+	[]byte("48;5;151"),
+	[]byte("48;5;152"),
+	[]byte("48;5;153"),
+	[]byte("48;5;154"),
+	[]byte("48;5;155"),
+	[]byte("48;5;156"),
+	[]byte("48;5;157"),
+	[]byte("48;5;158"),
+	[]byte("48;5;159"),
+	[]byte("48;5;160"),
+	[]byte("48;5;161"),
+	[]byte("48;5;162"),
+	[]byte("48;5;163"),
+	[]byte("48;5;164"),
+	[]byte("48;5;165"),
+	[]byte("48;5;166"),
+	[]byte("48;5;167"),
+	[]byte("48;5;168"),
+	[]byte("48;5;169"),
+	[]byte("48;5;170"),
+	[]byte("48;5;171"),
+	[]byte("48;5;172"),
+	[]byte("48;5;173"),
+	[]byte("48;5;174"),
+	[]byte("48;5;175"),
+	[]byte("48;5;176"),
+	[]byte("48;5;177"),
+	[]byte("48;5;178"),
+	[]byte("48;5;179"),
+	[]byte("48;5;180"),
+	[]byte("48;5;181"),
+	[]byte("48;5;182"),
+	[]byte("48;5;183"),
+	[]byte("48;5;184"),
+	[]byte("48;5;185"),
+	[]byte("48;5;186"),
+	[]byte("48;5;187"),
+	[]byte("48;5;188"),
+	[]byte("48;5;189"),
+	[]byte("48;5;190"),
+	[]byte("48;5;191"),
+	[]byte("48;5;192"),
+	[]byte("48;5;193"),
+	[]byte("48;5;194"),
+	[]byte("48;5;195"),
+	[]byte("48;5;196"),
+	[]byte("48;5;197"),
+	[]byte("48;5;198"),
+	[]byte("48;5;199"),
+	[]byte("48;5;200"),
+	[]byte("48;5;201"),
+	[]byte("48;5;202"),
+	[]byte("48;5;203"),
+	[]byte("48;5;204"),
+	[]byte("48;5;205"),
+	[]byte("48;5;206"),
+	[]byte("48;5;207"),
+	[]byte("48;5;208"),
+	[]byte("48;5;209"),
+	[]byte("48;5;210"),
+	[]byte("48;5;211"),
+	[]byte("48;5;212"),
+	[]byte("48;5;213"),
+	[]byte("48;5;214"),
+	[]byte("48;5;215"),
+	[]byte("48;5;216"),
+	[]byte("48;5;217"),
+	[]byte("48;5;218"),
+	[]byte("48;5;219"),
+	[]byte("48;5;220"),
+	[]byte("48;5;221"),
+	[]byte("48;5;222"),
+	[]byte("48;5;223"),
+	[]byte("48;5;224"),
+	[]byte("48;5;225"),
+	[]byte("48;5;226"),
+	[]byte("48;5;227"),
+	[]byte("48;5;228"),
+	[]byte("48;5;229"),
+	[]byte("48;5;230"),
+	[]byte("48;5;231"),
+	[]byte("48;5;232"),
+	[]byte("48;5;233"),
+	[]byte("48;5;234"),
+	[]byte("48;5;235"),
+	[]byte("48;5;236"),
+	[]byte("48;5;237"),
+	[]byte("48;5;238"),
+	[]byte("48;5;239"),
+	[]byte("48;5;240"),
+	[]byte("48;5;241"),
+	[]byte("48;5;242"),
+	[]byte("48;5;243"),
+	[]byte("48;5;244"),
+	[]byte("48;5;245"),
+	[]byte("48;5;246"),
+	[]byte("48;5;247"),
+	[]byte("48;5;248"),
+	[]byte("48;5;249"),
+	[]byte("48;5;250"),
+	[]byte("48;5;251"),
+	[]byte("48;5;252"),
+	[]byte("48;5;253"),
+	[]byte("48;5;254"),
+	[]byte("48;5;255"),
+}
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..16e08a3
--- /dev/null
+++ b/main.go
@@ -0,0 +1,380 @@
+// Copyright 2020 The golang.design Initiative Authors.
+// All rights reserved. Use of this source code is governed
+// by a GNU GPLv3 license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"os/exec"
+	"os/signal"
+	"strings"
+	"syscall"
+	"time"
+
+	"golang.design/x/bench/internal/lock"
+	"golang.design/x/bench/internal/stat"
+	"golang.design/x/bench/internal/term"
+)
+
+func usage() {
+	fmt.Fprintf(os.Stderr, `usage: bench [options]
+options for daemon usage:
+	-daemon
+		run bench service
+	-list
+		print current and pending commands
+
+options for significant tests:
+	-delta-test test
+		significance test to apply to delta: utest, ttest, or none (default "utest")
+	-alpha α
+		consider change significant if p < α (default 0.05)
+	-geomean
+		print the geometric mean of each file (default false)
+	-split labels
+		split benchmarks by labels (default "pkg,goos,goarch")
+	-sort order
+		sort by order: [-]delta, [-]name, none (default "none")
+
+options for running benchmarks:
+	-v go test
+		the -v flag from go test, (default false)
+	-name go test
+		the -bench flag from go test (default .) (default ".")
+	-count go test
+		the -count flag from go test (default 10) (default 10)
+	-time go test
+		the -benchtime flag from go test (default unset)
+	-cpuprocs go test
+		the -cpu flag to go test (default unset)
+
+options for performance locking
+	-shared
+		acquire lock in shared mode (default exclusive mode)
+	-cpufreq percent
+		set CPU frequency to percent between the min and max (default 90)
+		while running command, or "none" for no adjustment
+`)
+	os.Exit(2)
+}
+
+var (
+	flagDaemon *bool
+	flagList   *bool
+
+	flagDeltaTest *string
+	flagAlpha     *float64
+	flagGeomean   *bool
+	flagSplit     *string
+	flagSort      *string
+
+	flagShared  *bool
+	flagCPUFreq *lock.CpufreqFlag
+
+	flagVerbose  *bool
+	flagName     *string
+	flagCount    *int
+	flagTime     *string
+	flagCPUProcs *string
+)
+
+func main() {
+	log.SetPrefix("bench: ")
+	log.SetFlags(0)
+	flag.Usage = usage
+
+	// daemon args
+	flagDaemon = flag.Bool("daemon", false, "run bench service")
+	flagList = flag.Bool("list", false, "print current and pending commands")
+
+	// benchstat args
+	flagDeltaTest = flag.String("delta-test", "utest", "significance `test` to apply to delta: utest, ttest, or none")
+	flagAlpha = flag.Float64("alpha", 0.05, "consider change significant if p < `α`")
+	flagGeomean = flag.Bool("geomean", false, "print the geometric mean of each file")
+	flagSplit = flag.String("split", "pkg,goos,goarch", "split benchmarks by `labels`")
+	flagSort = flag.String("sort", "none", "sort by `order`: [-]delta, [-]name, none")
+
+	// perflock flags
+	flagShared = flag.Bool("shared", false, "acquire lock in shared mode (default exclusive mode)")
+	flagCPUFreq = &lock.CpufreqFlag{Percent: 90}
+	flag.Var(flagCPUFreq, "cpufreq", "set CPU frequency to `percent` between the min and max\n\twhile running command, or \"none\" for no adjustment")
+
+	// go test args
+	flagVerbose = flag.Bool("v", false, "the -v flag from `go test`, (default false)")
+	flagName = flag.String("name", ".", "the -bench flag from `go test` (default .)")
+	flagCount = flag.Int("count", 10, "the -count flag from `go test` (default 10)")
+	flagTime = flag.String("time", "", "the -benchtime flag from `go test` (default unset)")
+	flagCPUProcs = flag.String("cpuprocs", "", "the -cpu flag to `go test` (default unset)")
+	flag.Parse()
+
+	if *flagDaemon {
+		if flag.NArg() > 0 {
+			flag.Usage()
+			os.Exit(2)
+		}
+		lock.RunDaemon()
+		return
+	}
+	if *flagList {
+		if flag.NArg() > 0 {
+			flag.Usage()
+			os.Exit(2)
+		}
+		c := lock.NewClient()
+		if c == nil {
+			log.Fatal("Is the bench daemon running?")
+		}
+		list := c.List()
+		if len(list) == 0 {
+			log.Println("daemon is running but no running benchmarks.")
+			return
+		}
+		for _, l := range list {
+			log.Println(l)
+		}
+		return
+	}
+
+	if flag.NArg() > 0 {
+		runCompare()
+		return
+	}
+
+	// prepare go test command
+	args := []string{
+		"go",
+		"test",
+		"-run=^$",
+	}
+	if *flagVerbose {
+		args = append(args, "-v")
+	}
+	args = append(args, fmt.Sprintf("-bench=%s", *flagName))
+	if *flagCount <= 0 {
+		*flagCount = 10
+	}
+	args = append(args, fmt.Sprintf("-count=%d", *flagCount))
+	if *flagTime != "" {
+		args = append(args, fmt.Sprintf("-benchtime=%s", *flagTime))
+	}
+	if *flagCPUProcs != "" {
+		args = append(args, fmt.Sprintf("-cpu=%s", *flagCPUProcs))
+	}
+
+	// acquire lock
+	c := lock.NewClient()
+	if c == nil {
+		log.Printf(term.Red("run benchmarks without performance locking..."))
+	} else {
+		if !c.Acquire(*flagShared, true, strings.Join(args, " ")) {
+			list := c.List()
+			log.Printf("Waiting for lock...\n")
+			for _, l := range list {
+				log.Println(l)
+			}
+			c.Acquire(*flagShared, false, strings.Join(args, " "))
+		}
+		if !*flagShared && flagCPUFreq.Percent >= 0 {
+			c.SetCPUFreq(flagCPUFreq.Percent)
+			log.Print(term.Gray(fmt.Sprintf("run benchmarks under %d%% cpufreq...", flagCPUFreq.Percent)))
+		}
+	}
+
+	// Ignore SIGINT and SIGQUIT so they pass through to the
+	// child.
+	signal.Notify(make(chan os.Signal), os.Interrupt, syscall.SIGQUIT)
+
+	// run bench
+	runBench(args)
+}
+
+// shellEscape escapes a single shell token.
+func shellEscape(x string) string {
+	if len(x) == 0 {
+		return "''"
+	}
+	for _, r := range x {
+		if 'a' <= r && r <= 'z' || 'A' <= r && r <= 'Z' || '0' <= r && r <= '9' || strings.ContainsRune("@%_-+:,./", r) {
+			continue
+		}
+		// Unsafe character.
+		return "'" + strings.Replace(x, "'", "'\"'\"'", -1) + "'"
+	}
+	return x
+}
+
+// shellEscapeList escapes a list of shell tokens.
+func shellEscapeList(xs []string) string {
+	out := make([]string, len(xs))
+	for i, x := range xs {
+		out[i] = shellEscape(x)
+	}
+	return strings.Join(out, " ")
+}
+
+var deltaTestNames = map[string]stat.DeltaTest{
+	"none":   stat.NoDeltaTest,
+	"u":      stat.UTest,
+	"u-test": stat.UTest,
+	"utest":  stat.UTest,
+	"t":      stat.TTest,
+	"t-test": stat.TTest,
+	"ttest":  stat.TTest,
+}
+
+func runCompare() {
+	c := &stat.Collection{
+		Alpha:      *flagAlpha,
+		AddGeoMean: *flagGeomean,
+		DeltaTest:  deltaTestNames[strings.ToLower(*flagDeltaTest)],
+	}
+
+	for _, file := range flag.Args() {
+		f, err := os.Open(file)
+		if err != nil {
+			log.Print(err)
+			flag.Usage()
+			os.Exit(2)
+		}
+		defer f.Close()
+
+		if err := c.AddFile(file, f); err != nil {
+			log.Fatal(err)
+		}
+	}
+
+	tables := c.Tables()
+	var buf bytes.Buffer
+	stat.FormatText(&buf, tables)
+	os.Stdout.Write(buf.Bytes())
+}
+
+func runBench(args []string) {
+	log.Print(strings.Join(args, " "))
+	cmd := exec.Command(args[0], args[1:]...)
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		log.Fatal(err)
+	}
+	stderr, err := cmd.StderrPipe()
+	if err != nil {
+		log.Fatal(err)
+	}
+	doneCh := make(chan []byte)
+	go func() {
+		data := []byte{}
+		buf := make([]byte, 1024)
+		for {
+			n, err := stdout.Read(buf)
+			if err != nil {
+				if err != io.EOF {
+					fmt.Fprintf(os.Stderr, "%v", err)
+				}
+				doneCh <- data
+				close(doneCh)
+				return
+			}
+			fmt.Fprintf(os.Stdout, string(buf[:n]))
+			data = append(data, buf[:n]...)
+		}
+	}()
+	errCh := make(chan error)
+	go func() {
+		buf := make([]byte, 1024)
+		for {
+			n, err := stderr.Read(buf)
+			if err != nil {
+				if err == io.EOF {
+					errCh <- nil
+				} else {
+					errCh <- err
+				}
+				close(errCh)
+				return
+			}
+			fmt.Fprintf(os.Stderr, string(buf[:n]))
+		}
+	}()
+
+	err = cmd.Start()
+	switch err := err.(type) {
+	case *exec.ExitError:
+		status := err.Sys().(syscall.WaitStatus)
+		if status.Exited() {
+			os.Exit(status.ExitStatus())
+		}
+		log.Fatal(err)
+	}
+
+	var results []byte
+	select {
+	case err = <-errCh:
+		if err != nil { // benchmark was interrupted or not success, exit.
+			os.Exit(2)
+			return
+		}
+	case results = <-doneCh:
+	}
+
+	// do nothing if no tests were ran.
+	if strings.Index(string(results), "no Go files") != -1 ||
+		strings.Index(string(results), "no test files") != -1 {
+		return
+	}
+
+	fname := "bench-" + time.Now().Format("2006-01-02-15:04:05") + ".txt"
+	err = ioutil.WriteFile(fname, results, 0644)
+	if err != nil {
+		// try again, maybe the user was too fast?
+		err = ioutil.WriteFile(fname, results, 0644)
+		if err != nil {
+			log.Fatal("cannot save benchmark result to your disk.")
+		}
+	}
+	log.Printf("results are saved to file: ./%s\n\n", fname)
+
+	computeStat(results)
+}
+
+var sortNames = map[string]stat.Order{
+	"none":  nil,
+	"name":  stat.ByName,
+	"delta": stat.ByDelta,
+}
+
+func computeStat(data []byte) {
+	sortName := *flagSort
+	reverse := false
+	if strings.HasPrefix(sortName, "-") {
+		reverse = true
+		sortName = sortName[1:]
+	}
+	order, _ := sortNames[sortName]
+	c := &stat.Collection{
+		Alpha:      *flagAlpha,
+		AddGeoMean: *flagGeomean,
+		DeltaTest:  deltaTestNames[strings.ToLower(*flagDeltaTest)],
+	}
+
+	if *flagSplit != "" {
+		c.SplitBy = strings.Split(*flagSplit, ",")
+	}
+	if order != nil {
+		if reverse {
+			order = stat.Reverse(order)
+		}
+		c.Order = order
+	}
+	c.AddData("", data)
+	tables := c.Tables()
+	var buf bytes.Buffer
+	stat.FormatText(&buf, tables)
+	fmt.Print(buf.String())
+}