-
Notifications
You must be signed in to change notification settings - Fork 44
/
usage_generator.py
executable file
·143 lines (120 loc) · 4.08 KB
/
usage_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python
#
# Copyright 2012 Brett Slatkin
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generates fake usage and retention cohort data for demo purposes."""
import csv
import datetime
import math
import random
import string
import sys
duration = 300
COLUMNS = [
'Cohort group type',
'Cohort group value',
'Cohort day',
] + [
'Month %d' % i for i in xrange(duration / 30 - 1, -1, -1)
]
out = csv.writer(sys.stdout)
out.writerow(COLUMNS)
FUNNEL_STATES = [
'Born',
'Updated profile',
'Sent first message',
'Unlocked first achievement',
'Made two posts',
]
group_types = [
('Last active day', FUNNEL_STATES),
('Lifespan', FUNNEL_STATES),
]
step = math.pi / duration / 2
wave_start = {}
wave_size = {}
wave_period = {}
peaked = {}
max_wave = {}
total_value = {}
def do_wave(group, value, state, i, x):
# Create a new random wave starting point if it doesn't exist.
# Share this across all values.
if value not in wave_start:
wave_period[value] = max(1, random.random() * 1.5)
wave_start[value] = math.pi + random.random() * math.pi
wave_size[value] = max(100, 100 * random.random())
# Mix in a random peak
if (group not in peaked and
x > (1.0 * duration / 3.0) and
random.random() > 0.95):
amount = max(1, random.random() * 2)
peaked[group] = (state, x, amount)
# Apply size adjustments
size = wave_size[value]
if group in peaked:
peaked_state, peaked_x, amount = peaked[group]
peak_length = duration / 15.0
if state == peaked_state and peaked_x < x < (peaked_x + peak_length):
offset = (x - peaked_x) / peak_length
amount *= 2 + math.cos(math.pi + 2 * math.pi * offset)
size *= amount
# Adjust the X axis for the period of the wave, which may be less than
# duration to make things look out of phase.
radians = wave_start[value] + x * step * wave_period[value]
# Mix in random noise
noise = math.cos(radians) * random.random()
return max(0, int(
(1 + math.cos(radians) + noise) * size
))
start = datetime.date.today() - datetime.timedelta(days=duration)
for type_number, group in enumerate(group_types):
group_type, value_list = group
for value_number, group_value in enumerate(value_list):
wave_index = (type_number * 100) + value_number
for x in xrange(duration):
cohort_day = start + datetime.timedelta(days=x)
cohort = cohort_day.strftime('%m/%d/%y')
row = [
group_type,
group_value,
cohort,
]
for i in xrange(len(COLUMNS) - 3):
# Last active day should be the downward ramp.
x_churn_start = (30 * (i+1))
x_start = (30 * i)
if x < x_churn_start:
if x < x_start:
# Not active until it's our month
row.append(0)
else:
# i is "months back"; only output if this cohort is live.
# This works for sign-up day.
next_value = do_wave(type_number, value_number, i, wave_index + i, x)
total_value[i] = total_value.get(i, 0) + next_value
row.append(next_value)
else:
if total_value.get(i, 0) <= 0:
row.append(0)
else:
# After churn we slowly ramp down for the remainder
distance = float(x - x_churn_start) / duration * 0.8
adjustment = (1 + -math.cos(math.pi * distance)) / 2
level = do_wave(type_number, value_number, i, wave_index + i, x)
next_value = int(adjustment * level)
next_value = min(next_value, total_value[i])
total_value[i] -= next_value
row.append(-next_value)
out.writerow(row)