Skip to content

Commit

Permalink
More accurate file size estimate
Browse files Browse the repository at this point in the history
  • Loading branch information
danielpatrickdotdev authored and gunnarmorling committed Mar 3, 2024
1 parent c923467 commit 6daa93c
Showing 1 changed file with 12 additions and 16 deletions.
28 changes: 12 additions & 16 deletions src/main/python/create_measurements.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,22 +84,18 @@ def estimate_file_size(weather_station_names, num_rows_to_create):
"""
Tries to estimate how large a file the test data will be
"""
max_string = float('-inf')
min_string = float('inf')
per_record_size = 0
record_size_unit = "bytes"

for station in weather_station_names:
if len(station) > max_string:
max_string = len(station)
if len(station) < min_string:
min_string = len(station)
per_record_size = ((max_string + min_string * 2) + len(",-123.4")) / 2

total_file_size = num_rows_to_create * per_record_size
human_file_size = convert_bytes(total_file_size)

return f"Estimated max file size is: {human_file_size}.\nTrue size is probably much smaller (around half)."
total_name_bytes = sum(len(s.encode("utf-8")) for s in weather_station_names)
avg_name_bytes = total_name_bytes / float(len(weather_station_names))

# avg_temp_bytes = sum(len(str(n / 10)) for n in range(-999, 1000)) / 1999
avg_temp_bytes = 4.400200100050025

# add 2 for separator and newline
avg_line_length = avg_name_bytes + avg_temp_bytes + 2

human_file_size = convert_bytes(num_rows_to_create * avg_line_length)

return f"Estimated max file size is: {human_file_size}."


def build_test_data(weather_station_names, num_rows_to_create):
Expand Down

0 comments on commit 6daa93c

Please sign in to comment.