-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstatics.py
More file actions
92 lines (72 loc) · 2.76 KB
/
statics.py
File metadata and controls
92 lines (72 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import collections
import math
import sys
import csv
csvFile = open('statistic.csv', 'w')
csvWriter = csv.writer(csvFile, delimiter='\t')
Statistics = collections.namedtuple("Statistics",
"mean mode median std_dev")
def main():
if len(sys.argv) == 1 or sys.argv[1] in {"-h", "--help"}:
csvWriter.writerow(["usage: {0} file1 [file2 [... fileN]]".format(
sys.argv[0])])
sys.exit()
numbers = []
frequencies = collections.defaultdict(int)
for filename in sys.argv[1:]:
read_data(filename, numbers, frequencies)
if numbers:
statistics = calculate_statistics(numbers, frequencies)
print_results(len(numbers), statistics)
else:
csvWriter.writerow(["no numbers found"])
def read_data(filename, numbers, frequencies):
for lino, line in enumerate(open(filename, encoding="ascii"), start=1):
for x in line.split():
try:
number = float(x)
numbers.append(number)
frequencies[number] += 1
except ValueError as err:
csvWriter.writerow(["{0}:{1}: skipping {2}: {3}".format(filename, lino, x, err)])
def calculate_statistics(numbers, frequencies):
mean = sum(numbers) / len(numbers)
mode = calculate_mode(frequencies, 3)
median = calculate_median(numbers)
std_dev = calculate_std_dev(numbers, mean)
return Statistics(mean, mode, median, std_dev)
def calculate_mode(frequencies, maximum_modes):
highest_frequency = max(frequencies.values())
mode = [number for number, frequency in frequencies.items()
if math.fabs(frequency - highest_frequency) <= sys.float_info.epsilon]
if not (1 <= len(mode) <= maximum_modes):
mode = None
else:
mode.sort()
return mode
def calculate_median(numbers):
numbers = sorted(numbers)
middle = len(numbers) // 2
median = numbers[middle]
if len(numbers) % 2 == 0:
median = (median + numbers[middle - 1])/ 2
return median
def calculate_std_dev(numbers, mean):
total = 0
for number in numbers:
total += ((number - mean) ** 2)
variance = total / (len(numbers) - 1)
return math.sqrt(variance)
def print_results(count, statistics):
real = "9.2f"
if statistics.mode is None:
modeline = ""
elif len(statistics.mode) == 1:
modeline = "mode = {0:{fmt}}\n".format(
statistics.mode[0], fmt=real)
else:
modeline = ("mode = [" +", ".join(["{0:.2f}".format(m) for m in statistics.mode]) + "]\n")
csvWriter.writerow(["""\count = {0:6}mean = {1.mean:{fmt}}median = {1.median:{fmt}}{2}\std. dev. = {1.std_dev:{fmt}}""".format(
count, statistics, modeline, fmt=real)])
main()
csvFile.close()