|
@@ -1,21 +1,35 @@
|
|
|
-import csv
|
|
|
-import numpy
|
|
|
+from random import random
|
|
|
|
|
|
debug = True
|
|
|
chars = [str(d) for d in range(1, 10)]
|
|
|
|
|
|
-p = [1. for _ in chars]
|
|
|
-with open('letter_dist.csv', newline='') as csv_file:
|
|
|
- reader = csv.reader(csv_file, delimiter=',')
|
|
|
- sp = sum(p)
|
|
|
- for row in reader:
|
|
|
- chars.append(row[0])
|
|
|
- p.append(float(row[2]))
|
|
|
-p = numpy.array(p) / sum(p)
|
|
|
+ps = [1. for _ in chars]
|
|
|
+letter_dist = [("E", 21912, 12.02), ("T", 16587, 9.1), ("A", 14810, 8.12), ("O", 14003, 7.68), ("I", 13318, 7.31),
|
|
|
+ ("N", 12666, 6.95), ("S", 11450, 6.28), ("R", 10977, 6.02), ("H", 10795, 5.92), ("D", 7874, 4.32),
|
|
|
+ ("L", 7253, 3.98), ("U", 5246, 2.88), ("C", 4943, 2.71), ("M", 4761, 2.61), ("F", 4200, 2.3),
|
|
|
+ ("Y", 3853, 2.11), ("W", 3819, 2.09), ("G", 3693, 2.03), ("P", 3316, 1.82), ("B", 2715, 1.49),
|
|
|
+ ("V", 2019, 1.11), ("K", 1257, 0.69), ("X", 315, 0.17), ("Q", 205, 0.11), ("J", 188, 0.1),
|
|
|
+ ("Z", 128, 0.07), ]
|
|
|
+sp = sum(ps)
|
|
|
+for row in letter_dist:
|
|
|
+ chars.append(row[0])
|
|
|
+ ps.append(float(row[2]))
|
|
|
+ps = [p / sum(ps) for p in ps]
|
|
|
+
|
|
|
+
|
|
|
+def choice(sequence, probabilities):
|
|
|
+ if sum(probabilities) != 1:
|
|
|
+ raise AssertionError('Probabilities must sum up to 1')
|
|
|
+ r = random()
|
|
|
+ for idx, c in enumerate(sequence):
|
|
|
+ r -= probabilities[idx]
|
|
|
+ if r < 0:
|
|
|
+ return c
|
|
|
+ raise AssertionError('Probabilities must sum up to 1')
|
|
|
|
|
|
|
|
|
def random_chars(count):
|
|
|
- return ''.join(numpy.random.choice(chars, p=p) for _ in range(count))
|
|
|
+ return ''.join(choice(chars, probabilities=ps) for _ in range(count))
|
|
|
|
|
|
|
|
|
def str2bool(v):
|