util.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. import faulthandler
  2. import functools
  3. import inspect
  4. import json
  5. import os
  6. import random
  7. import time
  8. from bisect import bisect_left
  9. from datetime import datetime, timedelta
  10. from math import floor, log10, sqrt, nan, inf
  11. import scipy.stats
  12. import tabulate
  13. from numpy.random.mtrand import binomial
  14. from lib import stack_tracer
  15. from lib.print_exc_plus import print_exc_plus
  16. chars = [str(d) for d in range(1, 10)]
  17. digits = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  18. ps = [1. for _ in chars]
  19. # the first number is absolute and the second relative
  20. letter_dist = [("E", 21912, 12.02), ("T", 16587, 9.1), ("A", 14810, 8.12), ("O", 14003, 7.68), ("I", 13318, 7.31),
  21. ("N", 12666, 6.95), ("S", 11450, 6.28), ("R", 10977, 6.02), ("H", 10795, 5.92), ("D", 7874, 4.32),
  22. ("L", 7253, 3.98), ("U", 5246, 2.88), ("C", 4943, 2.71), ("M", 4761, 2.61), ("F", 4200, 2.3),
  23. ("Y", 3853, 2.11), ("W", 3819, 2.09), ("G", 3693, 2.03), ("P", 3316, 1.82), ("B", 2715, 1.49),
  24. ("V", 2019, 1.11), ("K", 1257, 0.69), ("X", 315, 0.17), ("Q", 205, 0.11), ("J", 188, 0.1),
  25. ("Z", 128, 0.07), ]
  26. sp = sum(ps)
  27. for row in letter_dist:
  28. chars.append(row[0])
  29. ps.append(float(row[2]))
  30. ps = [p / sum(ps) for p in ps]
  31. def choice(sequence, probabilities):
  32. # if sum(probabilities) != 1:
  33. # raise AssertionError('Probabilities must sum to 1')
  34. r = random.random()
  35. for idx, c in enumerate(sequence):
  36. r -= probabilities[idx]
  37. if r < 0:
  38. return c
  39. raise AssertionError('Probabilities must sum to 1')
  40. def multiple_choice(sequence, count):
  41. results = []
  42. num_remaining = len(sequence)
  43. for _ in range(count):
  44. idx = random.randrange(num_remaining)
  45. results.append(sequence[idx])
  46. del sequence[idx]
  47. num_remaining -= 1
  48. return results
  49. try:
  50. import winsound as win_sound
  51. def beep(*args, **kwargs):
  52. win_sound.Beep(*args, **kwargs)
  53. except ImportError:
  54. win_sound = None
  55. def beep(*_args, **_kwargs):
  56. pass
  57. def main_wrapper(f):
  58. @functools.wraps(f)
  59. def wrapper(*args, **kwargs):
  60. start = time.perf_counter()
  61. # import lib.stack_tracer
  62. import __main__
  63. # does not help much
  64. # monitoring_thread = hanging_threads.start_monitoring(seconds_frozen=180, test_interval=1000)
  65. os.makedirs('logs', exist_ok=True)
  66. stack_tracer.trace_start('logs/' + os.path.split(__main__.__file__)[-1] + '.html', interval=5)
  67. faulthandler.enable()
  68. profile_wall_time_instead_if_profiling()
  69. # noinspection PyBroadException
  70. try:
  71. f(*args, **kwargs)
  72. except Exception:
  73. print_exc_plus()
  74. exit(-1)
  75. finally:
  76. total_time = time.perf_counter() - start
  77. frequency = 2000
  78. duration = 500
  79. beep(frequency, duration)
  80. print('Total time', total_time)
  81. return wrapper
  82. def random_chars(count):
  83. return ''.join(choice(chars, probabilities=ps) for _ in range(count))
  84. def str2bool(v):
  85. v = str(v).strip().lower()
  86. if v in ["yes", 'y' "true", "t", "1"]:
  87. return True
  88. if v in ["no", 'n' "false", "f", "0", '', 'null', 'none']:
  89. return False
  90. raise ValueError('Can not convert `' + v + '` to bool')
  91. def my_tabulate(data, **params):
  92. if data == [] and 'headers' in params:
  93. data = [(None for _ in params['headers'])]
  94. tabulate.MIN_PADDING = 0
  95. return tabulate.tabulate(data, **params)
  96. def yn_dialog(msg):
  97. while True:
  98. result = input(msg + ' [y/n]: ')
  99. if result == 'y':
  100. return True
  101. if result == 'n':
  102. return False
  103. print('Type in \'y\' or \'n\'!')
  104. def round_to_closest_value(x, values):
  105. values = sorted(values)
  106. next_largest = bisect_left(values, x) # binary search
  107. if next_largest == 0:
  108. return values[0]
  109. if next_largest == len(values):
  110. return values[-1]
  111. next_smallest = next_largest - 1
  112. smaller = values[next_smallest]
  113. larger = values[next_largest]
  114. if abs(smaller - x) < abs(larger - x):
  115. return smaller
  116. else:
  117. return larger
  118. def binary_search(a, x, lo=0, hi=None):
  119. hi = hi if hi is not None else len(a) # hi defaults to len(a)
  120. pos = bisect_left(a, x, lo, hi) # find insertion position
  121. return pos if pos != hi and a[pos] == x else -1 # don't walk off the end
  122. def ceil_to_closest_value(x, values):
  123. values = sorted(values)
  124. next_largest = bisect_left(values, x) # binary search
  125. if next_largest < len(values):
  126. return values[next_largest]
  127. else:
  128. return values[-1] # if there is no larger value use the largest one
  129. def upset_binomial(mu, p, factor):
  130. if factor > 1:
  131. raise NotImplementedError()
  132. return (binomial(mu / p, p) - mu) * factor + mu
  133. def multinomial(n, bins):
  134. if bins == 0:
  135. if n > 0:
  136. raise ValueError('Cannot distribute to 0 bins.')
  137. return []
  138. remaining = n
  139. results = []
  140. for i in range(bins - 1):
  141. x = binomial(remaining, 1 / (bins - i))
  142. results.append(x)
  143. remaining -= x
  144. results.append(remaining)
  145. return results
  146. def round_to_n(x, n):
  147. return round(x, -int(floor(log10(x))) + (n - 1))
  148. def get_all_subclasses(klass):
  149. all_subclasses = []
  150. for subclass in klass.__subclasses__():
  151. all_subclasses.append(subclass)
  152. all_subclasses.extend(get_all_subclasses(subclass))
  153. return all_subclasses
  154. def latin1_json(data):
  155. return json.dumps(data, ensure_ascii=False).encode('latin-1')
  156. def l2_norm(v1, v2):
  157. if len(v1) != len(v2):
  158. raise ValueError('Both vectors must be of the same size')
  159. return sqrt(sum([(x1 - x2) * (x1 - x2) for x1, x2 in zip(v1, v2)]))
  160. def allow_additional_unused_keyword_arguments(func):
  161. @functools.wraps(func)
  162. def wrapper(*args, **kwargs):
  163. allowed_kwargs = [param.name for param in inspect.signature(func).parameters.values()]
  164. allowed_kwargs = {a: kwargs[a] for a in kwargs if a in allowed_kwargs}
  165. return func(*args, **allowed_kwargs)
  166. return wrapper
  167. def rename(new_name):
  168. def decorator(f):
  169. f.__name__ = new_name
  170. return f
  171. return decorator
  172. def mean_confidence_interval_size(data, confidence=0.95):
  173. if len(data) == 0:
  174. return nan
  175. if len(data) == 1:
  176. return inf
  177. if scipy.stats.sem(data) == 0:
  178. return 0
  179. return len(data) / sum(data) - scipy.stats.t.interval(confidence, len(data) - 1,
  180. loc=len(data) / sum(data),
  181. scale=scipy.stats.sem(data))[0]
  182. class LogicError(Exception):
  183. pass
  184. def round_time(dt=None, precision=60):
  185. """Round a datetime object to any time lapse in seconds
  186. dt : datetime.datetime object, default now.
  187. roundTo : Closest number of seconds to round to, default 1 minute.
  188. Author: Thierry Husson 2012 - Use it as you want but don't blame me.
  189. """
  190. if dt is None:
  191. dt = datetime.now()
  192. if isinstance(precision, timedelta):
  193. precision = precision.total_seconds()
  194. seconds = (dt.replace(tzinfo=None) - dt.min).seconds
  195. rounding = (seconds + precision / 2) // precision * precision
  196. return dt + timedelta(seconds=rounding - seconds,
  197. microseconds=dt.microsecond)
  198. def profile_wall_time_instead_if_profiling():
  199. try:
  200. import yappi
  201. except ModuleNotFoundError:
  202. return
  203. currently_profiling = len(yappi.get_func_stats())
  204. if currently_profiling and yappi.get_clock_type() != 'wall':
  205. print('Changing yappi clock type to wall and restarting yappi.')
  206. yappi.stop()
  207. yappi.clear_stats()
  208. yappi.set_clock_type("wall")
  209. yappi.start()
  210. def dummy_computation(_data):
  211. return
  212. def current_year_begin():
  213. return datetime(datetime.today().year, 1, 1).timestamp()
  214. def current_day_begin():
  215. return datetime.today().timestamp() // (3600 * 24) * (3600 * 24)
  216. def current_second_begin():
  217. return floor(datetime.today().timestamp())