"""
made by Eren Yilmaz
"""
import functools
import itertools
from copy import deepcopy

from goto import with_goto

from lib import util
from lib.tuned_cache import TunedMemory
from lib.progress_bar import ProgressBar
import pickle

from matplotlib.axes import Axes
import os
import random
import sqlite3
from datetime import datetime
from math import inf, nan, ceil, sqrt
from timeit import default_timer
from typing import Dict, Any, List, Callable, Optional, Tuple, Iterable, Union

import numpy
import pandas
import scipy.stats

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates

from lib.util import my_tabulate, round_to_digits, print_progress_bar, heatmap_from_points, LogicError, shorten_name

score_cache = TunedMemory(location='.cache/scores', verbose=0)
sns.set(style="whitegrid", font_scale=1.5)

# set up db for results
connection: sqlite3.Connection = sqlite3.connect('random_search_results.db')
connection.cursor().execute('PRAGMA foreign_keys = 1')
connection.cursor().execute('PRAGMA journal_mode = WAL')
connection.cursor().execute('PRAGMA synchronous = NORMAL')

Parameters = Dict[str, Any]
MetricValue = float
Metrics = Dict[str, MetricValue]
History = List[MetricValue]

suppress_intermediate_beeps = False

label = goto = object()


class Prediction:
    def __init__(self, dataset: str, y_true, y_pred, name: str):
        self.y_pred = y_pred
        self.y_true = y_true
        self.name = name
        if not isinstance(name, str):
            self.name = str(name)
        else:
            self.name = name
        if not isinstance(dataset, str):
            raise TypeError
        self.dataset = dataset

    def __str__(self):
        return str(self.__dict__)

    def __repr__(self):
        return self.__class__.__name__ + repr({k: v for k, v in self.__dict__.items() if k != 'predictions'})


class EvaluationResult:
    def __init__(self,
                 results: Dict[str, MetricValue],
                 parameters: Parameters = None,
                 predictions: Optional[List[Prediction]] = None):
        self.predictions = predictions
        if self.predictions is not None:
            self.predictions = self.predictions.copy()
        else:
            self.predictions = []
        if parameters is None:
            self.parameters = parameters
        else:
            self.parameters = parameters.copy()
        self.results = results.copy()

    def __iter__(self):
        yield self

    def __eq__(self, other):
        return (isinstance(other, EvaluationResult)
                and self.parameters == other.parameters
                and self.predictions == other.predictions
                and self.results == other.results)

    def __str__(self):
        return '{0}{1}'.format(self.__class__.__name__,
                               {k: getattr(self, k) for k in ['results', 'parameters', 'predictions']})

    def __repr__(self):
        return self.__class__.__name__ + repr(self.__dict__)


assert list(EvaluationResult({}, {})) == list([EvaluationResult({}, {})])

EvaluationFunction = Callable[[Parameters], Union[List[EvaluationResult], EvaluationResult, List[float], float]]


class Parameter:
    def __init__(self, name: str, initial_value, larger_value, smaller_value, first_try_increase=False):
        self.name = name
        self.initial_value = initial_value
        self.larger_value = larger_value
        self.smaller_value = smaller_value
        self.first_try_increase = first_try_increase

    def __repr__(self):
        return self.__class__.__name__ + repr(self.__dict__)

    def copy(self, new_name=None):
        result: Parameter = deepcopy(self)
        if new_name is not None:
            result.name = new_name
        return result


class BoundedParameter(Parameter):
    def __init__(self, name, initial_value, larger_value, smaller_value, minimum=-inf, maximum=inf,
                 first_try_increase=False):
        self.minimum = minimum
        self.maximum = maximum

        super().__init__(name,
                         initial_value,
                         lambda x: self._bounded(larger_value(x)),
                         lambda x: self._bounded(smaller_value(x)),
                         first_try_increase=first_try_increase)

        if self.initial_value < self.minimum:
            raise ValueError('Initial value is lower than minimum value.')
        if self.initial_value > self.maximum:
            raise ValueError('Initial value is larger than maximum value.')

    def _bounded(self, y):
        y = max(self.minimum, y)
        y = min(self.maximum, y)
        return y


class ConstantParameter(Parameter):
    def __init__(self, name, value):
        super().__init__(name,
                         value,
                         lambda x: value,
                         lambda x: value)


class BinaryParameter(Parameter):
    def __init__(self, name, value1, value2):
        super().__init__(name,
                         value1,
                         lambda x: value2 if x == value1 else value1,
                         lambda x: value2 if x == value1 else value1)


class BooleanParameter(Parameter):
    def __init__(self, name, initial_value: bool):
        super().__init__(name,
                         bool(initial_value),
                         lambda x: not x,
                         lambda x: not x)


class TernaryParameter(Parameter):
    def __init__(self, name, value1, value2, value3):
        self.smaller = {value1: value3, value2: value1, value3: value2}
        self.larger = {value1: value2, value2: value3, value3: value1}
        super().__init__(name,
                         value1,
                         lambda x: self.smaller[x],
                         lambda x: self.larger[x])


class ListParameter(Parameter):
    def __init__(self, name, initial_value, possible_values: List, first_try_increase=False, circle=False):
        self.possible_values = possible_values.copy()
        if initial_value not in self.possible_values:
            raise ValueError()
        if len(set(self.possible_values)) != len(self.possible_values):
            print('WARNING: It seems that there are duplicates in the list of possible values for {0}'.format(name))
        length = len(self.possible_values)
        if circle:
            smaller = lambda x: self.possible_values[(self.possible_values.index(x) + 1) % length]
            larger = lambda x: self.possible_values[(self.possible_values.index(x) - 1) % length]
        else:
            smaller = lambda x: self.possible_values[min(self.possible_values.index(x) + 1, length - 1)]
            larger = lambda x: self.possible_values[max(self.possible_values.index(x) - 1, 0)]

        super().__init__(name,
                         initial_value,
                         smaller,
                         larger,
                         first_try_increase=first_try_increase)


class ExponentialParameter(BoundedParameter):
    def __init__(self, name, initial_value, base, minimum=-inf, maximum=inf, first_try_increase=False):
        super().__init__(name,
                         initial_value,
                         lambda x: float(x * base),
                         lambda x: float(x / base),
                         minimum,
                         maximum,
                         first_try_increase=first_try_increase)
        self.plot_scale = 'log'


class ExponentialIntegerParameter(BoundedParameter):
    def __init__(self, name, initial_value, base, minimum=-inf, maximum=inf, first_try_increase=False):
        if minimum != -inf:
            minimum = round(minimum)

        if maximum != inf:
            maximum = round(maximum)

        super().__init__(name,
                         round(initial_value),
                         lambda x: round(x * base),
                         lambda x: round(x / base),
                         minimum,
                         maximum,
                         first_try_increase=first_try_increase)
        self.plot_scale = 'log'


class LinearParameter(BoundedParameter):
    def __init__(self, name, initial_value, summand, minimum=-inf, maximum=inf, first_try_increase=False):
        super().__init__(name,
                         initial_value,
                         lambda x: float(x + summand),
                         lambda x: float(x - summand),
                         minimum,
                         maximum,
                         first_try_increase=first_try_increase)


class LinearIntegerParameter(BoundedParameter):
    def __init__(self, name, initial_value, summand, minimum=-inf, maximum=inf, first_try_increase=False):
        super().__init__(name,
                         initial_value,
                         lambda x: x + summand,
                         lambda x: x - summand,
                         minimum,
                         maximum,
                         first_try_increase=first_try_increase)


class InvalidParametersError(Exception):
    def __init__(self, parameters=None):
        self.parameters = parameters


class BadParametersError(InvalidParametersError):
    pass


class InvalidReturnError(Exception):
    pass


class EmptyTableError(Exception):
    pass


EXAMPLE_PARAMS = [
    ExponentialParameter('learn_rate', 0.001, 10),
    ExponentialIntegerParameter('hidden_layer_size', 512, 2, minimum=1),
    LinearIntegerParameter('hidden_layer_count', 3, 1, minimum=0),
    ExponentialIntegerParameter('epochs', 100, 5, minimum=1),
    LinearParameter('dropout_rate', 0.5, 0.2, minimum=0, maximum=1),
]


def mean_confidence_interval_size(data, confidence=0.95, force_v: Optional[int] = None,
                                  force_sem: Optional[float] = None):
    if len(data) == 0:
        return nan
    if force_sem is None:
        if len(data) == 1:
            return inf
        sem = scipy.stats.sem(data)
    else:
        sem = force_sem
    if sem == 0:
        return 0
    if force_v is None:
        v = len(data) - 1
    else:
        v = force_v
    return numpy.mean(data) - scipy.stats.t.interval(confidence,
                                                     df=v,
                                                     loc=numpy.mean(data),
                                                     scale=sem)[0]


def try_parameters(experiment_name: str,
                   evaluate: EvaluationFunction,
                   params: Dict[str, any],
                   optimize: Optional[str] = None,
                   larger_result_is_better: bool = None, ):
    print('running experiment...')
    params = params.copy()

    if larger_result_is_better is None and optimize is not None:
        raise NotImplementedError(
            'Don\'t know how to optimize {0}. Did you specify `larger_result_is_better`?'.format(optimize))
    assert larger_result_is_better is not None or optimize is None

    worst_score = -inf if larger_result_is_better else inf

    cursor = connection.cursor()
    start = default_timer()
    try:
        result = evaluate(params)
        if not isinstance(result, Iterable):
            result = [result]
        evaluation_results: List[EvaluationResult] = list(result)
    except InvalidParametersError as e:
        if optimize is not None:
            bad_results: Dict[str, float] = {
                optimize: worst_score
            }
        else:
            bad_results = {}
        if e.parameters is None:
            evaluation_results = [EvaluationResult(
                parameters=params,
                results=bad_results
            )]
        else:
            evaluation_results = [EvaluationResult(
                parameters=e.parameters,
                results=bad_results
            )]
    finally:
        duration = default_timer() - start

    for idx in range(len(evaluation_results)):
        if isinstance(evaluation_results[idx], float):
            evaluation_results[idx] = EvaluationResult(parameters=params,
                                                       results={optimize: evaluation_results[idx]})

    p_count = 0
    for evaluation_result in evaluation_results:
        if evaluation_result.parameters is None:
            evaluation_result.parameters = params
        metric_names = sorted(evaluation_result.results.keys())
        param_names = list(sorted(evaluation_result.parameters.keys()))
        for metric_name in metric_names:
            add_metric_column(experiment_name, metric_name, verbose=1)
        for param_name in param_names:
            add_parameter_column(experiment_name, param_name, evaluation_result.parameters[param_name], verbose=1)

        if not set(param_names).isdisjoint(metric_names):
            raise RuntimeError('Metrics and parameter names should be disjoint')

        if optimize is not None and numpy.isnan(evaluation_result.results[optimize]):
            evaluation_result.results[optimize] = worst_score

        metric_values = [evaluation_result.results[metric_name] for metric_name in metric_names]
        param_names_comma_separated = ','.join('"' + param_name + '"' for param_name in param_names)
        metric_names_comma_separated = ','.join('"' + metric_name + '"' for metric_name in metric_names)
        insert_question_marks = ','.join('?' for _ in range(len(param_names) + len(metric_names)))

        cursor.execute('''
                INSERT INTO {0} ({1}) VALUES ({2})
                '''.format(experiment_name,
                           param_names_comma_separated + ',' + metric_names_comma_separated,
                           insert_question_marks), (*[evaluation_result.parameters[name] for name in param_names],
                                                    *metric_values))
        result_id = cursor.lastrowid
        assert cursor.execute(f'SELECT COUNT(*) FROM {experiment_name}_predictions WHERE result_id = ? LIMIT 1',
                              (result_id,)).fetchone()[0] == 0
        p_count += len(evaluation_result.predictions)
        dataset_names = [(prediction.dataset, prediction.name) for prediction in evaluation_result.predictions]
        if len(set(dataset_names)) != len(dataset_names):
            print('\n'.join(sorted(dsn
                                   for idx, dsn in dataset_names
                                   if dsn in dataset_names[idx:])))
            raise InvalidReturnError(
                'Every combination of name and dataset in a single evaluation result must be unique.'
                'There should be a list of duplicates printed above where the number of occurrences'
                'of an element in the list is the actual number of occurrences minus 1 '
                '(so only duplicates are listed).')
        # noinspection SqlResolve
        cursor.executemany('''
            INSERT INTO {0}_predictions (dataset, y_true, y_pred, result_id, name) 
            VALUES (?, ?, ?, ?, ?)
            '''.format(experiment_name), [(prediction.dataset,
                                           pickle.dumps(prediction.y_true),
                                           pickle.dumps(prediction.y_pred),
                                           result_id,
                                           prediction.name) for prediction in evaluation_result.predictions])

    connection.commit()
    print('saved', len(evaluation_results), 'results and', p_count, 'predictions to db')
    if not suppress_intermediate_beeps:
        util.beep(1000, 500)

    if optimize is not None:
        scores = [r.results[optimize] for r in evaluation_results]
        if larger_result_is_better:
            best_score = max(scores)
        else:
            best_score = min(scores)
        print('  finished in', duration, 'seconds, best loss/score:', best_score)

    for r in evaluation_results:
        if list(sorted(r.results.keys())) != list(sorted(metric_names)):
            raise InvalidReturnError("""
            Wrong metric names were returned by `evaluate`:
            Expected metric_names={0}
            but was {1}.
            The result was saved to database anyways, possibly with missing values.
            """.format(list(sorted(metric_names)),
                       list(sorted(r.results.keys()))))

    return evaluation_results


def config_dict_from_param_list(params: List[Parameter]):
    return {
        p.name: p.initial_value
        for p in params
    }


def evaluate_with_initial_params(experiment_name: str,
                                 params: List[Parameter],
                                 evaluate: EvaluationFunction,
                                 optimize: str,
                                 larger_result_is_better: bool,
                                 metric_names=None,
                                 num_experiments=1, ):
    random_parameter_search(experiment_name=experiment_name,
                            params=params,
                            evaluate=evaluate,
                            optimize=optimize,
                            larger_result_is_better=larger_result_is_better,
                            mutation_probability=1.,
                            no_mutations_probability=0.,
                            max_num_experiments=num_experiments,
                            metric_names=metric_names,
                            initial_experiments=num_experiments,
                            experiment_count='db_tries_initial', )


def random_parameter_search(experiment_name: str,
                            params: List[Parameter],
                            evaluate: EvaluationFunction,
                            optimize: str,
                            larger_result_is_better: bool,
                            mutation_probability: float = None,
                            no_mutations_probability: float = None,
                            allow_multiple_mutations=False,
                            max_num_experiments=inf,
                            metric_names=None,
                            initial_experiments=1,
                            runs_per_configuration=inf,
                            initial_runs=1,
                            ignore_configuration_condition='0',
                            experiment_count='tries', ):
    print('experiment name:', experiment_name)
    if metric_names is None:
        metric_names = [optimize]
    if optimize not in metric_names:
        raise ValueError('trying to optimize {0} but only metrics available are {1}'.format(optimize, metric_names))
    params = sorted(params, key=lambda p: p.name)
    validate_parameter_set(params)

    param_names = [param.name for param in params]

    cursor = connection.cursor()
    create_experiment_tables_if_not_exists(experiment_name, params, metric_names)

    def max_tries_reached(ps):
        return len(result_ids_for_parameters(experiment_name, ps)) >= runs_per_configuration

    def min_tries_reached(ps):
        return len(result_ids_for_parameters(experiment_name, ps)) >= initial_runs

    def try_(ps) -> bool:
        tried = False
        if not max_tries_reached(ps):
            try_parameters(experiment_name=experiment_name,
                           evaluate=evaluate,
                           params=ps,
                           optimize=optimize,
                           larger_result_is_better=larger_result_is_better, )
            tried = True
        else:
            print('Skipping because maximum number of tries is already reached.')
        while not min_tries_reached(ps):
            print('Repeating because minimum number of tries is not reached.')
            try_parameters(experiment_name=experiment_name,
                           evaluate=evaluate,
                           params=ps,
                           optimize=optimize,
                           larger_result_is_better=larger_result_is_better, )
            tried = True
        return tried

    if mutation_probability is None:
        mutation_probability = 1 / (len(params) + 1)
    if no_mutations_probability is None:
        no_mutations_probability = (1 - 1 / len(params)) / 4
    initial_params = {param.name: param.initial_value for param in params}
    print('initial parameters:', initial_params)

    def skip():
        best_scores, best_mean, best_std, best_conf = get_results_for_params(optimize, experiment_name,
                                                                             best_params, 0.99)
        try_scores, try_mean, try_std, try_conf = get_results_for_params(optimize, experiment_name,
                                                                         try_params, 0.99)
        if larger_result_is_better:
            if best_mean - best_conf > try_mean + try_conf:
                return True
        else:
            if best_mean + best_conf < try_mean - try_conf:
                return True
        return False

    # get best params
    initial_params = {param.name: param.initial_value for param in params}
    any_results = cursor.execute('SELECT EXISTS (SELECT * FROM {0} WHERE NOT ({1}))'.format(experiment_name,
                                                                                            ignore_configuration_condition)).fetchone()[
        0]
    if any_results:
        best_params = get_best_params(experiment_name,
                                      larger_result_is_better,
                                      optimize,
                                      param_names,
                                      additional_condition=f'NOT ({ignore_configuration_condition})')
    else:
        best_params = initial_params
    try_params = best_params.copy()

    def results_for_params(ps):
        return get_results_for_params(
            metric=optimize,
            experiment_name=experiment_name,
            parameters=ps
        )

    if experiment_count == 'tries':
        num_experiments = 0
    elif experiment_count == 'results':
        num_experiments = 0
    elif experiment_count == 'db_total':
        num_experiments = cursor.execute('SELECT COUNT(*) FROM {0} WHERE NOT ({1})'.format(experiment_name,
                                                                                           ignore_configuration_condition)).fetchone()[
            0]
    elif experiment_count == 'db_tries_best':
        num_experiments = len(result_ids_for_parameters(experiment_name, best_params))
    elif experiment_count == 'db_tries_initial':
        num_experiments = len(result_ids_for_parameters(experiment_name, initial_params))
    else:
        raise ValueError('Invalid argument for experiment_count')
    last_best_score = results_for_params(best_params)[1]
    while num_experiments < max_num_experiments:

        if num_experiments < initial_experiments:
            try_params = initial_params.copy()
        else:
            any_results = \
                cursor.execute('SELECT EXISTS (SELECT * FROM {0} WHERE NOT ({1}))'.format(experiment_name,
                                                                                          ignore_configuration_condition)).fetchone()[
                    0]
            if any_results:
                last_best_params = best_params
                best_params = get_best_params(experiment_name,
                                              larger_result_is_better,
                                              optimize,
                                              param_names,
                                              additional_condition=f'NOT ({ignore_configuration_condition})')
                best_scores, best_score, _, best_conf_size = results_for_params(best_params)
                if last_best_score is not None and best_score is not None:
                    if last_best_params != best_params:
                        if last_best_score < best_score and larger_result_is_better or last_best_score > best_score and not larger_result_is_better:
                            print(' --> Parameters were improved by this change!')
                        if last_best_score > best_score and larger_result_is_better or last_best_score < best_score and not larger_result_is_better:
                            print(' --> Actually other parameters are better...')
                last_best_score = best_score

                # print('currently best parameters:', best_params)
                changed_params = {k: v for k, v in best_params.items() if best_params[k] != initial_params[k]}
                print('currently best parameters (excluding unchanged parameters):', changed_params)
                print('currently best score:', best_score, 'conf.', best_conf_size, 'num.', len(best_scores))
            else:
                best_params = {param.name: param.initial_value for param in params}
                best_conf_size = inf
            try_params = best_params.copy()
            verbose = 1
            if best_conf_size != inf:
                if random.random() > no_mutations_probability:
                    modify_params_randomly(mutation_probability, params, try_params, verbose,
                                           allow_multiple_mutations=allow_multiple_mutations)

        if num_experiments < initial_experiments:
            try_params = initial_params.copy()
        else:
            # check if this already has a bad score
            if skip():
                print('skipping because this set of parameters is known to be worse with high probability.')
                print()
                continue

        # print('trying parameters', {k: v for k, v in try_params.items() if try_params[k] != initial_params[k]})

        results = try_(try_params)

        if experiment_count == 'tries':
            num_experiments += 1
        elif experiment_count == 'results':
            num_experiments += len(results)
        elif experiment_count == 'db_total':
            num_experiments = cursor.execute('SELECT COUNT(*) FROM {0}'.format(experiment_name)).fetchone()[0]
        elif experiment_count == 'db_tries_best':
            num_experiments = len(result_ids_for_parameters(experiment_name, best_params))
        elif experiment_count == 'db_tries_initial':
            num_experiments = len(result_ids_for_parameters(experiment_name, initial_params))
        else:
            raise LogicError('It is not possible that this is reached.')


@with_goto
def diamond_parameter_search(experiment_name: str,
                             diamond_size: int,
                             params: List[Parameter],
                             evaluate: EvaluationFunction,
                             optimize: str,
                             larger_result_is_better: bool,
                             runs_per_configuration=inf,
                             initial_runs=1,
                             metric_names=None,
                             filter_results_condition='1'):
    print('experiment name:', experiment_name)
    if metric_names is None:
        metric_names = [optimize]
    if optimize not in metric_names:
        raise ValueError('trying to optimize {0} but only metrics available are {1}'.format(optimize, metric_names))
    print('Optimizing metric', optimize)
    if runs_per_configuration > initial_runs:
        print(
            f'WARNING: You are using initial_runs={initial_runs} and runs_per_configuration={runs_per_configuration}. '
            f'This may lead to unexpected results if you dont know what you are doing.')
    params_in_original_order = params
    params = sorted(params, key=lambda p: p.name)
    validate_parameter_set(params)

    create_experiment_tables_if_not_exists(experiment_name, params, metric_names)

    initial_params = {param.name: param.initial_value for param in params}
    print('initial parameters:', initial_params)

    # get best params
    initial_params = {param.name: param.initial_value for param in params}
    try:
        best_params = get_best_params_and_compare_with_initial(experiment_name, initial_params, larger_result_is_better,
                                                               optimize,
                                                               additional_condition=filter_results_condition)
    except EmptyTableError:
        best_params = initial_params

    def max_tries_reached(ps):
        return len(result_ids_for_parameters(experiment_name, ps)) >= runs_per_configuration

    def min_tries_reached(ps):
        return len(result_ids_for_parameters(experiment_name, ps)) >= initial_runs

    def try_(ps) -> bool:
        tried = False
        if not max_tries_reached(ps):
            try_parameters(experiment_name=experiment_name,
                           evaluate=evaluate,
                           params=ps,
                           optimize=optimize,
                           larger_result_is_better=larger_result_is_better, )
            tried = True
        else:
            print('Skipping because maximum number of tries is already reached.')
        while not min_tries_reached(ps):
            print('Repeating because minimum number of tries is not reached.')
            try_parameters(experiment_name=experiment_name,
                           evaluate=evaluate,
                           params=ps,
                           optimize=optimize,
                           larger_result_is_better=larger_result_is_better, )
            tried = True
        return tried

    last_best_score = results_for_params(optimize, experiment_name, best_params)[1]
    modifications_steps = [
        {'param_name': param.name, 'direction': direction}
        for param in params_in_original_order
        for direction in ([param.larger_value, param.smaller_value] if param.first_try_increase
                          else [param.smaller_value, param.larger_value])
    ]
    label.restart
    restart_scheduled = False
    while True:  # repeatedly iterate parameters
        any_tries_done_this_iteration = False
        for num_modifications in range(diamond_size + 1):  # first try small changes, later larger changes
            modification_sets = itertools.product(*(modifications_steps for _ in range(num_modifications)))
            for modifications in modification_sets:  # which modifications to try this time
                while True:  # repeatedly modify parameters in this direction
                    improvement_found_in_this_iteration = False
                    try_params = best_params.copy()
                    for modification in modifications:
                        try_params[modification['param_name']] = modification['direction'](
                            try_params[modification['param_name']])
                    for param_name, param_value in try_params.items():
                        if best_params[param_name] != param_value:
                            print(f'Setting {param_name} = {param_value} for the next run.')
                    if try_params == best_params:
                        print('Repeating experiment with best found parameters.')

                    if try_(try_params):  # if the experiment was actually conducted
                        any_tries_done_this_iteration = True
                        best_params = get_best_params_and_compare_with_initial(experiment_name, initial_params,
                                                                               larger_result_is_better, optimize,
                                                                               filter_results_condition)

                        last_best_params = best_params
                        best_scores, best_score, _, best_conf_size = results_for_params(optimize, experiment_name,
                                                                                        best_params)

                        changed_params = {k: v for k, v in best_params.items() if best_params[k] != initial_params[k]}
                        print('currently best parameters (excluding unchanged parameters):', changed_params)
                        print('currently best score:', best_score, 'conf.', best_conf_size, 'num.', len(best_scores))
                    else:
                        last_best_params = best_params
                        _, best_score, _, best_conf_size = results_for_params(optimize, experiment_name, best_params)

                    if last_best_score is not None and best_score is not None:
                        if last_best_params != best_params:
                            if last_best_score < best_score and larger_result_is_better or last_best_score > best_score and not larger_result_is_better:
                                print(' --> Parameters were improved by this change!')
                                improvement_found_in_this_iteration = True
                                if num_modifications > 1:
                                    # two or more parameters were modified and this improved the results -> first try to modify them again in the same direction,
                                    # then restart the search from the best found configuration
                                    restart_scheduled = True
                            elif last_best_score > best_score and larger_result_is_better or last_best_score < best_score and not larger_result_is_better:
                                print(' --> Actually other parameters are better...')
                    if not improvement_found_in_this_iteration:
                        break  # stop if no improvement was found in this direction
                if restart_scheduled:
                    break
            if restart_scheduled:
                break
        if restart_scheduled:
            goto.restart
        if not any_tries_done_this_iteration:
            break  # parameter search finished (converged in some sense)


cross_parameter_search = functools.partial(diamond_parameter_search, diamond_size=1)
cross_parameter_search.__name__ = 'cross_parameter_search'


def get_best_params_and_compare_with_initial(experiment_name, initial_params, larger_result_is_better, optimize,
                                             additional_condition='1'):
    best_params = get_best_params(experiment_name, larger_result_is_better, optimize, list(initial_params),
                                  additional_condition=additional_condition)
    changed_params = {k: v for k, v in best_params.items() if best_params[k] != initial_params[k]}
    best_scores, best_score, _, best_conf_size = results_for_params(optimize, experiment_name, best_params)
    print('currently best parameters (excluding unchanged parameters):', changed_params)
    print('currently best score:', best_score, 'conf.', best_conf_size, 'num.', len(best_scores))
    return best_params


def results_for_params(optimize, experiment_name, ps):
    return get_results_for_params(
        metric=optimize,
        experiment_name=experiment_name,
        parameters=ps
    )


def modify_params_randomly(mutation_probability, params, try_params, verbose, allow_multiple_mutations=False):
    for param in params:
        while random.random() < mutation_probability:
            next_value = random.choice([param.smaller_value, param.larger_value])
            old_value = try_params[param.name]
            try:
                try_params[param.name] = round_to_digits(next_value(try_params[param.name]), 4)
            except TypeError:  # when the parameter is not a number
                try_params[param.name] = next_value(try_params[param.name])
            if verbose and try_params[param.name] != old_value:
                print('setting', param.name, '=', try_params[param.name], 'for this run')
            if not allow_multiple_mutations:
                break


def finish_experiments(experiment_name: str,
                       params: List[Parameter],
                       optimize: str,
                       larger_result_is_better: bool,
                       metric_names=None,
                       filter_results_table='1',
                       max_display_results=None,
                       print_results_table=False,
                       max_table_row_count=inf,
                       plot_metrics_by_metrics=False,
                       plot_metric_over_time=False,
                       plot_metrics_by_parameters=False, ):
    if max_display_results is inf:
        max_display_results = None
    if metric_names is None:
        metric_names = [optimize]
    # get the best parameters
    cursor = connection.cursor()
    params = sorted(params, key=lambda param: param.name)
    param_names = sorted(set(param.name for param in params))
    param_names_comma_separated = ','.join('"' + param_name + '"' for param_name in param_names)

    best_params = get_best_params(experiment_name, larger_result_is_better, optimize, param_names,
                                  additional_condition=filter_results_table, )
    best_score = get_results_for_params(
        metric=optimize,
        experiment_name=experiment_name,
        parameters=best_params
    )
    initial_params = {param.name: param.initial_value for param in params}

    # get a list of all results with mean std and conf
    if print_results_table or plot_metrics_by_parameters or plot_metrics_by_metrics:
        concatenated_metric_names = ','.join('GROUP_CONCAT("' + metric_name + '", \'@\') AS ' + metric_name
                                             for metric_name in metric_names)
        worst_score = '-1e999999' if larger_result_is_better else '1e999999'
        limit_string = f'LIMIT {max_table_row_count}' if max_table_row_count is not None and max_table_row_count < inf else ''
        # noinspection SqlAggregates
        cursor.execute('''
        SELECT {1}, {4}
        FROM {0} AS params
        WHERE ({5})
        GROUP BY {1}
        ORDER BY AVG(CASE WHEN params.{3} IS NULL THEN {6} ELSE params.{3} END) {2}
        {7}
        '''.format(experiment_name,
                   param_names_comma_separated,
                   'DESC' if larger_result_is_better else 'ASC',
                   optimize,
                   concatenated_metric_names,
                   filter_results_table,
                   worst_score,
                   limit_string))
        all_results = cursor.fetchall()

        column_description = list(cursor.description)

        for idx, row in enumerate(all_results):
            all_results[idx] = list(row)

        # prepare results table
        if print_results_table or plot_metrics_by_metrics or plot_metrics_by_parameters:
            iterations = 0
            print('Generating table of parameters')
            for column_index, column in list(enumerate(column_description))[::-1]:  # reverse
                print_progress_bar(iterations, len(metric_names))
                column_name = column[0]
                column_description[column_index] = column
                if column_name in metric_names:
                    if max_display_results > 0:
                        column_description[column_index] = column_name + ' values'
                    column_description.insert(column_index + 1, column_name + ' mean')
                    column_description.insert(column_index + 2, column_name + ' std')
                    column_description.insert(column_index + 3, column_name + ' conf')
                    # noinspection PyUnusedLocal
                    list_row: List
                    for list_row in all_results:
                        string_values: str = list_row[column_index]
                        if string_values is None:
                            metric_values: List[float] = [nan]
                        else:
                            metric_values = list(map(float, string_values.split('@')))
                        list_row[column_index] = [round_to_digits(x, 3) for x in metric_values[:max_display_results]]
                        list_row.insert(column_index + 1, numpy.mean(metric_values))
                        list_row.insert(column_index + 2, numpy.std(metric_values))
                        list_row.insert(column_index + 3, mean_confidence_interval_size(metric_values))
                    if all(len(list_row[column_index]) == 0 for list_row in all_results):
                        del column_description[column_index]
                        for list_row in all_results:
                            del list_row[column_index]
                    iterations += 1
                else:
                    column_description[column_index] = column_name
                print_progress_bar(iterations, len(metric_names))

        if print_results_table:  # actually print the table
            table = my_tabulate(all_results,
                                headers=column_description,
                                tablefmt='pipe')

            print(table)
            cursor.execute('''
            SELECT COUNT(*)
            FROM {0}
            '''.format(experiment_name))
            print('Total number of rows, experiments, cells in this table:',
                  (len(all_results), cursor.fetchone()[0], len(all_results) * len(all_results[0])))
            print('Best parameters:', best_params)
            changed_params = {k: v for k, v in best_params.items() if best_params[k] != initial_params[k]}
            print('Best parameters (excluding unchanged parameters):', changed_params)
            print('loss/score for best parameters (mean, std, conf):', best_score[1:])

        if plot_metrics_by_parameters or plot_metrics_by_metrics:
            print('Loading data...')
            df = pandas.DataFrame.from_records(all_results, columns=param_names + [x
                                                                                   for name in metric_names
                                                                                   for x in
                                                                                   [
                                                                                       name + '_values',
                                                                                       name + '_mean',
                                                                                       name + '_std',
                                                                                       name + '_conf'
                                                                                   ]])
            if plot_metrics_by_parameters:
                print('Plotting metrics by parameter...')
                plots = [
                    (param.name,
                     getattr(param, 'plot_scale', None),
                     param.smaller_value if isinstance(param, BoundedParameter) else None,
                     param.larger_value if isinstance(param, BoundedParameter) else None)
                    for param in params
                ]

                iterations = 0
                for metric_name in metric_names:
                    dirname = 'img/results/{0}/{1}/'.format(experiment_name, metric_name)
                    os.makedirs(dirname, exist_ok=True)
                    for plot, x_scale, min_mod, max_mod in plots:
                        print_progress_bar(iterations, len(metric_names) * len(plots))
                        if min_mod is None:
                            min_mod = lambda x: x
                        if max_mod is None:
                            max_mod = lambda x: x
                        if df[plot].nunique() <= 1:
                            iterations += 1
                            continue
                        grid = sns.relplot(x=plot, y=metric_name + '_mean', data=df)
                        if x_scale is not None:
                            if x_scale == 'log' and min_mod(df.min(axis=0)[plot]) <= 0:
                                x_min = None
                            else:
                                x_min = min_mod(df.min(axis=0)[plot])
                            grid.set(xscale=x_scale,
                                     xlim=(x_min,
                                           max_mod(df.max(axis=0)[plot]),))
                        plt.savefig(dirname + '{0}.png'.format(plot))
                        plt.clf()
                        plt.close()
                        iterations += 1
                        print_progress_bar(iterations, len(metric_names) * len(plots))

            if plot_metrics_by_metrics:
                print('Plotting metrics by metrics...')
                dirname = 'img/results/{0}/'.format(experiment_name)
                os.makedirs(dirname, exist_ok=True)

                # Generate some plots, metric by metric
                iterations = 0
                print('Plotting metric by metric, grouped')
                for metric_name in metric_names:
                    for metric_2 in metric_names:
                        if metric_name == metric_2:
                            iterations += 1
                            print_progress_bar(iterations, len(metric_names) ** 2)
                            continue
                        print_progress_bar(iterations, len(metric_names) ** 2)
                        sns.relplot(x=metric_name + '_mean', y=metric_2 + '_mean', data=df)
                        plt.savefig(dirname + '{0}_{1}_grouped.png'.format(metric_name, metric_2))
                        plt.clf()
                        plt.close()
                        heatmap_from_points(x=df[metric_name + '_mean'], y=df[metric_2 + '_mean'])
                        plt.xlabel(f'mean {metric_name}')
                        plt.ylabel(f'mean {metric_2}')
                        plt.savefig(dirname + '{0}_{1}_heatmap.png'.format(metric_name, metric_2))
                        plt.clf()
                        plt.close()
                        iterations += 1
                        print_progress_bar(iterations, len(metric_names) ** 2)

                df = pandas.read_sql_query('SELECT * FROM {0}'.format(experiment_name),
                                           connection)
                df['dt_created'] = pandas.to_datetime(df['dt_created'])

            if plot_metric_over_time:
                # Generate some plots, metric over time
                dirname = 'img/results/{0}/'.format(experiment_name)
                os.makedirs(dirname, exist_ok=True)
                print('Plotting metric over time')
                iterations = 0
                for metric_name in metric_names:
                    if not df[metric_name].any():
                        continue
                    print_progress_bar(iterations, len(metric_names))
                    ax = df.plot(x='dt_created', y=metric_name, style='.')
                    ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%m-%d %H:00'))
                    plt.savefig(dirname + 'dt_created_{0}.png'.format(metric_name))
                    plt.clf()
                    plt.close()
                    iterations += 1
                    print_progress_bar(iterations, len(metric_names))

                # plot optimize grouped over time
                assert df['dt_created'].is_monotonic  # sorting should not be a problem but we are lazy
                y_means = []
                df = df.drop_duplicates(subset='dt_created')
                timestamps = pandas.datetimeIndex(df.dt_created).asi8 // 10 ** 9
                iterations = 0
                print('Preparing plot {0} over time'.format(optimize))
                for x in timestamps:
                    print_progress_bar(iterations, len(timestamps))
                    not_after_x = 'CAST(strftime(\'%s\', dt_created) AS INT) <= {0}'.format(x)
                    param = get_best_params(additional_condition=not_after_x,
                                            param_names=param_names,
                                            experiment_name=experiment_name,
                                            larger_result_is_better=larger_result_is_better,
                                            optimize=optimize)
                    scores, mean, std, conf = get_results_for_params(optimize, experiment_name, param,
                                                                     additional_condition=not_after_x)
                    y_means.append(mean)
                    iterations += 1
                    print_progress_bar(iterations, len(timestamps))
                df['score'] = y_means

                ax = df.plot(x='dt_created', y='score')
                ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%m-%d %H:00'))
                plt.savefig(dirname + '{0}_over_time.png'.format(optimize))
                plt.clf()
                plt.close()

    return best_params, best_score


def predictions_for_parameters(experiment_name: str, parameters, show_progress=False):
    result_ids = result_ids_for_parameters(experiment_name, parameters)
    if not show_progress:
        return [
            predictions_for_result_id(experiment_name, result_id)
            for result_id in result_ids
        ]
    else:
        return [
            predictions_for_result_id(experiment_name, result_id)
            for result_id in ProgressBar(result_ids)
        ]


def result_ids_for_parameters(experiment_name, parameters: Dict[str, Any]):
    condition, parameters = only_specific_parameters_condition(parameters)
    cursor = connection.cursor()
    cursor.execute('''
    SELECT rowid FROM {0}
    WHERE {1}
    ORDER BY rowid
    '''.format(experiment_name, condition), parameters)
    result_ids = [row[0] for row in cursor.fetchall()]
    return result_ids


def creation_times_for_parameters(experiment_name, parameters):
    condition, parameters = only_specific_parameters_condition(parameters)
    cursor = connection.cursor()
    cursor.execute('''
    SELECT dt_created FROM {0}
    WHERE {1}
    ORDER BY rowid
    '''.format(experiment_name, condition), parameters)
    creation_times = [row[0] for row in cursor.fetchall()]
    return creation_times


def predictions_for_result_id(experiment_name: str, result_id):
    cursor = connection.cursor()
    cursor.execute('''
    SELECT name, dataset, y_pred, y_true FROM {0}_predictions
    WHERE result_id = ?
    '''.format(experiment_name, ), (result_id,))
    predictions = [{
        'name': row[0],
        'dataset': row[1],
        'y_pred': row[2],
        'y_true': row[3],
    } for row in cursor.fetchall()]

    return predictions


def list_difficult_samples(experiment_name,
                           loss_functions,
                           dataset,
                           max_losses_to_average=20,
                           additional_condition='1',
                           additional_parameters=(),
                           also_print=False):
    names = all_sample_names(dataset, experiment_name)
    cursor = connection.cursor()
    if 'epochs' in additional_condition:
        try:
            print('Creating index to fetch results faster (if not exists)...')
            cursor.execute('''
                CREATE INDEX IF NOT EXISTS {0}_by_name_epochs_dataset
                ON {0} (name, epochs, dataset)'''.format(experiment_name))
        except Exception as e:  # TODO check error type
            print(e)
            pass
    cursor = connection.cursor()
    table = []
    print('Fetching results for names...')
    for name in ProgressBar(names):
        if additional_condition == '1':
            additional_join = ''
        else:
            additional_join = 'JOIN {0} ON {0}.rowid = result_id'.format(experiment_name)
        if isinstance(max_losses_to_average, int) is not None and max_losses_to_average != inf:
            limit_string = f'LIMIT ?'
            limit_args = [max_losses_to_average]
        elif max_losses_to_average is None or max_losses_to_average == inf:
            limit_string = ''
            limit_args = []
        else:
            raise ValueError
        cursor.execute('''
            SELECT y_pred, y_true
            FROM {0}
             CROSS JOIN {0}_predictions ON {0}.rowid = result_id
            WHERE name = ? AND dataset = ? AND ({1})
            {3}'''.format(experiment_name, additional_condition, ..., limit_string), (name,
                                                                                      dataset,
                                                                                      *additional_parameters,
                                                                                      *limit_args,))
        data = cursor.fetchall()
        if len(data) > 0:
            def aggregate(xs):
                if len(set(xs)) == 1:
                    return xs[0]
                else:
                    return numpy.mean(xs)

            table.append((*[aggregate([loss_function(y_pred=y_pred, y_true=y_true, name=name)
                                       for y_pred, y_true in data])
                            for loss_function in loss_functions],
                          name, len(data)))
    print('sorting table...')
    table.sort(reverse=True)
    if also_print:
        print('stringifying table...')
        print(my_tabulate(table,
                          headers=[loss_function.__name__ for loss_function in loss_functions] + ['name', '#results'],
                          tablefmt='pipe'))
    return table


def all_sample_names(dataset, experiment_name):
    cursor = connection.cursor()
    print('Creating index to have faster queries by name (if not exists)...')
    cursor.execute('''
        CREATE INDEX IF NOT EXISTS {0}_predictions_by_name_and_dataset
        ON {0}_predictions (dataset, name)'''.format(experiment_name))
    print('Fetching all names...')
    names = []
    last_found = ''  # smaller than all other strings
    while True:
        cursor.execute('SELECT name '
                       'FROM {0}_predictions '
                       'WHERE dataset = ? AND name > ?'
                       'LIMIT 1'.format(experiment_name), (dataset, last_found))
        row = cursor.fetchone()
        if row is None:
            break
        names.append(row[0])
        last_found = row[0]
    return names


def only_specific_parameters_condition(parameters: Dict[str, Any]) -> Tuple[str, Tuple]:
    items = list(parameters.items())  # to have the correct ordering
    return '(' + ' AND '.join(f'"{name}" IS ?' for name, _ in items) + ')', \
           tuple(value for name, value in items)


def only_best_parameters_condition(experiment_name: str,
                                   larger_result_is_better: bool,
                                   optimize: str,
                                   param_names: List[str],
                                   additional_condition: str = '1') -> Tuple[str, Tuple]:
    parameters = get_best_params(experiment_name=experiment_name,
                                 larger_result_is_better=larger_result_is_better,
                                 optimize=optimize,
                                 param_names=param_names,
                                 additional_condition=additional_condition)
    return only_specific_parameters_condition(parameters)


def get_results_for_params(metric, experiment_name, parameters, confidence=0.95,
                           additional_condition='1'):
    param_names = list(parameters.keys())
    cursor = connection.cursor()
    params_equal = '\nAND '.join('"' + param_name + '" IS ?' for param_name in param_names)
    cursor.execute(
        '''
        SELECT {0}
        FROM {1}
        WHERE {2} AND ({3})
        '''.format(metric,
                   experiment_name,
                   params_equal,
                   additional_condition),
        tuple(parameters[name] for name in param_names)
    )
    # noinspection PyShadowingNames
    scores = [row[0] if row[0] is not None else nan for row in cursor.fetchall()]
    if len(scores) == 0:
        return scores, nan, nan, nan
    return scores, numpy.mean(scores), numpy.std(scores), mean_confidence_interval_size(scores, confidence)


def num_results_for_params(param_names, experiment_name, parameters,
                           additional_condition='1'):
    cursor = connection.cursor()
    params_equal = '\nAND '.join('"' + param_name + '" IS ?' for param_name in param_names)
    cursor.execute(
        '''
        SELECT COUNT(*)
        FROM {0}
        WHERE {1} AND ({2})
        '''.format(experiment_name,
                   params_equal,
                   additional_condition),
        tuple(parameters[name] for name in param_names)
    )
    return cursor.fetchone()[0]


def get_best_params(experiment_name: str,
                    larger_result_is_better: bool,
                    optimize: str,
                    param_names: List[str],
                    additional_condition='1') -> Optional[Parameters]:
    cursor = connection.cursor()
    param_names_comma_separated = ','.join('"' + param_name + '"' for param_name in param_names)
    worst_score = '-1e999999' if larger_result_is_better else '1e999999'
    # noinspection SqlAggregates
    cursor.execute('''
            SELECT * FROM {0} AS params
            WHERE ({4})
            GROUP BY {1}
            ORDER BY AVG(CASE WHEN params.{3} IS NULL THEN {5} ELSE params.{3} END) {2}, MIN(rowid) ASC
            LIMIT 1
            '''.format(experiment_name,
                       param_names_comma_separated,
                       'DESC' if larger_result_is_better else 'ASC',
                       optimize,
                       additional_condition,
                       worst_score, ))
    row = cursor.fetchone()
    if row is None:
        raise EmptyTableError()
    else:
        return params_from_row(cursor.description, row, param_names=param_names)


def params_from_row(description, row, param_names=None) -> Parameters:
    best_params = {}
    for idx, column_description in enumerate(description):
        column_name = column_description[0]
        if param_names is None or column_name in param_names:
            best_params[column_name] = row[idx]
    return best_params


def create_experiment_tables_if_not_exists(experiment_name, params, metric_names):
    cursor = connection.cursor()
    param_names = set(param.name for param in params)
    initial_params = {param.name: param.initial_value for param in params}
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS {0}(
        rowid INTEGER PRIMARY KEY,
        dt_created datetime DEFAULT CURRENT_TIMESTAMP
    )
    '''.format(experiment_name))
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS {0}_predictions(
        rowid INTEGER PRIMARY KEY,
        dataset TEXT NOT NULL,
        y_true BLOB,
        y_pred BLOB,
        name TEXT NOT NULL, -- used to identify the samples
        result_id INTEGER NOT NULL REFERENCES {0}(rowid),
        UNIQUE(result_id, dataset, name) -- gives additional indices
    )
    '''.format(experiment_name))
    connection.commit()

    for param_name in param_names:
        default_value = initial_params[param_name]
        add_parameter_column(experiment_name, param_name, default_value)

    for metric_name in metric_names:
        add_metric_column(experiment_name, metric_name)


def add_metric_column(experiment_name, metric_name, verbose=0):
    cursor = connection.cursor()
    try:
        cursor.execute('ALTER TABLE {0} ADD COLUMN "{1}" NUMERIC DEFAULT NULL'.format(experiment_name,
                                                                                      metric_name))
    except sqlite3.OperationalError as e:
        if 'duplicate column name' not in e.args[0]:
            raise
    else:
        if verbose:
            print(f'WARNING: created additional column {metric_name}. This may or may not be intentional')
    connection.commit()


def add_parameter_column(experiment_name, param_name, default_value, verbose=0):
    cursor = connection.cursor()
    try:
        if isinstance(default_value, str):
            default_value.replace("'", "\\'")
            default_value = "'" + default_value + "'"
        if default_value is None:
            default_value = 'NULL'
        cursor.execute('ALTER TABLE {0} ADD COLUMN "{1}" BLOB DEFAULT {2}'.format(experiment_name,
                                                                                  param_name,
                                                                                  default_value))
    except sqlite3.OperationalError as e:
        if 'duplicate column name' not in e.args[0]:
            raise
    else:
        if verbose:
            print(
                f'WARNING: created additional column {param_name} with default value {default_value}. This may or may not be intentional')
    connection.commit()


def markdown_table(all_results, sort_by):
    rows = [list(result['params'].values()) + [result['mean'], result['std'], result['conf'], result['all']] for result
            in all_results]
    rows.sort(key=sort_by)
    table = my_tabulate(rows, headers=list(all_results[0]['params'].keys()) + ['mean', 'std', 'conf', 'results'],
                        tablefmt='pipe')
    return table


def validate_parameter_set(params):
    if len(params) == 0:
        raise ValueError('Parameter set empty')
    for i, param in enumerate(params):
        # noinspection PyUnusedLocal
        other_param: Parameter
        for other_param in params[i + 1:]:
            if param.name == other_param.name and param.initial_value != other_param.initial_value:
                msg = '''
                A single parameter cant have multiple initial values. 
                Parameter "{0}" has initial values "{1}" and "{2}"
                '''.format(param.name, param.initial_value, other_param.initial_value)
                raise ValueError(msg)


def run_name(parameters=None) -> str:
    if parameters is None:
        parameters = {}
    shorter_parameters = {
        shorten_name(k): shorten_name(v)
        for k, v in parameters.items()
    }
    return ((str(datetime.now()) + str(shorter_parameters).replace(' ', ''))
            .replace("'", '')
            .replace('"', '')
            .replace(":", '⦂')
            .replace(",", '')
            .replace("_", '')
            .replace("<", '')
            .replace(">", '')
            .replace("{", '')
            .replace("}", ''))


def plot_experiment(metric_names,
                    experiment_name: str,
                    plot_name: str,
                    param_names: List[str],
                    params_list: List[Parameters],
                    evaluate: EvaluationFunction,
                    ignore: List[str] = None,
                    plot_shape=None,
                    metric_limits: Dict = None,
                    titles=None,
                    natural_metric_names: Dict[str, str] = None,
                    min_runs_per_params=0,
                    single_plot_width=6.4,
                    single_plot_height=4.8, ):
    if natural_metric_names is None:
        natural_metric_names = {}
    for parameters in params_list:
        if 'epochs' not in parameters:
            raise ValueError('`plot_experiment` needs the number of epochs to plot (`epochs`)')

    if metric_limits is None:
        metric_limits = {}
    if ignore is None:
        ignore = []
    if titles is None:
        titles = [None for _ in params_list]

    if plot_shape is None:
        width = ceil(sqrt(len(params_list)))
        plot_shape = (ceil(len(params_list) / width), width,)
    else:
        width = plot_shape[1]
    plot_shape_offset = 100 * plot_shape[0] + 10 * plot_shape[1]
    axes: Dict[int, Axes] = {}
    legend: List[str] = []
    results_dir = 'img/results/{0}/over_time/'.format(experiment_name)
    os.makedirs(results_dir, exist_ok=True)

    metric_names = sorted(metric_names, key=lambda m: (metric_limits.get(m, ()), metric_names.index(m)))
    print(metric_names)
    plotted_metric_names = []
    iterations = 0

    for plot_std in [False, True]:
        plt.figure(figsize=(single_plot_width * plot_shape[1], single_plot_height * plot_shape[0]))
        for idx, metric in enumerate(metric_names):
            print_progress_bar(iterations, 2 * (len(metric_names) - len(ignore)))
            limits = metric_limits.get(metric, None)
            try:
                next_limits = metric_limits.get(metric_names[idx + 1], None)
            except IndexError:
                next_limits = None
            if metric in ignore:
                continue
            sqlite_infinity = '1e999999'
            metric_is_finite = '{0} IS NOT NULL AND {0} != {1} AND {0} != -{1}'.format(metric, sqlite_infinity)
            for plot_idx, parameters in enumerate(params_list):
                while num_results_for_params(param_names=param_names,
                                             experiment_name=experiment_name,
                                             parameters=parameters, ) < min_runs_per_params:
                    print('Doing one of the missing experiments for the plot:')
                    print(parameters)
                    results = try_parameters(experiment_name=experiment_name,
                                             evaluate=evaluate,
                                             params=parameters, )
                    assert any(result.parameters == parameters for result in results)

                contains_avg_over = 'average_over_last_epochs' in parameters

                total_epochs = parameters['epochs']
                history = []
                lower_conf_limits = []
                upper_conf_limits = []
                for epoch_end in range(total_epochs):
                    current_parameters = parameters.copy()
                    if contains_avg_over:
                        current_parameters['average_over_last_epochs'] = None
                    current_parameters['epochs'] = epoch_end + 1
                    scores, mean, std, conf = get_results_for_params(
                        metric=metric,
                        experiment_name=experiment_name,
                        parameters=current_parameters,
                        additional_condition=metric_is_finite
                    )
                    history.append(mean)
                    if plot_std:
                        lower_conf_limits.append(mean - 1.959964 * std)
                        upper_conf_limits.append(mean + 1.959964 * std)
                    else:
                        lower_conf_limits.append(mean - conf)
                        upper_conf_limits.append(mean + conf)
                x = list(range(len(history)))
                if plot_shape_offset + plot_idx + 1 not in axes:
                    # noinspection PyTypeChecker
                    ax: Axes = plt.subplot(plot_shape_offset + plot_idx + 1)
                    assert isinstance(ax, Axes)
                    axes[plot_shape_offset + plot_idx + 1] = ax
                ax = axes[plot_shape_offset + plot_idx + 1]
                ax.plot(x, history)
                ax.fill_between(x, lower_conf_limits, upper_conf_limits, alpha=0.4)
                if titles[plot_idx] is not None:
                    ax.set_title(titles[plot_idx])
                if limits is not None:
                    ax.set_ylim(limits)
                ax.set_xlim(0, max(total_epochs, ax.get_xlim()[1]))
                current_row = plot_idx // width
                if current_row == plot_shape[0] - 1:
                    ax.set_xlabel('Epoch')
            natural_name = natural_metric_names.get(metric, metric)
            if plot_std:
                legend += ['mean ' + natural_name, '1.96σ of {0}'.format(natural_name)]
            else:
                legend += ['mean ' + natural_name, '95% conf. of mean {0}'.format(natural_name)]

            plotted_metric_names.append(metric)
            if limits is None or next_limits is None or limits != next_limits:
                legend = legend[0::2] + legend[1::2]
                for ax in axes.values():
                    ax.legend(legend)
                if plot_std:
                    plt.savefig(results_dir + plot_name + '_' + ','.join(plotted_metric_names) + '_std' + '.png')
                else:
                    plt.savefig(results_dir + plot_name + '_' + ','.join(plotted_metric_names) + '.png')
                plt.clf()
                plt.close()
                plt.figure(figsize=(single_plot_width * plot_shape[1], single_plot_height * plot_shape[0]))
                axes = {}
                plotted_metric_names = []
                legend = []
            iterations += 1
            print_progress_bar(iterations, 2 * (len(metric_names) - len(ignore)))
    plt.clf()
    plt.close()


if __name__ == '__main__':
    def evaluate(params):
        return (params['A'] - 30) ** 2 + 10 * ((params['B'] / (params['A'] + 1)) - 1) ** 2 + params['C']


    diamond_parameter_search('test',
                             diamond_size=2,
                             params=[LinearParameter('A', 10, 10),
                                     ExponentialIntegerParameter('B', 8, 2),
                                     ConstantParameter('C', 5)],
                             runs_per_configuration=1,
                             initial_runs=1,
                             evaluate=evaluate,
                             optimize='loss',
                             larger_result_is_better=False)