Experiment 2 - Multi-steps problems performance¶

Both Corridor and Grid multiple-step environments were used for verifying the biased exploration strategies. In each case the ACS2 agent starts by performing 60 explore trials with selected strategy, followed by 20 where the evolved population is validated.

The following metrics are considered:

knowledge - depicting the process of building an internal model,
population size - demonstrate the total number of classifiers,
steps in a trial - both in explore and exploit phase.

Figures 4.4 and 4.5 present the metric evolution for the basic versions of Corridor and Grid problems, containing 20 and 400 distinct states respectively, but the overall metrics look similar for larger instances. Additionally, for the Corridor, the cross-over capability of the agent was switched off because of the unit length of the perception vector \(\sigma\).

To amplify the agent’s motivation for exploring possible options, each problem was additionally increased to the sizes of \(n=40\) and \(n=100\). Last trial statistical inferences were collected in all cases to estimate overall performance.

Results¶

Corridor ACS2 parameters

\(\beta=0.2\), \(\gamma = 0.95\), \(\theta_r = 0.9\), \(\theta_i=0.1\), \(\epsilon = 0.8\) \(\theta_{GA} = 50\), \(\theta_{AS}=20\), \(\theta_{exp}=50\), \(m_u=0.03\), \(\chi=0\)

import gym_corridor  # noqa: F401
from src.observation_wrappers import CorridorObservationWrapper


def corridor20_env_provider():
    import gym_corridor  # noqa: F401
    return CorridorObservationWrapper(gym.make(f'corridor-20-v0'))


def corridor40_env_provider():
    import gym_corridor  # noqa: F401
    return CorridorObservationWrapper(gym.make(f'corridor-40-v0'))


def corridor100_env_provider():
    import gym_corridor  # noqa: F401
    return CorridorObservationWrapper(gym.make(f'corridor-100-v0'))


# Function for calculating relevant metrics
def corridor_metrics(agent, env):
    pop = agent.population
    metrics = {
        'knowledge': corridor_transition_knowledge(pop, env)
    }
    metrics.update(population_metrics(pop, env))
    return metrics


corridor_base_params = {
    "classifier_length": 1,
    "number_of_possible_actions": 2,
    "epsilon": 0.8,
    "beta": 0.2,
    "gamma": 0.95,
    "initial_q": 0.5,
    "theta_exp": 50,
    "theta_ga": 50,
    "do_ga": True,
    "mu": 0.03,
    "u_max": 1,
    "metrics_trial_frequency": 1,
    "user_metrics_collector_fcn": corridor_metrics
}

# Start experiments
@get_from_cache_or_run(cache_path=f'{cache_dir}/corridor/epsilon_greedy.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def corridor_epsilon_greedy():
    corridor20 = run_experiment(corridor20_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': EpsilonGreedy}))
    corridor40 = run_experiment(corridor40_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': EpsilonGreedy}))
    corridor100 = run_experiment(corridor100_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': EpsilonGreedy}))
    return corridor20, corridor40, corridor100


@get_from_cache_or_run(cache_path=f'{cache_dir}/corridor/action_delay.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def corridor_action_delay():
    corridor20 = run_experiment(corridor20_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': ActionDelay, 'biased_exploration_prob': 0.5}))
    corridor40 = run_experiment(corridor40_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': ActionDelay, 'biased_exploration_prob': 0.5}))
    corridor100 = run_experiment(corridor100_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': ActionDelay, 'biased_exploration_prob': 0.5}))
    return corridor20, corridor40, corridor100


@get_from_cache_or_run(cache_path=f'{cache_dir}/corridor/knowledge_array.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def corridor_knowledge_array():
    corridor20 = run_experiment(corridor20_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': KnowledgeArray, 'biased_exploration_prob': 0.5}))
    corridor40 = run_experiment(corridor40_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': KnowledgeArray, 'biased_exploration_prob': 0.5}))
    corridor100 = run_experiment(corridor100_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': KnowledgeArray, 'biased_exploration_prob': 0.5}))
    return corridor20, corridor40, corridor100


@get_from_cache_or_run(cache_path=f'{cache_dir}/corridor/oiq.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def corridor_oiq():
    corridor20 = run_experiment(corridor20_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': EpsilonGreedy, 'biased_exploration_prob': 0.8}))
    corridor40 = run_experiment(corridor40_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': EpsilonGreedy, 'biased_exploration_prob': 0.8}))
    corridor100 = run_experiment(corridor100_env_provider, explore_trials, exploit_trials, **(corridor_base_params | {'action_selector': EpsilonGreedy, 'biased_exploration_prob': 0.8}))
    return corridor20, corridor40, corridor100

Grid ACS2 parameters

\(\beta=0.2\), \(\gamma = 0.95\), \(\theta_r = 0.9\), \(\theta_i=0.1\), \(\epsilon = 0.8\) \(\theta_{GA} = 50\), \(\theta_{AS}=20\), \(\theta_{exp}=50\), \(m_u=0.03\), \(u_{max}=1\), \(\chi=0.8\)

# Function for calculating relevant metrics
def grid_metrics(agent, env):
    pop = agent.population
    metrics = {
        'knowledge': grid_transition_knowledge(pop, env)
    }
    metrics.update(population_metrics(pop, env))
    return metrics


def grid20_env_provider():
    import gym_grid  # noqa: F401
    return gym.make(f'grid-20-v0')


def grid40_env_provider():
    import gym_grid  # noqa: F401
    return gym.make(f'grid-40-v0')


def grid100_env_provider():
    import gym_grid  # noqa: F401
    return gym.make(f'grid-100-v0')


grid_base_params = {
    "classifier_length": 2,
    "number_of_possible_actions": 4,
    "epsilon": 0.8,
    "beta": 0.2,
    "gamma": 0.95,
    "initial_q": 0.5,
    "theta_exp": 50,
    "theta_ga": 50,
    "do_ga": True,
    "mu": 0.03,
    "u_max": 1,
    "metrics_trial_frequency": 1,
    "user_metrics_collector_fcn": grid_metrics
}

# Start experiments
@get_from_cache_or_run(cache_path=f'{cache_dir}/grid/epsilon_greedy.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def grid_epsilon_greedy():
    grid20 = run_experiment(grid20_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': EpsilonGreedy}))
    grid40 = run_experiment(grid40_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': EpsilonGreedy}))
    grid100 = run_experiment(grid100_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': EpsilonGreedy}))
    return grid20, grid40, grid100


@get_from_cache_or_run(cache_path=f'{cache_dir}/grid/action_delay.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def grid_action_delay():
    grid20 = run_experiment(grid20_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': ActionDelay, 'biased_exploration_prob': 0.5}))
    grid40 = run_experiment(grid40_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': ActionDelay, 'biased_exploration_prob': 0.5}))
    grid100 = run_experiment(grid100_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': ActionDelay, 'biased_exploration_prob': 0.5}))
    return grid20, grid40, grid100


@get_from_cache_or_run(cache_path=f'{cache_dir}/grid/knowledge_array.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def grid_knowledge_array():
    grid20 = run_experiment(grid20_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': KnowledgeArray, 'biased_exploration_prob': 0.5}))
    grid40 = run_experiment(grid40_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': KnowledgeArray, 'biased_exploration_prob': 0.5}))
    grid100 = run_experiment(grid100_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': KnowledgeArray, 'biased_exploration_prob': 0.5}))
    return grid20, grid40, grid100


@get_from_cache_or_run(cache_path=f'{cache_dir}/grid/oiq.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def grid_oiq():
    grid_20 = run_experiment(grid20_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': EpsilonGreedy, 'biased_exploration_prob': 0.8}))
    grid_40 = run_experiment(grid40_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': EpsilonGreedy, 'biased_exploration_prob': 0.8}))
    grid_100 = run_experiment(grid100_env_provider, explore_trials, exploit_trials, **(grid_base_params | {'action_selector': EpsilonGreedy, 'biased_exploration_prob': 0.8}))
    return grid_20, grid_40, grid_100

# Execute calculations
corridor20_eg_dfs, corridor40_eg_dfs, corridor100_eg_dfs = extract(corridor_epsilon_greedy())
corridor20_ad_dfs, corridor40_ad_dfs, corridor100_ad_dfs = extract(corridor_action_delay())
corridor20_ka_dfs, corridor40_ka_dfs, corridor100_ka_dfs = extract(corridor_knowledge_array())
corridor20_oiq_dfs, corridor40_oiq_dfs, corridor100_oiq_dfs = extract(corridor_oiq())

grid20_eg_dfs, grid40_eg_dfs, grid100_eg_dfs = extract(grid_epsilon_greedy())
grid20_ad_dfs, grid40_ad_dfs, grid100_ad_dfs = extract(grid_action_delay())
grid20_ka_dfs, grid40_ka_dfs, grid100_ka_dfs = extract(grid_knowledge_array())
grid20_oiq_dfs, grid40_oiq_dfs, grid100_oiq_dfs = extract(grid_oiq())

# Plot results
glue('41-e2-corridor-fig', plot(
    average_experiment_runs(corridor20_eg_dfs),
    average_experiment_runs(corridor20_ad_dfs),
    average_experiment_runs(corridor20_ka_dfs),
    average_experiment_runs(corridor20_oiq_dfs),
    env_name='Corridor-20',
    num_explore_trials=explore_trials,
    first_knowledge_trials=30,
    first_population_trials=20,
    population_ylim=(17, 40),
    text_box_loc={"x": 63, "y": 120},
    plot_filename=f'{plot_dir}/corridor-performance.png'
), display=False)

glue('41-e2-grid-fig', plot(
    average_experiment_runs(grid20_eg_dfs),
    average_experiment_runs(grid20_ad_dfs),
    average_experiment_runs(grid20_ka_dfs),
    average_experiment_runs(grid20_oiq_dfs),
    env_name='Grid-20',
    num_explore_trials=explore_trials,
    first_knowledge_trials=10,
    first_population_trials=30,
    population_ylim=(70, 105),
    text_box_loc={"x": 63, "y": 1000},
    plot_filename=f'{plot_dir}/grid-performance.png'
), display=False)

Corridor

Fig. 4.4 Performance of Corridor-20 environment. 60 exploration and 20 exploitation trials averaged over 50 runs. Steps in a trial was plotted with a moving average of 3 last steps for clarity. No explicit discretizer was needed. The maximum number of steps in a trial is 200. The dotted vertical line indicates the execution of explore and exploit phases.¶

Grid

Fig. 4.5 Performance of Grid-20 environment. 60 exploration and 20 exploitation trials averaged over 50 runs. Steps in a trial was plotted with a moving average of 3 last steps for clarity. No explicit discretizer was needed. The maximum number of steps in a trial is 2000. The dotted vertical line indicates the execution of explore and exploit phases.¶

Statistical verification¶

To statistically assess the population size, the posterior data distribution was modelled using 50 metric values collected in the last trial and then sampled with 100,000 draws. For the obtained reward, the average value from exploit trials is considered a representative state of algorithm performance.

def build_models(dfs: Dict[str, pd.DataFrame], field: str, query_condition: str):
    results = {}

    for name, df in dfs.items():
        data_arr = df.query(query_condition)[field].to_numpy()
        bayes_model = bayes_estimate(data_arr)
        results[name] = (bayes_model['mu'], bayes_model['std'])

    return results


def get_average_reward(dfs: Dict[str, pd.DataFrame]):
    results = {}

    for name, df in dfs.items():
        results[name] = df.query('phase == "exploit"')['reward'].mean()

    return results


experiments_data = {
    'corridor20_eg': pd.concat(corridor20_eg_dfs),
    'corridor40_eg': pd.concat(corridor40_eg_dfs),
    'corridor100_eg': pd.concat(corridor100_eg_dfs),

    'corridor20_ad': pd.concat(corridor20_ad_dfs),
    'corridor40_ad': pd.concat(corridor40_ad_dfs),
    'corridor100_ad': pd.concat(corridor100_ad_dfs),

    'corridor20_ka': pd.concat(corridor20_ka_dfs),
    'corridor40_ka': pd.concat(corridor40_ka_dfs),
    'corridor100_ka': pd.concat(corridor100_ka_dfs),

    'corridor20_oiq': pd.concat(corridor20_oiq_dfs),
    'corridor40_oiq': pd.concat(corridor40_oiq_dfs),
    'corridor100_oiq': pd.concat(corridor100_oiq_dfs),

    'grid20_eg': pd.concat(grid20_eg_dfs),
    'grid40_eg': pd.concat(grid40_eg_dfs),
    'grid100_eg': pd.concat(grid100_eg_dfs),

    'grid20_ad': pd.concat(grid20_ad_dfs),
    'grid40_ad': pd.concat(grid40_ad_dfs),
    'grid100_ad': pd.concat(grid100_ad_dfs),

    'grid20_ka': pd.concat(grid20_ka_dfs),
    'grid40_ka': pd.concat(grid40_ka_dfs),
    'grid100_ka': pd.concat(grid100_ka_dfs),

    'grid20_oiq': pd.concat(grid20_oiq_dfs),
    'grid40_oiq': pd.concat(grid40_oiq_dfs),
    'grid100_oiq': pd.concat(grid100_oiq_dfs)
}


@get_from_cache_or_run(cache_path=f'{cache_dir}/bayes/population.dill')
def build_population_models(dfs: Dict[str, pd.DataFrame]):
    return build_models(dfs, field='population', query_condition=f'trial == {explore_trials - 1}')


@get_from_cache_or_run(cache_path=f'{cache_dir}/bayes/reliable.dill')
def build_reliable_models(dfs: Dict[str, pd.DataFrame]):
    return build_models(dfs, field='reliable', query_condition=f'trial == {explore_trials - 1}')


@get_from_cache_or_run(cache_path=f'{cache_dir}/bayes/knowledge.dill')
def build_knowledge_models(dfs: Dict[str, pd.DataFrame]):
    return build_models(dfs, field='knowledge', query_condition=f'trial == {explore_trials - 1}')


@get_from_cache_or_run(cache_path=f'{cache_dir}/bayes/perf_time.dill')
def build_perf_time_models(dfs: Dict[str, pd.DataFrame]):
    return build_models(dfs, field='perf_time', query_condition=f'phase == "explore"')


population_models = build_population_models(experiments_data)
reliable_models = build_reliable_models(experiments_data)
knowledge_models = build_knowledge_models(experiments_data)
perf_time_models = build_perf_time_models(experiments_data)
avg_rewards = get_average_reward(experiments_data)


def print_bayes_table(name_prefix, population_models, reliable_models, knowledge_models, perf_time_models, avg_rewards):
    print_row = lambda r: f'{round(r[0].mean(), 2)} ± {round(r[0].std(), 2)}'

    key_names = [name for name in experiments_data.keys() if name.startswith(name_prefix)]

    bayes_table_data = [
        ['population of classifiers'] + [print_row(v) for name, v in population_models.items() if name in key_names],
        ['reliable classifiers'] + [print_row(v) for name, v in reliable_models.items() if name in key_names],
        ['knowledge'] + [print_row(v) for name, v in knowledge_models.items() if name in key_names],
        ['trial execution time'] + [print_row(v) for name, v in perf_time_models.items() if name in key_names],
        ['average exploit reward'] + [f'{round(v, 2)}' for name, v in avg_rewards.items() if name in key_names],
    ]

    table = tabulate(bayes_table_data,
                     headers=['', 'Epsilon Greedy', 'Action Delay', 'Knowledge Array', 'Optimistic Initial Quality'],
                     tablefmt="html",
                     stralign='right')
    return HTML(table)

# Add glue for rendering output in tabs
glue("corridor20", print_bayes_table('corridor20', population_models, reliable_models, knowledge_models, perf_time_models, avg_rewards), display=False)
glue("corridor40", print_bayes_table('corridor40', population_models, reliable_models, knowledge_models, perf_time_models, avg_rewards), display=False)
glue("corridor100", print_bayes_table('corridor100', population_models, reliable_models, knowledge_models, perf_time_models, avg_rewards), display=False)
glue("grid20", print_bayes_table('grid20', population_models, reliable_models, knowledge_models, perf_time_models, avg_rewards), display=False)
glue("grid40", print_bayes_table('grid40', population_models, reliable_models, knowledge_models, perf_time_models, avg_rewards), display=False)
glue("grid100", print_bayes_table('grid100', population_models, reliable_models, knowledge_models, perf_time_models, avg_rewards), display=False)

Corridor¶

Corridor 20

	Epsilon Greedy	Action Delay	Knowledge Array	Optimistic Initial Quality
population of classifiers	38.0 ± 0.0	38.0 ± 0.0	38.0 ± 0.0	38.0 ± 0.0
reliable classifiers	38.0 ± 0.0	38.0 ± 0.0	38.0 ± 0.0	38.0 ± 0.0
knowledge	100.0 ± 0.0	100.0 ± 0.0	100.0 ± 0.0	100.0 ± 0.0
trial execution time	0.02 ± 0.0	0.03 ± 0.0	0.05 ± 0.0	0.02 ± 0.0
average exploit reward	1000.0	1000.0	1000.0	1000.0

Corridor 40

	Epsilon Greedy	Action Delay	Knowledge Array	Optimistic Initial Quality
population of classifiers	78.0 ± 0.0	78.0 ± 0.0	78.0 ± 0.0	78.0 ± 0.0
reliable classifiers	78.0 ± 0.0	78.0 ± 0.0	78.0 ± 0.0	78.0 ± 0.0
knowledge	100.0 ± 0.0	100.0 ± 0.0	100.0 ± 0.0	100.0 ± 0.0
trial execution time	0.06 ± 0.0	0.07 ± 0.0	0.09 ± 0.0	0.06 ± 0.0
average exploit reward	949.0	998.0	1000.0	962.0

Corridor 100

	Epsilon Greedy	Action Delay	Knowledge Array	Optimistic Initial Quality
population of classifiers	198.0 ± 0.0	198.0 ± 0.0	198.0 ± 0.0	198.0 ± 0.0
reliable classifiers	195.25 ± 0.53	195.78 ± 0.38	196.0 ± 0.0	195.67 ± 0.39
knowledge	98.63 ± 0.27	98.88 ± 0.2	98.98 ± 0.0	98.83 ± 0.21
trial execution time	0.15 ± 0.0	0.16 ± 0.0	0.17 ± 0.0	0.16 ± 0.0
average exploit reward	228.0	222.0	329.0	217.0

Grid¶

Grid 20

	Epsilon Greedy	Action Delay	Knowledge Array	Optimistic Initial Quality
population of classifiers	80.0 ± 0.0	80.0 ± 0.0	80.0 ± 0.0	80.0 ± 0.0
reliable classifiers	80.0 ± 0.0	80.0 ± 0.0	80.0 ± 0.0	80.0 ± 0.0
knowledge	100.0 ± 0.0	100.0 ± 0.0	100.0 ± 0.0	100.0 ± 0.0
trial execution time	0.73 ± 0.01	0.69 ± 0.01	1.32 ± 0.0	0.72 ± 0.01
average exploit reward	459.0	440.0	377.0	448.0

Grid 40

	Epsilon Greedy	Action Delay	Knowledge Array	Optimistic Initial Quality
population of classifiers	160.0 ± 0.0	160.0 ± 0.0	161.67 ± 0.34	160.0 ± 0.0
reliable classifiers	160.0 ± 0.0	160.0 ± 0.0	161.24 ± 0.24	160.0 ± 0.0
knowledge	100.0 ± 0.0	100.0 ± 0.0	100.0 ± 0.0	100.0 ± 0.0
trial execution time	1.65 ± 0.02	1.63 ± 0.01	1.74 ± 0.01	1.67 ± 0.02
average exploit reward	197.0	191.0	141.0	216.0

Grid 100

	Epsilon Greedy	Action Delay	Knowledge Array	Optimistic Initial Quality
population of classifiers	404.67 ± 0.54	401.98 ± 0.3	409.1 ± 0.87	403.7 ± 0.57
reliable classifiers	400.0 ± 0.0	400.0 ± 0.0	401.45 ± 0.25	400.0 ± 0.0
knowledge	100.0 ± 0.0	100.0 ± 0.0	100.0 ± 0.0	100.0 ± 0.0
trial execution time	3.24 ± 0.02	3.33 ± 0.02	3.35 ± 0.02	3.23 ± 0.02
average exploit reward	47.0	45.0	24.0	17.0

Observations¶

Corridor

Based on the Figure 4.4 all methods converge to optimal population size, and after switching to the exploitation mode, can utilize gained knowledge fully. Regardless of the exploration technique chosen, the agent can obtain complete knowledge of the environment in about 20 trials. AD and KA techniques seem to reach this point faster than the baseline EG and with OIQ modification.

The AD and KA methods accelerate the process of investigating the search-space resulting in earlier classifier creation.

Finally, the agent can fully exploit the environment after switching to “exploit” mode performing a minimal number of steps to reach the goal in each trial. Its also worth mentioning the constant effect of the KA method in explore phase, not taking the optimal actions most of the time, by continually updating the assumptions about all possibilities.

However, when the problem size increased twice (\(n=40\)), while all strategies obtained full knowledge of the environment, only the KA method managed to exploit it totally unerringly. This difference is highlighted more for \(n=100\), where only about a third of all exploit trials were successful by KA, and other strategies performed significantly worse.

Grid

Performance plot using Grid of size \(n=20\) in Figure 4.5 shows that regardless of exploration technique chosen, the agent is still able to obtain full knowledge of the environment (even faster than in Corridor) and converge with the number of optimal classifiers (KA method here also creates much more classifiers at the beginning of the experimentation).

Interestingly, the KA obtains the worst average reward despite having the largest amount of reliable classifiers for problems where \(n=20\) and \(n=40\).

Moreover, what is interesting is that the agent cannot exploit the environment even though it knows the exact consequences of each action (non-optimal number of steps in the exploitation phase). After investigation, it was found that most classifiers have a very similar \(cl.r\) value, representing the expected future reward. The agent in the current form is unable to differentiate between aliasing states, resulting in an inability to form an optimal policy. This finding emphasizes a need for a universal metric for quantifying the agent’s performance. The current definition of knowledge, modelling only encountered transitions, is inaccurate when the estimated reward is not distributed correctly amongst participating classifiers.

Software packages used

import session_info

session_info.show()

Click to view session information

-----
gym                 0.21.0
gym_corridor        NA
lcs                 NA
matplotlib          3.5.1
myst_nb             0.13.1
pandas              1.4.0
session_info        1.0.0
src                 (embedded book's utils module)
tabulate            0.8.9
-----

Click to view modules imported as dependencies

PIL                         8.4.0
arviz                       0.11.2
asttokens                   NA
attr                        21.4.0
babel                       2.9.1
backcall                    0.2.0
beta_ufunc                  NA
binom_ufunc                 NA
brotli                      NA
cachetools                  5.0.0
certifi                     2021.10.08
cffi                        1.15.0
cftime                      1.5.2
charset_normalizer          2.0.10
click                       7.1.2
cloudpickle                 2.0.0
colorama                    0.4.4
colorful                    0.5.4
colorful_orig               0.5.4
cryptography                36.0.1
cycler                      0.10.0
cython_runtime              NA
databricks_cli              NA
dateutil                    2.8.2
debugpy                     1.5.1
decorator                   5.1.1
defusedxml                  0.7.1
dill                        0.3.4
docutils                    0.16
entrypoints                 0.3
executing                   0.8.2
fastprogress                0.2.7
filelock                    3.4.2
google                      NA
greenlet                    1.1.2
grpc                        1.43.0
hiredis                     2.0.0
idna                        3.3
imagesize                   NA
importlib_metadata          NA
ipykernel                   6.7.0
ipython_genutils            0.2.0
ipywidgets                  7.6.5
jedi                        0.18.1
jinja2                      3.0.3
jsonschema                  3.2.0
jupyter_cache               0.4.3
jupyter_sphinx              0.3.2
jupyterlab_pygments         0.1.2
kiwisolver                  1.3.2
linkify_it                  1.0.3
markdown_it                 1.1.0
markupsafe                  2.0.1
matplotlib_inline           NA
mdit_py_plugins             0.2.8
mistune                     0.8.4
mlflow                      1.23.1
mpl_toolkits                NA
msgpack                     1.0.3
myst_parser                 0.15.2
nbclient                    0.5.10
nbconvert                   6.4.1
nbformat                    5.1.3
nbinom_ufunc                NA
netCDF4                     1.5.8
numpy                       1.22.1
packaging                   21.3
pandocfilters               NA
parso                       0.8.3
pexpect                     4.8.0
pickleshare                 0.7.5
pkg_resources               NA
prompt_toolkit              3.0.26
psutil                      5.9.0
ptyprocess                  0.7.0
pure_eval                   0.2.2
pvectorc                    NA
pydev_ipython               NA
pydevconsole                NA
pydevd                      2.6.0
pydevd_concurrency_analyser NA
pydevd_file_utils           NA
pydevd_plugins              NA
pydevd_tracing              NA
pygments                    2.11.2
pylab                       NA
pymc3                       3.11.4
pyparsing                   3.0.7
pyrsistent                  NA
pytz                        2021.3
ray                         1.9.2
redis                       4.1.2
requests                    2.27.1
scipy                       1.7.3
semver                      2.13.0
setproctitle                1.2.2
setuptools                  60.5.0
six                         1.16.0
socks                       1.7.1
sphinx                      4.4.0
sphinxcontrib               NA
sqlalchemy                  1.4.31
stack_data                  0.1.4
testpath                    0.5.0
theano                      1.1.2
tornado                     6.1
tqdm                        4.62.3
traitlets                   5.1.1
typing_extensions           NA
uc_micro                    1.0.1
unicodedata2                NA
urllib3                     1.26.8
wcwidth                     0.2.5
xarray                      0.21.0
yaml                        6.0
zipp                        NA
zmq                         22.3.0

-----
IPython             8.0.1
jupyter_client      7.1.2
jupyter_core        4.9.1
notebook            6.4.8
-----
Python 3.9.10 | packaged by conda-forge | (main, Feb  1 2022, 21:24:11) [GCC 9.4.0]
Linux-5.13.0-30-generic-x86_64-with-glibc2.31
-----
Session information updated at 2022-02-24 12:57

Real-valued Anticipatory Classifier System

Experiment 2 - Multi-steps problems performance

Contents