import numpy as np
import itertools
import sys
import os
import random
import copy
import scipy
import time
import pandas as pd
import sklearn as skl
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns
try:
from lfig import LatexFigure
except:
from qmla.shared_functionality.latex_figure import LatexFigure
from qmla.exploration_strategies import exploration_strategy
import qmla.shared_functionality.probe_set_generation
import qmla.model_building_utilities
import qmla.shared_functionality.genetic_algorithm
__all__ = [
"Genetic",
"GeneticTest",
"GeneticAlgorithmQMLAFullyConnectedLikewisePauliTerms",
]
def hamming_distance(str1, str2):
return sum(c1 != c2 for c1, c2 in zip(str1, str2))
[docs]class Genetic(exploration_strategy.ExplorationStrategy):
r"""
Exploration Strategy where the model search is mediated through a genetic algorithm.
Genetic algorithm is implemented through :class:`qmla.GeneticAlgorithmQMLA`.
This forms the base class for genetic algorithm applications within QMLA.
:param str exploration_rules: name of exploration strategy used
:param list genes: terms which are permitted in the model search,
which become genes in the chromomsomes of the genetic algorithm
:param str true_model: name of the target model.
"""
def __init__(self, exploration_rules, genes, true_model, **kwargs):
super().__init__(exploration_rules=exploration_rules, **kwargs)
self.genes = genes
self.true_model = true_model
self.ratings_class = qmla.shared_functionality.rating_system.ModifiedEloRating(
initial_rating=1000, k_const=30
) # for use when ranking/rating models
self.branch_champion_selection_stratgey = "fitness" # 'ratings'
self.fitness_method = "elo_rating"
self.prune_completed_initially = True
self.prune_complete = True
self.fitness_by_f_score = pd.DataFrame()
self.fitness_df = pd.DataFrame()
self.num_sites = qmla.model_building_utilities.get_num_qubits(self.true_model)
self.num_probes = 50
self.max_num_qubits = 7
self.hypothetical_final_generation = False
self.qhl_models = [
"pauliSet_1J2_zJz_d3+pauliSet_1J3_yJy_d3+pauliSet_1J3_zJz_d3+pauliSet_2J3_xJx_d3+pauliSet_2J3_zJz_d3",
"pauliSet_1J3_yJy_d3+pauliSet_1J3_zJz_d3+pauliSet_2J3_xJx_d3+pauliSet_2J3_zJz_d3",
"pauliSet_1J2_zJz_d3+pauliSet_1J3_zJz_d3+pauliSet_2J3_xJx_d3+pauliSet_2J3_zJz_d3",
]
self.spawn_step = 0 # 1st generation's ID
self.mutation_probability = 0.1
if "log_file" not in kwargs:
kwargs["log_file"] = self.log_file
self.genetic_algorithm = (
qmla.shared_functionality.genetic_algorithm.GeneticAlgorithmQMLA(
genes=genes,
num_sites=self.num_sites,
true_model=self.true_model,
mutation_probability=self.mutation_probability,
**kwargs,
)
)
self.true_chromosome = self.genetic_algorithm.true_chromosome
self.true_chromosome_string = self.genetic_algorithm.true_chromosome_string
self.num_possible_models = 2 ** len(self.true_chromosome)
self.max_num_probe_qubits = self.num_sites
# default test - 32 generations x 16 starters
self.max_spawn_depth = 24
self.initial_num_models = 16
self.initial_models = self.genetic_algorithm.random_initial_models(
num_models=self.initial_num_models
)
self.model_f_scores = {}
self.model_points_at_step = {}
self.generation_model_rankings = {}
self.models_ranked_by_fitness = {}
self.model_fitness_by_generation = {}
self.fitness_correlations = {}
self.tree_completed_initially = False
self.max_num_models_by_shape = {
self.num_sites: (self.initial_num_models * self.max_spawn_depth) / 10,
"other": 0,
}
self.num_processes_to_parallelise_over = self.initial_num_models
self.max_time_to_consider = 15
self.min_param = 0.35
self.max_param = 0.65
self.fitness_mechanism_names = {
"f_score": r"$F_1$",
"hamming_distance": r"$H$",
"inverse_ll": r"$g^L$",
"inverse_ll_sq": r"$-\frac{1}{L^2}$",
"akaike_info_criterion": r"$\frac{1}{AIC}$",
"aic_sq": r"$\frac{1}{AIC^2}$",
"aicc": r"$\frac{1}{AICc}$",
"aicc_sq": r"$g^{A}$",
"bayesian_info_criterion": r"$\frac{1}{BIC}$",
"bic_sq": r"$g^{B}$",
"akaike_weight": r"$w_{A}$",
"bayes_weight": r"$w_{B}$",
"mean_residuals": r"$r_{\mu}$",
"mean_residuals_sq": r"$r_{\mu}^2$",
"rs_mean": r"$1-\overline{r}$",
"rs_median": r"$1-\tilde{r}$",
"rs_mean_sq": r"$g^{r}$", # r"$(1-\overline{r})^2$",
"rs_median_sq": r"$(1-\tilde{r})^2$",
"bf_points": r"$g^{p}$",
"bf_rank": r"$g^{R}$",
"elo_rating": r"$g^{E}$",
}
[docs] def nominate_champions(self):
r"""Choose model with highest fitness on final generation"""
self.champion_model = self.models_ranked_by_fitness[self.spawn_step][0]
self.log_print(
[
"Final generation:",
self.spawn_step,
"\nModel rankings on final generation:",
self.models_ranked_by_fitness[self.spawn_step],
"\nChampion:",
self.champion_model,
]
)
return [self.champion_model]
[docs] def analyse_generation(self, model_points, model_names_ids, **kwargs):
r"""
Following a complete generation of the genetic algorithm,
perform all necessary processing to enable construction of next set of models.
:param dict model_points: the number of Bayes factor comparisons for which each candidate
within the generation was deemed superior against a contemporary model
:param dict model_names_ids: mapping between models' names and their IDs from the QMLA environment;
this enables analaysing further data passed from QMLA within kwargs.
"""
self.spawn_step += 1
self.log_print(["Analysing generation at spawn step ", self.spawn_step])
self.log_print(["model names ids:", model_names_ids])
self.model_points_at_step[self.spawn_step] = model_points
# model_names_ids = model_names_ids
sum_wins = sum(list(model_points.values()))
if sum_wins == 0:
sum_wins = 1 # TODO hack to get over some times passing empty dict from update_branch -- find a better way
model_ids = list(model_points.keys())
# model rankings by number of wins
ranked_model_list = sorted(model_points, key=model_points.get, reverse=True)
ranked_models_by_name = [model_names_ids[m] for m in ranked_model_list]
self.log_print(
[
"Ranked models:",
ranked_model_list,
"\n Names:",
ranked_models_by_name,
"\n with fitnesses:",
]
)
self.generation_model_rankings[self.spawn_step] = ranked_models_by_name
rankings = list(range(1, len(ranked_model_list) + 1))
rankings.reverse()
num_points = sum(rankings) # number of points to distribute
ranking_points = list(
zip(ranked_models_by_name, [r / num_points for r in rankings])
)
ranking_points = dict(ranking_points)
# Model ratings (Elo ratings)
precomputed_ratings = self.ratings_class.get_ratings(list(model_points.keys()))
original_ratings_by_name = {
model_names_ids[m]: precomputed_ratings[m] for m in model_ids
}
min_rating = min(original_ratings_by_name.values())
ratings_by_name = {
m: original_ratings_by_name[m] - min_rating
for m in original_ratings_by_name
}
self.log_print(["Rating (as fraction of starting rating):\n", ratings_by_name])
sum_ratings = np.sum(list(ratings_by_name.values()))
model_elo_ratings = {
m: ratings_by_name[m] / sum_ratings for m in ratings_by_name
}
# New dictionaries which can be used as fitnesses:
model_f_scores = {"fitness_type": "f_score"}
model_hamming_distances = {"fitness_type": "hamming_distance"}
model_number_wins = {"fitness_type": "number_wins"}
model_win_ratio = {"fitness_type": "win_ratio"}
mean_residuals = {"fitness_type": "mean_residuals"}
log_likelihoods = {"fitness_type": "log_likelihoods"}
# Alter finished dicts also useable as fitness
# log_likelihoods['fitness_type'] = 'log_likelihoods'
model_elo_ratings["fitness_type"] = "elo_ratings"
ranking_points["fitness_type"] = "ranking"
model_instances = [self.tree.model_storage_instances[m] for m in model_ids]
aic_values = {
model.model_id: model.akaike_info_criterion for model in model_instances
}
aicc_values = {
model.model_id: model.akaike_info_criterion_c for model in model_instances
}
min_aicc = min(aicc_values.values())
self.log_print(
["At generation {}, AIC of models: {}".format(self.spawn_step, aic_values)]
)
# store info on each model for analysis
for m in model_ids:
# Access the model storage instance and retrieve some attributes from there
model_storage_instance = self.tree.model_storage_instances[m]
self.log_print(["Model storage instance:", model_storage_instance])
mod = model_storage_instance.model_name
model_number_wins[mod] = model_points[m]
hamming_dist = self.hamming_distance_model_comparison(
test_model=mod
) # for fitness use 1/H
model_hamming_distances[mod] = (
self.genetic_algorithm.num_terms - hamming_dist
) / self.genetic_algorithm.num_terms
model_f_scores[mod] = np.round(
self.f_score_model_comparison(test_model=mod), 2
) # TODO get from model instance
self.model_f_scores[m] = model_f_scores[mod]
model_win_ratio[mod] = model_number_wins[mod] / sum_wins
# store scores for offline analysis
this_model_fitnesses = {
# When adding a new fitness fnc -- add a name in self.fitness_mechanism_names
"model": mod,
"model_id": m,
"generation": self.spawn_step,
# absolute metrics (not available in real experiments)
"f_score": model_f_scores[mod],
"hamming_distance": model_hamming_distances[mod],
# from storage instance
# 'eval_log_likelihood' : model_storage_instance.evaluation_log_likelihood,
"inverse_ll": -1 / model_storage_instance.evaluation_log_likelihood,
"inverse_ll_sq": (-1 / model_storage_instance.evaluation_log_likelihood)
** 2,
"akaike_info_criterion": 1
/ model_storage_instance.akaike_info_criterion,
"aicc": 1 / model_storage_instance.akaike_info_criterion_c,
"aic_sq": (1 / model_storage_instance.akaike_info_criterion) ** 2,
"aicc_sq": (1 / model_storage_instance.akaike_info_criterion_c) ** 2,
"bayesian_info_criterion": (
1 / model_storage_instance.bayesian_info_criterion
),
"bic_sq": (1 / model_storage_instance.bayesian_info_criterion) ** 2,
"akaike_weight": np.e
** ((min_aicc - model_storage_instance.akaike_info_criterion_c) / 2),
"bayes_weight": np.e
** (-1 * model_storage_instance.bayesian_info_criterion / 2),
"mean_residuals": 1 - model_storage_instance.evaluation_mean_pr0_diff,
"mean_residuals_sq": (
1 - model_storage_instance.evaluation_mean_pr0_diff
)
** 2,
"rs_mean": 1
- model_storage_instance.evaluation_residual_squares["mean"],
"rs_median": 1
- model_storage_instance.evaluation_residual_squares["median"],
"rs_mean_sq": (
1 - model_storage_instance.evaluation_residual_squares["mean"]
)
** 2,
"rs_median_sq": (
1 - model_storage_instance.evaluation_residual_squares["median"]
)
** 2,
# relative to other models in this branch
"bf_points": model_win_ratio[mod],
"bf_rank": ranking_points[mod],
"elo_rating": model_elo_ratings[mod],
# 'original_elo_rating' : original_ratings_by_name[mod],
}
self.fitness_by_f_score = self.fitness_by_f_score.append(
pd.Series(this_model_fitnesses), ignore_index=True
)
recorded_fitness_types = list(
this_model_fitnesses.keys()
- [
"model",
"model_id",
"generation",
"hamming_distance",
]
)
for f in recorded_fitness_types:
try:
new_entry = pd.Series(
{
"generation": this_model_fitnesses["generation"],
"f_score": this_model_fitnesses["f_score"],
"fitness": this_model_fitnesses[f],
"fitness_type": f,
"fitness_type_name": self.fitness_mechanism_names[f],
"active_fitness_method": self.fitness_method == f,
}
)
self.fitness_df = self.fitness_df.append(
new_entry, ignore_index=True
)
except:
self.log_print(
[
"fitness name keys:",
list(self.fitness_mechanism_names.keys())
# "f={}; type name = {}".format(f, self.fitness_mechanism_names[f])
]
)
raise
# Extract fitness specified by user (exploration strategy's fitness_method attribute)
# to use for generating models within genetic algorithm
fitnesses = self.fitness_by_f_score[
self.fitness_by_f_score.generation == self.spawn_step
][["model", self.fitness_method]]
genetic_algorithm_fitnesses = dict(
zip(fitnesses["model"], fitnesses[self.fitness_method])
)
self.log_print(
[
"fitness method:{} => Fitnesses={}".format(
self.fitness_method, genetic_algorithm_fitnesses
)
]
)
self.models_ranked_by_fitness[self.spawn_step] = sorted(
genetic_algorithm_fitnesses,
key=genetic_algorithm_fitnesses.get,
reverse=True,
)
self.model_fitness_by_generation[self.spawn_step] = genetic_algorithm_fitnesses
self.genetic_algorithm.consolidate_generation(
model_fitnesses=genetic_algorithm_fitnesses
)
# return genetic_algorithm_fitnesses
return self.models_ranked_by_fitness[self.spawn_step]
[docs] def generate_models(self, model_list, **kwargs):
r"""
Model generation using genetic algorithm.
Follows rules of :meth:`~qmla.exploration_strategies.ExplorationStrategy.generate_models`.
"""
# Analysis of the previous generation is called by the exploration strategy tree.
genetic_algorithm_fitnesses = self.model_fitness_by_generation[self.spawn_step]
self.log_print(
[
"Spawn step:",
self.spawn_step,
]
)
# Spawn models from genetic algorithm
new_models = self.genetic_algorithm.genetic_algorithm_step(
model_fitnesses=genetic_algorithm_fitnesses,
num_pairs_to_sample=self.initial_num_models
/ 2, # for every pair, 2 chromosomes proposed
)
return new_models
def finalise_model_learning(self, **kwargs):
return
[docs] def hamming_distance_model_comparison(
self,
test_model,
target_model=None,
):
r"""
Compare test_model with target_model by Hamming distance
"""
if target_model is None:
target_model = self.true_chromosome_string
else:
target_model = self.genetic_algorithm.chromosome_string(
self.genetic_algorithm.map_model_to_chromosome(target_model)
)
test_model = self.genetic_algorithm.chromosome_string(
self.genetic_algorithm.map_model_to_chromosome(test_model)
)
h = sum(c1 != c2 for c1, c2 in zip(test_model, target_model))
return h
[docs] def f_score_model_comparison(
self,
test_model,
target_model=None,
beta=1,
):
r"""
Get F score of candidate model, measure of overlap between the terms of the candidate and target model
:param str test_model: name of candidate model
:param str target_model: name of target model, if None, assumed that target is self.true_model
:param float beta: relative importance of precision to sensitivity. in general this is F-beta score,
usually beta = 1
"""
if target_model is None:
target_model = self.true_model
true_set = set(
self.latex_name(mod)
for mod in qmla.model_building_utilities.get_constituent_names_from_name(
target_model
)
)
terms = [
self.latex_name(term)
for term in qmla.model_building_utilities.get_constituent_names_from_name(
test_model
)
]
learned_set = set(sorted(terms))
total_positives = len(true_set)
true_positives = len(true_set.intersection(learned_set))
false_positives = len(learned_set - true_set)
false_negatives = len(true_set - learned_set)
precision = true_positives / (true_positives + false_positives)
sensitivity = true_positives / total_positives
try:
f_score = (1 + beta ** 2) * (
(precision * sensitivity) / (beta ** 2 * precision + sensitivity)
)
except BaseException:
# both precision and sensitivity=0 as true_positives=0
f_score = 0
return f_score
[docs] def f_score_from_chromosome_string(
self,
chromosome,
):
r"""
F1 score between chromosome and true model
"""
mod = np.array([int(a) for a in list(chromosome)])
try:
f = skl.metrics.f1_score(mod, self.true_chromosome)
return f
except:
self.log_print(
[
"F score from chromosome {} with mod {} not working against true chrom {}".format(
mod, chromosome, self.true_chromosome
)
]
)
raise
[docs] def exploration_strategy_finalise(self):
r"""
Genetic algorithm specific version of :meth:`qmla.ExplorationStrategy.exploration_strategy_finalise`.
"""
# hypothetical generation_models
if self.hypothetical_final_generation:
# TODO this will cause a crash in QHL mode since.
# in general this should be turned off so not worth a large fix
self.log_print(["Running hypothetical step to get some models"])
hypothetical_models = self.genetic_algorithm.genetic_algorithm_step(
model_fitnesses=self.model_fitness_by_generation[self.spawn_step - 1],
num_pairs_to_sample=self.initial_num_models
/ 2, # for every pair, 2 chromosomes proposed
)
self.log_print(["hypothetical generation models:", hypothetical_models])
self.storage.fitness_correlations = self.fitness_correlations
self.storage.fitness_by_f_score = self.fitness_by_f_score
self.storage.fitness_df = self.fitness_df
self.storage.true_model_chromosome = self.true_chromosome_string
self.storage.ratings_df = self.ratings_class.ratings_df
gene_pool = self.genetic_algorithm.gene_pool
gene_pool["objective_function"] = self.fitness_mechanism_names[
self.fitness_method
]
self.storage.gene_pool = gene_pool
birth_register = self.genetic_algorithm.birth_register
birth_register["objective_function"] = self.fitness_mechanism_names[
self.fitness_method
]
birth_register["max_time_considered"] = self.max_time_to_consider
self.storage.birth_register = birth_register
self.storage.ratings = self.ratings_class.all_ratings
chromosomes = sorted(
list(set(self.genetic_algorithm.previously_considered_chromosomes))
)
self.unique_chromosomes = pd.DataFrame(
columns=[
"chromosome",
"numeric_chromosome",
"f_score",
"num_terms",
"hamming_distance",
]
)
for c in chromosomes:
hamming_dist = self.hamming_distance_model_comparison(
test_model=self.genetic_algorithm.map_chromosome_to_model(c)
) # for fitness use 1/H
chrom_data = pd.Series(
{
"chromosome": str(c),
"numeric_chromosome": int(c, 2),
"num_terms": self.genetic_algorithm.num_terms,
"hamming_distance": hamming_dist,
"f_score": np.round(self.f_score_from_chromosome_string(c), 3),
}
)
self.unique_chromosomes.loc[len(self.unique_chromosomes)] = chrom_data
self.log_print(["self.unique_chromosomes:\n", self.unique_chromosomes])
self.storage.unique_chromosomes = self.unique_chromosomes
dud_chromosome = str("1" + "0" * self.genetic_algorithm.num_terms)
if dud_chromosome in chromosomes:
self.log_print(
[
"{} in previous chromosomes:\n{}".format(
dud_chromosome,
self.genetic_algorithm.previously_considered_chromosomes,
)
]
)
chromosome_numbers = sorted([int(c, 2) for c in chromosomes])
# self.exploration_strategy_specific_data_to_store['chromosomes_tested'] = chromosome_numbers
try:
f_scores = []
for c in chromosomes:
try:
f_scores.append(np.round(self.f_score_from_chromosome_string(c), 3))
except:
self.log_print(
["Could not compute f score for chromosome: {}".format(c)]
)
# self.exploration_strategy_specific_data_to_store['f_score_tested_models' ] = f_scores
except:
self.log_print(
[
"Could not compute f score for chromosome list: {}".format(
chromosomes
)
]
)
pass
self.storage.chromosomes_tested = chromosome_numbers
self.storage.f_score_tested_models = f_scores
[docs] def check_tree_completed(self, spawn_step, **kwargs):
r"""
Genetic algorithm specific version of :meth:`qmla.ExplorationStrategy.check_tree_completed`.
"""
if self.spawn_step == self.max_spawn_depth:
self.log_print(["Terminating at spawn depth ", self.spawn_step])
return True
elif self.genetic_algorithm.best_model_unchanged:
self.champion_determined = True
self.champion_model = (
self.genetic_algorithm.most_elite_models_by_generation[
self.genetic_algorithm.genetic_generation - 1
]
)
self.log_print(
[
"Terminating search early (after {} generations) b/c elite model unchanged in {} generations.".format(
self.genetic_algorithm.genetic_generation,
self.genetic_algorithm.unchanged_elite_num_generations_cutoff,
),
"\nDeclaring champion:",
self.champion_model,
]
)
# check if elite model hasn't changed in last N generations
return True
else:
self.log_print(["Elite models changed recently; continuing search."])
return False
[docs] def check_tree_pruned(self, **kwargs):
r"""
Genetic algorithm specific version of :meth:`qmla.ExplorationStrategy.check_tree_pruned`.
"""
# no pruning for GA, winner is champion of final branch
return True
[docs] def set_specific_plots(self, **kwargs):
r"""
Genetic algorithm specific version of :meth:`qmla.ExplorationStrategy.set_specific_plots`.
"""
self.plot_methods_by_level = {
1: [],
2: [
self._plot_correlation_fitness_with_f_score,
self._plot_fitness_v_fscore_by_generation,
self.__plot_gene_pool_progression,
],
3: [
self._plot_fitness_v_fscore,
self._plot_fitness_v_generation,
],
4: [
self._plot_model_ratings,
self._plot_gene_pool,
],
5: [self.plot_generational_metrics, self._plot_selection_probabilities],
6: [],
}
# Plots that need arguments so are called individually
if self.plot_level >= 2:
try:
self.ratings_class.plot_models_ratings_against_generation(
f_scores=self.model_f_scores,
save_directory=self.save_directory,
f_score_cmap=self.f_score_cmap,
figure_format=self.figure_format,
)
except Exception as e:
self.log_print(
[
"plot failed plot_models_ratings_against_generation with error ",
e,
]
)
try:
self.ratings_class.plot_rating_progress_single_model(
target_model_id=champion_model_id,
save_to_file=os.path.join(
self.save_directory, "ratings_progress_champion.png"
),
)
if true_model_id != -1 and true_model_id != champion_model_id:
self.ratings_class.plot_rating_progress_single_model(
target_model_id=true_model_id,
save_to_file=os.path.join(
save_directory, "ratings_progress_true_model.png"
),
)
except Exception as e:
self.log_print(
["plot failed plot_rating_progress_single_model with error ", e]
)
[docs] def _plot_correlation_fitness_with_f_score(
self,
save_to_file=None,
):
r"""
Show how the fitness of models at each generation progress in terms of F score.
"""
plt.clf()
correlations = pd.DataFrame(columns=["Generation", "Method", "Correlation"])
fitness_types_to_ignore = ["f_score", "hamming_distance"]
for t in self.fitness_df.fitness_type.unique():
if t not in fitness_types_to_ignore:
this_fitness_type = self.fitness_df[
self.fitness_df["fitness_type"] == t
]
for g in this_fitness_type.generation.unique():
this_type_this_gen = this_fitness_type[
this_fitness_type.generation == g
]
corr = this_type_this_gen["f_score"].corr(
this_type_this_gen["fitness"]
)
cov = this_type_this_gen["f_score"].cov(
this_type_this_gen["fitness"]
)
corr = {
"Generation": g,
"Method": self.fitness_mechanism_names[t],
# 'Method' : t,
"Correlation": corr,
"Covariance": cov,
}
correlations = correlations.append(
pd.Series(corr), ignore_index=True
)
self.fitness_correlations = correlations
self.log_print(["fitness correlations:\n", self.fitness_correlations])
fig, ax = plt.subplots(figsize=(15, 10))
if len(correlations.Generation.unique()) == 1:
sns.scatterplot(
y="Correlation",
x="Generation",
# style= 'Method',
hue="Method",
data=correlations,
ax=ax,
# markers = ['*', 'X', '<', '^'],
)
else:
sns.lineplot(
y="Correlation",
x="Generation",
# style= 'Method',
hue="Method",
data=correlations,
ax=ax,
markers=["*", "X", "<", "^"],
)
ax.axhline(0, ls="--", c="k")
if save_to_file is None:
save_to_file = os.path.join(
self.save_directory,
"correlations_bw_fitness_and_f_score.png".format(self.qmla_id),
)
plt.savefig(save_to_file)
[docs] def _plot_fitness_v_generation(self, save_to_file=None):
r"""
Plot progression of fitness against generations of the genetic algorithm.
"""
import matplotlib.pyplot as plt
import seaborn as sns
plt.clf()
fig, ax = plt.subplots()
sns.set(rc={"figure.figsize": (11.7, 8.27)})
cmap = sns.cubehelix_palette(dark=0.3, light=0.8, as_cmap=True)
sns.boxplot(
x="generation",
y="fitness",
data=self.fitness_df[
# self.fitness_df['fitness_type'] == 'model_hamming_distances'
self.fitness_df["active_fitness_method"]
== True
],
ax=ax,
)
ax.legend(loc="lower right")
ax.set_xlabel("Generation")
ax.set_ylabel("Fitness")
ax.set_title("Fitness method: {}".format(self.fitness_method))
# ax.set_xlim((0,1))
if save_to_file is None:
save_to_file = os.path.join(
self.save_directory,
"fitness_v_generation.{}".format(self.figure_format),
)
plt.savefig(save_to_file)
[docs] def _plot_fitness_v_fscore_by_generation(
self,
):
r"""
Plot fitness vs f score throughout generations of the genetic algorithm.
"""
plt.clf()
sanity_check_df = self.fitness_df[
(self.fitness_df["fitness_type"] == "f_score")
| (self.fitness_df["fitness_type"] == "model_hamming_distances")
]
candidate_fitnesses = self.fitness_df[
(self.fitness_df["fitness_type"] == "elo_rating")
| (self.fitness_df["fitness_type"] == "ranking")
| (self.fitness_df["fitness_type"] == "model_win_ratio")
]
g = sns.FacetGrid(
candidate_fitnesses,
row="generation",
hue="fitness_type",
hue_kws=dict(marker=["x", "+", "*"]),
# col_wrap=5,
xlim=(-0.1, 1.1),
# ylim=(0,1),
size=4,
aspect=2,
)
g = g.map(plt.scatter, "f_score", "fitness").add_legend()
save_to_file = os.path.join(
self.save_directory, "fitness_types.{}".format(self.figure_format)
)
plt.savefig(save_to_file)
[docs] def _plot_model_ratings(
self,
):
r"""
Plot ratings of models on all generations, as determined by the RatingSystem
"""
plt.clf()
ratings = self.ratings_class.all_ratings
generations = [int(g) for g in ratings.generation.unique()]
num_generations = len(generations)
lf = LatexFigure(use_gridspec=True, gridspec_layout=(num_generations, 1))
# TODO : unique linestyle and colour combo for each model ID and tracks across subplots
ratings["Model ID"] = ratings["model_id"]
for gen in generations:
ax = lf.new_axis()
this_gen_ratings = ratings[ratings.generation == gen]
colours = {
m: self.f_score_cmap(self.model_f_scores[m])
for m in this_gen_ratings["model_id"]
}
sns.lineplot(
x="idx",
y="rating",
hue=r"Model ID",
hue_order=sorted(this_gen_ratings.model_id.unique()),
data=this_gen_ratings,
ax=ax,
legend="full",
palette=colours,
)
ax.set_title("Generation {}".format(gen), pad=-15)
ax.set_xlabel("")
ax.set_ylabel("Elo rating")
ax.legend(bbox_to_anchor=(1, 1))
save_to_file = os.path.join(self.save_directory, "ratings".format(self.qmla_id))
lf.save(save_to_file, file_format=self.figure_format)
[docs] def _plot_fitness_v_fscore(self):
r"""
Plot fitness against f score
"""
plt.clf()
fig, ax = plt.subplots()
sns.set(rc={"figure.figsize": (11.7, 8.27)})
cmap = sns.cubehelix_palette(dark=0.3, light=0.8, as_cmap=True)
sns.scatterplot(
x="f_score",
y="elo_rating",
# hue='generation',
# palette = cmap,
label="Rating",
data=self.fitness_by_f_score,
ax=ax,
)
sns.scatterplot(
x="f_score",
y="win_ratio",
# hue='generation',
# palette = cmap,
label="Win ratio",
data=self.fitness_by_f_score,
ax=ax,
)
ax.legend(loc="lower right")
ax.set_xlabel("F score")
ax.set_ylabel("Fitness (as probability)")
# bplot.set_ylim((0,1))
ax.set_xlim((-0.05, 1.05))
save_to_file = os.path.join(
self.save_directory, "fitness_v_fscore.png".format(self.qmla_id)
)
ax.figure.savefig(save_to_file)
[docs] def _plot_gene_pool(self):
r"""
Show the F scores of all models in all generations
"""
ga = self.genetic_algorithm
plt.clf()
fig, axes = plt.subplots(
figsize=(10, 8),
constrained_layout=True,
)
gs = GridSpec(nrows=2, ncols=1, height_ratios=[7, 1])
label_fontsize = 10
# TODO get f score cmap from exploration strategy
# f_score_cmap = matplotlib.colors.ListedColormap(["sienna", "red", "darkorange", "gold", "blue"])
f_score_cmap = self.f_score_cmap
# Bar plots for probability of gene being selected, coloured by f score
ax = fig.add_subplot(gs[0, 0])
generations = list(sorted(ga.gene_pool.generation.unique()))
probability_grouped_by_f_by_generation = {
g: {
f: ga.gene_pool[
(ga.gene_pool.f_score == f) & (ga.gene_pool.generation == g)
].probability.sum()
for f in ga.gene_pool.f_score.unique()
}
for g in generations
}
probability_grouped_by_f_by_generation = pd.DataFrame(
probability_grouped_by_f_by_generation
).T
sorted_f_scores = list(sorted(ga.gene_pool.f_score.unique()))
below = [0] * len(generations)
for f in sorted_f_scores[:]:
probs_this_f = list(probability_grouped_by_f_by_generation[f])
ax.bar(
generations,
probs_this_f,
color=f_score_cmap(f),
bottom=below,
edgecolor=["black"] * len(generations),
)
below = [b + p for b, p in zip(below, probs_this_f)]
ax.set_xticks(generations)
ax.set_ylabel("Probability", fontsize=label_fontsize)
ax.set_xlabel("Generation", fontsize=label_fontsize)
ax.set_title("Gene pool", fontsize=label_fontsize)
# Colour bar
ax = fig.add_subplot(gs[1, 0])
sm = plt.cm.ScalarMappable(
cmap=f_score_cmap, norm=plt.Normalize(vmin=0, vmax=1)
)
sm.set_array(np.linspace(0, 1, 100))
plt.colorbar(sm, cax=ax, orientation="horizontal")
ax.set_xlabel("F-score", fontsize=label_fontsize)
# Save figure
save_to_file = os.path.join(
self.save_directory, "gene_pool.{}".format(self.figure_format)
)
fig.savefig(save_to_file)
[docs] def _plot_selection_probabilities(self):
r"""
Plot pie charts of the selection probabilities of prospective parents at each generation.
Models are signified by their F score.
"""
generations = sorted(self.genetic_algorithm.gene_pool.generation.unique())
self.log_print(["[_plot_selection_probabilities] generations:", generations])
lf = LatexFigure(auto_gridspec=len(generations))
for g in generations:
ax = lf.new_axis()
this_gen_genes = self.genetic_algorithm.gene_pool[
self.genetic_algorithm.gene_pool.generation == g
]
f_scores = this_gen_genes.f_score
colours = [self.f_score_cmap(f) for f in f_scores]
probabilities = this_gen_genes.probability
ax.pie(
probabilities,
colors=colours,
radius=2,
)
save_to_file = os.path.join(self.save_directory, "selection_probabilities")
lf.save(save_to_file, figure_format=self.figure_format)
[docs] def plot_generational_metrics(self):
r"""
Show various metrics across all generations
"""
fig, axes = plt.subplots(figsize=(15, 10), constrained_layout=True)
gs = GridSpec(
nrows=2,
ncols=1,
)
ax = fig.add_subplot(gs[0, 0])
sns.boxplot(y="f_score", x="generation", data=self.fitness_by_f_score, ax=ax)
ax.set_ylabel("F-score")
ax.set_xlabel("Generation")
ax.set_title("F score")
ax.set_ylim(0, 1)
ax.legend()
ax = fig.add_subplot(gs[1, 0])
sns.boxplot(
y="log_likelihood", x="generation", data=self.fitness_by_f_score, ax=ax
)
ax.set_ylabel("log-likelihood")
ax.set_xlabel("Generation")
ax.set_title("Evaluation log likeihood")
ax.legend()
# Save figure
save_to_file = os.path.join(self.save_directory, "generation_progress.png")
fig.savefig(save_to_file)
def __plot_gene_pool_progression(
self,
):
r"""
Succinct representation of the progression of gene pool with respect to F score.
"""
lf = LatexFigure()
ax = lf.new_axis()
gene_pool = self.genetic_algorithm.gene_pool
gene_pool.sort_values("f_score", inplace=True, ascending=False)
self.gene_pool_progression(
gene_pool=gene_pool,
ax=ax,
f_score_cmap=self.f_score_cmap,
)
lf.save(
save_to_file=os.path.join(self.save_directory, "gene_pool_progression"),
file_format=self.figure_format,
)
[docs] @staticmethod
def gene_pool_progression(
gene_pool, ax, f_score_cmap=None, draw_cbar=True, cbar_ax=None
):
r"""
Method for plotting succinct summary of progression of gene pool with respect to F score.
"""
if f_score_cmap is None:
f_score_cmap = matplotlib.cm.RdBu
num_models_per_generation = len(gene_pool[gene_pool.generation == 1])
num_generations = gene_pool.generation.nunique()
f_scores_of_gene_pool = np.empty((num_models_per_generation, num_generations))
for g in gene_pool.generation.unique():
f_scores_by_gen = gene_pool[gene_pool.generation == g].f_score
f_scores_of_gene_pool[:, g - 1] = f_scores_by_gen
sns.heatmap(
f_scores_of_gene_pool,
cmap=f_score_cmap,
vmin=0,
vmax=1,
ax=ax,
cbar=draw_cbar,
cbar_kws=dict(
label=r"$F_1$-score",
aspect=25,
ticks=[0, 0.5, 1],
),
)
ax.set_yticks([])
xtick_pos = range(5, num_generations + 1, 5)
ax.set_xticks([g - 0.5 for g in xtick_pos])
ax.set_xticklabels(xtick_pos)
ax.set_xlabel("Generation")
if cbar_ax is not None:
cbar = ax.collections[0].colorbar
cbar.ax.set_ylabel(r"$F_1$", rotation=0, labelpad=10) # if F horizontal
cbar.ax.yaxis.set_label_position(
"right",
)
cbar.ax.tick_params(labelleft=True, labelright=False)
class GeneticTest(Genetic):
r"""
Exactly as the genetic exploration strategy, but small depth to test quickly.
"""
def __init__(self, exploration_rules, **kwargs):
true_model = "pauliSet_1J2_zJz_d4+pauliSet_1J3_zJz_d4+pauliSet_2J3_zJz_d4+pauliSet_2J4_zJz_d4+pauliSet_3J4_zJz_d4"
self.true_model = qmla.model_building_utilities.alph(true_model)
num_sites = qmla.model_building_utilities.get_num_qubits(true_model)
terms = []
for i in range(1, 1 + num_sites):
for j in range(i + 1, 1 + num_sites):
for t in ["x", "y", "z"]:
new_term = "pauliSet_{i}J{j}_{o}J{o}_d{N}".format(
i=i,
j=j,
o=t,
N=num_sites,
)
terms.append(new_term)
super().__init__(
exploration_rules=exploration_rules,
genes=terms,
true_model=self.true_model,
**kwargs,
)
self.max_spawn_depth = 2
self.max_num_probe_qubits = self.num_sites
self.initial_num_models = 6
self.initial_models = self.genetic_algorithm.random_initial_models(
num_models=self.initial_num_models
)
self.tree_completed_initially = False
self.max_num_models_by_shape = {
self.num_sites: (self.initial_num_models * self.max_spawn_depth) / 10,
"other": 0,
}
self.num_processes_to_parallelise_over = self.initial_num_models
class GeneticAlgorithmQMLAFullyConnectedLikewisePauliTerms(Genetic):
r"""
Exact structure of :class:`~qmla.Genetic`, where the avaiable terms
are assumed to follow conventional pauliSet format, and all sites are connected.
e.g. terms of the form:
pauliSet_1J2_xJx_d2, pauliSet_1J2_yJy_d2, pauliSet_1J2_zJz_d2,
"""
def __init__(
self,
exploration_rules,
true_model,
num_sites=None,
base_terms=["x", "y", "z"],
**kwargs
):
if num_sites is None:
num_sites = qmla.model_building_utilities.get_num_qubits(true_model)
terms = []
for i in range(1, 1 + num_sites):
for j in range(i + 1, 1 + num_sites):
for t in base_terms:
new_term = "pauliSet_{i}J{j}_{o}J{o}_d{N}".format(
i=i,
j=j,
o=t,
N=num_sites,
)
terms.append(new_term)
super().__init__(
exploration_rules=exploration_rules,
genes=terms,
true_model=true_model,
**kwargs,
)