import numpy as np
import itertools
import sys
import os
import random
import copy
import scipy
import time
import pandas as pd
import sklearn as skl
sys.path.append("/home/bf16951/QMD")
import qmla
import qmla.model_building_utilities
__all__ = ["GeneticAlgorithmQMLA", "GeneticAlgorithmFullyConnectedLikewisePauliTerms"]
[docs]class GeneticAlgorithmQMLA:
r"""
Standalone genetic algorithm implementation for integration with :class:`qmla.QuantumModelLearningAgent`.
This class works with the :class:`~qmla.exploration_strategies.ExplorationStrategy`
to construct models according to the genetic strategy.
:param list genes: individual terms which can be combined to form chromosomes
:param int num_sites: maximum dimension permitted in model search
:param str true_model: target model. if None, set at random from space of valid models.
:param list base_terms: deprecated TODO remove
:param str selection_method: mechanism through which to select chromosomes as parents.
Currently only 'roulette' available, but the framework should facilitate
alternatives.
:param str crossover_method: mechanism through which parent chromosomes are combined
to form offspring.
Currently only 'one_point' available, but the framework should facilitate
alternatives.
:param str mutation_method: mechanism through which to perform chromosome mutation
Currently only 'element_wise' available, but the framework should facilitate
alternatives.
:param float mutation_probability: rate with which the mutation mechanism incurs mutation.
:param float selection_truncation_rate: fraction of models to retain as viable parents
to the subsequent generation; the lower-rated other models are discarded.
:param int num_protected_elite_models: number of models to automatically admit to the
subsequent generation.
:param int unchanged_elite_num_generations_cutoff: after this number of generations,
if the top model has not changed, the model search is terminated.
:param str log_file: path of QMLA instance's log file.
"""
def __init__(
self,
genes,
num_sites,
true_model=None,
base_terms=["x", "y", "z"],
selection_method="roulette",
crossover_method="one_point",
mutation_method="element_wise",
mutation_probability=0.1,
selection_truncation_rate=0.5,
num_protected_elite_models=2,
unchanged_elite_num_generations_cutoff=5,
log_file=None,
**kwargs
):
self.num_sites = num_sites
self.base_terms = base_terms
self.genes = list(sorted(genes))
self.get_base_chromosome()
if true_model is None:
r = random.randint(1, 2 ** self.num_terms - 1)
r = format(r, "0{}b".format(self.num_terms))
self.true_model = self.map_chromosome_to_model(r)
else:
self.true_model = true_model
self.true_chromosome = self.map_model_to_chromosome(self.true_model)
self.true_chromosome_string = self.chromosome_string(self.true_chromosome)
self.all_zero_chromosome_string = "0" * self.num_terms
self.addition_str = "+"
self.mutation_probability = mutation_probability
self.mutation_count = 0
self.previously_considered_chromosomes = []
self.chromosomes_at_generation = {}
self.delta_f_by_generation = {}
self.genetic_generation = 1
self.log_file = log_file
self.f_score_change_by_generation = {}
self.fitness_at_generation = {}
self.models_ranked_by_fitness = {}
self.most_elite_models_by_generation = {}
self.num_protected_elite_models = num_protected_elite_models
self.terminate_early_if_top_model_unchanged = True
self.best_model_unchanged = False
self.unchanged_elite_num_generations_cutoff = (
unchanged_elite_num_generations_cutoff
)
self.selection_truncation_rate = selection_truncation_rate
self.gene_pool = pd.DataFrame(
columns=["model", "chromosome", "f_score", "probability", "generation"]
)
self.elite_models = pd.DataFrame(
columns=["model", "chromosome", "f_score", "generation", "elite_position"]
)
# specifying which functionality to use
self.selection_method = self.select_from_pair_df_remove_selected
self.mutation_method = self.element_wise_mutation
self.crossover_method = self.one_point_crossover
available_selection_methods = {
"roulette": self.select_from_pair_df_remove_selected,
}
available_mutation_methods = {"element_wise": self.element_wise_mutation}
available_crossover_methods = {"one_point": self.one_point_crossover}
self.selection_method = available_selection_methods[selection_method]
self.mutation_method = available_mutation_methods[mutation_method]
self.crossover_method = available_crossover_methods[crossover_method]
[docs] def get_base_chromosome(self):
r"""
Creates basic chromosome, i.e. with all genes set to 0.
"""
self.num_terms = len(self.genes)
self.basic_chromosome = np.array([0] * self.num_terms)
self.chromosome_description = self.genes
self.chromosome_description_array = np.array(self.genes)
[docs] def map_chromosome_to_model(
self,
chromosome,
):
r"""
Given a chromosome, get the corresponding model.
:param np.array chromosome: chromosome representing a candidate model
:returns str model_string: name of the corresponding model
"""
if isinstance(chromosome, str):
chromosome = list(chromosome)
chromosome = np.array([int(i) for i in chromosome])
assert (
len(chromosome) == self.num_terms
), "Chromosome must be of length {}".format(self.num_terms)
nonzero_postions = chromosome.nonzero()
present_terms = list(self.chromosome_description_array[nonzero_postions])
model_string = "+".join(present_terms)
return model_string
[docs] def map_model_to_chromosome(self, model):
r"""
Given a model, get the corresponding chromosome.
:param str model: name of candidate model
:returns np.array chromosome: array of ones and zeros indicating which genes are active in the model
"""
terms = qmla.model_building_utilities.get_constituent_names_from_name(model)
assert np.all(
[t in self.chromosome_description for t in terms]
), "Cannot map some term(s) to any available gene. Terms: {} \n Genes".format(
terms, self.chromosome_description
)
locs = [self.chromosome_description.index(t) for t in terms]
chromosome = copy.copy(self.basic_chromosome)
chromosome[np.array(locs)] = 1
return chromosome
[docs] def model_f_score(self, model_name):
r"""
Get the F score of a candidate model.
:param str model_name: name of candidate model
:returns float f_score: F score, between 0 and 1, indicating how many terms overlap
between the candidate and target models.
"""
model_as_chromosome = self.map_model_to_chromosome(model_name)
return self.chromosome_f_score(model_as_chromosome)
[docs] def chromosome_string(self, c):
r"""Map a chromosome array to a string."""
b = [str(i) for i in c]
s = "".join(b)
if s == "1000000000":
# TODO generaalise
# 1 followed by num_terms 0's can be generated and is not permitted
self.log_print(["Unallowed chromosome string {} for {}".format(b, c)])
return s
[docs] def chromosome_f_score(
self,
chromosome,
):
r"""
Get the F score of a candidate model from its chromosome representation.
:param np.array chromosome: representation of candidate model
:returns float f_score: F score, between 0 and 1, indicating how many terms overlap
between the candidate and target models.
"""
if not isinstance(chromosome, np.ndarray):
chromosome = np.array([int(a) for a in list(chromosome)])
return skl.metrics.f1_score(chromosome, self.true_chromosome)
[docs] def log_print(self, to_print_list):
r"""Wrapper for :func:`~qmla.print_to_log`"""
qmla.logging.print_to_log(
to_print_list=to_print_list,
log_file=self.log_file,
log_identifier="GA gen {}".format(self.genetic_generation),
)
[docs] def random_initial_models(self, num_models=5):
r"""
Generate random models from the space of valid candidates.
:param int num_models: number of candidates to generate
:returns list new_models: the randomly generated model names
"""
if num_models > 2 ** self.num_terms:
self.log_print(
[
"Number of models requested > number of possible models ({})".format(
2 ** self.num_terms
),
"Reducing by half until < half available",
]
)
while num_models > (2 ** self.num_terms) / 2:
num_models = int(num_models / 2)
new_models = []
self.initial_number_models = num_models
self.chromosomes_at_generation[0] = []
self.previously_considered_chromosomes = []
self.birth_register = pd.DataFrame(
columns=[
"child",
"chromosome_child",
"parent_a",
"parent_b",
"chromosome_parent_a",
"chromosome_parent_b",
"generation",
"f_score",
]
) # TODO this is awful - this stuff shouldn't be initialised in this function
while len(new_models) < num_models:
# generate random number and
# format as binary string, i.e. chromosome
r = random.randint(1, 2 ** self.num_terms - 1)
r = format(r, "0{}b".format(self.num_terms))
if self.chromosome_string(r) not in self.previously_considered_chromosomes:
r = list(r)
r = np.array([int(i) for i in r])
mod = self.map_chromosome_to_model(r)
chrom = self.chromosome_string(r)
f = self.chromosome_f_score(chrom)
self.previously_considered_chromosomes.append(chrom)
self.chromosomes_at_generation[0].append(chrom)
new_models.append(mod)
birth = pd.Series(
{
"child": mod,
"chromosome_child": chrom,
"generation": 1,
"f_score": f,
}
)
self.birth_register.loc[len(self.birth_register)] = birth
return new_models
[docs] def rand_model_f(self):
r"""
Generate a random model chromosome and evaluate its F score.
"""
r = 0
while r == 0:
r = np.random.randint(2 ** self.num_terms)
b = bin(r)[2:].zfill(self.num_terms)
b_array = np.array([int(i) for i in list(b)])
f = skl.metrics.f1_score(b_array, self.true_chromosome)
return f, b_array
[docs] def random_models_sorted_by_f_score(
self,
num_models=14,
):
r"""
Generate a set of random models and sort them by F score.
"""
n_runs = 1e3 # first sample ~1000 random numbers
some_models = [self.rand_model_f() for _ in range(int(n_runs))]
f_scores = np.array(some_models)[:, 0]
chromosomes = np.array(some_models)[:, 1]
# then choose from those randomly generated models
random_chroms = np.random.choice(chromosomes, num_models)
random_models = [self.map_chromosome_to_model(c) for c in random_chroms]
models_w_f = list(
zip(random_models, [self.model_f_score(m) for m in random_models])
)
sorted_by_f = sorted(models_w_f, key=lambda x: x[1])
sorted_models = np.array(sorted_by_f)[:, 0]
sorted_models = list(sorted_models)
just_f = np.array(models_w_f)[:, 1]
just_f = [float(a) for a in just_f]
return sorted_models
######################
# Selection functions
######################
[docs] def selection(self, **kwargs):
r"""
Wrapper for user's selected selection method.
Whatever method is called must return
* prescribed_chromosomes
* chromosomes_for_crossover - pairs
"""
return self.selection_method(**kwargs)
def select_from_pair_df_remove_selected(self, **kwargs):
# normalise so pairs' probabilities sum to 1
self.chrom_pair_df.probability = self.chrom_pair_df.probability.astype(float)
self.chrom_pair_df.probability = (
self.chrom_pair_df.probability / self.chrom_pair_df.probability.sum()
)
pair_ids = list(self.chrom_pair_df.index)
pair_probs = [self.chrom_pair_df.loc[i].probability for i in pair_ids]
self.log_print(["Number available pairs:", len(pair_ids)])
# randomly select a pair from list of pairs
selected_id = np.random.choice(a=pair_ids, p=pair_probs)
selected_entry = self.chrom_pair_df.loc[selected_id]
# Drop so it can't be chosen again
self.chrom_pair_df.drop(selected_id, inplace=True)
self.log_print(
["chrom pair df has {} options remaining".format(len(self.chrom_pair_df))]
)
selection = {
"chromosome_1": selected_entry["c1"],
"chromosome_2": selected_entry["c2"],
"other_data": {
"cut": int(selected_entry["cut1"]),
"force_mutation": bool(selected_entry["force_mutation"]),
},
}
return selection
[docs] def basic_pair_selection(self, chromosome_selection_probabilities, **kwargs):
r"""
Mechanism for selecting two models from the database of potential parents.
:param pd.DataFrame chromosome_selection_probabilities:
database indicating the probability that every valid pair of
parents should be selected.
:return tuple selected_chromosomes: two models
"""
chromosomes = list(chromosome_selection_probabilities.keys())
probabilities = [chromosome_selection_probabilities[c] for c in chromosomes]
selected_chromosomes = np.random.choice(
chromosomes, size=2, p=probabilities, replace=False
)
return selected_chromosomes
######################
# Crossover functions
######################
[docs] def crossover(self, **kwargs):
r"""
Wrapper for crossover mechanism.
This method assumes only 2 chromosomes to crossover
and passes them to the method set as self.crossover_method, which can be easily replaced
to facilitate alternative crossover schemes.
"""
return self.crossover_method(**kwargs)
[docs] def one_point_crossover(self, **kwargs):
r"""
Crossover two chromosomes about a single gene.
Input two chromosomes, and selection (a dict) in kwargs.
selection contains ``chromosome_1`` and ``chromosome_2``,
as well as a dict called ``other_data`` containing ``cut``,
which is the position about which to crossover the two chromosomes.
"""
selection = kwargs["selection"]
c1 = np.array(list(selection["chromosome_1"]))
c2 = np.array(list(selection["chromosome_2"]))
x = selection["other_data"]["cut"]
tmp = c2[:x].copy()
c2[:x], c1[:x] = c1[:x], tmp
return c1, c2
######################
# Mutation functions
######################
[docs] def mutation(self, **kwargs):
r"""
Wrapper for mutation mechanism.
All input arguments to the mutation method are passed directly to
the nominated mutation function, set as self.mutation_method.
"""
return self.mutation_method(**kwargs)
[docs] def element_wise_mutation(self, **kwargs):
r"""
Probabilistically mutate each gene independently.
"""
chromosomes = kwargs["chromosomes"]
force_mutation = kwargs["force_mutation"]
copy_chromosomes = copy.copy(chromosomes)
mutated_chromosomes = []
for c in copy_chromosomes:
try:
if np.all(c == 0):
self.log_print(
[
"Input chomosome {} has no interactions -- forcing mutation".format(
c
)
]
)
mutation_probability = 1.0
else:
mutation_probability = self.mutation_probability
except:
self.log_print(["Can't compare all w/ 0 :", c])
mutation_probability = self.mutation_probability
if np.random.rand() < mutation_probability or force_mutation:
num_mutations_to_perform = max(1, force_mutation)
self.mutation_count += 1
idx = np.random.choice(range(len(c)))
# print("Flipping idx {}".format(idx))
if int(c[idx]) == 0:
c[idx] = "1"
elif int(c[idx]) == 1:
c[idx] = "0"
mutated_chromosomes.append(c)
return mutated_chromosomes
######################
# Elitism functions
######################
[docs] def get_elite_models(self, **kwargs):
r"""
Wrapper for elite model selection method,
here set to self.elite_ranking_top_n_models.
"""
return self.elite_ranking_top_n_models(**kwargs)
[docs] def elite_ranking_top_n_models(self, model_fitnesses, **kwargs):
r"""
Get the top N models, and store info on the elite models to date.
:param dict model_fitnesses: the fitness of each model in this generation according to the
chosen objective function.
"""
elite_models = self.models_ranked_by_fitness[self.genetic_generation][
: self.num_protected_elite_models
]
self.log_print(
[
"Elite models at generation {}: {}".format(
self.genetic_generation, elite_models
)
]
)
for m in elite_models:
self.elite_models = self.elite_models.append(
pd.Series(
{
"model": m,
"generation": self.genetic_generation,
"elite_position": elite_models.index(m) + 1,
"chromosome": self.map_model_to_chromosome(m),
"f_score": self.model_f_score(m),
}
),
ignore_index=True,
)
self.most_elite_models_by_generation[
self.genetic_generation
] = self.models_ranked_by_fitness[self.genetic_generation][0]
if self.genetic_generation > self.unchanged_elite_num_generations_cutoff + 2:
gen = self.genetic_generation
recent_generations = list(
range(
max(0, gen - self.unchanged_elite_num_generations_cutoff), gen + 1
)
)
recent_elite_models = [
self.most_elite_models_by_generation[g] for g in recent_generations
]
unchanged = np.all(
np.array(recent_elite_models)
== self.most_elite_models_by_generation[gen]
)
if unchanged and self.terminate_early_if_top_model_unchanged:
# TODO this allows for unusual case where top model unchanged in 5 generations,
# but is improved upon in the subsequent generation.
# but since 5 generations are unchanged, termination is triggered and the new generation champion is winner
self.best_model_unchanged = True
self.log_print(
[
"Setting best_model_unchanged to {}".format(
self.best_model_unchanged
)
]
)
self.log_print(
[
"Elite model unchanged in last {} generations: {}. \nCurrently: {} with f-score {}".format(
self.unchanged_elite_num_generations_cutoff,
self.best_model_unchanged,
self.most_elite_models_by_generation[gen],
self.chromosome_f_score(
self.map_model_to_chromosome(
self.most_elite_models_by_generation[gen]
)
),
)
]
)
return elite_models
######################
# Processing given fitness to
# selection probabilities
######################
[docs] def get_selection_probabilities(self, **kwargs):
r"""
Wrapper for parent selection function, here set to self.truncate_to_top_half.
"""
return self.truncate_to_top_half(**kwargs)
[docs] def truncate_to_top_half(self, model_fitnesses, **kwargs):
r"""
Retain only the top-performing half of models considered at this generation,
for consideration as parents to offspring on the subsequent generation.
:param dict model_fitnesses: the fitness of each model in this generation according to the
chosen objective function.
"""
ranked_models = sorted(model_fitnesses, key=model_fitnesses.get, reverse=True)
num_models = len(ranked_models)
self.log_print(
[
"Considering truncation for {} models. Truncation rate = {}".format(
num_models, self.selection_truncation_rate
),
]
)
for m in ranked_models:
self.log_print(["fitness = {} \t Model={} ".format(model_fitnesses[m], m)])
truncation_cutoff = max(
int(num_models * self.selection_truncation_rate), 4
) # either consider top half, or top 4 if too small
truncation_cutoff = min(truncation_cutoff, num_models)
truncated_model_list = ranked_models[:truncation_cutoff]
truncated_model_fitnesses = {
mod: model_fitnesses[mod] for mod in truncated_model_list
}
# keep the others with zero fitness, so the gene pool reflect them
for m in ranked_models[truncation_cutoff:]:
self.log_print(
[
"Setting fitness to 0 for {} as it is {}th in rankings".format(
m, ranked_models.index(m)
)
]
)
truncated_model_fitnesses[m] = 0
sum_fitnesses = np.sum(list(truncated_model_fitnesses.values()))
self.log_print(
[
"Truncated model list:\n",
truncated_model_list,
"\nTruncated model fitnesses:\n",
truncated_model_fitnesses,
"\nsum fitnesses:",
sum_fitnesses,
]
)
model_probabilities = {
self.chromosome_string(self.map_model_to_chromosome(mod)): (
truncated_model_fitnesses[mod] / sum_fitnesses
)
for mod in truncated_model_fitnesses.keys()
}
self.log_print(["Chromosome Selection probabilities:\n", model_probabilities])
return model_probabilities
[docs] def prepare_chromosome_pair_dataframe(
self,
chromosome_probabilities,
force_mutation=False,
):
r"""
Given a set of individual chromosome fitnesses, generate database of pairs of
parent chromosomes, with probability proportional to the fitness of both parents.
"""
self.log_print(
[
"Setting up chromosome pair dataframe with initial probabilities",
chromosome_probabilities,
]
)
if len(chromosome_probabilities) == 1:
self.log_print(
["There is only one chromosome; not constructing selection database."]
)
return
# Register gene pool
for c in chromosome_probabilities:
model = self.map_chromosome_to_model(c)
gene_probability = pd.Series(
{
"model": model,
"chromosome": c,
"f_score": self.model_f_score(model),
"probability": chromosome_probabilities[c],
"generation": self.genetic_generation,
}
)
self.gene_pool.loc[len(self.gene_pool)] = gene_probability
# Construct df of pairs of chromosomes from the gene pool, where the probability of that
# pair being selected is the product of their individual fitnesses
t2 = time.time()
chromosome_combinations = list(
itertools.combinations(list(chromosome_probabilities.keys()), 2)
)
eg_combo = chromosome_combinations[0]
min_cut_pt = int(len(eg_combo[0]) * 0.25)
max_cut_pt = int(len(eg_combo[0]) * 0.75) + 1
self.log_print(
[
"example chrom combination : {}. \n min/max cut locations = {}/{}".format(
eg_combo, min_cut_pt, max_cut_pt
)
]
)
pair_data = []
count_good_pairs = 0
for c1, c2 in chromosome_combinations:
pair_prob = (
chromosome_probabilities[c1] * chromosome_probabilities[c2]
) # TODO better way to get pair prob?
# for cut1 in range(1, len(c1)-2):
if pair_prob > 0:
count_good_pairs += 1
self.log_print(
["Nonzero prob pair: {} & {}, prob = {}".format(c1, c2, pair_prob)]
)
for cut1 in range(min_cut_pt, max_cut_pt):
this_pair_df = {
"c1": c1,
"c2": c2,
"probability": pair_prob, # np.round(pair_prob, 2),
"cut1": cut1,
"c1_prob": chromosome_probabilities[c1],
"c2_prob": chromosome_probabilities[c2],
"force_mutation": force_mutation,
}
pair_data.append(this_pair_df)
self.chrom_pair_df = pd.DataFrame.from_dict(pair_data)
# normalise probabilities
try:
self.chrom_pair_df.probability = self.chrom_pair_df.probability.astype(
float
)
self.chrom_pair_df.probability = (
self.chrom_pair_df.probability / self.chrom_pair_df.probability.sum()
)
except:
self.log_print(
["Failing at final generation. chrom pair df:", self.chrom_pair_df]
)
self.log_print(
[
"starting chromosome pair dataframe setup. {} combinations in total from {} non-zero prob pairs. took {} sec and has len {}".format(
len(chromosome_combinations),
count_good_pairs,
np.round(time.time() - t2, 3),
len(self.chrom_pair_df),
)
]
)
self.log_print(
[
"Probs after preparing df:",
self.chrom_pair_df[["c1", "c2", "probability"]],
]
)
[docs] def get_pair_selection_order(self):
r"""
Use the probabilities of parental selection to define the order in which to generate offspring.
It is cheaper to perform this once than call the database repeatedly.
:return list pair_selection_order: list of tuples of the order in which to pass
the model pairs to the crossover mechanism to generate offspring
"""
pair_idx = self.chrom_pair_df.index.values
probabilities = self.chrom_pair_df.probability.values
# only keep nonzero probs
pair_idx = pair_idx[probabilities > 0]
probabilities = probabilities[probabilities > 0]
self.log_print(
[
"get_pair_selection_order probabilities: ",
probabilities,
"\n {} distinct".format(len(probabilities)),
"\n sum:",
np.sum(probabilities),
]
)
probabilities /= np.sum(probabilities)
n_samples = len(probabilities)
self.log_print(
["Getting {} samples from chromosome probabilities".format(n_samples)]
)
t1 = time.time()
pair_selection_order = np.random.choice(
a=pair_idx, size=n_samples, p=probabilities, replace=False
)
self.log_print(
[
"after {} s, pair_selection_order has {} elements ({} unique): \n {}".format(
np.round(time.time() - t1, 3),
len(pair_selection_order),
len(set(pair_selection_order)),
repr(pair_selection_order),
)
]
)
return pair_selection_order
######################
# Implement entire genetic algorithm iteration
######################
[docs] def consolidate_generation(self, model_fitnesses, **kwargs):
r"""
Following the training of all models on a generation, consolidate that generation.
This involves determining the strongest models from the generation,
and constructing the database of parent-pairs and their associated selection probabilities.
:param dict model_fitnesses: the fitness of each model in this generation according to the
chosen objective function.
"""
self.fitness_at_generation[self.genetic_generation] = model_fitnesses
self.models_ranked_by_fitness[self.genetic_generation] = sorted(
model_fitnesses, key=model_fitnesses.get, reverse=True
)
self.log_print(
[
"GA step. model ranked by fitness:",
self.models_ranked_by_fitness[self.genetic_generation],
]
)
self.get_elite_models(
model_fitnesses=model_fitnesses, num_protected_elite_models=2
)
self.chromosome_selection_probabilities = self.get_selection_probabilities(
model_fitnesses=model_fitnesses,
)
t_init = time.time()
self.prepare_chromosome_pair_dataframe(
chromosome_probabilities=self.chromosome_selection_probabilities
)
[docs] def genetic_algorithm_step(self, model_fitnesses, **kwargs):
r"""
Perform a complete step of the genetic algorithm, assuming all of the required steps have been performed.
That is, the database for parent selection must already be available.
:param dict model_fitnesses: the fitness of each model in this generation according to the
chosen objective function.
:returns list new_models: set of models to place on the next generation.
"""
# get the order to iterate through chromosome pairs
self.log_print(["Genetic algorithm step {}".format(self.genetic_generation)])
pair_selection_order = self.get_pair_selection_order()
init_num_chrom_pairs = len(pair_selection_order)
pair_selection_order = iter(pair_selection_order)
elite_models = list(
self.elite_models[
self.elite_models.generation == self.genetic_generation
].model
)
self.log_print(["elite models to start off with:", elite_models])
proposed_chromosomes = [
self.chromosome_string(self.map_model_to_chromosome(mod))
for mod in elite_models
] # list of chromosome strings to return
input_models = list(model_fitnesses.keys())
num_models_for_next_generation = len(input_models)
self.log_print(
["Num models reqd for generation:", num_models_for_next_generation]
)
num_loops_to_find_new_chromosome = 0
force_mutation = False
num_genes_to_force_mutate = 0
t_init = time.time()
while len(proposed_chromosomes) < num_models_for_next_generation:
# selection = self.selection()
try:
selected_id = next(pair_selection_order)
except:
self.log_print(["no pairs remaining."]) # TODO now what?
raise
selected_entry = self.chrom_pair_df.loc[selected_id]
selection = {
"chromosome_1": selected_entry["c1"],
"chromosome_2": selected_entry["c2"],
"other_data": {
"cut": int(selected_entry["cut1"]),
"force_mutation": bool(selected_entry["force_mutation"]),
},
}
suggested_chromosomes = self.crossover(selection=selection)
suggested_chromosomes = self.mutation(
chromosomes=suggested_chromosomes,
force_mutation=selection["other_data"]["force_mutation"],
)
c0_str = self.chromosome_string(suggested_chromosomes[0])
c1_str = self.chromosome_string(suggested_chromosomes[1])
for c in [c0_str, c1_str]:
if (
c not in proposed_chromosomes
and c != self.all_zero_chromosome_string
):
proposed_chromosomes.append(c)
self.log_print(
[
"num proposed chromosome now: {} of {}".format(
len(proposed_chromosomes),
num_models_for_next_generation,
),
"new chromosome:",
c,
]
)
birth = pd.Series(
{
"child": self.map_chromosome_to_model(c),
"chromosome_child": c,
"chromosome_parent_a": selection["chromosome_1"],
"chromosome_parent_b": selection["chromosome_2"],
"parent_a": self.map_chromosome_to_model(
selection["chromosome_1"]
),
"parent_b": self.map_chromosome_to_model(
selection["chromosome_2"]
),
"generation": self.genetic_generation,
"f_score": self.chromosome_f_score(c),
}
)
self.birth_register.loc[len(self.birth_register)] = birth
self.log_print(["Registering birth"])
if len(self.chrom_pair_df) == 0:
# already tried every available pair
num_genes_to_force_mutate += 1 # TODO increase number of genes to flip to diversify population when repetitive
self.log_print(
[
"Redrawing chromosome pair selection dataframe, enforcing mutation on {} genes".format(
num_genes_to_force_mutate
)
]
)
self.prepare_chromosome_pair_dataframe(
chromosome_probabilities=self.chromosome_selection_probabilities,
force_mutation=True
# force_mutation=num_genes_to_force_mutate
)
# chop extra chromosomes if generated
proposed_chromosomes = proposed_chromosomes[:num_models_for_next_generation]
self.previously_considered_chromosomes.extend(
[self.chromosome_string(r) for r in proposed_chromosomes]
)
# self.delta_f_by_generation[self.genetic_generation] = delta_f_score
self.chromosomes_at_generation[self.genetic_generation] = [
self.chromosome_string(r) for r in proposed_chromosomes
]
new_models = [self.map_chromosome_to_model(mod) for mod in proposed_chromosomes]
self.log_print(
[
"Genetic alg num new models:{}".format(len(new_models)),
"({} unique)".format(len(set(list(new_models)))),
]
)
self.genetic_generation += 1
return new_models
class GeneticAlgorithmFullyConnectedLikewisePauliTerms(GeneticAlgorithmQMLA):
r"""
Exact structure of :class:`~qmla.GeneticAlgorithmQMLA`, where the avaiable terms
are assumed to follow conventional pauliSet format,
and all sites are connected.
e.g. terms of the form
pauliSet_1J2_xJx_d2, pauliSet_1J2_yJy_d2, pauliSet_1J2_zJz_d2,
:param int num_sites: dimension to permit model search within
:param list base_terms: terms to use with pauliSet-type terms
"""
def __init__(self, num_sites, base_terms=["x", "y", "z"], **kwargs):
terms = []
for i in range(1, 1 + num_sites):
for j in range(i + 1, 1 + num_sites):
for t in base_terms:
new_term = "pauliSet_{i}J{j}_{o}J{o}_d{N}".format(
i=i,
j=j,
o=t,
N=num_sites,
)
terms.append(new_term)
super().__init__(genes=terms, num_sites=num_sites, **kwargs)
def multidimensional_shifting(num_samples, sample_size, elements, probabilities):
# replicate probabilities as many times as `num_samples`
replicated_probabilities = np.tile(probabilities, (num_samples, 1))
# get random shifting numbers & scale them correctly
random_shifts = np.random.random(replicated_probabilities.shape)
random_shifts /= random_shifts.sum(axis=1)[:, np.newaxis]
# shift by numbers & find largest (by finding the smallest of the negative)
shifted_probabilities = random_shifts - replicated_probabilities
return np.argpartition(shifted_probabilities, sample_size, axis=1)[:, :sample_size]