Source code for qmla.shared_functionality.genetic_algorithm

import numpy as np
import itertools
import sys
import os
import random
import copy
import scipy
import time
import pandas as pd
import sklearn as skl
sys.path.append("/home/bf16951/QMD")
import qmla

import qmla.model_building_utilities

__all__ = ["GeneticAlgorithmQMLA", "GeneticAlgorithmFullyConnectedLikewisePauliTerms"]


[docs]class GeneticAlgorithmQMLA:
    r"""
    Standalone genetic algorithm implementation for integration with :class:`qmla.QuantumModelLearningAgent`.

    This class works with the :class:`~qmla.exploration_strategies.ExplorationStrategy`
    to construct models according to the genetic strategy.

    :param list genes: individual terms which can be combined to form chromosomes
    :param int num_sites: maximum dimension permitted in model search
    :param str true_model: target model. if None, set at random from space of valid models.
    :param list base_terms: deprecated TODO remove
    :param str selection_method: mechanism through which to select chromosomes as parents.
        Currently only 'roulette' available, but the framework should facilitate
        alternatives.
    :param str crossover_method: mechanism through which parent chromosomes are combined
        to form offspring.
        Currently only 'one_point' available, but the framework should facilitate
        alternatives.
    :param str mutation_method: mechanism through which to perform chromosome mutation
        Currently only 'element_wise' available, but the framework should facilitate
        alternatives.
    :param float mutation_probability: rate with which the mutation mechanism incurs mutation.
    :param float selection_truncation_rate: fraction of models to retain as viable parents
        to the subsequent generation; the lower-rated other models are discarded.
    :param int num_protected_elite_models: number of models to automatically admit to the
        subsequent generation.
    :param int unchanged_elite_num_generations_cutoff: after this number of generations,
        if the top model has not changed, the model search is terminated.
    :param str log_file: path of QMLA instance's log file.

    """

    def __init__(
        self,
        genes,
        num_sites,
        true_model=None,
        base_terms=["x", "y", "z"],
        selection_method="roulette",
        crossover_method="one_point",
        mutation_method="element_wise",
        mutation_probability=0.1,
        selection_truncation_rate=0.5,
        num_protected_elite_models=2,
        unchanged_elite_num_generations_cutoff=5,
        log_file=None,
        **kwargs
    ):
        self.num_sites = num_sites
        self.base_terms = base_terms
        self.genes = list(sorted(genes))
        self.get_base_chromosome()

        if true_model is None:
            r = random.randint(1, 2 ** self.num_terms - 1)
            r = format(r, "0{}b".format(self.num_terms))
            self.true_model = self.map_chromosome_to_model(r)
        else:
            self.true_model = true_model

        self.true_chromosome = self.map_model_to_chromosome(self.true_model)
        self.true_chromosome_string = self.chromosome_string(self.true_chromosome)
        self.all_zero_chromosome_string = "0" * self.num_terms
        self.addition_str = "+"
        self.mutation_probability = mutation_probability
        self.mutation_count = 0
        self.previously_considered_chromosomes = []
        self.chromosomes_at_generation = {}
        self.delta_f_by_generation = {}
        self.genetic_generation = 1
        self.log_file = log_file
        self.f_score_change_by_generation = {}
        self.fitness_at_generation = {}
        self.models_ranked_by_fitness = {}
        self.most_elite_models_by_generation = {}
        self.num_protected_elite_models = num_protected_elite_models
        self.terminate_early_if_top_model_unchanged = True
        self.best_model_unchanged = False
        self.unchanged_elite_num_generations_cutoff = (
            unchanged_elite_num_generations_cutoff
        )
        self.selection_truncation_rate = selection_truncation_rate
        self.gene_pool = pd.DataFrame(
            columns=["model", "chromosome", "f_score", "probability", "generation"]
        )
        self.elite_models = pd.DataFrame(
            columns=["model", "chromosome", "f_score", "generation", "elite_position"]
        )

        # specifying which functionality to use
        self.selection_method = self.select_from_pair_df_remove_selected
        self.mutation_method = self.element_wise_mutation
        self.crossover_method = self.one_point_crossover

        available_selection_methods = {
            "roulette": self.select_from_pair_df_remove_selected,
        }
        available_mutation_methods = {"element_wise": self.element_wise_mutation}
        available_crossover_methods = {"one_point": self.one_point_crossover}

        self.selection_method = available_selection_methods[selection_method]
        self.mutation_method = available_mutation_methods[mutation_method]
        self.crossover_method = available_crossover_methods[crossover_method]

[docs]    def get_base_chromosome(self):
        r"""
        Creates basic chromosome, i.e. with all genes set to 0.
        """

        self.num_terms = len(self.genes)
        self.basic_chromosome = np.array([0] * self.num_terms)
        self.chromosome_description = self.genes
        self.chromosome_description_array = np.array(self.genes)

[docs]    def map_chromosome_to_model(
        self,
        chromosome,
    ):
        r"""
        Given a chromosome, get the corresponding model.

        :param np.array chromosome: chromosome representing a candidate model
        :returns str model_string: name of the corresponding model
        """

        if isinstance(chromosome, str):
            chromosome = list(chromosome)
            chromosome = np.array([int(i) for i in chromosome])
        assert (
            len(chromosome) == self.num_terms
        ), "Chromosome must be of length {}".format(self.num_terms)

        nonzero_postions = chromosome.nonzero()
        present_terms = list(self.chromosome_description_array[nonzero_postions])

        model_string = "+".join(present_terms)
        return model_string

[docs]    def map_model_to_chromosome(self, model):
        r"""
        Given a model, get the corresponding chromosome.

        :param str model: name of candidate model
        :returns np.array chromosome: array of ones and zeros indicating which genes are active in the model
        """

        terms = qmla.model_building_utilities.get_constituent_names_from_name(model)
        assert np.all(
            [t in self.chromosome_description for t in terms]
        ), "Cannot map some term(s) to any available gene. Terms: {} \n Genes".format(
            terms, self.chromosome_description
        )

        locs = [self.chromosome_description.index(t) for t in terms]
        chromosome = copy.copy(self.basic_chromosome)
        chromosome[np.array(locs)] = 1
        return chromosome

[docs]    def model_f_score(self, model_name):
        r"""
        Get the F score of a candidate model.

        :param str model_name: name of candidate model
        :returns float f_score: F score, between 0 and 1, indicating how many terms overlap
            between the candidate and target models.
        """

        model_as_chromosome = self.map_model_to_chromosome(model_name)
        return self.chromosome_f_score(model_as_chromosome)

[docs]    def chromosome_string(self, c):
        r"""Map a chromosome array to a string."""

        b = [str(i) for i in c]
        s = "".join(b)
        if s == "1000000000":
            # TODO generaalise
            # 1 followed by num_terms 0's can be generated and is not permitted
            self.log_print(["Unallowed chromosome string {} for {}".format(b, c)])
        return s

[docs]    def chromosome_f_score(
        self,
        chromosome,
    ):
        r"""
        Get the F score of a candidate model from its chromosome representation.

        :param np.array chromosome: representation of candidate model
        :returns float f_score: F score, between 0 and 1, indicating how many terms overlap
            between the candidate and target models.

        """

        if not isinstance(chromosome, np.ndarray):
            chromosome = np.array([int(a) for a in list(chromosome)])

        return skl.metrics.f1_score(chromosome, self.true_chromosome)

[docs]    def log_print(self, to_print_list):
        r"""Wrapper for :func:`~qmla.print_to_log`"""
        qmla.logging.print_to_log(
            to_print_list=to_print_list,
            log_file=self.log_file,
            log_identifier="GA gen {}".format(self.genetic_generation),
        )

[docs]    def random_initial_models(self, num_models=5):
        r"""
        Generate random models from the space of valid candidates.

        :param int num_models: number of candidates to generate
        :returns list new_models: the randomly generated model names
        """

        if num_models > 2 ** self.num_terms:
            self.log_print(
                [
                    "Number of models requested > number of possible models ({})".format(
                        2 ** self.num_terms
                    ),
                    "Reducing by half until < half available",
                ]
            )

            while num_models > (2 ** self.num_terms) / 2:
                num_models = int(num_models / 2)
        new_models = []
        self.initial_number_models = num_models
        self.chromosomes_at_generation[0] = []
        self.previously_considered_chromosomes = []
        self.birth_register = pd.DataFrame(
            columns=[
                "child",
                "chromosome_child",
                "parent_a",
                "parent_b",
                "chromosome_parent_a",
                "chromosome_parent_b",
                "generation",
                "f_score",
            ]
        )  # TODO this is awful - this stuff shouldn't be initialised in this function

        while len(new_models) < num_models:
            # generate random number and
            # format as binary string, i.e. chromosome
            r = random.randint(1, 2 ** self.num_terms - 1)
            r = format(r, "0{}b".format(self.num_terms))

            if self.chromosome_string(r) not in self.previously_considered_chromosomes:
                r = list(r)
                r = np.array([int(i) for i in r])
                mod = self.map_chromosome_to_model(r)
                chrom = self.chromosome_string(r)
                f = self.chromosome_f_score(chrom)
                self.previously_considered_chromosomes.append(chrom)
                self.chromosomes_at_generation[0].append(chrom)
                new_models.append(mod)

                birth = pd.Series(
                    {
                        "child": mod,
                        "chromosome_child": chrom,
                        "generation": 1,
                        "f_score": f,
                    }
                )
                self.birth_register.loc[len(self.birth_register)] = birth

        return new_models

[docs]    def rand_model_f(self):
        r"""
        Generate a random model chromosome and evaluate its F score.
        """

        r = 0
        while r == 0:
            r = np.random.randint(2 ** self.num_terms)

        b = bin(r)[2:].zfill(self.num_terms)
        b_array = np.array([int(i) for i in list(b)])
        f = skl.metrics.f1_score(b_array, self.true_chromosome)
        return f, b_array

[docs]    def random_models_sorted_by_f_score(
        self,
        num_models=14,
    ):
        r"""
        Generate a set of random models and sort them by F score.
        """

        n_runs = 1e3  # first sample ~1000 random numbers
        some_models = [self.rand_model_f() for _ in range(int(n_runs))]
        f_scores = np.array(some_models)[:, 0]
        chromosomes = np.array(some_models)[:, 1]

        # then choose from those randomly generated models
        random_chroms = np.random.choice(chromosomes, num_models)
        random_models = [self.map_chromosome_to_model(c) for c in random_chroms]
        models_w_f = list(
            zip(random_models, [self.model_f_score(m) for m in random_models])
        )
        sorted_by_f = sorted(models_w_f, key=lambda x: x[1])
        sorted_models = np.array(sorted_by_f)[:, 0]
        sorted_models = list(sorted_models)
        just_f = np.array(models_w_f)[:, 1]
        just_f = [float(a) for a in just_f]

        return sorted_models

    ######################
    # Selection functions
    ######################

[docs]    def selection(self, **kwargs):
        r"""
        Wrapper for user's selected selection method.

        Whatever method is called must return
            * prescribed_chromosomes
            * chromosomes_for_crossover - pairs
        """

        return self.selection_method(**kwargs)

    def select_from_pair_df_remove_selected(self, **kwargs):
        # normalise so pairs' probabilities sum to 1
        self.chrom_pair_df.probability = self.chrom_pair_df.probability.astype(float)
        self.chrom_pair_df.probability = (
            self.chrom_pair_df.probability / self.chrom_pair_df.probability.sum()
        )
        pair_ids = list(self.chrom_pair_df.index)
        pair_probs = [self.chrom_pair_df.loc[i].probability for i in pair_ids]
        self.log_print(["Number available pairs:", len(pair_ids)])

        # randomly select a pair from list of pairs
        selected_id = np.random.choice(a=pair_ids, p=pair_probs)
        selected_entry = self.chrom_pair_df.loc[selected_id]
        # Drop so it can't be chosen again
        self.chrom_pair_df.drop(selected_id, inplace=True)
        self.log_print(
            ["chrom pair df has {} options remaining".format(len(self.chrom_pair_df))]
        )

        selection = {
            "chromosome_1": selected_entry["c1"],
            "chromosome_2": selected_entry["c2"],
            "other_data": {
                "cut": int(selected_entry["cut1"]),
                "force_mutation": bool(selected_entry["force_mutation"]),
            },
        }
        return selection

[docs]    def basic_pair_selection(self, chromosome_selection_probabilities, **kwargs):
        r"""
        Mechanism for selecting two models from the database of potential parents.

        :param pd.DataFrame chromosome_selection_probabilities:
            database indicating the probability that every valid pair of
            parents should be selected.
        :return tuple selected_chromosomes: two models
        """

        chromosomes = list(chromosome_selection_probabilities.keys())
        probabilities = [chromosome_selection_probabilities[c] for c in chromosomes]
        selected_chromosomes = np.random.choice(
            chromosomes, size=2, p=probabilities, replace=False
        )

        return selected_chromosomes

    ######################
    # Crossover functions
    ######################

[docs]    def crossover(self, **kwargs):
        r"""
        Wrapper for crossover mechanism.

        This method assumes only 2 chromosomes to crossover
        and passes them to the method set as self.crossover_method, which can be easily replaced
        to facilitate alternative crossover schemes.
        """

        return self.crossover_method(**kwargs)

[docs]    def one_point_crossover(self, **kwargs):
        r"""
        Crossover two chromosomes about a single gene.

        Input two chromosomes, and selection (a dict) in kwargs.
        selection contains ``chromosome_1`` and ``chromosome_2``,
        as well as a dict called  ``other_data`` containing ``cut``,
        which is the position about which to crossover the two chromosomes.
        """

        selection = kwargs["selection"]
        c1 = np.array(list(selection["chromosome_1"]))
        c2 = np.array(list(selection["chromosome_2"]))
        x = selection["other_data"]["cut"]
        tmp = c2[:x].copy()
        c2[:x], c1[:x] = c1[:x], tmp

        return c1, c2

    ######################
    # Mutation functions
    ######################

[docs]    def mutation(self, **kwargs):
        r"""
        Wrapper for mutation mechanism.
        All input arguments to the mutation method are passed directly to
        the nominated mutation function, set as self.mutation_method.
        """

        return self.mutation_method(**kwargs)

[docs]    def element_wise_mutation(self, **kwargs):
        r"""
        Probabilistically mutate each gene independently.
        """

        chromosomes = kwargs["chromosomes"]
        force_mutation = kwargs["force_mutation"]

        copy_chromosomes = copy.copy(chromosomes)
        mutated_chromosomes = []
        for c in copy_chromosomes:
            try:
                if np.all(c == 0):
                    self.log_print(
                        [
                            "Input chomosome {} has no interactions -- forcing mutation".format(
                                c
                            )
                        ]
                    )
                    mutation_probability = 1.0
                else:
                    mutation_probability = self.mutation_probability
            except:
                self.log_print(["Can't compare all w/ 0 :", c])
                mutation_probability = self.mutation_probability

            if np.random.rand() < mutation_probability or force_mutation:
                num_mutations_to_perform = max(1, force_mutation)
                self.mutation_count += 1
                idx = np.random.choice(range(len(c)))
                # print("Flipping idx {}".format(idx))
                if int(c[idx]) == 0:
                    c[idx] = "1"
                elif int(c[idx]) == 1:
                    c[idx] = "0"
            mutated_chromosomes.append(c)
        return mutated_chromosomes

    ######################
    # Elitism functions
    ######################

[docs]    def get_elite_models(self, **kwargs):
        r"""
        Wrapper for elite model selection method,
            here set to self.elite_ranking_top_n_models.
        """

        return self.elite_ranking_top_n_models(**kwargs)

[docs]    def elite_ranking_top_n_models(self, model_fitnesses, **kwargs):
        r"""
        Get the top N models, and store info on the elite models to date.

        :param dict model_fitnesses: the fitness of each model in this generation according to the
            chosen objective function.

        """

        elite_models = self.models_ranked_by_fitness[self.genetic_generation][
            : self.num_protected_elite_models
        ]
        self.log_print(
            [
                "Elite models at generation {}: {}".format(
                    self.genetic_generation, elite_models
                )
            ]
        )
        for m in elite_models:
            self.elite_models = self.elite_models.append(
                pd.Series(
                    {
                        "model": m,
                        "generation": self.genetic_generation,
                        "elite_position": elite_models.index(m) + 1,
                        "chromosome": self.map_model_to_chromosome(m),
                        "f_score": self.model_f_score(m),
                    }
                ),
                ignore_index=True,
            )
        self.most_elite_models_by_generation[
            self.genetic_generation
        ] = self.models_ranked_by_fitness[self.genetic_generation][0]

        if self.genetic_generation > self.unchanged_elite_num_generations_cutoff + 2:
            gen = self.genetic_generation
            recent_generations = list(
                range(
                    max(0, gen - self.unchanged_elite_num_generations_cutoff), gen + 1
                )
            )
            recent_elite_models = [
                self.most_elite_models_by_generation[g] for g in recent_generations
            ]
            unchanged = np.all(
                np.array(recent_elite_models)
                == self.most_elite_models_by_generation[gen]
            )
            if unchanged and self.terminate_early_if_top_model_unchanged:
                # TODO this allows for unusual case where top model unchanged in 5 generations,
                # but is improved upon in the subsequent generation.
                # but since 5 generations are unchanged, termination is triggered and the new generation champion is winner
                self.best_model_unchanged = True
                self.log_print(
                    [
                        "Setting best_model_unchanged to {}".format(
                            self.best_model_unchanged
                        )
                    ]
                )
            self.log_print(
                [
                    "Elite model unchanged in last {} generations: {}. \nCurrently: {} with f-score {}".format(
                        self.unchanged_elite_num_generations_cutoff,
                        self.best_model_unchanged,
                        self.most_elite_models_by_generation[gen],
                        self.chromosome_f_score(
                            self.map_model_to_chromosome(
                                self.most_elite_models_by_generation[gen]
                            )
                        ),
                    )
                ]
            )
        return elite_models

    ######################
    # Processing given fitness to
    # selection probabilities
    ######################

[docs]    def get_selection_probabilities(self, **kwargs):
        r"""
        Wrapper for parent selection function, here set to self.truncate_to_top_half.
        """
        return self.truncate_to_top_half(**kwargs)

[docs]    def truncate_to_top_half(self, model_fitnesses, **kwargs):
        r"""
        Retain only the top-performing half of models considered at this generation,
        for consideration as parents to offspring on the subsequent generation.

        :param dict model_fitnesses: the fitness of each model in this generation according to the
            chosen objective function.

        """

        ranked_models = sorted(model_fitnesses, key=model_fitnesses.get, reverse=True)
        num_models = len(ranked_models)
        self.log_print(
            [
                "Considering truncation for {} models. Truncation rate = {}".format(
                    num_models, self.selection_truncation_rate
                ),
            ]
        )
        for m in ranked_models:
            self.log_print(["fitness = {} \t Model={} ".format(model_fitnesses[m], m)])

        truncation_cutoff = max(
            int(num_models * self.selection_truncation_rate), 4
        )  # either consider top half, or top 4 if too small
        truncation_cutoff = min(truncation_cutoff, num_models)
        truncated_model_list = ranked_models[:truncation_cutoff]

        truncated_model_fitnesses = {
            mod: model_fitnesses[mod] for mod in truncated_model_list
        }

        # keep the others with zero fitness, so the gene pool reflect them
        for m in ranked_models[truncation_cutoff:]:
            self.log_print(
                [
                    "Setting fitness to 0 for {} as it is {}th in rankings".format(
                        m, ranked_models.index(m)
                    )
                ]
            )
            truncated_model_fitnesses[m] = 0

        sum_fitnesses = np.sum(list(truncated_model_fitnesses.values()))
        self.log_print(
            [
                "Truncated model list:\n",
                truncated_model_list,
                "\nTruncated model fitnesses:\n",
                truncated_model_fitnesses,
                "\nsum fitnesses:",
                sum_fitnesses,
            ]
        )
        model_probabilities = {
            self.chromosome_string(self.map_model_to_chromosome(mod)): (
                truncated_model_fitnesses[mod] / sum_fitnesses
            )
            for mod in truncated_model_fitnesses.keys()
        }
        self.log_print(["Chromosome Selection probabilities:\n", model_probabilities])
        return model_probabilities

[docs]    def prepare_chromosome_pair_dataframe(
        self,
        chromosome_probabilities,
        force_mutation=False,
    ):
        r"""
        Given a set of individual chromosome fitnesses, generate database of pairs of
        parent chromosomes, with probability proportional to the fitness of both parents.

        """

        self.log_print(
            [
                "Setting up chromosome pair dataframe with initial probabilities",
                chromosome_probabilities,
            ]
        )
        if len(chromosome_probabilities) == 1:
            self.log_print(
                ["There is only one chromosome; not constructing selection database."]
            )
            return

        # Register gene pool
        for c in chromosome_probabilities:
            model = self.map_chromosome_to_model(c)
            gene_probability = pd.Series(
                {
                    "model": model,
                    "chromosome": c,
                    "f_score": self.model_f_score(model),
                    "probability": chromosome_probabilities[c],
                    "generation": self.genetic_generation,
                }
            )
            self.gene_pool.loc[len(self.gene_pool)] = gene_probability

        # Construct df of pairs of chromosomes from the gene pool, where the probability of that
        # pair being selected is the product of their individual fitnesses
        t2 = time.time()
        chromosome_combinations = list(
            itertools.combinations(list(chromosome_probabilities.keys()), 2)
        )
        eg_combo = chromosome_combinations[0]
        min_cut_pt = int(len(eg_combo[0]) * 0.25)
        max_cut_pt = int(len(eg_combo[0]) * 0.75) + 1
        self.log_print(
            [
                "example chrom combination : {}. \n min/max cut locations = {}/{}".format(
                    eg_combo, min_cut_pt, max_cut_pt
                )
            ]
        )

        pair_data = []
        count_good_pairs = 0
        for c1, c2 in chromosome_combinations:
            pair_prob = (
                chromosome_probabilities[c1] * chromosome_probabilities[c2]
            )  # TODO better way to get pair prob?
            # for cut1 in range(1, len(c1)-2):
            if pair_prob > 0:
                count_good_pairs += 1
                self.log_print(
                    ["Nonzero prob pair: {} & {}, prob = {}".format(c1, c2, pair_prob)]
                )
                for cut1 in range(min_cut_pt, max_cut_pt):
                    this_pair_df = {
                        "c1": c1,
                        "c2": c2,
                        "probability": pair_prob,  # np.round(pair_prob, 2),
                        "cut1": cut1,
                        "c1_prob": chromosome_probabilities[c1],
                        "c2_prob": chromosome_probabilities[c2],
                        "force_mutation": force_mutation,
                    }
                    pair_data.append(this_pair_df)
        self.chrom_pair_df = pd.DataFrame.from_dict(pair_data)

        # normalise probabilities
        try:
            self.chrom_pair_df.probability = self.chrom_pair_df.probability.astype(
                float
            )
            self.chrom_pair_df.probability = (
                self.chrom_pair_df.probability / self.chrom_pair_df.probability.sum()
            )
        except:
            self.log_print(
                ["Failing at final generation. chrom pair df:", self.chrom_pair_df]
            )

        self.log_print(
            [
                "starting chromosome pair dataframe setup. {} combinations in total from {} non-zero prob pairs. took {} sec and has len {}".format(
                    len(chromosome_combinations),
                    count_good_pairs,
                    np.round(time.time() - t2, 3),
                    len(self.chrom_pair_df),
                )
            ]
        )
        self.log_print(
            [
                "Probs after preparing df:",
                self.chrom_pair_df[["c1", "c2", "probability"]],
            ]
        )

[docs]    def get_pair_selection_order(self):
        r"""
        Use the probabilities of parental selection to define the order in which to generate offspring.
        It is cheaper to perform this once than call the database repeatedly.

        :return list pair_selection_order: list of tuples of the order in which to pass
            the model pairs to the crossover mechanism to generate offspring
        """

        pair_idx = self.chrom_pair_df.index.values
        probabilities = self.chrom_pair_df.probability.values
        # only keep nonzero probs
        pair_idx = pair_idx[probabilities > 0]
        probabilities = probabilities[probabilities > 0]
        self.log_print(
            [
                "get_pair_selection_order probabilities: ",
                probabilities,
                "\n {} distinct".format(len(probabilities)),
                "\n sum:",
                np.sum(probabilities),
            ]
        )
        probabilities /= np.sum(probabilities)

        n_samples = len(probabilities)
        self.log_print(
            ["Getting {} samples from chromosome probabilities".format(n_samples)]
        )
        t1 = time.time()
        pair_selection_order = np.random.choice(
            a=pair_idx, size=n_samples, p=probabilities, replace=False
        )
        self.log_print(
            [
                "after {} s, pair_selection_order has {} elements ({} unique): \n {}".format(
                    np.round(time.time() - t1, 3),
                    len(pair_selection_order),
                    len(set(pair_selection_order)),
                    repr(pair_selection_order),
                )
            ]
        )
        return pair_selection_order

    ######################
    # Implement entire genetic algorithm iteration
    ######################

[docs]    def consolidate_generation(self, model_fitnesses, **kwargs):
        r"""
        Following the training of all models on a generation, consolidate that generation.

        This involves determining the strongest models from the generation,
        and constructing the database of parent-pairs and their associated selection probabilities.

        :param dict model_fitnesses: the fitness of each model in this generation according to the
            chosen objective function.
        """

        self.fitness_at_generation[self.genetic_generation] = model_fitnesses
        self.models_ranked_by_fitness[self.genetic_generation] = sorted(
            model_fitnesses, key=model_fitnesses.get, reverse=True
        )
        self.log_print(
            [
                "GA step. model ranked by fitness:",
                self.models_ranked_by_fitness[self.genetic_generation],
            ]
        )

        self.get_elite_models(
            model_fitnesses=model_fitnesses, num_protected_elite_models=2
        )

        self.chromosome_selection_probabilities = self.get_selection_probabilities(
            model_fitnesses=model_fitnesses,
        )
        t_init = time.time()
        self.prepare_chromosome_pair_dataframe(
            chromosome_probabilities=self.chromosome_selection_probabilities
        )

[docs]    def genetic_algorithm_step(self, model_fitnesses, **kwargs):
        r"""
        Perform a complete step of the genetic algorithm, assuming all of the required steps have been performed.
        That is, the database for parent selection must already be available.

        :param dict model_fitnesses: the fitness of each model in this generation according to the
            chosen objective function.
        :returns list new_models: set of models to place on the next generation.
        """

        # get the order to iterate through chromosome pairs
        self.log_print(["Genetic algorithm step {}".format(self.genetic_generation)])
        pair_selection_order = self.get_pair_selection_order()
        init_num_chrom_pairs = len(pair_selection_order)
        pair_selection_order = iter(pair_selection_order)

        elite_models = list(
            self.elite_models[
                self.elite_models.generation == self.genetic_generation
            ].model
        )
        self.log_print(["elite models to start off with:", elite_models])
        proposed_chromosomes = [
            self.chromosome_string(self.map_model_to_chromosome(mod))
            for mod in elite_models
        ]  # list of chromosome strings to return

        input_models = list(model_fitnesses.keys())
        num_models_for_next_generation = len(input_models)
        self.log_print(
            ["Num models reqd for generation:", num_models_for_next_generation]
        )

        num_loops_to_find_new_chromosome = 0
        force_mutation = False
        num_genes_to_force_mutate = 0
        t_init = time.time()
        while len(proposed_chromosomes) < num_models_for_next_generation:
            # selection = self.selection()
            try:
                selected_id = next(pair_selection_order)
            except:
                self.log_print(["no pairs remaining."])  #  TODO now what?
                raise
            selected_entry = self.chrom_pair_df.loc[selected_id]
            selection = {
                "chromosome_1": selected_entry["c1"],
                "chromosome_2": selected_entry["c2"],
                "other_data": {
                    "cut": int(selected_entry["cut1"]),
                    "force_mutation": bool(selected_entry["force_mutation"]),
                },
            }

            suggested_chromosomes = self.crossover(selection=selection)
            suggested_chromosomes = self.mutation(
                chromosomes=suggested_chromosomes,
                force_mutation=selection["other_data"]["force_mutation"],
            )
            c0_str = self.chromosome_string(suggested_chromosomes[0])
            c1_str = self.chromosome_string(suggested_chromosomes[1])

            for c in [c0_str, c1_str]:
                if (
                    c not in proposed_chromosomes
                    and c != self.all_zero_chromosome_string
                ):
                    proposed_chromosomes.append(c)
                    self.log_print(
                        [
                            "num proposed chromosome now: {} of {}".format(
                                len(proposed_chromosomes),
                                num_models_for_next_generation,
                            ),
                            "new chromosome:",
                            c,
                        ]
                    )
                    birth = pd.Series(
                        {
                            "child": self.map_chromosome_to_model(c),
                            "chromosome_child": c,
                            "chromosome_parent_a": selection["chromosome_1"],
                            "chromosome_parent_b": selection["chromosome_2"],
                            "parent_a": self.map_chromosome_to_model(
                                selection["chromosome_1"]
                            ),
                            "parent_b": self.map_chromosome_to_model(
                                selection["chromosome_2"]
                            ),
                            "generation": self.genetic_generation,
                            "f_score": self.chromosome_f_score(c),
                        }
                    )
                    self.birth_register.loc[len(self.birth_register)] = birth
                    self.log_print(["Registering birth"])

            if len(self.chrom_pair_df) == 0:
                # already tried every available pair
                num_genes_to_force_mutate += 1  # TODO increase number of genes to flip to diversify population when repetitive
                self.log_print(
                    [
                        "Redrawing chromosome pair selection dataframe, enforcing mutation on {} genes".format(
                            num_genes_to_force_mutate
                        )
                    ]
                )
                self.prepare_chromosome_pair_dataframe(
                    chromosome_probabilities=self.chromosome_selection_probabilities,
                    force_mutation=True
                    # force_mutation=num_genes_to_force_mutate
                )

        # chop extra chromosomes if generated
        proposed_chromosomes = proposed_chromosomes[:num_models_for_next_generation]
        self.previously_considered_chromosomes.extend(
            [self.chromosome_string(r) for r in proposed_chromosomes]
        )

        # self.delta_f_by_generation[self.genetic_generation] = delta_f_score
        self.chromosomes_at_generation[self.genetic_generation] = [
            self.chromosome_string(r) for r in proposed_chromosomes
        ]
        new_models = [self.map_chromosome_to_model(mod) for mod in proposed_chromosomes]
        self.log_print(
            [
                "Genetic alg num new models:{}".format(len(new_models)),
                "({} unique)".format(len(set(list(new_models)))),
            ]
        )

        self.genetic_generation += 1
        return new_models


class GeneticAlgorithmFullyConnectedLikewisePauliTerms(GeneticAlgorithmQMLA):
    r"""
    Exact structure of :class:`~qmla.GeneticAlgorithmQMLA`, where the avaiable terms
    are assumed to follow conventional pauliSet format,
    and all sites are connected.
    e.g. terms of the form
    pauliSet_1J2_xJx_d2, pauliSet_1J2_yJy_d2, pauliSet_1J2_zJz_d2,

    :param int num_sites: dimension to permit model search within
    :param list base_terms: terms to use with pauliSet-type terms
    """

    def __init__(self, num_sites, base_terms=["x", "y", "z"], **kwargs):

        terms = []
        for i in range(1, 1 + num_sites):
            for j in range(i + 1, 1 + num_sites):
                for t in base_terms:
                    new_term = "pauliSet_{i}J{j}_{o}J{o}_d{N}".format(
                        i=i,
                        j=j,
                        o=t,
                        N=num_sites,
                    )
                    terms.append(new_term)

        super().__init__(genes=terms, num_sites=num_sites, **kwargs)


def multidimensional_shifting(num_samples, sample_size, elements, probabilities):
    # replicate probabilities as many times as `num_samples`
    replicated_probabilities = np.tile(probabilities, (num_samples, 1))
    # get random shifting numbers & scale them correctly
    random_shifts = np.random.random(replicated_probabilities.shape)
    random_shifts /= random_shifts.sum(axis=1)[:, np.newaxis]
    # shift by numbers & find largest (by finding the smallest of the negative)
    shifted_probabilities = random_shifts - replicated_probabilities
    return np.argpartition(shifted_probabilities, sample_size, axis=1)[:, :sample_size]
Source code for qmla.shared_functionality.genetic_algorithm

Quantum Model Learning Agent

Navigation

Related Topics