#!/usr/bin/perl -w
use strict;
use vars qw ($TARGET_DIGITS $OP_REGEXP $MUTATION_RATE $CROSSOVER_RATE $GENERATION_TICKER $VERSION
$MUTATE_EXP_RATE $NUM_RUNS $TOURNAMENT_SIZE $POP_SIZE_MULT $GENOME_SIZE_MULT $NUM_GENERATIONS);
########################################################################
# To run this program you will need to save it and change the ending
# from .txt to .pl (I have to call it .txt to stop the web server
# trying to execute it rather than deliver it). Then you need to use a
# perl interpreter to run it. These come installed with Linux or
# there is a free one available for Windows here:
#
# http://www.activestate.com/ActivePerl
#
# After you have installed that just double click on the file.
#
# The aim of the program is to use a genetic algorithm to generate an
# expression which evaluates to a target number. For example:
#
# 5+5+3-5^3+8^1*6*4^5+1^5^5-4*5-2-1^4-5*8+5+7 = 48990
#
# This is done by generating a population of expression holders called
# "beasties", each of which can produce their expression on
# demand. The use of a genetic algorithm encourages the beastie
# expressions to converge on the target over many generations.
#
# Each generation of beasties is generated from the previous generation by
# mating pairs of beasties then mutating some of the offspring. The
# choosing of pairs is done using a variant of tournament selection, a
# small number of random beasties are compared and the top two get to
# mate to generate one child beastie. This means that "fit" beasties
# get to mate more often. Lucky beasties.
#
# Each beastie consists of a list of expression "domains"
# (e.g. 9+(4*2)^3) and a list of arithmetic operators which will join
# those expressions together. It's done this way so that each beastie
# has an identical genome length, which makes crossover during mating
# much simpler.
#
# By default the program is set up to generate a 5 digit target and
# try 3 times to find the best solutions within 100 generations. If
# you want to change these settings then read the globals section
# below and the &main subroutine.
#
# NOTE: Leave food out near the computer to tempt back any beasties
# that escape. They are particularly fond of chili chocolate. Yum!
#
########################################################################
# This top section contains globals that you may want to tweak.
########################################################################
# The default number of generations to try, if not given
$NUM_GENERATIONS = 100;
# Default number of digits in our random target number, if not given
$TARGET_DIGITS = 5;
# The number of complete simulation runs to do for a particular target.
$NUM_RUNS = 3;
########################################################################
# You probably don't want to tweak the values below but if you do then
# go for it, the current values are empirically derived.
#
# Note that it is certainly possible to break the algorithm by picking
# silly values. There is no validation.
########################################################################
# The percentage chance that a new child beastie might be mutated
$MUTATION_RATE = 60;
# The percentage chance that an individual expression will be mutated,
# rather than a complete expression being deleted, generated, duplicated etc.
$MUTATE_EXP_RATE = 40;
# The percentage chance of flipping between parent genomes when
# generating a child.
$CROSSOVER_RATE = 20;
# The number of beasties in each tournament pool. See
# &generate_beastie_child for details. Changing this changes the
# selection pressure but make it too big and you risk not being able
# to get out of local maxima.
#
# A tournament size of 2 turns off selection *completely* because
# there are only two contestants in the pool. Not much of a contest,
# and it turns the program into a random walk.
$TOURNAMENT_SIZE = 10;
# Regular expression to identify operators in expressions
$OP_REGEXP = '[\/\^\+\-\*]';
# The multiplier on the number of digits in the target for the population size
$POP_SIZE_MULT = 50;
# The multiplier on the number of digits in the target for the genome size
$GENOME_SIZE_MULT = 3;
# How often do we want the generations to report? 0 for never.
$GENERATION_TICKER = 10;
# The version banner for this program
$VERSION = "beastly_maths.pl (Version: Aardvark.Buffalo) by Sapient Fridge";
########################################################################
# Known limitations and possible improvements:
#
# There is no validation so a population size of 1, target size of 500
# digits or a tournament pool size of 2 etc. will probably break the
# algorithm. I haven't investigated where the limits are.
#
# The program seems to mess up Perl's garbage collection. Multiple
# large generation runs searching for multi-billion numbers get
# exponentially longer for some reason. I guess there is a memory leak
# in perl somewhere and I'm provoking it. Bizarrely when that
# happens, if you are running in a Linux VMware virtual machine, it
# messes something up to the point where windows can't talk to Samba
# any more (until you reboot the virtual machine).
#
# It would be nice if there was a way to track ancestry so you could
# see where an expression came from. Even nicer would be a web page
# to explore ancestry and show the nested hierarchies.
#
# The genome size, mutation rate etc. could be a part of the beastie
# genome as well e.g. as the third list (after the expressions and
# operators). That way the code could optimise itself rather than
# using hardcoded values.
########################################################################
########################################################################
# And on to the main program of the night...
########################################################################
sub main {
## Run one or more population simulations to generate an
## expression which evaluates to a given (or random) number. This
## is done by generating a population of random expression then
## mating and mutating them over multiple generations until the
## target number is reached.
##
## See the various globals above which control aspects of the
## simulation.
##
## Arguments:
##
## target : The target value to aim for. If this is not given
## then one will be generated randomly.
##
## num_generations : The number of generations to run each
## simulation for. If this number of runs is done and no solution
## is found then the simulation will continue until the target
## value is hit, then it will end.
##
## num_runs : The number of simulation runs to do.
##
## Nothing returned.
my ($target, $num_generations, $num_runs) = @_;
my (@top_beasties, $beastie_l, $count, $genome_size, $population_size, $waste);
print "$VERSION\n";
# Generate a random number with the right number of digits
if (not $target) {
$target = &random_digit();
for (1..$TARGET_DIGITS-1) { $target .= &random_char("0123456789")};
}
# How many generations will we try?
if (not $num_generations) { $num_generations = $NUM_GENERATIONS }
# And how many simulation runs?
if (not $num_runs) { $num_runs = $NUM_RUNS }
# Make the population size suitable for the number of digits
$population_size = (length $target) * $POP_SIZE_MULT;
$genome_size = (length $target) * $GENOME_SIZE_MULT;
print "\nTarget : $target\n";
print "Population : $population_size\n";
print "Genome size : $genome_size\n";
print "Max Generations : $num_generations\n";
# Generate the simulation runs and keep track of the winner from each
for (1..$num_runs) {
($beastie_l, $count) = &do_run($num_generations, $target, $genome_size, $population_size);
if ($beastie_l) {
push @top_beasties, $beastie_l;
}
}
# Show our list of winners
print "========================================================================\n";
print "The final best beasties from $num_runs simulation runs are as follows:\n\n";
&show_population(\@top_beasties, $target);
print "========================================================================\n";
# If we are in a command shell we will vanish without warning if we don't wait for the user
print "\nPress return to exit.\n";
$waste = ;
}
sub do_run {
## Actually go ahead and do a simulation run. This will generate a
## population from scratch then mate and mutate it looking for the
## target value to be generated.
##
## Arguments:
##
## num_generations : The maximum number of generations to run. If
## it is not given then it will stop when the target value is
## reached. See the &main subroutine for more details.
##
## target : The target value to aim for.
##
## genome_size : The number of sub-expression domains in the
## genome. The larger this is then the more variation but the
## weaker the length selection.
##
## population_size : The number of beasties we are running at each
## generation. The higher this is the more variety their is, but
## the slower things run.
##
## Returned:
##
## alltime_best_beastie_l : Reference to the list of expressions
## and operators which represents to the best beastie from all the
## generations
##
## total_count : The total number of beasties generated by the
## simulation.
my ($num_generations, $target, $genome_size, $population_size) = @_;
my ($population_l, $val, $score, $count, $total_count, $best_beastie_l, $best_score, $generation, $alltime_best_beastie_l);
$generation = $total_count = 0;
# Loop forever, or until we hit the maximum number of generations
until ($num_generations and $generation == $num_generations) {
$generation++;
($population_l, $count) = &generate_population($population_l, $target, $genome_size, $population_size);
$total_count += $count;
# This is the best beastie in this particular generation
($best_beastie_l, $val, $score) = &find_best_beastie($population_l, $target);
# If we hit the value then keep track of the winner
if ($val == $target and (not $best_score or $score < $best_score)) {
$alltime_best_beastie_l = $best_beastie_l;
$best_score = $score;
if (not $num_generations) { last }
}
# Show the user that we are making progress
if ($GENERATION_TICKER and not ($generation % $GENERATION_TICKER)) {
print "Generation: $generation : ";
if ($alltime_best_beastie_l) {
print "Best success : ";
&show_beastie($alltime_best_beastie_l, $target, "short");
} else {
print "Best attempt : ";
&show_beastie($best_beastie_l, $target, "short");
}
}
# Rats, we are meant to finish but didn't find *any* match.
if ($generation == $num_generations and not $best_score) {
print "Sorry mate but I couldn't find a single good beastie in the entire population of little runts :-(\nDon't worry, I'll soldier on until I find one!\n";
# Change so we only finish when we find one
$num_generations= 0;
}
}
# Print out some stats. Note that the count will include some beasties generated after the winner
print "Beastie count : $total_count\n";
print "Generations : $generation\n";
print "Best beastie : ";
&show_beastie($alltime_best_beastie_l, $target, "short");
print "\n";
return $alltime_best_beastie_l, $total_count;
}
sub find_best_beastie {
## Search the population of beasties for the highest scoring one
## and return it.
##
## Arguments:
##
## population_l : Reference to the list of beasties in the population
##
## target : The target value the simulation is aiming at. This is
## needed to score the beasties.
##
## Returned:
##
## best_beastie_l : Reference to the list of expressions and
## operators which make up the best beastie.
##
## val : The value that the beasties expression gave when evaluated
##
## score : The fitness score of the best beastie
my ($population_l, $target) = @_;
my ($best_beastie_l, $exp, $val, $score, $best_score, $beastie_l);
# Find the best beastie by scoring them all
foreach $beastie_l (@$population_l) {
($exp, $val, $score) = &calculate_score($beastie_l, $target);
# Keep track of the best beastie
if (not $best_score or $score < $best_score) {
$best_score = $score;
$best_beastie_l = $beastie_l;
}
}
# Get the full stats of the best beastie
($exp, $val, $score) = &calculate_score($best_beastie_l, $target);
return $best_beastie_l, $val, $score;
}
sub build_full_expression {
## Return the full expression for a beastie. This is built from
## the sub-expressions and operators that the beastie contains.
##
##
## Arguments:
##
## beastie_l : The beastie is a reference to a list of two lists.
## The first list is the expressions and the second list is the
## list of operators which go before each of those expressions
## (the first operator is not used).
##
## Returned:
##
## full_exp : The expression built from the component parts within
## the beastie.
##
## val : The value of the expression when evaluated.
my ($beastie_l) = @_;
my ($expressions_l, $operators_l, $full_exp, $exp, $val, $op, $genome_size);
($expressions_l, $operators_l) = @$beastie_l;
# We get the first expression by hand as it has no operator
$full_exp = $expressions_l->[0];
$genome_size = @$expressions_l;
# Add the expression domains together to make the full expression
for (1..$genome_size-1) {
if ($exp = $expressions_l->[$_]) {
if ($full_exp) {
$op = $operators_l->[$_];
$full_exp .= $op . $exp;
} else {
$full_exp = $exp;
}
}
}
# So, what is the result?
$val = &calculate_value($full_exp);
return $full_exp, $val;
}
sub show_population {
## Print out the entire population of beasties in fitness order (fittest first)
##
## Arguments:
##
## population_l : Reference to a list of beasties which make up the population.
##
## target: The target value the simulation is trying to reach.
##
## Nothing returned.
my ($population_l, $target) = @_;
my ($beastie_l, @sorted_beasties, $exp1, $val1, $score1, $exp2, $val2, $score2);
@sorted_beasties = sort {
($exp1, $val1, $score1) = &calculate_score($a, $target);
($exp2, $val2, $score2) = &calculate_score($b, $target);
return $score1 <=> $score2;
} @$population_l;
foreach $beastie_l (@sorted_beasties) {
&show_beastie($beastie_l, $target, "short");
}
}
sub show_beastie {
## Print out the details of a beastie, including the expression,
## the value and the fitness score. The full genome can
## optionally be printed for debugging.
##
## Arguments:
##
## beastie_l : Reference to the list of expressions and operators
## that make up the beastie.
##
## target : The target value that the simulation is aiming for
##
## short_q : Pass true for a short, one line description or false
## to see the sequences and operators that make up the beasties
## genome.
my ($beastie_l, $target, $short_q) = @_;
my ($expressions_l, $operators_l, $exp, $op, $val, $score, $subexp, $genome_size);
($exp, $val) = &build_full_expression($beastie_l);
# Nothing there
if (not $exp or not $val) {
print "DEAD BEASTIE\n";
return;
};
($exp, $val, $score) = &calculate_score($beastie_l, $target);
$genome_size = @{$beastie_l -> [0]};
if ($short_q) {
print "$exp = $val (Score = $score)\n";
} else {
($expressions_l, $operators_l) = @$beastie_l;
for (0..$genome_size-1) {
$op = $operators_l -> [$_];
$subexp = $expressions_l -> [$_];
print "$op : $subexp\n";
}
print "Expression : $exp\n";
print "Value : $val\n";
print "Score : $score\n\n";
}
}
sub mutate_beastie {
## Mutate a beastie in situ. This can change either the number of
## expression domains at the top level or it can mutate the
## expression itself. Note that the mutation may not happen or may
## not be visible e.g. changing the operator for a blank domain.
##
## The mutation types are:
##
## :mutate_expression Mutate the expression itself. See &mutate_expression for details.
## :delete_expression Delete a random expression completely
## :create_expression Fill an empty domain with a new expression
## :duplicate_expression Copy a expression to an empty domain
## :mutate_operator Mutate the operator between two expression domains
##
## Arguments:
##
## beastie_l : Reference to list of expression and operators that
## make up the simulation creature. See &build_full_expression
## for details.
##
## Nothing returned.
my ($beastie_l) = @_;
my ($mutation_type, $expressions_l, $operators_l, @exp_indices, @blank_indices, $exp, $index, $genome_size);
($expressions_l, $operators_l) = @$beastie_l;
$genome_size = @{$beastie_l -> [0]};
if (&percent_chance_q($MUTATE_EXP_RATE)) {
# Mutate the expression itself.
$mutation_type = ":mutate_expression";
} else {
$mutation_type = &random_value([":delete_expression", ":create_expression", ":duplicate_expression", ":mutate_operator"]);
}
# Record the blank and non-blank expressions
for (0..$genome_size-1) {
if ($exp = $expressions_l -> [$_]) {
push @exp_indices, $_;
} else {
push @blank_indices, $_;
}
}
# He's dead Jim
if (not @exp_indices) { return }
# Pick a target expression as most mutation need one
$index = &random_value(\@exp_indices);
if ($mutation_type eq ":delete_expression") {
# Delete a random expression completely
$expressions_l -> [$index] = "";
} elsif ($mutation_type eq ":create_expression" and @blank_indices) {
# Fill an empty domain with a new expression
$index = &random_value(\@blank_indices);
$expressions_l -> [$index] = &generate_expression();
} elsif ($mutation_type eq ":mutate_expression") {
# Mutate the expression itself
$exp = $expressions_l -> [$index];
$expressions_l -> [$index] = &mutate_expression($exp);
} elsif ($mutation_type eq ":duplicate_expression" and @blank_indices) {
#Copy a expression to an empty domain
$exp = $expressions_l -> [$index];
$index = &random_value(\@blank_indices);
$expressions_l -> [$index] = $exp;
} elsif ($mutation_type eq ":mutate_operator") {
# Mutate the operator between two expression domains
$index = &random_index($operators_l);
$operators_l -> [$index] = &random_operator();
}
}
sub generate_population {
## Generate a population of beasties either from scratch or from
## an old population.
##
## Arguments:
##
## old_population_l : The reference to the list of beasties which
## make up the current population. These will be used as breeding
## stock for the next generation. If it isn't given then a new
## random population is created.
##
## target : The target value the simulation is trying to
## reach. This is used to score the fitness of the beasties so the
## fitter ones can mate more often.
##
## genome_size : The number of sub-expression domains in the
## genome. The larger this is then the more variation but the
## weaker the length selection.
##
## population_size : The number of beasties we are running at each
## generation. The higher this is the more variety their is, but
## the slower things run.
##
## Returned:
##
## population_l : Reference to the list of beasties which make up
## the new population.
##
## count : The number of beasties that were generated.
my ($old_population_l, $target, $genome_size, $population_size) = @_;
my (@population, $beastie_l, $exp, $val, $score, $count);
$count = 0;
if ($old_population_l) {
# Use the old population as breeding stock. Each beastie gets at least one chance to breed!
for (0 .. $population_size-1) {
# Generate a child, the beastie we are looking at gets a free pass
push @population, &generate_beastie_child($old_population_l, $target, $_);
$count++;
}
} else {
# Make a brand new population of random beasties
until (@population == $population_size) {
$beastie_l = &generate_random_beastie($genome_size);
($exp, $val, $score) = &calculate_score($beastie_l, $target);
$count++;
# If the beastie died (non-integral) then make another one to replace it.
if ($val) { push @population, $beastie_l };
}
}
return \@population, $count;
}
sub generate_random_beastie {
## Generate a new beastie completely at random. This is used to
## seed the population.
##
## No arguments.
##
## Returned:
##
## beastie_l : Reference to the list of expressions and operators
## that make up the new beastie.
my ($genome_size) = @_;
my (@expressions, @operators, $exp);
until (@expressions == $genome_size) {
$exp = &generate_expression();
if (&calculate_value($exp)) {
push @expressions, $exp;
} else {
push @expressions, "";
}
push @operators, &random_operator();
}
return [\@expressions, \@operators];
}
sub generate_beastie_child {
## This subroutine is the meat of the program. It loops over every
## beastie in the population and puts it in a pool with a number
## of other, randomly selected, beasties. The top two beasties in
## the pool then get to mate.
##
## This is pretty similar to tournament selection and has the
## benefit that higher ranking beasties automatically breed more
## without having to mess around with assigning probabilities.
## The only individuals with absolutely no chance of breeding are
## the very lowest ranking ones.
##
## Note that the child beastie may be mutated before being
## returned.
##
## Arguments:
##
## population_l : Reference to a list of beasties which make up
## the population.
##
## target : The value that the simulation is trying to reach
##
## index : The individual who gets a free pass into the pool. It
## still has to outrank the others in the pool to actually breed.
## The index doesn't have to be given.
##
## child_l : Reference to the list of expressions and operators
## which make up the child beastie which was created.
my ($population_l, $target, $index) = @_;
my (@parent_indices, @sorted_indices, $parent1_l,
$parent2_l, $parent1_index, $parent2_index, $child_l,
$loser_index, $loser_l);
# Put in the potential parent index if it was passed in
if ($index) { push @parent_indices, $index }
# Add some more
until (@parent_indices > 1 and @parent_indices >= $TOURNAMENT_SIZE) {
push @parent_indices, &random_index($population_l);
}
# Sort the indices by the score of the beastie they point to
@sorted_indices = sort { &calculate_indexed_score($population_l, $a, $target) <=> &calculate_indexed_score($population_l, $b, $target) } @parent_indices;
# Find the parents by taking the two highest ranking candidates
$parent1_index = shift @sorted_indices;
$parent2_index = shift @sorted_indices;
$parent1_l = $population_l -> [$parent1_index];
$parent2_l = $population_l -> [$parent2_index];
# Generate a child beastie from the parents
$child_l = &mate_beasties($parent1_l, $parent2_l);
# # Find the worst one
# $loser_index = pop @sorted_indices;
# $loser_l = $population_l -> [$loser_index];
#
# print "PARENT 1\n";
# &show_beastie($parent1_l, $target);
#
# print "\nPARENT 2\n";
# &show_beastie($parent2_l, $target);
#
# print "\nLOSER\n";
# &show_beastie($loser_l, $target);
#
# print "\nCHILD\n";
# &show_beastie($child_l, $target);
if (&percent_chance_q($MUTATION_RATE)) {
&mutate_beastie($child_l);
# print "\nMUTANT CHILD\n";
# &show_beastie($child_l, $target);
}
return $child_l;
}
sub mate_beasties {
## Take two beasties and generate a child beastie from them using
## crossover to combine the sequences.
##
## Arguments:
##
## beastie1_l, beastie2_l : References to the lists of expressions
## and operators for the proud parent beasties.
##
## Returned:
##
## beastie_child_l : Reference to the list of expressions and
## operators which make up the new child beastie.
my ($beastie1_l, $beastie2_l) = @_;
my ($beastie1_q, @child_exps, @child_ops, $exp, $op, $genome_size);
$genome_size = @{$beastie1_l -> [0]};
# Randomly choose which parent we start on
$beastie1_q = &random_value([0,1]);
# Loop over the entire sequence, taking the expression and
# operator from one parent or the other
for (0..$genome_size-1) {
if ($beastie1_q) {
$exp = $beastie1_l -> [0] -> [$_];
$op = $beastie1_l -> [1] -> [$_];
} else {
$exp = $beastie2_l -> [0] -> [$_];
$op = $beastie2_l -> [1] -> [$_];
}
# build the child
push @child_exps, $exp;
push @child_ops, $op;
# Maybe crossover which parent we are reading from?
if (&percent_chance_q($CROSSOVER_RATE)) { $beastie1_q = not $beastie1_q }
}
# Beasties are list reference to two list references, the first
# list is the expressions and the second is the inter-expression operators.
return [\@child_exps, \@child_ops];
}
sub calculate_indexed_score {
## This is used to calculate the score of a beastie within a
## population and is used when sorting a population.
##
## Arguments:
##
## population_l : Reference to the list of beasties which make up
## the population.
##
## index : The index of the beastie we want the score of, 0 indexed
##
## target : Numerical value that the simulation is aiming for
##
## Returned:
##
## score : Fitness score for the beastie. See &calculate_score()
my ($population_l, $index, $target) = @_;
my ($exp, $val, $score, $beastie_l);
$beastie_l = $population_l -> [$index];
($exp, $val, $score) = &calculate_score($beastie_l, $target);
return $score;
}
sub calculate_score {
## Calculate the expression, value and score for a beastie in the simulation.
##
## The closer to the target number the better, but once it's very close
## then the shorter the better. This has the desired result:
##
## (difference ^ 2) + length
##
## If an exact match hasn't been found then a penalty of 100 is
## added. This is to stop short but inaccurate sequences beating
## long, accurate ones.
##
## Invalid formula and non-integral are not viable so it returns a
## *really* big number for them.
##
## Arguments:
##
## beastie_l : Reference to list of expression and operators that
## make up the simulation creature. See &build_full_expression.
##
## target : The target value that we are aiming for.
##
## Returned:
##
## exp : The expression generated from the beastie.
##
## val : Value that the expression yielded when evaluated.
##
## score : The fitness score, the lower the better.
my ($beastie_l, $target) = @_;
my ($exp, $val, $length, $diff, $score);
($exp, $val) = &build_full_expression($beastie_l);
# If it failed or wasn't integral then score it very badly. Perl
# doesn't like generating infinity so it can't be used directly.
if ($val eq "") { return "", 0, 1e1000000 }
$length = length $exp;
$diff = abs ($target - $val);
$score = ($diff + 1) ** 2 + $length;
# Add the penalty for not hitting th target
if ($val ne $target) { $score += 100 };
return $exp, $val, $score;
}
sub calculate_value {
## Evaluate an expression and return the value.
##
## Arguments:
##
## exp : The expression to evaluate
##
## Returned:
##
## result : The result value after running the expression through
## the compiler. This will be "" if the compilation failed or the
## result was non-integral
my ($exp) = @_;
my ($result);
# Replace ^ with ** for the compiler
$exp =~ s/\^/**/g;
# Let the compiler evaluate it
eval("\$result=$exp");
if (not defined($result)) {
# Compiler barfed, this should only be from infinity in the
# result as all the expressions should be compileable.
return "";
} elsif (int $result != $result) {
# Kill the non-integral results
$result = "";
}
return $result;
}
sub mutate_expression {
## Repeatedly try to mutate a string representing a single
## expression until it changes, then return the mutated string.
##
## See &int_mutate_expression for details on the possible
## mutations.
##
## Arguments:
##
## string : The string to mutate
##
## Returned:
##
## new_string : The string after it has been mutated.
my ($string) = @_;
my ($new_string);
$new_string = $string;
# Loop until something changes
until ($new_string ne $string) {
$new_string = &int_mutate_expression($string);
}
return $new_string;
}
sub int_mutate_expression {
## Try to mutate a string representing a single expression and
## return the mutated string. Some mutations will fail or result
## in nothing changing (e.g. replacing a "+" character with a "+")
##
## Use &mutate_expression to guarantee a mutation.
##
## The possible mutations are:
##
## :delete_brackets Delete a random pair of brackets
## :insert_expression Add a expression within or at an end of the string
## :change_digit Change a single digit to another
## :change_operator Change a random operator to a new random operator
## :delete_digit Delete a random digit and an operator that goes with it
##
## Arguments:
##
## string : The string to mutate
##
## Returned:
##
## new_string : The mutated string, it may be the same as the original
my ($string) = @_;
my ($mutation_type, $new_string, $end_string, $part1, $part2, $part3, $part4, $ops);
$new_string = $string;
$mutation_type = &random_value([":delete_brackets", ":insert_expression", ":change_digit", ":change_operator", ":delete_digit"]);
if ($mutation_type eq ":change_operator") {
# Change a random operator to a new random operator
($part1, $part2) = &split_string($string, $OP_REGEXP);
if ($part1 or $part2) {
$new_string = $part1 . &random_operator . $part2;
}
} elsif ($mutation_type eq ":delete_digit") {
# Delete a random digit and an operator that goes with it
($part1, $part2) = &split_string($string, '\d');
if ($part1 or $part2) {
# Random which operator we keep, left or right
$ops = "";
if ($part1 =~ m/$OP_REGEXP$/) {
$ops .= substr($part1, (length $part1)-1, 1);
$part1 =~ s/$OP_REGEXP$//;
}
if ($part2 =~ m/^$OP_REGEXP/) {
$ops .= substr($part2, 0, 1);
$part2 =~ s/^$OP_REGEXP//;
}
if (length $ops == 2) {
$new_string = $part1 . &random_char($ops) . $part2;
} else {
$new_string = $part1 . $part2;
}
}
} elsif ($mutation_type eq ":change_digit") {
# Change a single digit to another
($part1, $part2) = &split_string($string, '\d');
if ($part1 or $part2) {
$new_string = $part1 . &random_digit . $part2;
}
} elsif ($mutation_type eq ":insert_expression" and &percent_chance_q(50)) {
# Add a expression to the start or end of the string
if (&percent_chance_q(50)) {
$new_string = &generate_expression($string);
} else {
$new_string = &generate_expression() . &random_operator . $string;
}
} elsif ($mutation_type eq ":insert_expression") {
# Insert an expression, using an existing digit as the first value
($part1, $part2, $part3) = &split_string($string, '\d');
if ($part1 or $part2) {
$new_string = $part1 . &generate_expression($part3) . $part2;
}
} elsif ($mutation_type eq ":delete_brackets") {
# Delete a random pair of brackets
($part1, $part2) = &split_string($string, '\(');
if ($part1 or $part2) {
($part3, $part4) = &split_string($part2, '\)');
$new_string = $part1 . $part3 . $part4;
}
}
# Get rid of brackets with no contents as the compiler won't accept that.
# Brackets around a single digit are also removed.
while ($new_string =~ s/\((\d*)\)/$1/g) {}
return $new_string;
}
sub split_string {
## Split a string at a random point which matches a given
## character regular expression. The string is split to either
## side of the character and the matching character and the
## strings to either side of it are returned.
##
## For example splitting 9^(3*7+(8/2)) by "\d" might give:
##
## "9^(3*", "+(8/2))", "7"
##
## This is done to find suitable insertion/deletion location for
## mutations. If no matches are found then empty strings are
## returned.
##
## Arguments:
##
## string : The string to chop
##
## regexp : The character regular expression to loop for
##
## Returned:
##
## start_string : The section of the string to the left of the character
##
## end_string : The string to the right
##
## char : The matching character itself
my ($string, $regexp) = @_;
my (@indices, $char, $index, $start_string, $end_string, $last);
# Find the indices of the characters that match the regexp
$last = (length $string) - 1;
for (0..$last) {
$char = substr($string, $_, 1);
if ($char =~ m/$regexp/) {
push @indices, $_;
}
}
# Nothing suitable found
if (not @indices) { return "","","" }
# pick one at random
$index = &random_value(\@indices);
$char = substr($string, $index, 1);
# And split off the strings to both sides
$start_string = substr($string, 0, $index);
if ($index == $last) {
$end_string = "";
} else {
$end_string = substr($string, $index+1, $last-$index);
}
return $start_string, $end_string, $char;
}
sub generate_expression {
## Generate a random expression in the form
## where is either a digit or an
## expression and is one of +-*/^ e.g.
##
## 9^(3*7+(8/2))
##
## Arguments:
##
## Val1 : The left hand value. If this isn't given then one will
## be generated.
##
## Returned
##
## expression : The randomly generated expression.
my ($val1) = @_;
my ($val2, $op);
if (not $val1) {
if (&percent_chance_q(10)) {
# Recurse! Recurse!
$val1 = "(" . &generate_expression . ")";
} else {
$val1 = &random_digit();
}
}
if (&percent_chance_q(10)) {
# Recurse! Recurse!
$val2 = "(" . &generate_expression . ")";
} else {
$val2 = &random_digit();
}
$op = &random_operator();
return "$val1$op$val2";
}
sub random_operator {
## Return a random operator e.g. +-^* or /
##
## Note that / appears at low frequency as we want to avoid
## non-integral expressions.
##
## No arguments
##
## Returned:
##
## op : The chosen operator.
return &random_char("/+-*^+-*^+-*^");
}
sub random_digit {
## Return a random digit between 1 and 9
##
## No arguments
##
## Returned:
##
## digit : The chosen digit.
return &random_char("123456789");
}
sub random_char {
## Return a random character from a string
##
## Arguments:
##
## string : The string to choose a character from
##
## Returned:
##
## char : The chosen character.
my ($string) = @_;
my ($index);
$index = int ((rand) * length $string);
return substr($string, $index, 1);
}
sub random_value {
## Pick a random value out of a list of values
##
## Arguments:
##
## values_l : Reference to a list of values
##
## Returned:
##
## value : The value that was chosen
my ($values_l) = @_;
my ($value);
$value = $values_l -> [&random_index($values_l)];
return $value
}
sub random_index {
## Return a random index into a list of values
##
## Arguments:
##
## values_l : Reference to a list of values
##
## Returned:
##
## index : The index that was chosen
my ($values_l) = @_;
my ($index, $count);
$count = @$values_l;
$index = int ((rand) * $count);
return $index;
}
sub percent_chance_q {
## Returns true a certain percentage amount of the time.
##
## Arguments:
##
## percentage : The percentage time we want true returned
##
## Returned:
##
## true_q : True or false, randomly but at a frequency controlled
## by the percentage argument.
my ($percentage) = @_;
return $percentage > ((rand) * 100);
}
## Launch the simulator. The arguments are summarised here but see
## the &main subroutine for details.
##
## The first is the target value if you don't want it randomly
## generated. $TARGET_DIGITS controls the number of digits in a random
## target.
##
## The second number is the maximum number of generations (default to
## the global $NUM_GENERATIONS if not given).
##
## The third is the number of simulations to run (each are completely
## isolated from the others). Defaults to the global $NUM_RUNS.
&main(0, 0, 0);