#!/usr/bin/perl -w use strict; use vars qw ($TARGET_DIGITS $OP_REGEXP $MUTATION_RATE $CROSSOVER_RATE $GENERATION_TICKER $VERSION $MUTATE_EXP_RATE $NUM_RUNS $TOURNAMENT_SIZE $POP_SIZE_MULT $GENOME_SIZE_MULT $NUM_GENERATIONS); ######################################################################## # To run this program you will need to save it and change the ending # from .txt to .pl (I have to call it .txt to stop the web server # trying to execute it rather than deliver it). Then you need to use a # perl interpreter to run it. These come installed with Linux or # there is a free one available for Windows here: # # http://www.activestate.com/ActivePerl # # After you have installed that just double click on the file. # # The aim of the program is to use a genetic algorithm to generate an # expression which evaluates to a target number. For example: # # 5+5+3-5^3+8^1*6*4^5+1^5^5-4*5-2-1^4-5*8+5+7 = 48990 # # This is done by generating a population of expression holders called # "beasties", each of which can produce their expression on # demand. The use of a genetic algorithm encourages the beastie # expressions to converge on the target over many generations. # # Each generation of beasties is generated from the previous generation by # mating pairs of beasties then mutating some of the offspring. The # choosing of pairs is done using a variant of tournament selection, a # small number of random beasties are compared and the top two get to # mate to generate one child beastie. This means that "fit" beasties # get to mate more often. Lucky beasties. # # Each beastie consists of a list of expression "domains" # (e.g. 9+(4*2)^3) and a list of arithmetic operators which will join # those expressions together. It's done this way so that each beastie # has an identical genome length, which makes crossover during mating # much simpler. # # By default the program is set up to generate a 5 digit target and # try 3 times to find the best solutions within 100 generations. If # you want to change these settings then read the globals section # below and the &main subroutine. # # NOTE: Leave food out near the computer to tempt back any beasties # that escape. They are particularly fond of chili chocolate. Yum! # ######################################################################## # This top section contains globals that you may want to tweak. ######################################################################## # The default number of generations to try, if not given $NUM_GENERATIONS = 100; # Default number of digits in our random target number, if not given $TARGET_DIGITS = 5; # The number of complete simulation runs to do for a particular target. $NUM_RUNS = 3; ######################################################################## # You probably don't want to tweak the values below but if you do then # go for it, the current values are empirically derived. # # Note that it is certainly possible to break the algorithm by picking # silly values. There is no validation. ######################################################################## # The percentage chance that a new child beastie might be mutated $MUTATION_RATE = 60; # The percentage chance that an individual expression will be mutated, # rather than a complete expression being deleted, generated, duplicated etc. $MUTATE_EXP_RATE = 40; # The percentage chance of flipping between parent genomes when # generating a child. $CROSSOVER_RATE = 20; # The number of beasties in each tournament pool. See # &generate_beastie_child for details. Changing this changes the # selection pressure but make it too big and you risk not being able # to get out of local maxima. # # A tournament size of 2 turns off selection *completely* because # there are only two contestants in the pool. Not much of a contest, # and it turns the program into a random walk. $TOURNAMENT_SIZE = 10; # Regular expression to identify operators in expressions $OP_REGEXP = '[\/\^\+\-\*]'; # The multiplier on the number of digits in the target for the population size $POP_SIZE_MULT = 50; # The multiplier on the number of digits in the target for the genome size $GENOME_SIZE_MULT = 3; # How often do we want the generations to report? 0 for never. $GENERATION_TICKER = 10; # The version banner for this program $VERSION = "beastly_maths.pl (Version: Aardvark.Buffalo) by Sapient Fridge"; ######################################################################## # Known limitations and possible improvements: # # There is no validation so a population size of 1, target size of 500 # digits or a tournament pool size of 2 etc. will probably break the # algorithm. I haven't investigated where the limits are. # # The program seems to mess up Perl's garbage collection. Multiple # large generation runs searching for multi-billion numbers get # exponentially longer for some reason. I guess there is a memory leak # in perl somewhere and I'm provoking it. Bizarrely when that # happens, if you are running in a Linux VMware virtual machine, it # messes something up to the point where windows can't talk to Samba # any more (until you reboot the virtual machine). # # It would be nice if there was a way to track ancestry so you could # see where an expression came from. Even nicer would be a web page # to explore ancestry and show the nested hierarchies. # # The genome size, mutation rate etc. could be a part of the beastie # genome as well e.g. as the third list (after the expressions and # operators). That way the code could optimise itself rather than # using hardcoded values. ######################################################################## ######################################################################## # And on to the main program of the night... ######################################################################## sub main { ## Run one or more population simulations to generate an ## expression which evaluates to a given (or random) number. This ## is done by generating a population of random expression then ## mating and mutating them over multiple generations until the ## target number is reached. ## ## See the various globals above which control aspects of the ## simulation. ## ## Arguments: ## ## target : The target value to aim for. If this is not given ## then one will be generated randomly. ## ## num_generations : The number of generations to run each ## simulation for. If this number of runs is done and no solution ## is found then the simulation will continue until the target ## value is hit, then it will end. ## ## num_runs : The number of simulation runs to do. ## ## Nothing returned. my ($target, $num_generations, $num_runs) = @_; my (@top_beasties, $beastie_l, $count, $genome_size, $population_size, $waste); print "$VERSION\n"; # Generate a random number with the right number of digits if (not $target) { $target = &random_digit(); for (1..$TARGET_DIGITS-1) { $target .= &random_char("0123456789")}; } # How many generations will we try? if (not $num_generations) { $num_generations = $NUM_GENERATIONS } # And how many simulation runs? if (not $num_runs) { $num_runs = $NUM_RUNS } # Make the population size suitable for the number of digits $population_size = (length $target) * $POP_SIZE_MULT; $genome_size = (length $target) * $GENOME_SIZE_MULT; print "\nTarget : $target\n"; print "Population : $population_size\n"; print "Genome size : $genome_size\n"; print "Max Generations : $num_generations\n"; # Generate the simulation runs and keep track of the winner from each for (1..$num_runs) { ($beastie_l, $count) = &do_run($num_generations, $target, $genome_size, $population_size); if ($beastie_l) { push @top_beasties, $beastie_l; } } # Show our list of winners print "========================================================================\n"; print "The final best beasties from $num_runs simulation runs are as follows:\n\n"; &show_population(\@top_beasties, $target); print "========================================================================\n"; # If we are in a command shell we will vanish without warning if we don't wait for the user print "\nPress return to exit.\n"; $waste = ; } sub do_run { ## Actually go ahead and do a simulation run. This will generate a ## population from scratch then mate and mutate it looking for the ## target value to be generated. ## ## Arguments: ## ## num_generations : The maximum number of generations to run. If ## it is not given then it will stop when the target value is ## reached. See the &main subroutine for more details. ## ## target : The target value to aim for. ## ## genome_size : The number of sub-expression domains in the ## genome. The larger this is then the more variation but the ## weaker the length selection. ## ## population_size : The number of beasties we are running at each ## generation. The higher this is the more variety their is, but ## the slower things run. ## ## Returned: ## ## alltime_best_beastie_l : Reference to the list of expressions ## and operators which represents to the best beastie from all the ## generations ## ## total_count : The total number of beasties generated by the ## simulation. my ($num_generations, $target, $genome_size, $population_size) = @_; my ($population_l, $val, $score, $count, $total_count, $best_beastie_l, $best_score, $generation, $alltime_best_beastie_l); $generation = $total_count = 0; # Loop forever, or until we hit the maximum number of generations until ($num_generations and $generation == $num_generations) { $generation++; ($population_l, $count) = &generate_population($population_l, $target, $genome_size, $population_size); $total_count += $count; # This is the best beastie in this particular generation ($best_beastie_l, $val, $score) = &find_best_beastie($population_l, $target); # If we hit the value then keep track of the winner if ($val == $target and (not $best_score or $score < $best_score)) { $alltime_best_beastie_l = $best_beastie_l; $best_score = $score; if (not $num_generations) { last } } # Show the user that we are making progress if ($GENERATION_TICKER and not ($generation % $GENERATION_TICKER)) { print "Generation: $generation : "; if ($alltime_best_beastie_l) { print "Best success : "; &show_beastie($alltime_best_beastie_l, $target, "short"); } else { print "Best attempt : "; &show_beastie($best_beastie_l, $target, "short"); } } # Rats, we are meant to finish but didn't find *any* match. if ($generation == $num_generations and not $best_score) { print "Sorry mate but I couldn't find a single good beastie in the entire population of little runts :-(\nDon't worry, I'll soldier on until I find one!\n"; # Change so we only finish when we find one $num_generations= 0; } } # Print out some stats. Note that the count will include some beasties generated after the winner print "Beastie count : $total_count\n"; print "Generations : $generation\n"; print "Best beastie : "; &show_beastie($alltime_best_beastie_l, $target, "short"); print "\n"; return $alltime_best_beastie_l, $total_count; } sub find_best_beastie { ## Search the population of beasties for the highest scoring one ## and return it. ## ## Arguments: ## ## population_l : Reference to the list of beasties in the population ## ## target : The target value the simulation is aiming at. This is ## needed to score the beasties. ## ## Returned: ## ## best_beastie_l : Reference to the list of expressions and ## operators which make up the best beastie. ## ## val : The value that the beasties expression gave when evaluated ## ## score : The fitness score of the best beastie my ($population_l, $target) = @_; my ($best_beastie_l, $exp, $val, $score, $best_score, $beastie_l); # Find the best beastie by scoring them all foreach $beastie_l (@$population_l) { ($exp, $val, $score) = &calculate_score($beastie_l, $target); # Keep track of the best beastie if (not $best_score or $score < $best_score) { $best_score = $score; $best_beastie_l = $beastie_l; } } # Get the full stats of the best beastie ($exp, $val, $score) = &calculate_score($best_beastie_l, $target); return $best_beastie_l, $val, $score; } sub build_full_expression { ## Return the full expression for a beastie. This is built from ## the sub-expressions and operators that the beastie contains. ## ## ## Arguments: ## ## beastie_l : The beastie is a reference to a list of two lists. ## The first list is the expressions and the second list is the ## list of operators which go before each of those expressions ## (the first operator is not used). ## ## Returned: ## ## full_exp : The expression built from the component parts within ## the beastie. ## ## val : The value of the expression when evaluated. my ($beastie_l) = @_; my ($expressions_l, $operators_l, $full_exp, $exp, $val, $op, $genome_size); ($expressions_l, $operators_l) = @$beastie_l; # We get the first expression by hand as it has no operator $full_exp = $expressions_l->[0]; $genome_size = @$expressions_l; # Add the expression domains together to make the full expression for (1..$genome_size-1) { if ($exp = $expressions_l->[$_]) { if ($full_exp) { $op = $operators_l->[$_]; $full_exp .= $op . $exp; } else { $full_exp = $exp; } } } # So, what is the result? $val = &calculate_value($full_exp); return $full_exp, $val; } sub show_population { ## Print out the entire population of beasties in fitness order (fittest first) ## ## Arguments: ## ## population_l : Reference to a list of beasties which make up the population. ## ## target: The target value the simulation is trying to reach. ## ## Nothing returned. my ($population_l, $target) = @_; my ($beastie_l, @sorted_beasties, $exp1, $val1, $score1, $exp2, $val2, $score2); @sorted_beasties = sort { ($exp1, $val1, $score1) = &calculate_score($a, $target); ($exp2, $val2, $score2) = &calculate_score($b, $target); return $score1 <=> $score2; } @$population_l; foreach $beastie_l (@sorted_beasties) { &show_beastie($beastie_l, $target, "short"); } } sub show_beastie { ## Print out the details of a beastie, including the expression, ## the value and the fitness score. The full genome can ## optionally be printed for debugging. ## ## Arguments: ## ## beastie_l : Reference to the list of expressions and operators ## that make up the beastie. ## ## target : The target value that the simulation is aiming for ## ## short_q : Pass true for a short, one line description or false ## to see the sequences and operators that make up the beasties ## genome. my ($beastie_l, $target, $short_q) = @_; my ($expressions_l, $operators_l, $exp, $op, $val, $score, $subexp, $genome_size); ($exp, $val) = &build_full_expression($beastie_l); # Nothing there if (not $exp or not $val) { print "DEAD BEASTIE\n"; return; }; ($exp, $val, $score) = &calculate_score($beastie_l, $target); $genome_size = @{$beastie_l -> [0]}; if ($short_q) { print "$exp = $val (Score = $score)\n"; } else { ($expressions_l, $operators_l) = @$beastie_l; for (0..$genome_size-1) { $op = $operators_l -> [$_]; $subexp = $expressions_l -> [$_]; print "$op : $subexp\n"; } print "Expression : $exp\n"; print "Value : $val\n"; print "Score : $score\n\n"; } } sub mutate_beastie { ## Mutate a beastie in situ. This can change either the number of ## expression domains at the top level or it can mutate the ## expression itself. Note that the mutation may not happen or may ## not be visible e.g. changing the operator for a blank domain. ## ## The mutation types are: ## ## :mutate_expression Mutate the expression itself. See &mutate_expression for details. ## :delete_expression Delete a random expression completely ## :create_expression Fill an empty domain with a new expression ## :duplicate_expression Copy a expression to an empty domain ## :mutate_operator Mutate the operator between two expression domains ## ## Arguments: ## ## beastie_l : Reference to list of expression and operators that ## make up the simulation creature. See &build_full_expression ## for details. ## ## Nothing returned. my ($beastie_l) = @_; my ($mutation_type, $expressions_l, $operators_l, @exp_indices, @blank_indices, $exp, $index, $genome_size); ($expressions_l, $operators_l) = @$beastie_l; $genome_size = @{$beastie_l -> [0]}; if (&percent_chance_q($MUTATE_EXP_RATE)) { # Mutate the expression itself. $mutation_type = ":mutate_expression"; } else { $mutation_type = &random_value([":delete_expression", ":create_expression", ":duplicate_expression", ":mutate_operator"]); } # Record the blank and non-blank expressions for (0..$genome_size-1) { if ($exp = $expressions_l -> [$_]) { push @exp_indices, $_; } else { push @blank_indices, $_; } } # He's dead Jim if (not @exp_indices) { return } # Pick a target expression as most mutation need one $index = &random_value(\@exp_indices); if ($mutation_type eq ":delete_expression") { # Delete a random expression completely $expressions_l -> [$index] = ""; } elsif ($mutation_type eq ":create_expression" and @blank_indices) { # Fill an empty domain with a new expression $index = &random_value(\@blank_indices); $expressions_l -> [$index] = &generate_expression(); } elsif ($mutation_type eq ":mutate_expression") { # Mutate the expression itself $exp = $expressions_l -> [$index]; $expressions_l -> [$index] = &mutate_expression($exp); } elsif ($mutation_type eq ":duplicate_expression" and @blank_indices) { #Copy a expression to an empty domain $exp = $expressions_l -> [$index]; $index = &random_value(\@blank_indices); $expressions_l -> [$index] = $exp; } elsif ($mutation_type eq ":mutate_operator") { # Mutate the operator between two expression domains $index = &random_index($operators_l); $operators_l -> [$index] = &random_operator(); } } sub generate_population { ## Generate a population of beasties either from scratch or from ## an old population. ## ## Arguments: ## ## old_population_l : The reference to the list of beasties which ## make up the current population. These will be used as breeding ## stock for the next generation. If it isn't given then a new ## random population is created. ## ## target : The target value the simulation is trying to ## reach. This is used to score the fitness of the beasties so the ## fitter ones can mate more often. ## ## genome_size : The number of sub-expression domains in the ## genome. The larger this is then the more variation but the ## weaker the length selection. ## ## population_size : The number of beasties we are running at each ## generation. The higher this is the more variety their is, but ## the slower things run. ## ## Returned: ## ## population_l : Reference to the list of beasties which make up ## the new population. ## ## count : The number of beasties that were generated. my ($old_population_l, $target, $genome_size, $population_size) = @_; my (@population, $beastie_l, $exp, $val, $score, $count); $count = 0; if ($old_population_l) { # Use the old population as breeding stock. Each beastie gets at least one chance to breed! for (0 .. $population_size-1) { # Generate a child, the beastie we are looking at gets a free pass push @population, &generate_beastie_child($old_population_l, $target, $_); $count++; } } else { # Make a brand new population of random beasties until (@population == $population_size) { $beastie_l = &generate_random_beastie($genome_size); ($exp, $val, $score) = &calculate_score($beastie_l, $target); $count++; # If the beastie died (non-integral) then make another one to replace it. if ($val) { push @population, $beastie_l }; } } return \@population, $count; } sub generate_random_beastie { ## Generate a new beastie completely at random. This is used to ## seed the population. ## ## No arguments. ## ## Returned: ## ## beastie_l : Reference to the list of expressions and operators ## that make up the new beastie. my ($genome_size) = @_; my (@expressions, @operators, $exp); until (@expressions == $genome_size) { $exp = &generate_expression(); if (&calculate_value($exp)) { push @expressions, $exp; } else { push @expressions, ""; } push @operators, &random_operator(); } return [\@expressions, \@operators]; } sub generate_beastie_child { ## This subroutine is the meat of the program. It loops over every ## beastie in the population and puts it in a pool with a number ## of other, randomly selected, beasties. The top two beasties in ## the pool then get to mate. ## ## This is pretty similar to tournament selection and has the ## benefit that higher ranking beasties automatically breed more ## without having to mess around with assigning probabilities. ## The only individuals with absolutely no chance of breeding are ## the very lowest ranking ones. ## ## Note that the child beastie may be mutated before being ## returned. ## ## Arguments: ## ## population_l : Reference to a list of beasties which make up ## the population. ## ## target : The value that the simulation is trying to reach ## ## index : The individual who gets a free pass into the pool. It ## still has to outrank the others in the pool to actually breed. ## The index doesn't have to be given. ## ## child_l : Reference to the list of expressions and operators ## which make up the child beastie which was created. my ($population_l, $target, $index) = @_; my (@parent_indices, @sorted_indices, $parent1_l, $parent2_l, $parent1_index, $parent2_index, $child_l, $loser_index, $loser_l); # Put in the potential parent index if it was passed in if ($index) { push @parent_indices, $index } # Add some more until (@parent_indices > 1 and @parent_indices >= $TOURNAMENT_SIZE) { push @parent_indices, &random_index($population_l); } # Sort the indices by the score of the beastie they point to @sorted_indices = sort { &calculate_indexed_score($population_l, $a, $target) <=> &calculate_indexed_score($population_l, $b, $target) } @parent_indices; # Find the parents by taking the two highest ranking candidates $parent1_index = shift @sorted_indices; $parent2_index = shift @sorted_indices; $parent1_l = $population_l -> [$parent1_index]; $parent2_l = $population_l -> [$parent2_index]; # Generate a child beastie from the parents $child_l = &mate_beasties($parent1_l, $parent2_l); # # Find the worst one # $loser_index = pop @sorted_indices; # $loser_l = $population_l -> [$loser_index]; # # print "PARENT 1\n"; # &show_beastie($parent1_l, $target); # # print "\nPARENT 2\n"; # &show_beastie($parent2_l, $target); # # print "\nLOSER\n"; # &show_beastie($loser_l, $target); # # print "\nCHILD\n"; # &show_beastie($child_l, $target); if (&percent_chance_q($MUTATION_RATE)) { &mutate_beastie($child_l); # print "\nMUTANT CHILD\n"; # &show_beastie($child_l, $target); } return $child_l; } sub mate_beasties { ## Take two beasties and generate a child beastie from them using ## crossover to combine the sequences. ## ## Arguments: ## ## beastie1_l, beastie2_l : References to the lists of expressions ## and operators for the proud parent beasties. ## ## Returned: ## ## beastie_child_l : Reference to the list of expressions and ## operators which make up the new child beastie. my ($beastie1_l, $beastie2_l) = @_; my ($beastie1_q, @child_exps, @child_ops, $exp, $op, $genome_size); $genome_size = @{$beastie1_l -> [0]}; # Randomly choose which parent we start on $beastie1_q = &random_value([0,1]); # Loop over the entire sequence, taking the expression and # operator from one parent or the other for (0..$genome_size-1) { if ($beastie1_q) { $exp = $beastie1_l -> [0] -> [$_]; $op = $beastie1_l -> [1] -> [$_]; } else { $exp = $beastie2_l -> [0] -> [$_]; $op = $beastie2_l -> [1] -> [$_]; } # build the child push @child_exps, $exp; push @child_ops, $op; # Maybe crossover which parent we are reading from? if (&percent_chance_q($CROSSOVER_RATE)) { $beastie1_q = not $beastie1_q } } # Beasties are list reference to two list references, the first # list is the expressions and the second is the inter-expression operators. return [\@child_exps, \@child_ops]; } sub calculate_indexed_score { ## This is used to calculate the score of a beastie within a ## population and is used when sorting a population. ## ## Arguments: ## ## population_l : Reference to the list of beasties which make up ## the population. ## ## index : The index of the beastie we want the score of, 0 indexed ## ## target : Numerical value that the simulation is aiming for ## ## Returned: ## ## score : Fitness score for the beastie. See &calculate_score() my ($population_l, $index, $target) = @_; my ($exp, $val, $score, $beastie_l); $beastie_l = $population_l -> [$index]; ($exp, $val, $score) = &calculate_score($beastie_l, $target); return $score; } sub calculate_score { ## Calculate the expression, value and score for a beastie in the simulation. ## ## The closer to the target number the better, but once it's very close ## then the shorter the better. This has the desired result: ## ## (difference ^ 2) + length ## ## If an exact match hasn't been found then a penalty of 100 is ## added. This is to stop short but inaccurate sequences beating ## long, accurate ones. ## ## Invalid formula and non-integral are not viable so it returns a ## *really* big number for them. ## ## Arguments: ## ## beastie_l : Reference to list of expression and operators that ## make up the simulation creature. See &build_full_expression. ## ## target : The target value that we are aiming for. ## ## Returned: ## ## exp : The expression generated from the beastie. ## ## val : Value that the expression yielded when evaluated. ## ## score : The fitness score, the lower the better. my ($beastie_l, $target) = @_; my ($exp, $val, $length, $diff, $score); ($exp, $val) = &build_full_expression($beastie_l); # If it failed or wasn't integral then score it very badly. Perl # doesn't like generating infinity so it can't be used directly. if ($val eq "") { return "", 0, 1e1000000 } $length = length $exp; $diff = abs ($target - $val); $score = ($diff + 1) ** 2 + $length; # Add the penalty for not hitting th target if ($val ne $target) { $score += 100 }; return $exp, $val, $score; } sub calculate_value { ## Evaluate an expression and return the value. ## ## Arguments: ## ## exp : The expression to evaluate ## ## Returned: ## ## result : The result value after running the expression through ## the compiler. This will be "" if the compilation failed or the ## result was non-integral my ($exp) = @_; my ($result); # Replace ^ with ** for the compiler $exp =~ s/\^/**/g; # Let the compiler evaluate it eval("\$result=$exp"); if (not defined($result)) { # Compiler barfed, this should only be from infinity in the # result as all the expressions should be compileable. return ""; } elsif (int $result != $result) { # Kill the non-integral results $result = ""; } return $result; } sub mutate_expression { ## Repeatedly try to mutate a string representing a single ## expression until it changes, then return the mutated string. ## ## See &int_mutate_expression for details on the possible ## mutations. ## ## Arguments: ## ## string : The string to mutate ## ## Returned: ## ## new_string : The string after it has been mutated. my ($string) = @_; my ($new_string); $new_string = $string; # Loop until something changes until ($new_string ne $string) { $new_string = &int_mutate_expression($string); } return $new_string; } sub int_mutate_expression { ## Try to mutate a string representing a single expression and ## return the mutated string. Some mutations will fail or result ## in nothing changing (e.g. replacing a "+" character with a "+") ## ## Use &mutate_expression to guarantee a mutation. ## ## The possible mutations are: ## ## :delete_brackets Delete a random pair of brackets ## :insert_expression Add a expression within or at an end of the string ## :change_digit Change a single digit to another ## :change_operator Change a random operator to a new random operator ## :delete_digit Delete a random digit and an operator that goes with it ## ## Arguments: ## ## string : The string to mutate ## ## Returned: ## ## new_string : The mutated string, it may be the same as the original my ($string) = @_; my ($mutation_type, $new_string, $end_string, $part1, $part2, $part3, $part4, $ops); $new_string = $string; $mutation_type = &random_value([":delete_brackets", ":insert_expression", ":change_digit", ":change_operator", ":delete_digit"]); if ($mutation_type eq ":change_operator") { # Change a random operator to a new random operator ($part1, $part2) = &split_string($string, $OP_REGEXP); if ($part1 or $part2) { $new_string = $part1 . &random_operator . $part2; } } elsif ($mutation_type eq ":delete_digit") { # Delete a random digit and an operator that goes with it ($part1, $part2) = &split_string($string, '\d'); if ($part1 or $part2) { # Random which operator we keep, left or right $ops = ""; if ($part1 =~ m/$OP_REGEXP$/) { $ops .= substr($part1, (length $part1)-1, 1); $part1 =~ s/$OP_REGEXP$//; } if ($part2 =~ m/^$OP_REGEXP/) { $ops .= substr($part2, 0, 1); $part2 =~ s/^$OP_REGEXP//; } if (length $ops == 2) { $new_string = $part1 . &random_char($ops) . $part2; } else { $new_string = $part1 . $part2; } } } elsif ($mutation_type eq ":change_digit") { # Change a single digit to another ($part1, $part2) = &split_string($string, '\d'); if ($part1 or $part2) { $new_string = $part1 . &random_digit . $part2; } } elsif ($mutation_type eq ":insert_expression" and &percent_chance_q(50)) { # Add a expression to the start or end of the string if (&percent_chance_q(50)) { $new_string = &generate_expression($string); } else { $new_string = &generate_expression() . &random_operator . $string; } } elsif ($mutation_type eq ":insert_expression") { # Insert an expression, using an existing digit as the first value ($part1, $part2, $part3) = &split_string($string, '\d'); if ($part1 or $part2) { $new_string = $part1 . &generate_expression($part3) . $part2; } } elsif ($mutation_type eq ":delete_brackets") { # Delete a random pair of brackets ($part1, $part2) = &split_string($string, '\('); if ($part1 or $part2) { ($part3, $part4) = &split_string($part2, '\)'); $new_string = $part1 . $part3 . $part4; } } # Get rid of brackets with no contents as the compiler won't accept that. # Brackets around a single digit are also removed. while ($new_string =~ s/\((\d*)\)/$1/g) {} return $new_string; } sub split_string { ## Split a string at a random point which matches a given ## character regular expression. The string is split to either ## side of the character and the matching character and the ## strings to either side of it are returned. ## ## For example splitting 9^(3*7+(8/2)) by "\d" might give: ## ## "9^(3*", "+(8/2))", "7" ## ## This is done to find suitable insertion/deletion location for ## mutations. If no matches are found then empty strings are ## returned. ## ## Arguments: ## ## string : The string to chop ## ## regexp : The character regular expression to loop for ## ## Returned: ## ## start_string : The section of the string to the left of the character ## ## end_string : The string to the right ## ## char : The matching character itself my ($string, $regexp) = @_; my (@indices, $char, $index, $start_string, $end_string, $last); # Find the indices of the characters that match the regexp $last = (length $string) - 1; for (0..$last) { $char = substr($string, $_, 1); if ($char =~ m/$regexp/) { push @indices, $_; } } # Nothing suitable found if (not @indices) { return "","","" } # pick one at random $index = &random_value(\@indices); $char = substr($string, $index, 1); # And split off the strings to both sides $start_string = substr($string, 0, $index); if ($index == $last) { $end_string = ""; } else { $end_string = substr($string, $index+1, $last-$index); } return $start_string, $end_string, $char; } sub generate_expression { ## Generate a random expression in the form ## where is either a digit or an ## expression and is one of +-*/^ e.g. ## ## 9^(3*7+(8/2)) ## ## Arguments: ## ## Val1 : The left hand value. If this isn't given then one will ## be generated. ## ## Returned ## ## expression : The randomly generated expression. my ($val1) = @_; my ($val2, $op); if (not $val1) { if (&percent_chance_q(10)) { # Recurse! Recurse! $val1 = "(" . &generate_expression . ")"; } else { $val1 = &random_digit(); } } if (&percent_chance_q(10)) { # Recurse! Recurse! $val2 = "(" . &generate_expression . ")"; } else { $val2 = &random_digit(); } $op = &random_operator(); return "$val1$op$val2"; } sub random_operator { ## Return a random operator e.g. +-^* or / ## ## Note that / appears at low frequency as we want to avoid ## non-integral expressions. ## ## No arguments ## ## Returned: ## ## op : The chosen operator. return &random_char("/+-*^+-*^+-*^"); } sub random_digit { ## Return a random digit between 1 and 9 ## ## No arguments ## ## Returned: ## ## digit : The chosen digit. return &random_char("123456789"); } sub random_char { ## Return a random character from a string ## ## Arguments: ## ## string : The string to choose a character from ## ## Returned: ## ## char : The chosen character. my ($string) = @_; my ($index); $index = int ((rand) * length $string); return substr($string, $index, 1); } sub random_value { ## Pick a random value out of a list of values ## ## Arguments: ## ## values_l : Reference to a list of values ## ## Returned: ## ## value : The value that was chosen my ($values_l) = @_; my ($value); $value = $values_l -> [&random_index($values_l)]; return $value } sub random_index { ## Return a random index into a list of values ## ## Arguments: ## ## values_l : Reference to a list of values ## ## Returned: ## ## index : The index that was chosen my ($values_l) = @_; my ($index, $count); $count = @$values_l; $index = int ((rand) * $count); return $index; } sub percent_chance_q { ## Returns true a certain percentage amount of the time. ## ## Arguments: ## ## percentage : The percentage time we want true returned ## ## Returned: ## ## true_q : True or false, randomly but at a frequency controlled ## by the percentage argument. my ($percentage) = @_; return $percentage > ((rand) * 100); } ## Launch the simulator. The arguments are summarised here but see ## the &main subroutine for details. ## ## The first is the target value if you don't want it randomly ## generated. $TARGET_DIGITS controls the number of digits in a random ## target. ## ## The second number is the maximum number of generations (default to ## the global $NUM_GENERATIONS if not given). ## ## The third is the number of simulations to run (each are completely ## isolated from the others). Defaults to the global $NUM_RUNS. &main(0, 0, 0);