#!/usr/bin/perl -w
########################################################################
# Rename this file to chromosome_44_sim.pl after download
########################################################################
# This program models the change in chromosome frequency in a
# population when different reproductive advantages are assigned. The
# program was written to model the spread of fused chromosomes but the
# model would apply to other mutations.
#
# Three numbers can be passed in, the first is the number of
# simulations to run (defaults to 100).
#
# The second is the percentage advantage for a fused chromosomes,
# which can be negative (defaults to 1). This percentage is the
# percentage chance of an extra child at each mating.
#
# Finally the third number is the report frequency i.e. how many
# generations pass between reports. If this is 0 (the default) it
# only reports at the end of each simulation run.
#
# Example use:
#
# chromosome_44_sim.perl
# chromosome_44_sim.perl 50 2 100
# chromosome_44_sim.perl 200 -1 50
#
# If you want to see a graph then load the output numbers into Excel.
#
# Note: There are *two* sets of data produced, these generate a graph
# for fuse wins and unfuse wins. Which is output after each run
# depends on who won (the program says which is which above the
# numbers).
#
# See the "main" subroutine for more details.
########################################################################
use strict;
use vars qw ( $SAMPLES $MAX_POP $NUM_CHILDREN $DEFAULT_NUM_RUNS
$DEFAULT_PERCENTAGE_ADVANTAGE $DEFAULT_REPORT_FREQ
@STARTING_POPULATION);
########################################################################
# You might want to try changing the numbers in this section #
########################################################################
# The number of samples for the graphs. The number of generations per
# run changes so the generation numbers for each run are sampled at
# regular intervals so the graphs can be superimposed. This results in
# one overall graph which represents the average shape of all the
# individual runs. See the "update_by_gen" subroutine.
$SAMPLES = 50;
# The number of individuals in the population
$MAX_POP = 1000;
# The base range of children at each mating is zero up to this
# number. A child may be added or removed afterwards depending on the
# advantage/disadvantage of having a fused chromosome parent.
$NUM_CHILDREN = 3;
# The default percentage advantage for fused chromosomes if none is passed
$DEFAULT_PERCENTAGE_ADVANTAGE = 1;
# The default report frequency. Zero means only one report is
# produced for each simulation run (at the end)
$DEFAULT_REPORT_FREQ = 0;
#The default number of simulation runs to execute
$DEFAULT_NUM_RUNS = 100;
# Founding population. The numbers represent the numbers of chromosomes
# the founders have. Two of them have a fused chromosome pair
# (e.g. 14 & 21) so have a lower chromosome count.
@STARTING_POPULATION = (46, 46, 45, 45);
########################################################################
sub main {
## Run a number of population simulations modelling the spread of
## fused chromosomes in a population where 50% of the founders
## carried a fused chromosome.
##
## The advantage (or disadvantage) of having a fused chromosome is
## modelled by a percentage chance of having an additional child
## when pairs mate (or losing a child if the chance is negative)
##
## Reports can be generated at intervals to show how the
## population changes over the generations (see the "report"
## method for details)
##
## After each run is completed a report is generated which gives
## how many runs were completed, how many were won by fused and
## unfused populations, the average number of generations the win
## took. Finally numbers representing the population change over
## time is generated for both fused and unfused wins. These can
## be imported into Excel to generate graphs (see the
## "update_by_gen" method for details)
##
## Arguments:
##
## num_runs : The number of simulation runs to execute
##
## percent_advantage: This control the advantage (or disadvantage)
## of having a fused chromosome. If it is positive then it gives
## the percentage chance of a mating producing an additional
## child, if it is negative then it is the percentage chance of
## having one child less.
##
## report_freq : How many generations need to pass between
## reports, give zero if you don't want any reports within each
## simulation run. You will still get one at the end.
my ($num_runs, $percent_advantage, $report_freq) = @ARGV;
$num_runs = $num_runs||$DEFAULT_NUM_RUNS;
$percent_advantage = $percent_advantage||$DEFAULT_PERCENTAGE_ADVANTAGE;
$report_freq = $report_freq||$DEFAULT_REPORT_FREQ;
my (@fusewin_44_by_gen, @fusewin_45_by_gen, @fusewin_46_by_gen,
@unfusewin_44_by_gen, @unfusewin_45_by_gen, @unfusewin_46_by_gen,
$gen_44_l, $gen_45_l, $gen_46_l,
$runs, $tot_fused_wins, $tot_unfused_wins, $tot_fused_win_gens, $tot_unfused_win_gens, $fuse_win_q);
$tot_fused_wins = $tot_unfused_wins = $tot_fused_win_gens = $tot_unfused_win_gens = 0;
for ($runs=1;$runs<=$num_runs;$runs++) {
print "------------------------------------------------------------------------\n";
print "Simulation run # $runs\n\n";
($fuse_win_q, $gen_44_l, $gen_45_l, $gen_46_l) = &run_simulation($percent_advantage, $report_freq, $runs);
if ($fuse_win_q) {
$tot_fused_wins++;
$tot_fused_win_gens+=scalar @$gen_44_l;
} else {
$tot_unfused_wins++;
$tot_unfused_win_gens+=scalar @$gen_44_l;
}
print "Summary result of all $runs simulation runs completed so far:\n";
print "Percentage fuse win : " . int (($tot_fused_wins / $runs) * 100) . "%\n";
print "Percentage unfuse win : " . int (($tot_unfused_wins / $runs) * 100) . "%\n";
print "Fused win gen average : " . ($tot_fused_wins ? int ($tot_fused_win_gens / $tot_fused_wins) : "N/A") . "\n";
print "Unfused win gen average: " . ($tot_unfused_wins ? int ($tot_unfused_win_gens / $tot_unfused_wins) : "N/A") . "\n";
if ($fuse_win_q) {
print "Fuse win graph:\n";
&update_by_gen(\@fusewin_44_by_gen, $gen_44_l, $tot_fused_wins, "44 chromosomes: ");
&update_by_gen(\@fusewin_45_by_gen, $gen_45_l, $tot_fused_wins, "45 chromosomes: ");
&update_by_gen(\@fusewin_46_by_gen, $gen_46_l, $tot_fused_wins, "46 chromosomes: ");
} else {
print "Unfuse win graph:\n";
&update_by_gen(\@unfusewin_44_by_gen, $gen_44_l, $tot_unfused_wins, "44 chromosomes: ");
&update_by_gen(\@unfusewin_45_by_gen, $gen_45_l, $tot_unfused_wins, "45 chromosomes: ");
&update_by_gen(\@unfusewin_46_by_gen, $gen_46_l, $tot_unfused_wins, "46 chromosomes: ");
}
}
print "------------------------------------------------------------------------\n\n";
}
sub update_by_gen {
## Tricky method to sample the number of individuals at each
## generation in a simulation run and combine that with previous
## runs. This is so we can generate a single graph showing what
## the cumulative population change over time looks like.
##
## This is non-trivial because the number of generations in each
## simulation run will be different, so the algorithm take the same
## number of evenly spaced samples from each run (regardless of
## how many generations there are) and adds it to the number of
## individuals at that same sample position in previous runs.
##
## For example if there were 3 samples needed then for a 5
## generation run we would take values at generation 1,3 and 5.
## For a 15 generation run we would take generations 1, 8 and
## 15. This allows values from multiple runs to be combined into
## one graph.
##
## Dividing the number at any sample position by the total number
## of runs gives the average number of individuals at that sample
## position, and that is the number that is printed out.
##
## The number of samples taken is controlled by the SAMPLES
## global.
##
## Arguments:
##
## all_by_gen_l : Reference to the list of total sampled
## populations numbers for all simulation runs for a single
## chromosome number.
##
## by_gen_l : Reference to the list of population numbers at each
## generation for a particular chromosome number generated by a
## single simulation run. This list will be sampled to combine it
## with previous runs.
##
## runs : The number of simulation runs represented by each sample
## position. Dividing the number at a sample position by the
## number of runs gives the average population at that sample
## position, and that is what is printed out.
##
## title : A title to print out so which chromosome count is
## represented by the line of numbers being printed out.
my ($all_by_gen_l, $by_gen_l, $runs, $title) = @_;
my ($interval, $i, $sample_i, $sample_pop, $pop, $first_run_q, $tot_pop);
print $title;
# Work out the interval between our samples for this particular run
$interval = ((@$by_gen_l -1)/ ($SAMPLES-1));
# For the first run we just copy in the population numbers as they are
$first_run_q = not @$all_by_gen_l;
# Loop over our sample indices and total up the populations at those positions
for ($i = 0;$i < $SAMPLES; $i ++) {
$sample_i = int (($i * $interval) + 0.5);
$sample_pop = $by_gen_l -> [$sample_i];
if ($first_run_q) {
$pop = $all_by_gen_l->[$i] = $sample_pop;
} else {
$tot_pop = $all_by_gen_l->[$i] += $sample_pop;
$pop = int (($tot_pop / $runs)+0.5);
}
print "$pop,";
}
print "\n";
}
sub run_simulation {
## Run a simulation of the generations until either the fused
## chromosomes win, or become extinct.
##
## Arguments:
##
## percent_advantage: This control the advantage (or disadvantage)
## of having a fused chromosome by supplying the probability of a
## pair having an extra child. For example 2% advantage means
## that a couple who have at least 1 fused chromosome between have
## a 2% chance of having 1 extra child. If it is negative then it
## is the percentage chance of having one child fewer when a
## parent has a fused chromosome. No bonus is given for both
## parents having a fused chromosome.
##
## report_freq : The number of generations between population
## reports being printed to the screen. If this is zero then no
## reports are printed until the end of each run.
##
## run_number : The count of the simulation runs that have been
## done so far.
##
## Returned:
##
## fused_win_q : True if the fused chromosomes won once either
## fused or unfused chromosomes take over the population, false
## otherwise (it's actually the number of 44 chromosome
## individuals in the population, non-zero=true)
##
## gen_44_l,gen_44_l and gen_44_l : References to the lists of the
## counts of each chromosome number at each generation.
my ($percent_advantage, $report_freq, $run_number) = @_;
my ($population_l, $generation, $tot_44, $tot_45,$tot_46,
@gen_44, @gen_45, @gen_46);
# Re-seed random numbers
srand(time() ^ ($$ + ($$ << 15 )));
# The starting population of two normal 46 chromosomes individuals
# and two fused chromosome carriers (45 chromosomes)
$population_l = [ @STARTING_POPULATION ];
# Run the simulation until a chromosome number becomes extinct
while (1) {
($population_l, $tot_44, $tot_45, $tot_46) = &next_generation($population_l, $percent_advantage);
# Track numbers at each generation
push @gen_44, $tot_44;
push @gen_45, $tot_45;
push @gen_46, $tot_46;
# Check if either fused or unfused chromosomes types are extinct
if (not $tot_45 and (not $tot_44 or not $tot_46 )) { last }
# Find out what generation we are on and generate the report
$generation = @gen_44;
if ($report_freq and not ($generation % $report_freq )) { &report($generation, $population_l) }
}
# Always report on the last generation
print "\nFinal report for simulation run #$run_number\n";
&report($generation, $population_l);
return $tot_44, \@gen_44,\@gen_45,\@gen_46;
}
sub next_generation {
## Generate the next generation by mating individuals chosen
## from the previous generation
##
## Arguments:
##
## population_l : Reference to the list of individuals in the
## previous generation. Each individual is represented by a
## number giving the number of chromosomes e.g. 44, 45 or 46.
##
## percent_advantage: This controls the advantage (or disadvantage
## if negative) of having a fused chromosome. See the
## "run_simulation" method for details.
##
## Returned:
##
## new_population_l : Reference to the list of individuals in the
## next generation
##
## tot_44, tot_45, tot_46 : The numbers of individuals with 44, 45
## and 46 chromosomes in the new population.
my ($population_l, $percent_advantage) = @_;
my ($parent1, $parent2, @new_population, $num_children,
$parent_has_fused_q, $tot_44, $tot_45, $tot_46, $num_44, $num_45,
$num_46, $percent_disadvantage);
$percent_disadvantage = $tot_44 = $tot_45 = $tot_46 = 0;
# Check if the advantage is negative, and make it a disadvantage if it is
if ($percent_advantage < 0) {
$percent_disadvantage = abs $percent_advantage;
$percent_advantage = 0;
}
# Generate the next population
while (@new_population < $MAX_POP) {
$parent1 = &pick_value($population_l);
$parent2 = &pick_value($population_l);
$num_children = (int ((rand) * ($NUM_CHILDREN+1)));
# Apply our reproductive success modifiers
$parent_has_fused_q = ($parent1 < 46 or $parent2 < 46);
if ($parent_has_fused_q and $percent_advantage and &percentage_chance_q($percent_advantage)) {$num_children++};
if ($parent_has_fused_q and $percent_disadvantage and &percentage_chance_q($percent_disadvantage)) {$num_children--};
($num_44, $num_45, $num_46) = &gen_children(\@new_population, $num_children, $parent1, $parent2);
$tot_44 += $num_44;
$tot_45 += $num_45;
$tot_46 += $num_46;
}
return \@new_population, $tot_44, $tot_45, $tot_46;
}
sub gen_children {
## Generate children for a pair of individuals in the population
##
## Arguments:
##
## new_population_l : Reference to the list holding the new
## generation. Each individual is represented by a single number
## giving how many chromosomes they have i.e. 44, 45 or 46. The
## new children will be added to the list.
##
## num_children : The number of children to generate (can be < 1)
##
## parent1 & parent2 : The parents of the children. These are
## just numbers representing the number of chromosomes.
##
## Returned:
##
## num44, num45, num46 : The number of children of each chromosome
## count that were generated
my ($new_population_l, $num_children, $parent1, $parent2) = @_;
my ($child, $num_fused, $num_unfused, $num_44, $num_45, $num_46);
$num_44 = $num_45 = $num_46 = 0;
# Sort the chromosomes numbers so the lowest is parent1
if ($parent1 > $parent2) { ($parent1, $parent2) = ($parent2, $parent1) }
while ($num_children > 0 and @$new_population_l < $MAX_POP) {
$num_children--;
if ($parent1 == 44 and $parent2 == 44) {
# Breeds true, all children have 44 chromosomes
$child = 44;
} elsif ($parent1 == 44 and $parent2 == 45) {
# 50/50 whether the child has 44 or 45 chromosomes
$child = &pick_value([44, 45]);
} elsif ($parent1 == 44 and $parent2 == 46) {
# All children have 45 chromosomes
$child = 45;
} elsif ($parent1 == 45 and $parent2 == 45) {
# 25% of 44, 50% 45 and 25% of 46 chromosomes
$child = &pick_value([44, 45, 45, 46]);
} elsif ($parent1 == 45 and $parent2 == 46) {
# 50/50 whether the child has 45 or 46 chromosomes
$child = &pick_value([45, 46]);
} elsif ($parent1 == 46 and $parent2 == 46) {
# Breeds true, all children have 46 chromosomes
$child = 46;
} else {
# This should never happen
print "*** Unknown chromosome numbers: $parent1 $parent2\n";
}
# Keep a tally of the children chromosome totals
if ($child == 44) {
$num_44 ++;
} elsif ($child == 45) {
$num_45 ++;
} else {
$num_46 ++;
}
push @$new_population_l, $child;
}
return $num_44, $num_45, $num_46;
}
sub report {
## Print out a brief report on the current population This gives
## the total population size, the percentage of each chromosome
## numbered individuals and the total number of fused and unfused
## chromosomes in the population (remember that each individual
## has a pair of fused or unfused chromosomes.)
##
## Arguments:
##
## generation : Integer given which generation this is
##
## population_l : Reference to a list holding the integer
## representing the number of chromosomes in each
## individual in the population i.e. a list of 44, 45 or 46
##
## Nothing returned
my ($generation, $population_l) = @_;
my ($pop44, $pop45, $pop46, $chrom_number, $pop, $citizen_l, $total_pop, $num_fused, $num_unfused);
$pop44 = $pop45 = $pop46 = $num_fused = $num_unfused = 0;
$pop = @$population_l;
foreach $chrom_number (@$population_l) {
if ($chrom_number == 44) {
$pop44++;
$num_fused += 2
} elsif ($chrom_number == 45) {
$pop45++;
$num_fused++;
$num_unfused++;
} else {
$pop46++;
$num_unfused += 2
}
}
# Find the number of individuals are in the population
$total_pop = @$population_l;
# Print out the results for this population
print "Generations : $generation\n";
print "Population size : $pop\n";
print "44 chromosomes : " . &calc_percentage($pop44, $total_pop) . "%\n";
print "45 chromosomes : " . &calc_percentage($pop45, $total_pop) . "%\n";
print "46 chromosomes : " . &calc_percentage($pop46, $total_pop) . "%\n";
print "Fused chromosomes : $num_fused\n";
print "UnFused chromosomes: $num_unfused\n\n";
}
sub calc_percentage {
## Calculate and return a percentage i.e. (value/total) * 100
##
## Arguments:
##
## value : The number representing the sample we are interested in
##
## total : The size of the sample the value is a fraction of
##
## Returned:
##
## percentage : Rounded string representing the percentage
my ($value, $total) = @_;
return sprintf "%.2f", ($value / $total) * 100;
}
sub pick_value {
## Pick a random value from a list of value
##
## Arguments:
##
## values_l : Reference to a list of values
##
## Returned:
##
## value : The chosen random value from the list
my ($values_l) = @_;
my ($value, $index, $count);
$count = @$values_l;
$index = int ((rand) * $count);
return $values_l -> [$index];
}
sub percentage_chance_q {
## This method true a given percentage of the times called
##
## Arguments:
##
## percentage : The percentage chance of true being returned
##
## Returned:
##
## result_q : Either true or false
my ($percentage) = @_;
return $percentage > ((rand) * 100);
}
&main;