#! /usr/bin/perl -w use strict; use Statistics::Distributions; use Getopt::Long; my $step = 0; my $count_by_a_yes_by_b_yes = {}; my $next_count_by_a_yes_by_b_yes; my $correct = 0; my $incorrect = 0; my $outstanding = 100; my $stop_at_portion_outstanding = 0.1; my $p_a_yes = 0.5; my $p_b_yes = 0.51; my $threshold_prob = 0.01; # You can set all of the parameters on the command line. # For the talk, I set total to 100000, and a_success, # b_success and cutoff to whatever was appropriate for # the simulation. # # It outputs a header, then the simulation results. The # fields in those results are: # # o Steps: The number of people in the test is actually # double this because we add one person to A and one # person to B at each step. # o Outstanding: How many tests are still running. Because # of the way I avoid tracking individual tests as long as I # can, you can have a fractional number outstanding. # o Correct: How many tests finished with B better than A. # o Incorrect: How many tests finished with A better than B. # # Be warned, some of the simulations took me over a day to run. # GetOptions( "a_success=f" => \$p_a_yes, "b_success=f" => \$p_b_yes, "cutoff=f" => \$threshold_prob, "total=i" => \$outstanding, "end_at=f" => \$stop_at_portion_outstanding, ) or die "Cannot process options"; my $threshold_chi_square = Statistics::Distributions::chisqrdistr(1, $threshold_prob); $count_by_a_yes_by_b_yes->{0}{0} = $outstanding; my $stop_at_x_outstanding = $outstanding * $stop_at_portion_outstanding; # Due to quantization errors, we may be slightly off. So we correct # for that. my $real_outstanding = $outstanding; my $last_outstanding = 0; #print "threshold: $threshold_chi_square\n"; print "trials: $outstanding\n"; printf "prob(a): %.02f\n", $p_a_yes; printf "prob(b): %.02f\n", $p_b_yes; printf "threshold: %.03f\n", $threshold_prob; print join "\t", "Step", "Outstanding", "Correct", "Incorrect"; print "\n"; my $quantizer = get_quantizer(); while ($outstanding > $stop_at_x_outstanding) { $next_count_by_a_yes_by_b_yes = {}; # Distribute them to the next. my $correction_ratio = $outstanding/$real_outstanding; for my $b_yes (keys %$count_by_a_yes_by_b_yes) { my $count_by_a_yes = $count_by_a_yes_by_b_yes->{$b_yes}; my @a_yeses = keys %$count_by_a_yes; for my $a_yes (keys %$count_by_a_yes) { my $count = $count_by_a_yes->{$a_yes} * $correction_ratio; my $a_success = $count * $p_a_yes; my $a_fail = $count - $a_success; my $both_success = $a_success * $p_b_yes; my $a_only_success = $a_success - $both_success; my $b_only_success = $a_fail * $p_b_yes; my $no_success = $a_fail - $b_only_success; $next_count_by_a_yes_by_b_yes->{$b_yes}{$a_yes} += $no_success; $next_count_by_a_yes_by_b_yes->{$b_yes + 1}{$a_yes} += $b_only_success; $next_count_by_a_yes_by_b_yes->{$b_yes}{$a_yes + 1} += $a_only_success; $next_count_by_a_yes_by_b_yes->{$b_yes + 1}{$a_yes + 1} += $both_success; } } $step++; $real_outstanding = 0; # Now pass through, figure out who we've decided on, and quantize. my @b_yeses = sort {$a <=> $b} keys %$next_count_by_a_yes_by_b_yes; for my $b_yes (@b_yeses) { my $count_by_a_yes = $next_count_by_a_yes_by_b_yes->{$b_yes}; my @a_yeses = sort {$a <=> $b} keys %$count_by_a_yes; filter_out_bottom_decisions( $b_yes, $count_by_a_yes, @a_yeses, ); # And reversing filters at the top. :-) filter_out_bottom_decisions( $b_yes, $count_by_a_yes, reverse @a_yeses, ); # Quantize to avoid fractional data points. for my $a_yes (@a_yeses) { my $count = delete $count_by_a_yes->{$a_yes}; my $new_count = $quantizer->($count); if ($new_count) { $real_outstanding += $new_count; $count_by_a_yes->{$a_yes} = $new_count; } } } $count_by_a_yes_by_b_yes = $next_count_by_a_yes_by_b_yes; if (sprintf("%.1f", $outstanding) ne sprintf("%.1f", $last_outstanding)) { printf("%i\t%.1f\t%.1f\t%.1f\n" , $step, $outstanding, $correct, $incorrect ); } $last_outstanding = $outstanding; } # If we keep track of all of the fractional bits we'll have too much # data to track. So fractional bits will be rounded to 0 or 1 at # random. But we do it carefully, in every "1" of entries with # fractional bits, 1 gets rounded to 1, the rest to 0. sub get_quantizer { my $choice = rand(1); my $last_cutoff = 0; return sub { my $x = shift; if (not $x) { return; } elsif ($x >= 1) { # Since we'll keep the entry, keep the fraction for accuracy return $x; } elsif ($x < 0) { die "Oops?"; } else { my $this_cutoff = $last_cutoff + $x; my $n = 0; if ($last_cutoff <= $choice and $choice < $this_cutoff) { $n++; } if (1 <= $this_cutoff) { $this_cutoff -= 1; $choice = rand(1); # Note that we potentially will round a fractional bit to # 2. Doing so is counter-intuitive, but is necessary to avoid # introducing a consistent bias. if ($choice < $this_cutoff) { $n++; } } $last_cutoff = $this_cutoff; return $n; } }; } sub calculate_chi_square { my ($a_yes, $a_no, $b_yes, $b_no) = @_; my $a = $a_yes + $a_no; my $b = $b_yes + $b_no; my $yes = $a_yes + $b_yes; my $no = $a_no + $b_no; my $total = $a + $b; my $e_a_yes = $a * $yes / $total; my $e_a_no = $a * $no / $total; my $e_b_yes = $b * $yes / $total; my $e_b_no = $b * $no / $total; if ( $e_a_yes < 10 or $e_a_no < 10 or $e_b_yes < 10 or $e_b_no < 10 ) { # print "@_ n/a\n"; return "n/a"; } my $chi_square = ($e_a_yes - $a_yes)**2/$e_a_yes + ($e_a_no - $a_no)**2/ $e_a_no + ($e_b_yes - $b_yes)**2/$e_b_yes + ($e_b_no - $b_no)**2/ $e_b_no; # print "@_ $chi_square\n"; } sub filter_out_bottom_decisions { my $b_yes = shift; my $count_by_a_yes = shift; my @a_yeses = @_; for my $a_yes (@a_yeses) { my $chi_square = calculate_chi_square( $a_yes, $step - $a_yes, $b_yes, $step - $b_yes, ); next if $chi_square eq "n/a"; if ($chi_square > $threshold_chi_square) { my $count = delete $count_by_a_yes->{$a_yes} || 0; $outstanding -= $count; if ($a_yes > $b_yes) { $incorrect += $count; } else { $correct += $count; } } else { # We're into insignificant territory now return; } } }