#! /usr/bin/perl -w use strict; use Statistics::Distributions qw(uprob udistr); use Carp qw(confess); print <) { chomp($line); my @row = split /\s+/, $line; if (2 != @row) { print "WARNING: wrong number of columns at line $.\n"; next; } if ($row[0] eq 'A' or $row[0] eq 'a') { push @a, $row[1]; } elsif ($row[0] eq 'B' or $row[0] eq 'b') { push @b, $row[1]; } else { print "WARNING: one line $. '$row[0]' should have been A or B\n"; } } if (not @a) { die "No data found for version A\n"; } elsif (not @b) { die "No data found for version B\n"; } elsif (@a + @b < 3) { die "Insufficient data to calculate (need at least 3 data points)\n"; } my $var = variance(@a, @b); my @c = sort {$a <=> $b} @a, @b; pop @c; my $var_c = variance(@c); if (0 == $var_c) { die "Insufficient variation (need at least 3 different values)\n"; } # Sanity check my $w = $var/$var_c - 1; my $a_weight = $w * (@a + @b)/@a; my $b_weight = $w * (@a + @b)/@b; if ($a_weight >= 0.1) { print "WARNING: The normal approximation may not fit A yet\n"; printf " (a_weight = %.2f > 0.1)\n", $a_weight; } if ($b_weight >= 0.1) { print "WARNING: The normal approximation may not fit B yet\n"; printf " (b_weight = %.2f > 0.1)\n", $b_weight; } my $m_a = mean(@a); printf "Mean of A: %.3f\n", $m_a; my $m_b = mean(@b); printf "Mean of B: %.3f\n", $m_b; my $diff = $m_a - $m_b; printf "A is %.3f better than B\n", $diff; my $m_var = $var * (1/@a + 1/@b); my $p = 2 * uprob( abs($diff) / sqrt($m_var) ); printf "p = %.3f\n", $p; for my $c (qw(80 90 95 99)) { my $p = 1 - $c/100; # We're calculating the upper bound of a $c% confidence interval for # a normal distribution with mean 0 and variance $m_var. # # The 2 is because it is 2-tailed. (Half the left out stuff is in # one tail, half in the other.) # my $b = udistr( $p/2 ) * sqrt($m_var); printf "$c%% Confidence for A - B: (%.3f, %.3f)\n", $diff - $b, $diff + $b; } # Calculates the arithmetic average of a list of numbers. sub mean { if (not @_) { confess("No data found, cannot take mean"); } return sum(@_)/@_; } # Calculates the sum of a list of numbers. sub sum { my $s = shift; $s += $_ for @_; return $s; } # Estimates the variance of a list of numbers. sub variance { if (@_ < 2) { confess("Not enough data to calculate a variance"); } my $m = mean(@_); return sum( map {($_ - $m)**2} @_ ) / $#_; }