Benchmarking files for comparing performance of two different proposed database

schemas for MARC storage.  See IRC log at

    http://www.haz.cmsd.bc.ca/cgi-bin/kohalog.pl

for information on the two proposals.
This commit is contained in:
tonnesen 2002-05-17 22:03:14 +00:00
parent 86c0d5ce87
commit 63474fc22f
7 changed files with 217 additions and 0 deletions

View file

@ -0,0 +1,6 @@
Benchmark results from Steve Tonnesen
getdata-steve: 15.73 13.26 14.47 12.73 11.90
getdata-paul: 21.56 14.11 12.58 12.49 13.18
getdata-paul-regex: 14.32 11.53 13.51 10.15 10.99

View file

@ -0,0 +1,22 @@
CREATE TABLE marc_0XX_tag_table (
bibcode bigint(20) NOT NULL default '0',
tagnumber char(3) NOT NULL default '',
tagorder tinyint(4) NOT NULL default '0',
tagvalue varchar(255) default NULL,
valuebloblink bigint(20) default NULL,
PRIMARY KEY (bibcode,tagnumber,tagorder)
) TYPE=MyISAM;
CREATE TABLE marc_2XX_subfield_table (
subfieldid bigint(20) unsigned NOT NULL auto_increment,
tagid bigint(20) NOT NULL default '0',
tag char(3) NOT NULL default '',
bibid bigint(20) NOT NULL default '0',
subfieldorder tinyint(4) NOT NULL default '0',
subfieldcode char(1) NOT NULL default '',
subfieldvalue varchar(255) default NULL,
valuebloblink bigint(20) default NULL,
PRIMARY KEY (subfieldid),
KEY (bibid,tagid,tag,subfieldcode)
) TYPE=MyISAM;

View file

@ -0,0 +1,73 @@
#!/usr/bin/perl
#
# This script generates 80,000 random records in the kohabenchmark database for
# the purposes of comparing two different marc storage schemas. It requires
# the presence of a word list for populating the data. Mine is in
# /usr/share/dict/words. Change that if necessary. You'll also need to change
# your userid and password for the dbi->connect line.
use DBI;
my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'youruserid', 'yourpassword');
@subfields = ( 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n');
open (W, "/usr/share/dict/words");
while (<W>) {
chomp;
push @words, $_;
}
my $tagcounter=0;
my $subfieldcounter=0;
srand($$|time);
for ($bibid=1; $bibid<80000; $bibid++) {
my $numtags=int(rand(10)+5);
my $localtagcounter=0;
for ($i=1; $i<$numtags; $i++) {
$localtagcounter++;
$tagcounter++;
my $tag=$i*40+100;
my $numsubfields=int(rand(10)+1);
my $subfieldsused;
my $localsubfieldcounter=0;
my $tagvalue='';
for ($j=1; $j<=$numsubfields; $j++) {
my $code='';
until ($code) {
my $codepicker=int(rand($#subfields));
if ($subfieldsused->{$subfields[$codepicker]}==0) {
$subfieldsused->{$subfields[$codepicker]}=1;
$code=$subfields[$codepicker];
}
}
$subfieldcounter++;
$localsubfieldcounter++;
my $word=$words[int(rand($#words))];
$tagvalue.="\$$code $word\0";
my $sth=$dbh->prepare("insert into marc_2XX_subfield_table (subfieldid, tagid, tag, bibid, subfieldorder, subfieldcode, subfieldvalue) values (?,?,?,?,?,?,?)");
my $error=1;
while ($error) {
$sth->execute($subfieldcounter, $tagcounter, $tag, $bibid, $localsubfieldcounter, $code, $word);
$error=$dbh->err;
if ($error) {
sleep 1;
print "ERROR: $error\n";
}
$sth->finish;
}
}
$tagvalue=~s/\0$//;
my $error=1;
my $sth=$dbh->prepare("insert into marc_0XX_tag_table (bibcode, tagnumber, tagorder, tagvalue) values (?, ?, ?, ?)");
while ($error) {
$sth->execute($bibid, $tag, $localtagcounter, $tagvalue);
$error=$dbh->err;
if ($error) {
sleep 1;
print "ERROR: $error\n";
}
$sth->finish;
}
}
}

View file

@ -0,0 +1,32 @@
#!/usr/bin/perl
#
#
# Benchmark script for Paul's marc db schema using split() to separate subfield
# code from subfield value
use DBI;
my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass');
my $count=$ARGV[0];
my $print=$ARGV[1];
my $bibid=$ARGV[2];
for ($i=0; $i<$count; $i++) {
($bibid) || ($bibid=int(rand(79998))+1);
($print) && (print "BIBID: $bibid\n");
my $sth=$dbh->prepare("select tagnumber,tagvalue from marc_0XX_tag_table where bibcode=$bibid order by tagorder");
$sth->execute;
while (my ($tagnumber, $tagvalue) = $sth->fetchrow) {
($print) && (print " Tag: $tagnumber\n");
foreach (split(/\0/, $tagvalue)) {
my ($code, $value) = split(/\s/, $_, 2);
($print) && (print " $code $value\n");
}
}
$bibid=0;
}

View file

@ -0,0 +1,33 @@
#!/usr/bin/perl
#
#
# Benchmark script for Paul's marc db schema using regex to separate subfield
# code from subfield value
use DBI;
my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass');
my $count=$ARGV[0];
my $print=$ARGV[1];
my $bibid=$ARGV[2];
for ($i=0; $i<$count; $i++) {
($bibid) || ($bibid=int(rand(79998))+1);
($print) && (print "BIBID: $bibid\n");
my $sth=$dbh->prepare("select tagnumber,tagvalue from marc_0XX_tag_table where bibcode=$bibid order by tagorder");
$sth->execute;
while (my ($tagnumber, $tagvalue) = $sth->fetchrow) {
($print) && (print " Tag: $tagnumber\n");
foreach (split(/\0/, $tagvalue)) {
m#$(.) (.*)#;
my ($code, $value) = ($1, $2);
($print) && (print " $code $value\n");
}
}
$bibid=0;
}

View file

@ -0,0 +1,32 @@
#!/usr/bin/perl
#
#
# Benchmark script for Steve's marc db schema
use DBI;
my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass');
my $count=$ARGV[0];
my $print=$ARGV[1];
my $bibid=$ARGV[2];
for ($i=0; $i<$count; $i++) {
($bibid) || ($bibid=int(rand(79998))+1);
($print) && (print "BIBID: $bibid\n");
my $sth=$dbh->prepare("select tagid,tag,subfieldcode,subfieldvalue from marc_2XX_subfield_table where bibid=$bibid order by tagid,subfieldorder");
$sth->execute;
my $lasttag='';
while (my ($tagid,$tag,$subfieldcode,$subfieldvalue) = $sth->fetchrow) {
if ($tag ne $lasttag) {
($print) && (print " Tag: $tag\n");
$lasttag=$tag;
}
($print) && (print " $subfieldcode $subfieldvalue\n");
}
$bibid=0;
}

View file

@ -0,0 +1,19 @@
#!/usr/bin/perl
#
#
# This script will iterate through each benchmark 5 times, looking up 500
# random records each time. Results will be printed to STDOUT.
my @benchmarks=('getdata-steve', 'getdata-paul', 'getdata-paul-regex');
my $iterations=5;
foreach (@benchmarks) {
print "$_:\t";
for ($i=1; $i<=$iterations; $i++) {
my $timer=`/usr/bin/time -f "%E" perl $_ 500 2>&1`;
chomp $timer;
print "$timer\t";
}
print "\n";
}