Benchmarking files for comparing performance of two different proposed database
schemas for MARC storage. See IRC log at http://www.haz.cmsd.bc.ca/cgi-bin/kohalog.pl for information on the two proposals.
This commit is contained in:
parent
86c0d5ce87
commit
63474fc22f
7 changed files with 217 additions and 0 deletions
6
marc/benchmarks/benchmarkresults
Normal file
6
marc/benchmarks/benchmarkresults
Normal file
|
@ -0,0 +1,6 @@
|
|||
Benchmark results from Steve Tonnesen
|
||||
|
||||
getdata-steve: 15.73 13.26 14.47 12.73 11.90
|
||||
getdata-paul: 21.56 14.11 12.58 12.49 13.18
|
||||
getdata-paul-regex: 14.32 11.53 13.51 10.15 10.99
|
||||
|
22
marc/benchmarks/benchmarkschema
Normal file
22
marc/benchmarks/benchmarkschema
Normal file
|
@ -0,0 +1,22 @@
|
|||
CREATE TABLE marc_0XX_tag_table (
|
||||
bibcode bigint(20) NOT NULL default '0',
|
||||
tagnumber char(3) NOT NULL default '',
|
||||
tagorder tinyint(4) NOT NULL default '0',
|
||||
tagvalue varchar(255) default NULL,
|
||||
valuebloblink bigint(20) default NULL,
|
||||
PRIMARY KEY (bibcode,tagnumber,tagorder)
|
||||
) TYPE=MyISAM;
|
||||
|
||||
CREATE TABLE marc_2XX_subfield_table (
|
||||
subfieldid bigint(20) unsigned NOT NULL auto_increment,
|
||||
tagid bigint(20) NOT NULL default '0',
|
||||
tag char(3) NOT NULL default '',
|
||||
bibid bigint(20) NOT NULL default '0',
|
||||
subfieldorder tinyint(4) NOT NULL default '0',
|
||||
subfieldcode char(1) NOT NULL default '',
|
||||
subfieldvalue varchar(255) default NULL,
|
||||
valuebloblink bigint(20) default NULL,
|
||||
PRIMARY KEY (subfieldid),
|
||||
KEY (bibid,tagid,tag,subfieldcode)
|
||||
) TYPE=MyISAM;
|
||||
|
73
marc/benchmarks/generaterandomdata
Normal file
73
marc/benchmarks/generaterandomdata
Normal file
|
@ -0,0 +1,73 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
# This script generates 80,000 random records in the kohabenchmark database for
|
||||
# the purposes of comparing two different marc storage schemas. It requires
|
||||
# the presence of a word list for populating the data. Mine is in
|
||||
# /usr/share/dict/words. Change that if necessary. You'll also need to change
|
||||
# your userid and password for the dbi->connect line.
|
||||
|
||||
use DBI;
|
||||
|
||||
my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'youruserid', 'yourpassword');
|
||||
@subfields = ( 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n');
|
||||
|
||||
|
||||
open (W, "/usr/share/dict/words");
|
||||
while (<W>) {
|
||||
chomp;
|
||||
push @words, $_;
|
||||
}
|
||||
|
||||
my $tagcounter=0;
|
||||
my $subfieldcounter=0;
|
||||
srand($$|time);
|
||||
for ($bibid=1; $bibid<80000; $bibid++) {
|
||||
my $numtags=int(rand(10)+5);
|
||||
my $localtagcounter=0;
|
||||
for ($i=1; $i<$numtags; $i++) {
|
||||
$localtagcounter++;
|
||||
$tagcounter++;
|
||||
my $tag=$i*40+100;
|
||||
my $numsubfields=int(rand(10)+1);
|
||||
my $subfieldsused;
|
||||
my $localsubfieldcounter=0;
|
||||
my $tagvalue='';
|
||||
for ($j=1; $j<=$numsubfields; $j++) {
|
||||
my $code='';
|
||||
until ($code) {
|
||||
my $codepicker=int(rand($#subfields));
|
||||
if ($subfieldsused->{$subfields[$codepicker]}==0) {
|
||||
$subfieldsused->{$subfields[$codepicker]}=1;
|
||||
$code=$subfields[$codepicker];
|
||||
}
|
||||
}
|
||||
$subfieldcounter++;
|
||||
$localsubfieldcounter++;
|
||||
my $word=$words[int(rand($#words))];
|
||||
$tagvalue.="\$$code $word\0";
|
||||
my $sth=$dbh->prepare("insert into marc_2XX_subfield_table (subfieldid, tagid, tag, bibid, subfieldorder, subfieldcode, subfieldvalue) values (?,?,?,?,?,?,?)");
|
||||
my $error=1;
|
||||
while ($error) {
|
||||
$sth->execute($subfieldcounter, $tagcounter, $tag, $bibid, $localsubfieldcounter, $code, $word);
|
||||
$error=$dbh->err;
|
||||
if ($error) {
|
||||
sleep 1;
|
||||
print "ERROR: $error\n";
|
||||
}
|
||||
$sth->finish;
|
||||
}
|
||||
}
|
||||
$tagvalue=~s/\0$//;
|
||||
my $error=1;
|
||||
my $sth=$dbh->prepare("insert into marc_0XX_tag_table (bibcode, tagnumber, tagorder, tagvalue) values (?, ?, ?, ?)");
|
||||
while ($error) {
|
||||
$sth->execute($bibid, $tag, $localtagcounter, $tagvalue);
|
||||
$error=$dbh->err;
|
||||
if ($error) {
|
||||
sleep 1;
|
||||
print "ERROR: $error\n";
|
||||
}
|
||||
$sth->finish;
|
||||
}
|
||||
}
|
||||
}
|
32
marc/benchmarks/getdata-paul
Normal file
32
marc/benchmarks/getdata-paul
Normal file
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
#
|
||||
# Benchmark script for Paul's marc db schema using split() to separate subfield
|
||||
# code from subfield value
|
||||
|
||||
use DBI;
|
||||
|
||||
|
||||
my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass');
|
||||
|
||||
my $count=$ARGV[0];
|
||||
my $print=$ARGV[1];
|
||||
my $bibid=$ARGV[2];
|
||||
|
||||
|
||||
|
||||
for ($i=0; $i<$count; $i++) {
|
||||
($bibid) || ($bibid=int(rand(79998))+1);
|
||||
|
||||
($print) && (print "BIBID: $bibid\n");
|
||||
my $sth=$dbh->prepare("select tagnumber,tagvalue from marc_0XX_tag_table where bibcode=$bibid order by tagorder");
|
||||
$sth->execute;
|
||||
while (my ($tagnumber, $tagvalue) = $sth->fetchrow) {
|
||||
($print) && (print " Tag: $tagnumber\n");
|
||||
foreach (split(/\0/, $tagvalue)) {
|
||||
my ($code, $value) = split(/\s/, $_, 2);
|
||||
($print) && (print " $code $value\n");
|
||||
}
|
||||
}
|
||||
$bibid=0;
|
||||
}
|
33
marc/benchmarks/getdata-paul-regex
Normal file
33
marc/benchmarks/getdata-paul-regex
Normal file
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
#
|
||||
# Benchmark script for Paul's marc db schema using regex to separate subfield
|
||||
# code from subfield value
|
||||
|
||||
use DBI;
|
||||
|
||||
|
||||
my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass');
|
||||
|
||||
my $count=$ARGV[0];
|
||||
my $print=$ARGV[1];
|
||||
my $bibid=$ARGV[2];
|
||||
|
||||
|
||||
|
||||
for ($i=0; $i<$count; $i++) {
|
||||
($bibid) || ($bibid=int(rand(79998))+1);
|
||||
|
||||
($print) && (print "BIBID: $bibid\n");
|
||||
my $sth=$dbh->prepare("select tagnumber,tagvalue from marc_0XX_tag_table where bibcode=$bibid order by tagorder");
|
||||
$sth->execute;
|
||||
while (my ($tagnumber, $tagvalue) = $sth->fetchrow) {
|
||||
($print) && (print " Tag: $tagnumber\n");
|
||||
foreach (split(/\0/, $tagvalue)) {
|
||||
m#$(.) (.*)#;
|
||||
my ($code, $value) = ($1, $2);
|
||||
($print) && (print " $code $value\n");
|
||||
}
|
||||
}
|
||||
$bibid=0;
|
||||
}
|
32
marc/benchmarks/getdata-steve
Normal file
32
marc/benchmarks/getdata-steve
Normal file
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
#
|
||||
# Benchmark script for Steve's marc db schema
|
||||
|
||||
|
||||
use DBI;
|
||||
|
||||
|
||||
my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass');
|
||||
|
||||
my $count=$ARGV[0];
|
||||
my $print=$ARGV[1];
|
||||
my $bibid=$ARGV[2];
|
||||
|
||||
|
||||
for ($i=0; $i<$count; $i++) {
|
||||
($bibid) || ($bibid=int(rand(79998))+1);
|
||||
|
||||
($print) && (print "BIBID: $bibid\n");
|
||||
my $sth=$dbh->prepare("select tagid,tag,subfieldcode,subfieldvalue from marc_2XX_subfield_table where bibid=$bibid order by tagid,subfieldorder");
|
||||
$sth->execute;
|
||||
my $lasttag='';
|
||||
while (my ($tagid,$tag,$subfieldcode,$subfieldvalue) = $sth->fetchrow) {
|
||||
if ($tag ne $lasttag) {
|
||||
($print) && (print " Tag: $tag\n");
|
||||
$lasttag=$tag;
|
||||
}
|
||||
($print) && (print " $subfieldcode $subfieldvalue\n");
|
||||
}
|
||||
$bibid=0;
|
||||
}
|
19
marc/benchmarks/runbenchmark
Normal file
19
marc/benchmarks/runbenchmark
Normal file
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
#
|
||||
# This script will iterate through each benchmark 5 times, looking up 500
|
||||
# random records each time. Results will be printed to STDOUT.
|
||||
|
||||
my @benchmarks=('getdata-steve', 'getdata-paul', 'getdata-paul-regex');
|
||||
|
||||
my $iterations=5;
|
||||
|
||||
foreach (@benchmarks) {
|
||||
print "$_:\t";
|
||||
for ($i=1; $i<=$iterations; $i++) {
|
||||
my $timer=`/usr/bin/time -f "%E" perl $_ 500 2>&1`;
|
||||
chomp $timer;
|
||||
print "$timer\t";
|
||||
}
|
||||
print "\n";
|
||||
}
|
Loading…
Reference in a new issue