From 63474fc22fb317dc3ba5e541b6b3984e92422d6f Mon Sep 17 00:00:00 2001 From: tonnesen Date: Fri, 17 May 2002 22:03:14 +0000 Subject: [PATCH] Benchmarking files for comparing performance of two different proposed database schemas for MARC storage. See IRC log at http://www.haz.cmsd.bc.ca/cgi-bin/kohalog.pl for information on the two proposals. --- marc/benchmarks/benchmarkresults | 6 +++ marc/benchmarks/benchmarkschema | 22 +++++++++ marc/benchmarks/generaterandomdata | 73 ++++++++++++++++++++++++++++++ marc/benchmarks/getdata-paul | 32 +++++++++++++ marc/benchmarks/getdata-paul-regex | 33 ++++++++++++++ marc/benchmarks/getdata-steve | 32 +++++++++++++ marc/benchmarks/runbenchmark | 19 ++++++++ 7 files changed, 217 insertions(+) create mode 100644 marc/benchmarks/benchmarkresults create mode 100644 marc/benchmarks/benchmarkschema create mode 100644 marc/benchmarks/generaterandomdata create mode 100644 marc/benchmarks/getdata-paul create mode 100644 marc/benchmarks/getdata-paul-regex create mode 100644 marc/benchmarks/getdata-steve create mode 100644 marc/benchmarks/runbenchmark diff --git a/marc/benchmarks/benchmarkresults b/marc/benchmarks/benchmarkresults new file mode 100644 index 0000000000..4191d587a0 --- /dev/null +++ b/marc/benchmarks/benchmarkresults @@ -0,0 +1,6 @@ +Benchmark results from Steve Tonnesen + +getdata-steve: 15.73 13.26 14.47 12.73 11.90 +getdata-paul: 21.56 14.11 12.58 12.49 13.18 +getdata-paul-regex: 14.32 11.53 13.51 10.15 10.99 + diff --git a/marc/benchmarks/benchmarkschema b/marc/benchmarks/benchmarkschema new file mode 100644 index 0000000000..7c10aeb42d --- /dev/null +++ b/marc/benchmarks/benchmarkschema @@ -0,0 +1,22 @@ +CREATE TABLE marc_0XX_tag_table ( + bibcode bigint(20) NOT NULL default '0', + tagnumber char(3) NOT NULL default '', + tagorder tinyint(4) NOT NULL default '0', + tagvalue varchar(255) default NULL, + valuebloblink bigint(20) default NULL, + PRIMARY KEY (bibcode,tagnumber,tagorder) + ) TYPE=MyISAM; + +CREATE TABLE marc_2XX_subfield_table ( + subfieldid bigint(20) unsigned NOT NULL auto_increment, + tagid bigint(20) NOT NULL default '0', + tag char(3) NOT NULL default '', + bibid bigint(20) NOT NULL default '0', + subfieldorder tinyint(4) NOT NULL default '0', + subfieldcode char(1) NOT NULL default '', + subfieldvalue varchar(255) default NULL, + valuebloblink bigint(20) default NULL, + PRIMARY KEY (subfieldid), + KEY (bibid,tagid,tag,subfieldcode) + ) TYPE=MyISAM; + diff --git a/marc/benchmarks/generaterandomdata b/marc/benchmarks/generaterandomdata new file mode 100644 index 0000000000..2c6455b3e1 --- /dev/null +++ b/marc/benchmarks/generaterandomdata @@ -0,0 +1,73 @@ +#!/usr/bin/perl +# +# This script generates 80,000 random records in the kohabenchmark database for +# the purposes of comparing two different marc storage schemas. It requires +# the presence of a word list for populating the data. Mine is in +# /usr/share/dict/words. Change that if necessary. You'll also need to change +# your userid and password for the dbi->connect line. + +use DBI; + +my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'youruserid', 'yourpassword'); +@subfields = ( 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n'); + + +open (W, "/usr/share/dict/words"); +while () { + chomp; + push @words, $_; +} + +my $tagcounter=0; +my $subfieldcounter=0; +srand($$|time); +for ($bibid=1; $bibid<80000; $bibid++) { + my $numtags=int(rand(10)+5); + my $localtagcounter=0; + for ($i=1; $i<$numtags; $i++) { + $localtagcounter++; + $tagcounter++; + my $tag=$i*40+100; + my $numsubfields=int(rand(10)+1); + my $subfieldsused; + my $localsubfieldcounter=0; + my $tagvalue=''; + for ($j=1; $j<=$numsubfields; $j++) { + my $code=''; + until ($code) { + my $codepicker=int(rand($#subfields)); + if ($subfieldsused->{$subfields[$codepicker]}==0) { + $subfieldsused->{$subfields[$codepicker]}=1; + $code=$subfields[$codepicker]; + } + } + $subfieldcounter++; + $localsubfieldcounter++; + my $word=$words[int(rand($#words))]; + $tagvalue.="\$$code $word\0"; + my $sth=$dbh->prepare("insert into marc_2XX_subfield_table (subfieldid, tagid, tag, bibid, subfieldorder, subfieldcode, subfieldvalue) values (?,?,?,?,?,?,?)"); + my $error=1; + while ($error) { + $sth->execute($subfieldcounter, $tagcounter, $tag, $bibid, $localsubfieldcounter, $code, $word); + $error=$dbh->err; + if ($error) { + sleep 1; + print "ERROR: $error\n"; + } + $sth->finish; + } + } + $tagvalue=~s/\0$//; + my $error=1; + my $sth=$dbh->prepare("insert into marc_0XX_tag_table (bibcode, tagnumber, tagorder, tagvalue) values (?, ?, ?, ?)"); + while ($error) { + $sth->execute($bibid, $tag, $localtagcounter, $tagvalue); + $error=$dbh->err; + if ($error) { + sleep 1; + print "ERROR: $error\n"; + } + $sth->finish; + } + } +} diff --git a/marc/benchmarks/getdata-paul b/marc/benchmarks/getdata-paul new file mode 100644 index 0000000000..04849250a4 --- /dev/null +++ b/marc/benchmarks/getdata-paul @@ -0,0 +1,32 @@ +#!/usr/bin/perl +# +# +# Benchmark script for Paul's marc db schema using split() to separate subfield +# code from subfield value + +use DBI; + + +my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass'); + +my $count=$ARGV[0]; +my $print=$ARGV[1]; +my $bibid=$ARGV[2]; + + + +for ($i=0; $i<$count; $i++) { + ($bibid) || ($bibid=int(rand(79998))+1); + + ($print) && (print "BIBID: $bibid\n"); + my $sth=$dbh->prepare("select tagnumber,tagvalue from marc_0XX_tag_table where bibcode=$bibid order by tagorder"); + $sth->execute; + while (my ($tagnumber, $tagvalue) = $sth->fetchrow) { + ($print) && (print " Tag: $tagnumber\n"); + foreach (split(/\0/, $tagvalue)) { + my ($code, $value) = split(/\s/, $_, 2); + ($print) && (print " $code $value\n"); + } + } + $bibid=0; +} diff --git a/marc/benchmarks/getdata-paul-regex b/marc/benchmarks/getdata-paul-regex new file mode 100644 index 0000000000..edd33a8dbf --- /dev/null +++ b/marc/benchmarks/getdata-paul-regex @@ -0,0 +1,33 @@ +#!/usr/bin/perl +# +# +# Benchmark script for Paul's marc db schema using regex to separate subfield +# code from subfield value + +use DBI; + + +my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass'); + +my $count=$ARGV[0]; +my $print=$ARGV[1]; +my $bibid=$ARGV[2]; + + + +for ($i=0; $i<$count; $i++) { + ($bibid) || ($bibid=int(rand(79998))+1); + + ($print) && (print "BIBID: $bibid\n"); + my $sth=$dbh->prepare("select tagnumber,tagvalue from marc_0XX_tag_table where bibcode=$bibid order by tagorder"); + $sth->execute; + while (my ($tagnumber, $tagvalue) = $sth->fetchrow) { + ($print) && (print " Tag: $tagnumber\n"); + foreach (split(/\0/, $tagvalue)) { + m#$(.) (.*)#; + my ($code, $value) = ($1, $2); + ($print) && (print " $code $value\n"); + } + } + $bibid=0; +} diff --git a/marc/benchmarks/getdata-steve b/marc/benchmarks/getdata-steve new file mode 100644 index 0000000000..7bdc3acbe7 --- /dev/null +++ b/marc/benchmarks/getdata-steve @@ -0,0 +1,32 @@ +#!/usr/bin/perl +# +# +# Benchmark script for Steve's marc db schema + + +use DBI; + + +my $dbh=DBI->connect("dbi:mysql:kohabenchmark", 'root', 'testpass'); + +my $count=$ARGV[0]; +my $print=$ARGV[1]; +my $bibid=$ARGV[2]; + + +for ($i=0; $i<$count; $i++) { + ($bibid) || ($bibid=int(rand(79998))+1); + + ($print) && (print "BIBID: $bibid\n"); + my $sth=$dbh->prepare("select tagid,tag,subfieldcode,subfieldvalue from marc_2XX_subfield_table where bibid=$bibid order by tagid,subfieldorder"); + $sth->execute; + my $lasttag=''; + while (my ($tagid,$tag,$subfieldcode,$subfieldvalue) = $sth->fetchrow) { + if ($tag ne $lasttag) { + ($print) && (print " Tag: $tag\n"); + $lasttag=$tag; + } + ($print) && (print " $subfieldcode $subfieldvalue\n"); + } + $bibid=0; +} diff --git a/marc/benchmarks/runbenchmark b/marc/benchmarks/runbenchmark new file mode 100644 index 0000000000..566ab06e3b --- /dev/null +++ b/marc/benchmarks/runbenchmark @@ -0,0 +1,19 @@ +#!/usr/bin/perl +# +# +# This script will iterate through each benchmark 5 times, looking up 500 +# random records each time. Results will be printed to STDOUT. + +my @benchmarks=('getdata-steve', 'getdata-paul', 'getdata-paul-regex'); + +my $iterations=5; + +foreach (@benchmarks) { + print "$_:\t"; + for ($i=1; $i<=$iterations; $i++) { + my $timer=`/usr/bin/time -f "%E" perl $_ 500 2>&1`; + chomp $timer; + print "$timer\t"; + } + print "\n"; +} -- 2.39.2