From 44f4e9baf37af9eb9a51aa90c33fed619a16ecfa Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Fri, 16 Nov 2007 18:49:30 -0600 Subject: [PATCH] added editor for MARC matching rules Signed-off-by: Chris Cormack Signed-off-by: Joshua Ferraro --- C4/Matcher.pm | 86 ++- admin/matching-rules.pl | 280 +++++++ .../prog/en/includes/admin-menu.inc | 1 + .../prog/en/modules/admin/admin-home.tmpl | 2 + .../prog/en/modules/admin/matching-rules.tmpl | 696 ++++++++++++++++++ 5 files changed, 1062 insertions(+), 3 deletions(-) create mode 100755 admin/matching-rules.pl create mode 100644 koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tmpl diff --git a/C4/Matcher.pm b/C4/Matcher.pm index 505ff27f36..3bd242bd3d 100644 --- a/C4/Matcher.pm +++ b/C4/Matcher.pm @@ -62,6 +62,8 @@ foreach $match (@matches) { } +my $matcher_description = $matcher->dump(); + =back =head1 FUNCTIONS @@ -365,6 +367,29 @@ sub _store_matchpoint { return $matchpoint_id; } + +=head2 delete + +=over 4 + +C4::Matcher->delete($id); + +=back + +Deletes the matcher of the specified ID +from the database. + +=cut + +sub delete { + my $class = shift; + my $matcher_id = shift; + + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("DELETE FROM marc_matchers WHERE matcher_id = ?"); + $sth->execute($matcher_id); # relying on cascading deletes to clean up everything +} + =head2 threshold =over 4 @@ -383,6 +408,26 @@ sub threshold { @_ ? $self->{'threshold'} = shift : $self->{'threshold'}; } +=head2 _id + +=over 4 + +$matcher->_id(123); +my $id = $matcher->_id(); + +=back + +Accessor method. Note that using this method +to set the DB ID of the matcher should not be +done outside of the editing CGI. + +=cut + +sub _id { + my $self = shift; + @_ ? $self->{'id'} = shift : $self->{'id'}; +} + =head2 code =over 4 @@ -483,7 +528,7 @@ sub add_simple_matchpoint { $self->add_matchpoint($index, $score, [ { tag => $source_tag, subfields => $source_subfields, - offset => $source_offset, length => $source_length, + offset => $source_offset, 'length' => $source_length, norms => [ $source_normalizer ] } ]); @@ -565,9 +610,9 @@ sub add_simple_required_check { $target_tag, $target_subfields, $target_offset, $target_length, $target_normalizer) = @_; $self->add_required_check( - [ { tag => $source_tag, subfields => $source_subfields, offset => $source_offset, length => $source_length, + [ { tag => $source_tag, subfields => $source_subfields, offset => $source_offset, 'length' => $source_length, norms => [ $source_normalizer ] } ], - [ { tag => $target_tag, subfields => $target_subfields, offset => $target_offset, length => $target_length, + [ { tag => $target_tag, subfields => $target_subfields, offset => $target_offset, 'length' => $target_length, norms => [ $target_normalizer ] } ] ); } @@ -646,6 +691,41 @@ sub get_matches { } +=head2 dump + +=over 4 + +$description = $matcher->dump(); + +=back + +Returns a reference to a structure containing all of the information +in the matcher object. This is mainly a convenience method to +aid setting up a HTML editing form. + +=cut + +sub dump { + my $self = shift; + + my $result = {}; + + $result->{'matcher_id'} = $self->{'id'}; + $result->{'code'} = $self->{'code'}; + $result->{'description'} = $self->{'description'}; + + $result->{'matchpoints'} = []; + foreach my $matchpoint (@{ $self->{'matchpoints'} }) { + push @{ $result->{'matchpoints'} }, $matchpoint; + } + $result->{'matchchecks'} = []; + foreach my $matchcheck (@{ $self->{'required_checks'} }) { + push @{ $result->{'matchchecks'} }, $matchcheck; + } + + return $result; +} + sub _passes_required_checks { my ($source_record, $target_blob, $matchchecks) = @_; my $target_record = MARC::Record->new_from_usmarc($target_blob); # FIXME -- need to avoid parsing record twice diff --git a/admin/matching-rules.pl b/admin/matching-rules.pl new file mode 100755 index 0000000000..a7d45d25f2 --- /dev/null +++ b/admin/matching-rules.pl @@ -0,0 +1,280 @@ +#! /usr/bin/perl +# +# Copyright 2007 LibLime +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, +# Suite 330, Boston, MA 02111-1307 USA +# + +use strict; +use CGI; +use C4::Auth; +use C4::Context; +use C4::Output; +use C4::Koha; +use C4::Matcher; + +my $script_name = "/cgi-bin/koha/admin/matching-rules.pl"; + +my $input = new CGI; +my $op = $input->param('op'); + + +my ($template, $loggedinuser, $cookie) + = get_template_and_user({template_name => "admin/matching-rules.tmpl", + query => $input, + type => "intranet", + authnotrequired => 0, + flagsrequired => {parameters => 1}, + debug => 1, + }); + +$template->param(script_name => $script_name); + +my $matcher_id = $input->param("matcher_id"); + +$template->param(max_matchpoint => 0); +$template->param(max_matchcheck => 0); +my $display_list = 0; +if ($op eq "edit_matching_rule") { + edit_matching_rule_form($template, $matcher_id); +} elsif ($op eq "edit_matching_rule_confirmed") { + add_update_matching_rule($template, $matcher_id); + $display_list = 1; +} elsif ($op eq "add_matching_rule") { + add_matching_rule_form($template); +} elsif ($op eq "add_matching_rule_confirmed") { + add_update_matching_rule($template, $matcher_id); + $display_list = 1; +} elsif ($op eq "delete_matching_rule") { + delete_matching_rule_form($template, $matcher_id); +} elsif ($op eq "delete_matching_rule_confirmed") { + delete_matching_rule($template, $matcher_id); + $display_list = 1; +} else { + $display_list = 1; +} + +if ($display_list) { + matching_rule_list($template); +} + +output_html_with_http_headers $input, $cookie, $template->output; + +exit 0; + +sub add_matching_rule_form { + my $template = shift; + + $template->param( + matching_rule_form => 1, + confirm_op => 'add_matching_rule_confirmed', + max_matchpoint => 1, + max_matchcheck => 1 + ); + +} + +sub add_update_matching_rule { + my $template = shift; + my $matcher_id = shift; + + # do parsing + my $matcher = C4::Matcher->new('biblio', 1000); # FIXME biblio only for now + $matcher->code($input->param('code')); + $matcher->description($input->param('description')); + $matcher->threshold($input->param('threshold')); + + # matchpoints + my @mp_nums = sort map { /^mp_(\d+)_search_index/ ? int($1): () } $input->param; + foreach my $mp_num (@mp_nums) { + my $index = $input->param("mp_${mp_num}_search_index"); + my $score = $input->param("mp_${mp_num}_score"); + # components + my $components = []; + my @comp_nums = sort map { /^mp_${mp_num}_c_(\d+)_tag/ ? int($1): () } $input->param; + foreach my $comp_num (@comp_nums) { + my $component = {}; + $component->{'tag'} = $input->param("mp_${mp_num}_c_${comp_num}_tag"); + $component->{'subfields'} = $input->param("mp_${mp_num}_c_${comp_num}_subfields"); + $component->{'offset'} = $input->param("mp_${mp_num}_c_${comp_num}_offset"); + $component->{'length'} = $input->param("mp_${mp_num}_c_${comp_num}_length"); + # norms + $component->{'norms'} = []; + my @norm_nums = sort map { /^mp_${mp_num}_c_${comp_num}_n_(\d+)_norm/ ? int($1): () } $input->param; + foreach my $norm_num (@norm_nums) { + push @{ $component->{'norms'} }, $input->param("mp_${mp_num}_c_${comp_num}_n_${norm_num}_norm"); + } + push @$components, $component; + } + $matcher->add_matchpoint($index, $score, $components); + } + + # match checks + my @mc_nums = sort map { /^mc_(\d+)_id/ ? int($1): () } $input->param; + foreach my $mc_num (@mp_nums) { + # source components + my $src_components = []; + my @src_comp_nums = sort map { /^mc_${mc_num}_src_c_(\d+)_tag/ ? int($1): () } $input->param; + foreach my $comp_num (@src_comp_nums) { + my $component = {}; + $component->{'tag'} = $input->param("mc_${mc_num}_src_c_${comp_num}_tag"); + $component->{'subfields'} = $input->param("mc_${mc_num}_src_c_${comp_num}_subfields"); + $component->{'offset'} = $input->param("mc_${mc_num}_src_c_${comp_num}_offset"); + $component->{'length'} = $input->param("mc_${mc_num}_src_c_${comp_num}_length"); + # norms + $component->{'norms'} = []; + my @norm_nums = sort map { /^mc_${mc_num}_src_c_${comp_num}_n_(\d+)_norm/ ? int($1): () } $input->param; + foreach my $norm_num (@norm_nums) { + push @{ $component->{'norms'} }, $input->param("mc_${mc_num}_src_c_${comp_num}_n_${norm_num}_norm"); + } + push @$src_components, $component; + } + # target components + my $tgt_components = []; + my @tgt_comp_nums = sort map { /^mc_${mc_num}_tgt_c_(\d+)_tag/ ? int($1): () } $input->param; + foreach my $comp_num (@tgt_comp_nums) { + my $component = {}; + $component->{'tag'} = $input->param("mc_${mc_num}_tgt_c_${comp_num}_tag"); + $component->{'subfields'} = $input->param("mc_${mc_num}_tgt_c_${comp_num}_subfields"); + $component->{'offset'} = $input->param("mc_${mc_num}_tgt_c_${comp_num}_offset"); + $component->{'length'} = $input->param("mc_${mc_num}_tgt_c_${comp_num}_length"); + # norms + $component->{'norms'} = []; + my @norm_nums = sort map { /^mc_${mc_num}_tgt_c_${comp_num}_n_(\d+)_norm/ ? int($1): () } $input->param; + foreach my $norm_num (@norm_nums) { + push @{ $component->{'norms'} }, $input->param("mc_${mc_num}_tgt_c_${comp_num}_n_${norm_num}_norm"); + } + push @$tgt_components, $component; + } + $matcher->add_required_check($src_components, $tgt_components); + } + + if (defined $matcher_id and $matcher_id =~ /^\d+/) { + $matcher->_id($matcher_id); + $template->param(edited_matching_rule => $matcher->code()); + } else { + $template->param(added_matching_rule => $matcher->code()); + } + $matcher_id = $matcher->store(); +} + +sub delete_matching_rule_form { + my $template = shift; + my $matcher_id = shift; + + my $matcher = C4::Matcher->fetch($matcher_id); + $template->param( + delete_matching_rule_form => 1, + confirm_op => "delete_matching_rule_confirmed", + matcher_id => $matcher_id, + code => $matcher->code(), + description => $matcher->description(), + ); +} + +sub delete_matching_rule { + my $template = shift; + my $matcher_id = shift; + + my $matcher = C4::Matcher->fetch($matcher_id); + $template->param(deleted_matching_rule => $matcher->code(), + ); + C4::Matcher->delete($matcher_id); +} + +sub edit_matching_rule_form { + my $template = shift; + my $matcher_id = shift; + + my $matcher = C4::Matcher->fetch($matcher_id); + + $template->param(matcher_id => $matcher_id); + $template->param(code => $matcher->code()); + $template->param(description => $matcher->description()); + $template->param(threshold => $matcher->threshold()); + + my $matcher_info = $matcher->dump(); + my @matchpoints = (); + my $mp_num = 0; + foreach my $matchpoint (@{ $matcher_info->{'matchpoints'} }) { + $mp_num++; + my @components = _parse_components($matchpoint->{'components'}); + push @matchpoints, { + mp_num => $mp_num, + index => $matchpoint->{'index'}, + score => $matchpoint->{'score'}, + components => \@components + }; + } + $template->param(matchpoints => \@matchpoints); + + my $mc_num = 0; + my @matchchecks = (); + foreach my $matchcheck (@{ $matcher_info->{'matchchecks'} }) { + $mc_num++; + my @src_components = _parse_components($matchcheck->{'source_matchpoint'}->{'components'}); + my @tgt_components = _parse_components($matchcheck->{'target_matchpoint'}->{'components'}); + push @matchchecks, { + mc_num => $mc_num, + src_components => \@src_components, + tgt_components => \@tgt_components + }; + } + $template->param(matchchecks => \@matchchecks); + + $template->param( + matching_rule_form => 1, + edit_matching_rule => 1, + confirm_op => 'edit_matching_rule_confirmed', + max_matchpoint => $mp_num, + max_matchcheck => $mc_num + ); + +} + +sub _parse_components { + my $components_ref = shift; + my @components = (); + + my $comp_num = 0; + foreach my $component (@{ $components_ref }) { + $comp_num++; + my $norm_num = 0; + my @norms; + foreach my $norm (@{ $component->{'norms'} }) { + $norm_num++; + push @norms, { norm_num => $norm_num, norm => $norm }; + } + push @components, { + comp_num => $comp_num, + tag => $component->{'tag'}, + subfields => join("", sort keys %{ $component->{'subfields'} }), + offset => $component->{'offset'}, + 'length' => $component->{'length'}, + norms => \@norms + }; + } + + return @components; +} + +sub matching_rule_list { + my $template = shift; + + my @matching_rules = C4::Matcher::GetMatcherList(); + $template->param(available_matching_rules => \@matching_rules); + $template->param(display_list => 1); +} diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/admin-menu.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/admin-menu.inc index 0da665f7cc..17b66d0205 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/includes/admin-menu.inc +++ b/koha-tmpl/intranet-tmpl/prog/en/includes/admin-menu.inc @@ -28,6 +28,7 @@
  • MARC Bibliographic framework test
  • MARC Authorities framework
  • Classification sources
  • +
  • Record matching rules
  • Additional parameters
    diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/admin-home.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/admin-home.tmpl index f64d4d13b9..a793a29360 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/admin-home.tmpl +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/admin-home.tmpl @@ -65,6 +65,8 @@
    Create and manage Authorities frameworks that define the characteristics of your MARC Records (field and subfield definitions).
    Classification sources
    Define classification sources (i.e., call number schemes) used by your collection. Also define filing rules used for sorting call numbers.
    +
    Record matching rules
    +
    Manage rules for automatically matching MARC records during record imports.

    Additional parameters

    diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tmpl new file mode 100644 index 0000000000..ffdf4330de --- /dev/null +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tmpl @@ -0,0 +1,696 @@ + +Koha › Administration › Record Matching Rules +<!-- TMPL_IF name="matching_rule_form" --> + <!-- TMPL_IF name="edit_matching_rule" --> + › Modify record matching rule + <!-- TMPL_ELSE --> + › Add record matching rule + <!-- /TMPL_IF --> +<!-- /TMPL_IF --> +<!-- TMPL_IF name="delete_matching_rule_form" --> + › Confirm deletion of record matching rule "<!-- TMPL_VAR name="code" -->" +<!-- /TMPL_IF --> + + + + + + + + + + + +
    + +
    +
    +
    + + + +

    Modify record matching rule

    + +

    Add record matching rule

    + +
    " name="Aform" method="post"> + " /> +
    +
      +
    1. + + Matching rule code: + " /> + " /> + + + + + +
    2. +
    3. + " /> +
    4. +
    5. + " /> +
    6. +
    +
    +
    + Match points Add matchpoint + + +
    "> +
    + Remove this matchpoint +
      +
    1. + + _search_index" + name="mp__search_index" size="20" + value="" + maxlegnth="30" escape="HTML" /> +
    2. +
    3. + + _score" + name="mp__score" size="5" + value="" + maxlegnth="5" escape="HTML" /> +
    4. + +
      _c_"> +
      +
        +
      1. + + _c__tag" + name="mp__c__tag" + value="" + size="3" maxlength="3" escape="HTML" /> +
      2. +
      3. + + _c__subfields" + name="mp__c__subfields" + value="" + size="10" maxlength="40" escape="HTML" /> +
      4. +
      5. + + _c__offset" + name="mp__c__offset" + value="" + size="5" maxoffset="5" escape="HTML" /> +
      6. +
      7. + + _c__length" + name="mp__c__length" + value="" + size="5" maxlength="5" escape="HTML" /> +
      8. + +
        _c__n_"> +
      9. + + _c__n___norm" + name="mp__c__n__norm" + value="" + size="20" maxnorms="50" escape="HTML" /> +
      10. +
        + +
      +
      +
      + +
    +
    +
    + + +
    +
    + Remove this matchpoint +
      +
    1. + + +
    2. +
    3. + + +
    4. +
      +
      +
        +
      1. + + +
      2. +
      3. + + +
      4. +
      5. + + +
      6. +
      7. + + +
      8. +
        +
      9. + + +
      10. +
        +
      +
      +
      +
    +
    +
    + +
    +
    + Required match checks Add match check + + +
    "> +
    + Remove this match check + _id" name="mc__id" value="1" /> +
      + +
      _src_c_"> +
      + Source (incoming) record check field +
        +
      1. + + _src_c__tag" + name="mc__src_c__tag" + value="" + size="3" maxlength="3" escape="HTML" /> +
      2. +
      3. + + _src_c__subfields" + name="mc__src_c__subfields" + value="" + size="10" maxlength="40" escape="HTML" /> +
      4. +
      5. + + _src_c__offset" + name="mc__src_c__offset" + value="" + size="5" maxoffset="5" escape="HTML" /> +
      6. +
      7. + + _src_c__length" + name="mc__src_c__length" + value="" + size="5" maxlength="5" escape="HTML" /> +
      8. + +
        _src_c__n_"> +
      9. + + _src_c__n__norm" + name="mc__src_c__n__norm" + value="" + size="20" maxnorms="50" escape="HTML" /> +
      10. +
        + +
      +
      +
      + + +
      _tgt_c_"> +
      + Target (database) record check field +
        +
      1. + + _tgt_c__tag" + name="mc__tgt_c__tag" + value="" + size="3" maxlength="3" escape="HTML" /> +
      2. +
      3. + + _tgt_c__subfields" + name="mc__tgt_c__subfields" + value="" + size="10" maxlength="40" escape="HTML" /> +
      4. +
      5. + + _tgt_c__offset" + name="mc__tgt_c__offset" + value="" + size="5" maxoffset="5" escape="HTML" /> +
      6. +
      7. + + _tgt_c__length" + name="mc__tgt_c__length" + value="" + size="5" maxlength="5" escape="HTML" /> +
      8. + +
        _tgt_c__n_"> +
      9. + + _tgt_c__n__norm" + name="mc__tgt_c__n__norm" + value="" + size="20" maxnorms="50" escape="HTML" /> +
      10. +
        + +
      +
      +
      + +
    +
    +
    + + +
    +
    + Remove this match check + +
      +
      +
      + Source (incoming) record check field +
        +
      1. + + +
      2. +
      3. + + +
      4. +
      5. + + +
      6. +
      7. + + +
      8. +
        +
      9. + + +
      10. +
        +
      +
      +
      +
      +
      + Target (database) record check field +
        +
      1. + + +
      2. +
      3. + + +
      4. +
      5. + + +
      6. +
      7. + + +
      8. +
        +
      9. + + +
      10. +
        +
      +
      +
      +
    +
    +
    + +
    +
    + + + + + + Cancel +
    +
    + + + +

    Confirm deletion of record matching rule "" ()?

    +
    " name="Aform" method="post"> + " /> + " /> +

    + + +

    +
    + + + + +
    + +
    + +

    Record Matching Rules

    + +Added record matching rule "" + + +Modified record matching rule "" + + +Deleted record matching rule "" + + + + + + + + + + + + + + + + +
    #CodeDescriptionActions
    + ?op=edit_matching_rule&matcher_id=">Edit + ?op=delete_matching_rule&matcher_id=">Delete +
    + +
    + + + + + + + + +
    +
    +
    + +
    +
    + -- 2.39.5