4 # Copyright 2008 Tamil s.a.r.l.
6 # This software is placed under the gnu General Public License, v2
7 # (http://www.gnu.org/licenses/gpl.html)
24 'verbose' => \$verbose,
30 pod2usage( -verbose => 2 );
34 usage() if $help || !$conf;
38 print "Reading configuration file: $conf\n" if $verbose;
40 @clouds = LoadFile( $conf );
42 croak "Unable to read configuration file: $conf\n" if $@;
44 for my $cloud ( @clouds ) {
45 print "Create a cloud\n",
46 " Koha conf file: ", $cloud->{KohaConf} ? $cloud->{KohaConf} : "default", "\n",
47 " Zebra Index: ", $cloud->{ZebraIndex}, "\n",
48 " Koha Keyword: ", $cloud->{KohaIndex}, "\n",
49 " Count: ", $cloud->{Count}, "\n",
50 " Withcss: ", $cloud->{Withcss}, "\n",
51 " Output: ", $cloud->{Output}, "\n",
54 # Set Koha context if KohaConf is present
55 my $set_new_context = 0;
56 if ( $cloud->{KohaConf} ) {
57 if ( -e $cloud->{KohaConf} ) {
58 my $context = C4::Context->new( $cloud->{KohaConf} );
59 $context->set_context();
63 carp "Koha conf file doesn't exist: ", $cloud->{KohaConf}, " ; use KOHA_CONF\n";
67 my $index = new ZebraIndex( $cloud->{ZebraIndex} );
68 $index->scan( $cloud->{Count} );
70 open my $fh, ">", $cloud->{Output}
71 or croak "Unable to create file ", $cloud->{Output};
73 my $withcss = $cloud->{Withcss};
75 print $fh $index->html_cloud( $cloud->{KohaIndex}, $withcss );
77 $set_new_context && restore_context C4::Context;
92 $self->{ zebra_index } = shift;
93 $self->{ top_terms } = undef;
94 $self->{ levels_cloud } = 24;
98 my $zbiblio = C4::Context->Zconn( "biblioserver" );
100 my $ss = $zbiblio->scan_pqf(
101 '@attr 1=' . $self->{ zebra_index } . ' @attr 4=1 @attr 6=3 "a"'
104 croak "Invalid Zebra index: ", $self->{ zebra_index } if $@;
112 # Scan zebra index and populate an array of top terms
115 # $max_terms Max number of top terms
118 # A 4-dimensionnal array in $self->{top_terms}
120 # [1] term number of occurences
121 # [2] term proportional relative weight in terms set E[0-1]
122 # [3] term logarithmic relative weight E [0-levels_cloud]
124 # This array is sorted alphabetically by terms ([0])
125 # It can be easily sorted by occurences:
126 # @t = sort { $a[1] <=> $a[1] } @{$self->{top_terms}};
130 my $index_name = $self->{ zebra_index };
131 my $max_terms = shift;
133 my $MAX_OCCURENCE = 1000000000;
135 my $zbiblio = C4::Context->Zconn( "biblioserver" );
136 my $number_of_terms = 0;
137 my @terms; # 2 dimensions array
138 my $min_occurence_index = -1;
145 print "$from\n" if $verbose;
146 $from =~ s/\"/\\\"/g;
147 my $query = '@attr 1=' . $index_name . ' @attr 4=1 @attr 6=3 "'
149 $ss = $zbiblio->scan_pqf( $query );
155 $ss->option( rpnCharset => 'UTF-8' );
156 last if $ss->size() == 0;
159 for my $index ( 0..$ss->size()-1 ) {
160 ($term, $occ) = $ss->display_term($index);
161 #print "$term:$occ\n";
162 if ( $number_of_terms < $max_terms ) {
163 push( @terms, [ $term, $occ ] );
165 if ( $number_of_terms == $max_terms ) {
166 $min_occurence = $MAX_OCCURENCE;
167 for (0..$number_of_terms-1) {
168 my @term = @{ $terms[$_] };
169 if ( $term[1] <= $min_occurence ) {
170 $min_occurence = $term[1];
171 $min_occurence_index = $_;
177 if ( $occ > $min_occurence) {
178 @{ $terms[$min_occurence_index] }[0] = $term;
179 @{ $terms[$min_occurence_index] }[1] = $occ;
180 $min_occurence = $MAX_OCCURENCE;
181 for (0..$max_terms-1) {
182 my @term = @{ $terms[$_] };
183 if ( $term[1] <= $min_occurence ) {
184 $min_occurence = $term[1];
185 $min_occurence_index = $_;
194 # Sort array of array by terms weight
195 @terms = sort { @{$a}[1] <=> @{$b}[1] } @terms;
197 # A relatif weight to other set terms is added to each term
198 my $min = $terms[0][1];
199 my $log_min = log( $min );
200 my $max = $terms[$#terms][1];
201 my $log_max = log( $max );
202 my $delta = $max - $min;
203 $delta = 1 if $delta == 0; # Very unlikely
205 if ($log_max - $log_min == 0) {
206 $log_min = $log_min - $self->{levels_cloud};
210 $factor = $self->{levels_cloud} / ($log_max - $log_min);
213 foreach (0..$#terms) {
214 my $count = @{ $terms[$_] }[1];
215 my $weight = ( $count - $min ) / $delta;
216 my $log_weight = int( (log($count) - $log_min) * $factor);
217 push( @{ $terms[$_] }, $weight );
218 push( @{ $terms[$_] }, $log_weight );
220 $self->{ top_terms } = \@terms;
222 # Sort array of array by terms alphabetical order
223 @terms = sort { @{$a}[0] cmp @{$b}[0] } @terms;
228 # Returns a HTML version of index top terms formated
233 my $koha_index = shift;
235 my @terms = @{ $self->{top_terms} };
248 font-weight: lighter;
249 text-decoration: none;
251 span.tagcloud0 { font-size: 12px;}
252 span.tagcloud1 { font-size: 13px;}
253 span.tagcloud2 { font-size: 14px;}
254 span.tagcloud3 { font-size: 15px;}
255 span.tagcloud4 { font-size: 16px;}
256 span.tagcloud5 { font-size: 17px;}
257 span.tagcloud6 { font-size: 18px;}
258 span.tagcloud7 { font-size: 19px;}
259 span.tagcloud8 { font-size: 20px;}
260 span.tagcloud9 { font-size: 21px;}
261 span.tagcloud10 { font-size: 22px;}
262 span.tagcloud11 { font-size: 23px;}
263 span.tagcloud12 { font-size: 24px;}
264 span.tagcloud13 { font-size: 25px;}
265 span.tagcloud14 { font-size: 26px;}
266 span.tagcloud15 { font-size: 27px;}
267 span.tagcloud16 { font-size: 28px;}
268 span.tagcloud17 { font-size: 29px;}
269 span.tagcloud18 { font-size: 30px;}
270 span.tagcloud19 { font-size: 31px;}
271 span.tagcloud20 { font-size: 32px;}
272 span.tagcloud21 { font-size: 33px;}
273 span.tagcloud22 { font-size: 34px;}
274 span.tagcloud23 { font-size: 35px;}
275 span.tagcloud24 { font-size: 36px;}
277 <div class="subjectcloud">
281 my @term = @{ $terms[$_] };
284 #print " 0=", $term[0]," - 1=", $term[1], " - 2=", $term[2], " - 3=", $term[3],"\n";
286 . '<span class="tagcloud'
289 . '<a href="/cgi-bin/koha/opac-search.pl?q='
304 cloud-kw.pl - Creates HTML keywords clouds from Koha Zebra Indexes
310 =item cloud-kw.pl [--verbose|--help] --conf=F<cloud.conf>
312 Creates multiple HTML files containing kewords cloud with top terms sorted
313 by their logarithmic weight.
314 F<cloud.conf> is a YAML configuration file driving cloud generation
323 =item B<--conf=configuration file>
325 Specify configuration file name
327 =item B<--verbose|-v>
329 Enable script verbose mode.
333 Print this help page.
339 Configuration file looks like that:
342 # Koha configuration file for a specific installation
343 # If not present, defaults to KOHA_CONF
344 KohaConf: /home/koha/mylibray/etc/koha-conf.xml
345 # Zebra index to scan
347 # Koha index used to link found kewords with an opac search URL
349 # Number of top keyword to use for the cloud
351 # Include CSS style directives with the cloud
352 # This could be used as a model and then CSS directives are
353 # put in the appropriate CSS file directly.
355 # HTML file where to output the cloud
356 Output: /home/koha/mylibrary/koharoot/koha-tmpl/cloud-author.html
358 KohaConf: /home/koha/yourlibray/etc/koha-conf.xml
363 Output: /home/koha/yourlibrary/koharoot/koha-tmpl/cloud-subject.html
367 Generated top terms have more informations than those outputted from
368 the time beeing. Some parameters could be easily added to improve
375 In order to output terms with the number of occurences they
376 have been found in Koha Catalogue by Zebra.
380 Number of levels in the cloud. Now 24 levels are hardcoded.
384 Weighting method used to distribute terms in the cloud. We could have two
385 values: Logarithmic and Linear. Now it's Logarithmic by default.
389 Now terms are outputted in the lexical order. They could be sorted