4 # Copyright 2008 Tamil s.a.r.l.
6 # This software is placed under the gnu General Public License, v2
7 # (http://www.gnu.org/licenses/gpl.html)
24 'verbose' => \$verbose,
30 pod2usage( -verbose => 2 );
34 usage() if $help || !$conf;
38 print "Reading configuration file: $conf\n" if $verbose;
40 @clouds = LoadFile( $conf );
42 croak "Unable to read configuration file: $conf\n" if $@;
44 for my $cloud ( @clouds ) {
45 print "Create a cloud\n",
46 " Koha conf file: ", $cloud->{KohaConf}, "\n",
47 " Zebra Index: ", $cloud->{ZebraIndex}, "\n",
48 " Koha Keyword: ", $cloud->{KohaIndex}, "\n",
49 " Count: ", $cloud->{Count}, "\n",
50 " Withcss: ", $cloud->{Withcss}, "\n",
51 " Output: ", $cloud->{Output}, "\n",
55 my $context = new C4::Context( $cloud->{KohaConf} );
56 $context->set_context();
58 my $index = new ZebraIndex( $cloud->{ZebraIndex} );
59 $index->scan( $cloud->{Count} );
61 open my $fh, ">", $cloud->{Output}
62 or croak "Unable to create file ", $cloud->{Output};
64 my $withcss = $cloud->{Withcss};
66 print $fh $index->html_cloud( $cloud->{KohaIndex}, $withcss );
78 $self->{ zebra_index } = shift;
79 $self->{ top_terms } = undef;
80 $self->{ levels_cloud } = 24;
88 # Scan zebra index and populate an array of top terms
91 # $max_terms Max number of top terms
94 # A 4-dimensionnal array in $self->{top_terms}
96 # [1] term number of occurences
97 # [2] term proportional relative weight in terms set E[0-1]
98 # [3] term logarithmic relative weight E [0-levels_cloud]
100 # This array is sorted alphabetically by terms ([0])
101 # It can be easily sorted by occurences:
102 # @t = sort { $a[1] <=> $a[1] } @{$self->{top_terms}};
106 my $index_name = $self->{ zebra_index };
107 my $max_terms = shift;
109 my $MAX_OCCURENCE = 1000000000;
111 my $zbiblio = C4::Context->Zconn( "biblioserver" );
112 my $number_of_terms = 0;
113 my @terms; # 2 dimensions array
114 my $min_occurence_index = -1;
120 print "$from\n" if $verbose;
121 $from =~ s/\"/\\\"/g;
122 my $query = '@attr 1=' . $index_name . ' @attr 4=1 @attr 6=3 "'
124 $ss = $zbiblio->scan_pqf( $query );
130 $ss->option( rpnCharset => 'UTF-8' );
131 last if $ss->size() == 0;
134 for my $index ( 0..$ss->size()-1 ) {
135 ($term, $occ) = $ss->display_term($index);
136 #print "$term:$occ\n";
137 if ( $number_of_terms < $max_terms ) {
138 push( @terms, [ $term, $occ ] );
140 if ( $number_of_terms == $max_terms ) {
141 $min_occurence = $MAX_OCCURENCE;
142 for (0..$number_of_terms-1) {
143 my @term = @{ $terms[$_] };
144 if ( $term[1] <= $min_occurence ) {
145 $min_occurence = $term[1];
146 $min_occurence_index = $_;
152 if ( $occ > $min_occurence) {
153 @{ $terms[$min_occurence_index] }[0] = $term;
154 @{ $terms[$min_occurence_index] }[1] = $occ;
155 $min_occurence = $MAX_OCCURENCE;
156 for (0..$max_terms-1) {
157 my @term = @{ $terms[$_] };
158 if ( $term[1] <= $min_occurence ) {
159 $min_occurence = $term[1];
160 $min_occurence_index = $_;
169 # Sort array of array by terms weight
170 @terms = sort { @{$a}[1] <=> @{$b}[1] } @terms;
172 # A relatif weight to other set terms is added to each term
173 my $min = $terms[0][1];
174 my $log_min = log( $min );
175 my $max = $terms[$#terms-1][1];
176 my $log_max = log( $max );
177 my $delta = $max - $min;
179 if ($log_max - $log_min == 0) {
180 $log_min = $log_min - $self->{levels_cloud};
184 $factor = $self->{levels_cloud} / ($log_max - $log_min);
187 foreach (0..$#terms-1) {
188 my $count = @{ $terms[$_] }[1];
189 my $weight = ( $count - $min ) / $delta;
190 my $log_weight = int( (log($count) - $log_min) * $factor);
191 push( @{ $terms[$_] }, $weight );
192 push( @{ $terms[$_] }, $log_weight );
194 $self->{ top_terms } = \@terms;
196 # Sort array of array by terms alphabetical order
197 @terms = sort { @{$a}[0] cmp @{$b}[0] } @terms;
202 # Returns a HTML version of index top terms formated
207 my $koha_index = shift;
209 my @terms = @{ $self->{top_terms} };
220 font-weight: lighter;
221 text-decoration: none;
223 span.tagcloud0 { font-size: 12px;}
224 span.tagcloud1 { font-size: 13px;}
225 span.tagcloud2 { font-size: 14px;}
226 span.tagcloud3 { font-size: 15px;}
227 span.tagcloud4 { font-size: 16px;}
228 span.tagcloud5 { font-size: 17px;}
229 span.tagcloud6 { font-size: 18px;}
230 span.tagcloud7 { font-size: 19px;}
231 span.tagcloud8 { font-size: 20px;}
232 span.tagcloud9 { font-size: 21px;}
233 span.tagcloud10 { font-size: 22px;}
234 span.tagcloud11 { font-size: 23px;}
235 span.tagcloud12 { font-size: 24px;}
236 span.tagcloud13 { font-size: 25px;}
237 span.tagcloud14 { font-size: 26px;}
238 span.tagcloud15 { font-size: 27px;}
239 span.tagcloud16 { font-size: 28px;}
240 span.tagcloud17 { font-size: 29px;}
241 span.tagcloud18 { font-size: 30px;}
242 span.tagcloud19 { font-size: 31px;}
243 span.tagcloud20 { font-size: 32px;}
244 span.tagcloud21 { font-size: 33px;}
245 span.tagcloud22 { font-size: 34px;}
246 span.tagcloud23 { font-size: 35px;}
247 span.tagcloud24 { font-size: 36px;}
249 <div class="subjectcloud">
252 my @term = @{ $terms[$_] };
256 . '<span class="tagcloud'
259 . '<a href="/cgi-bin/koha/opac-search.pl?q='
274 cloud-kw.pl - Creates HTML keywords clouds from Koha Zebra Indexes
280 =item cloud-kw.pl [--verbose|--help] --conf=F<cloud.conf>
282 Creates multiple HTML files containing kewords cloud.
283 F<cloud.conf> is a YAML configuration file driving cloud generation
292 =item B<--conf=configuration file>
294 Specify configuration file name
296 =item B<--verbose|-v>
298 Enable script verbose mode.
302 Print this help page.
308 Configuration file looks like that:
311 # Koha configuration file for a specific installation
312 KohaConf: /home/koha/mylibray/etc/koha-conf.xml
313 # Zebra index to scan
315 # Koha index used to link found kewords with an opac search URL
317 # Number of top keyword to used for the cloud
319 # Include CSS style directives with the cloud
321 # HTML file where to output the cloud
322 Output: /home/koha/mylibrary/koharoot/koha-tmpl/cloud-author.html
324 KohaConf: /home/koha/yourlibray/etc/koha-conf.xml
329 Output: /home/koha/yourlibrary/koharoot/koha-tmpl/cloud-subject.html