Adds support for using the "scan indexes" action in advanced search by using faceting with a prefix filter. Requires that the field be set as facetable for anything to be found.
Test plan:
1. Apply patch
2. Go to advanced search and click "More options"
3. Select author as the search field, enter a last name and check "Scan indexes"
4. Perform search and observe the result list resembling scan results
Signed-off-by: Michal Denar <black23@gmail.com>
Signed-off-by: Séverine QUEUNE <severine.queune@bulac.fr>
Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
return $res;
}
-=head2 build_browse_query
-
- my $browse_query = $builder->build_browse_query($field, $query);
-
-This performs a "starts with" style query on a particular field. The field
-to be searched must have been indexed with an appropriate mapping as a
-"phrase" subfield, which pretty much everything has.
-
-=cut
-
-# XXX this isn't really a browse query like we want in the end
-sub build_browse_query {
- my ( $self, $field, $query ) = @_;
-
- my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
-
- return { query => '*' } if !defined $query;
-
- # TODO this should come from Koha::SearchEngine::Elasticsearch
- my %field_whitelist = (
- title => 1,
- author => 1,
- );
- $field = 'title' if !exists $field_whitelist{$field};
- my $sort = $self->_sort_field($field);
- my $res = {
- query => {
- match_phrase_prefix => {
- "$field.phrase" => {
- query => $query,
- operator => 'or',
- fuzziness => $fuzzy_enabled ? 'auto' : '0',
- }
- }
- },
- sort => [ { $sort => { order => "asc" } } ],
- };
-}
-
=head2 build_query_compat
my (
$lang, $params )
= @_;
-#die Dumper ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan, $lang );
- my @sort_params = $self->_convert_sort_fields(@$sort_by);
- my @index_params = $self->_convert_index_fields(@$indexes);
- my $limits = $self->_fix_limit_special_cases($orig_limits);
- if ( $params->{suppress} ) { push @$limits, "suppress:0"; }
- # Merge the indexes in with the search terms and the operands so that
- # each search thing is a handy unit.
- unshift @$operators, undef; # The first one can't have an op
- my @search_params;
- my $truncate = C4::Context->preference("QueryAutoTruncate") || 0;
- my $ea = each_array( @$operands, @$operators, @index_params );
- while ( my ( $oand, $otor, $index ) = $ea->() ) {
- next if ( !defined($oand) || $oand eq '' );
- $oand = $self->_clean_search_term($oand);
- $oand = $self->_truncate_terms($oand) if ($truncate);
- push @search_params, {
- operand => $oand, # the search terms
- operator => defined($otor) ? uc $otor : undef, # AND and so on
- $index ? %$index : (),
- };
- }
+ my $query;
+ my $query_str = '';
+ my $search_param_query_str = '';
+ my $limits = ();
+ if ( $scan ) {
+ ($query, $query_str) = $self->_build_scan_query( $operands, $indexes );
+ $search_param_query_str = $query_str;
+ } else {
+ my @sort_params = $self->_convert_sort_fields(@$sort_by);
+ my @index_params = $self->_convert_index_fields(@$indexes);
+ my $limits = $self->_fix_limit_special_cases($orig_limits);
+ if ( $params->{suppress} ) { push @$limits, "suppress:0"; }
+ # Merge the indexes in with the search terms and the operands so that
+ # each search thing is a handy unit.
+ unshift @$operators, undef; # The first one can't have an op
+ my @search_params;
+ my $truncate = C4::Context->preference("QueryAutoTruncate") || 0;
+ my $ea = each_array( @$operands, @$operators, @index_params );
+ while ( my ( $oand, $otor, $index ) = $ea->() ) {
+ next if ( !defined($oand) || $oand eq '' );
+ $oand = $self->_clean_search_term($oand);
+ $oand = $self->_truncate_terms($oand) if ($truncate);
+ push @search_params, {
+ operand => $oand, # the search terms
+ operator => defined($otor) ? uc $otor : undef, # AND and so on
+ $index ? %$index : (),
+ };
+ }
- # We build a string query from limits and the queries. An alternative
- # would be to pass them separately into build_query and let it build
- # them into a structured ES query itself. Maybe later, though that'd be
- # more robust.
- my $search_param_query_str = join( ' ', $self->_create_query_string(@search_params) );
- my $query_str = join( ' AND ',
- $search_param_query_str || (),
- $self->_join_queries( $self->_convert_index_strings(@$limits) ) || () );
-
- # If there's no query on the left, let's remove the junk left behind
- $query_str =~ s/^ AND //;
- my %options;
- $options{sort} = \@sort_params;
- $options{is_opac} = $params->{is_opac};
- $options{weighted_fields} = $params->{weighted_fields};
- $options{whole_record} = $params->{whole_record};
- my $query = $self->build_query( $query_str, %options );
+ # We build a string query from limits and the queries. An alternative
+ # would be to pass them separately into build_query and let it build
+ # them into a structured ES query itself. Maybe later, though that'd be
+ # more robust.
+ $search_param_query_str = join( ' ', $self->_create_query_string(@search_params) );
+ $query_str = join( ' AND ',
+ $search_param_query_str || (),
+ $self->_join_queries( $self->_convert_index_strings(@$limits) ) || () );
+
+ # If there's no query on the left, let's remove the junk left behind
+ $query_str =~ s/^ AND //;
+ my %options;
+ $options{sort} = \@sort_params;
+ $options{is_opac} = $params->{is_opac};
+ $options{weighted_fields} = $params->{weighted_fields};
+ $options{whole_record} = $params->{whole_record};
+ $query = $self->build_query( $query_str, %options );
+ }
# We roughly emulate the CGI parameters of the zebra query builder
my $query_cgi = '';
shift @$operators; # Shift out the one we unshifted before
- $ea = each_array( @$operands, @$operators, @$indexes );
+ my $ea = each_array( @$operands, @$operators, @$indexes );
while ( my ( $oand, $otor, $index ) = $ea->() ) {
$query_cgi .= '&' if $query_cgi;
$query_cgi .= 'idx=' . uri_escape_utf8( $index // '') . '&q=' . uri_escape_utf8( $oand );
return $query;
}
+=head2 _build_scan_query
+
+ my ($query, $query_str) = $builder->_build_scan_query(\@operands, \@indexes)
+
+This will build an aggregation scan query that can be issued to elasticsearch from
+the provided string input.
+
+=cut
+
+our %scan_field_convert = (
+ 'ti' => 'title',
+ 'au' => 'author',
+ 'su' => 'subject',
+ 'se' => 'title-series',
+ 'pb' => 'publisher',
+);
+
+sub _build_scan_query {
+ my ( $self, $operands, $indexes ) = @_;
+
+ my $term = scalar( @$operands ) == 0 ? '' : $operands->[0];
+ my $index = scalar( @$indexes ) == 0 ? 'subject' : $indexes->[0];
+
+ my ( $f, $d ) = split( /,/, $index);
+ $index = $scan_field_convert{$f} || $f;
+
+ my $res;
+ $res->{query} = {
+ query_string => {
+ query => '*'
+ }
+ };
+ $res->{aggregations} = {
+ $index => {
+ terms => {
+ field => $index . '__facet',
+ order => { '_term' => 'asc' },
+ include => $self->_create_regex_filter($self->_clean_search_term($term)) . '.*'
+ }
+ }
+ };
+ return ($res, $term);
+}
+
+=head2 _create_regex_filter
+
+ my $filter = $builder->_create_regex_filter('term')
+
+This will create a regex filter that can be used with an aggregation query.
+
+=cut
+
+sub _create_regex_filter {
+ my ($self, $term) = @_;
+
+ my $result = '';
+ foreach my $c (split(//, quotemeta($term))) {
+ my $lc = lc($c);
+ my $uc = uc($c);
+ $result .= $lc ne $uc ? '[' . $lc . $uc . ']' : $c;
+ }
+ return $result;
+}
+
=head2 _convert_sort_fields
my @sort_params = _convert_sort_fields(@sort_by)
return $str unless $str; # Empty or undef, we can't use it.
$str .= '*' if $type eq 'right-truncate';
- $str = '"' . $str . '"' if $type eq 'phrase';
+ $str = '"' . $str . '"' if $type eq 'phrase' && $str !~ /^".*"$/;
if ($type eq 'st-year') {
if ($str =~ /^(.*)-(.*)$/) {
my $from = $1 || '*';
my $params = $self->get_elasticsearch_params();
# 20 is the default number of results per page
- $query->{size} = $count || 20;
+ $query->{size} = $count // 20;
# ES doesn't want pages, it wants a record to start from.
if (exists $options{offset}) {
$query->{from} = $options{offset};
my ( $error, $results, $facets ) = $search->search_compat(
$query, $simple_query, \@sort_by, \@servers,
- $results_per_page, $offset, $branches, $query_type,
- $scan
+ $results_per_page, $offset, undef, $item_types,
+ $query_type, $scan
)
A search interface somewhat compatible with L<C4::Search->getRecords>. Anything
sub search_compat {
my (
- $self, $query, $simple_query, $sort_by,
- $servers, $results_per_page, $offset, $branches,
- $query_type, $scan
+ $self, $query, $simple_query, $sort_by,
+ $servers, $results_per_page, $offset, $branches,
+ $item_types, $query_type, $scan
) = @_;
+
+ if ( $scan ) {
+ return $self->_aggregation_scan( $query, $results_per_page, $offset );
+ }
+
my %options;
if ( !defined $offset or $offset < 0 ) {
$offset = 0;
return \@facets;
}
+=head2 _aggregation_scan
+
+ my $result = $self->_aggregration_scan($query, 10, 0);
+
+Perform an aggregation request for scan purposes.
+
+=cut
+
+sub _aggregation_scan {
+ my ($self, $query, $results_per_page, $offset) = @_;
+
+ if (!scalar(keys %{$query->{aggregations}})) {
+ my %result = {
+ biblioserver => {
+ hits => 0,
+ RECORDS => undef
+ }
+ };
+ return (undef, \%result, undef);
+ }
+ my ($field) = keys %{$query->{aggregations}};
+ $query->{aggregations}{$field}{terms}{size} = 1000;
+ my $results = $self->search($query, 1, 0);
+
+ # Convert each result into a MARC::Record
+ my (@records, $index);
+ # opac-search expects results to be put in the
+ # right place in the array, according to $offset
+ $index = $offset - 1;
+
+ my $count = scalar(@{$results->{aggregations}{$field}{buckets}});
+ for (my $index = $offset; $index - $offset < $results_per_page && $index < $count; $index++) {
+ my $bucket = $results->{aggregations}{$field}{buckets}->[$index];
+ # Scan values are expressed as:
+ # - MARC21: 100a (count) and 245a (term)
+ # - UNIMARC: 200f (count) and 200a (term)
+ my $marc = MARC::Record->new;
+ $marc->encoding('UTF-8');
+ if (C4::Context->preference('marcflavour') eq 'UNIMARC') {
+ $marc->append_fields(
+ MARC::Field->new((200, ' ', ' ', 'f' => $bucket->{doc_count}))
+ );
+ $marc->append_fields(
+ MARC::Field->new((200, ' ', ' ', 'a' => $bucket->{key}))
+ );
+ } else {
+ $marc->append_fields(
+ MARC::Field->new((100, ' ', ' ', 'a' => $bucket->{doc_count}))
+ );
+ $marc->append_fields(
+ MARC::Field->new((245, ' ', ' ', 'a' => $bucket->{key}))
+ );
+ }
+ $records[$index] = $marc->as_usmarc();
+ };
+ # consumers of this expect a namespaced result, we provide the default
+ # configuration.
+ my %result;
+ $result{biblioserver}{hits} = $count;
+ $result{biblioserver}{RECORDS} = \@records;
+ return (undef, \%result, undef);
+}
+
1;
my $input_value = $2;
push @query_inputs, { input_name => $input_name, input_value => Encode::decode_utf8( uri_unescape( $input_value ) ) };
if ($input_name eq 'idx') {
- $scan_index_to_use = $input_value; # unless $scan_index_to_use;
+ # The form contains multiple fields, so take the first value as the scan index
+ $scan_index_to_use = $input_value unless $scan_index_to_use;
}
- if ($input_name eq 'q') {
+ if (!defined $scan_search_term_to_use && $input_name eq 'q') {
$scan_search_term_to_use = Encode::decode_utf8( uri_unescape( $input_value ));
}
}
$template->param( EnableSearchHistory => 1 );
}
- ## If there's just one result, redirect to the detail page
- if ($total == 1) {
+ ## If there's just one result, redirect to the detail page unless doing an index scan
+ if ($total == 1 && !$scan) {
my $biblionumber = $newresults[0]->{biblionumber};
my $defaultview = C4::Context->preference('IntranetBiblioDefaultView');
my $views = { C4::Search::enabled_staff_search_views };
[% ELSE %]<option value="ti">Title</option>[% END %]
[% IF ( ms_ticommaphr ) %]<option selected="selected" value="ti,phr">Title phrase</option>
[% ELSE %]<option value="ti,phr">Title phrase</option>[% END %]
- [% IF ( ms_aucommaphr ) %]<option selected="selected" value="au,phr">Author</option>
+ [% IF ( ms_au || ms_aucommaphr ) %]<option selected="selected" value="au,phr">Author</option>
[% ELSE %]<option value="au,phr">Author</option>[% END %]
[% IF ( ms_su ) %]<option selected="selected" value="su">Subject</option>
[% ELSE %]<option value="su">Subject</option>[% END %]
[% ELSE %]<option value="ss">ISSN</option>[% END %]
</select>
<input type="hidden" name="scan" value="1" />
+ <input class="submit" type="submit" value="Submit" />
</td>
</tr>
</table>
type => 'text'
},
subject => {
- type => 'text'
+ type => 'text',
+ facet => 1
},
itemnumber => {
type => 'integer'
};
subtest 'build_query tests' => sub {
- plan tests => 40;
+ plan tests => 48;
my $qb;
);
is($query_cgi, 'idx=&q=title%3A%22donald%20duck%22', 'query cgi');
is($query_desc, 'title:"donald duck"', 'query desc ok');
+
+ # Scan queries
+ ( undef, $query, $simple_query, $query_cgi, $query_desc ) = $qb->build_query_compat( undef, ['new'], ['au'], undef, undef, 1 );
+ is(
+ $query->{query}{query_string}{query},
+ '*',
+ "scan query is properly formed"
+ );
+ is_deeply(
+ $query->{aggregations}{'author'}{'terms'},
+ {
+ field => 'author__facet',
+ order => { '_term' => 'asc' },
+ include => '[nN][eE][wW].*'
+ },
+ "scan aggregation request is properly formed"
+ );
+ is($query_cgi, 'idx=au&q=new&scan=1', 'query cgi');
+ is($query_desc, 'new', 'query desc ok');
+
+ ( undef, $query, $simple_query, $query_cgi, $query_desc ) = $qb->build_query_compat( undef, ['new'], [], undef, undef, 1 );
+ is(
+ $query->{query}{query_string}{query},
+ '*',
+ "scan query is properly formed"
+ );
+ is_deeply(
+ $query->{aggregations}{'subject'}{'terms'},
+ {
+ field => 'subject__facet',
+ order => { '_term' => 'asc' },
+ include => '[nN][eE][wW].*'
+ },
+ "scan aggregation request is properly formed"
+ );
+ is($query_cgi, 'idx=&q=new&scan=1', 'query cgi');
+ is($query_desc, 'new', 'query desc ok');
};
use Modern::Perl;
-use Test::More tests => 11;
+use Test::More tests => 13;
use t::lib::Mocks;
use Koha::SearchEngine::Elasticsearch::QueryBuilder;
ok ($results = $searcher->search_compat( $query ), 'Test search_compat' );
+ my ( undef, $scan_query ) = $builder->build_query_compat( undef, ['easy'], [], undef, undef, 1 );
+ ok ((undef, $results) = $searcher->search_compat( $scan_query, undef, [], [], 20, 0, undef, undef, undef, 1 ), 'Test search_compat scan query' );
+ my $expected = {
+ biblioserver => {
+ hits => 0,
+ RECORDS => []
+ }
+ };
+ is_deeply($results, $expected, 'Scan query results ok');
+
ok (($results,$count) = $searcher->search_auth_compat ( $query ), 'Test search_auth_compat' );
is ( $count = $searcher->count_auth_use($searcher,1), 0, 'Testing count_auth_use');