| | 1 | package Wiki::Toolkit::Search::KinoSearch; |
| | 2 | use strict; |
| | 3 | our $VERSION = '0.01'; |
| | 4 | |
| | 5 | use base 'Wiki::Toolkit::Search::Base'; |
| | 6 | |
| | 7 | #use File::Spec::Functions qw(catfile); |
| | 8 | use File::Spec; |
| | 9 | use KinoSearch::InvIndexer; |
| | 10 | use KinoSearch::Analysis::PolyAnalyzer; |
| | 11 | |
| | 12 | =head1 NAME |
| | 13 | |
| | 14 | Wiki::Toolkit::Search::KinoSearch - Use KinoSearch to search your Wiki::Toolkit wiki. |
| | 15 | |
| | 16 | =head1 SYNOPSIS |
| | 17 | |
| | 18 | my $search = Wiki::Toolkit::Search::KinoSearch->new( path => "/var/KinoSearch/wiki" ); |
| | 19 | my %wombat_nodes = $search->search_nodes("wombat"); |
| | 20 | |
| | 21 | Provides search-related methods for L<Wiki::Toolkit>. |
| | 22 | |
| | 23 | =cut |
| | 24 | |
| | 25 | =head1 METHODS |
| | 26 | |
| | 27 | =over 4 |
| | 28 | |
| | 29 | =item B<new> |
| | 30 | |
| | 31 | my $search = Wiki::Toolkit::Search::KinoSearch->new( path => "/var/KinoSearch/wiki" ); |
| | 32 | |
| | 33 | Takes only one parameter, which is mandatory. C<path> must be a directory |
| | 34 | for storing the indexed data. It should exist and be writeable. |
| | 35 | |
| | 36 | =cut |
| | 37 | |
| | 38 | sub _init { |
| | 39 | my ( $self, %args ) = @_; |
| | 40 | $self->{_dir} = $args{path}; |
| | 41 | return $self; |
| | 42 | } |
| | 43 | |
| | 44 | sub _dir { shift->{_dir} } |
| | 45 | |
| | 46 | sub _analyzer { |
| | 47 | KinoSearch::Analysis::PolyAnalyzer->new( language => 'en', ); |
| | 48 | } |
| | 49 | |
| | 50 | sub _indexer { |
| | 51 | my ($self) = @_; |
| | 52 | my $indexer = KinoSearch::InvIndexer->new( |
| | 53 | analyzer => $self->_analyzer, |
| | 54 | invindex => $self->_dir, |
| | 55 | create => 1, |
| | 56 | ); |
| | 57 | $indexer->spec_field( name => 'title' ); |
| | 58 | $indexer->spec_field( |
| | 59 | name => 'body_text', |
| | 60 | vectorized => 1, |
| | 61 | ); |
| | 62 | return $indexer; |
| | 63 | } |
| | 64 | |
| | 65 | sub index_node { |
| | 66 | my ( $self, $node, $content ) = @_; |
| | 67 | my $indexer = $self->_indexer; |
| | 68 | my $doc = $indexer->new_doc; |
| | 69 | $doc->set_value( title => $node ); |
| | 70 | $doc->set_value( body_text => $content ); |
| | 71 | $indexer->add_doc($doc); |
| | 72 | $indexer->finish( optimize => $self->optimize ); |
| | 73 | } |
| | 74 | |
| | 75 | sub _searcher { |
| | 76 | my ($self) = @_; |
| | 77 | KinoSearch::Searcher->new( |
| | 78 | invindex => $self->_dir, |
| | 79 | analyzer => $self->_analyzer, |
| | 80 | ); |
| | 81 | } |
| | 82 | |
| | 83 | sub _search_nodes { |
| | 84 | my ( $self, $query ) = @_; |
| | 85 | $self->_searcher->search($query); |
| | 86 | } |
| | 87 | |
| | 88 | sub search_nodes { |
| | 89 | my ( $self, @args ) = @_; |
| | 90 | my $hits = $self->_search_nodes(@args); |
| | 91 | my $results = {}; |
| | 92 | while ( $hit = $hits->fetch_hit_hashref ) { |
| | 93 | $results->{ $hit->{title} } = $hit->{score}; |
| | 94 | } |
| | 95 | return %$results; |
| | 96 | } |
| | 97 | |
| | 98 | # sub _fuzzy_match { |
| | 99 | # my ( $self, $string, $canonical ) = @_; |
| | 100 | # return |
| | 101 | # map { $_ => ( $_ eq $string ? 2 : 1 ) } |
| | 102 | # $self->_search_nodes("fuzzy:$canonical"); |
| | 103 | # } |
| | 104 | |
| | 105 | # sub indexed { |
| | 106 | # my ( $self, $id ) = @_; |
| | 107 | # my $term = Plucene::Index::Term->new( { field => 'id', text => $id } ); |
| | 108 | # return $self->_reader->doc_freq($term); |
| | 109 | # } |
| | 110 | |
| | 111 | sub optimize { 1 } |
| | 112 | |
| | 113 | sub delete_node { |
| | 114 | my ( $self, $id ) = @_; |
| | 115 | my $term = KinoSearch::Index::Term->new( title => $id ); |
| | 116 | my $indexer = $self->_indexer; |
| | 117 | $indexer->delete_docs_by_term($term); |
| | 118 | $indexer->finish( optimize => $self->optimize ); |
| | 119 | } |
| | 120 | |
| | 121 | sub supports_phrase_searches { return 0; } |
| | 122 | sub supports_fuzzy_searches { return 0; } |
| | 123 | |
| | 124 | 1; |
| | 125 | __END__ |
| | 126 | |
| | 127 | =back |
| | 128 | |
| | 129 | =head1 TODO |
| | 130 | |
| | 131 | =over 4 |
| | 132 | |
| | 133 | =item Phrase Searching |
| | 134 | =item Fuzzy Matching |
| | 135 | |
| | 136 | =head1 SEE ALSO |
| | 137 | |
| | 138 | L<Wiki::Toolkit>, L<Wiki::Toolkit::Search::Base>. |
| | 139 | |
| | 140 | =cut |
| | 141 | |