Ticket #39: 0001-add-KinoSearch.patch

File 0001-add-KinoSearch.patch, 5.5 KB (added by perigrin, 11 years ago)

KinoSearch? Patch

  • Makefile.PL

    From 90ac1d0becf551a28ab49f4d4ee31c369da98fe1 Mon Sep 17 00:00:00 2001
    From: Chris Prather <chris@prather.org>
    Date: Sat, 26 Jul 2008 17:57:02 -0400
    Subject: [Patch][Build] add KinoSearch
    
    ---
     Makefile.PL                           |   11 +++
     lib/Wiki/Toolkit/Search/KinoSearch.pm |  141 +++++++++++++++++++++++++++++++++
     lib/Wiki/Toolkit/TestLib.pm           |   10 +++
     3 files changed, 162 insertions(+), 0 deletions(-)
     create mode 100644 lib/Wiki/Toolkit/Search/KinoSearch.pm
    
    diff --git a/Makefile.PL b/Makefile.PL
    index 9867d89..f7587ea 100644
    a b if ( $plucene_inst ) { 
    170170    $Wiki::Toolkit::TestConfig::config{plucene} = undef;
    171171}
    172172
     173# If we have Plucene installed, we can test that without asking questions.
     174eval { require KinoSearch; };
     175my $plucene_inst = $@ ? 0 : 1;
     176if ( $plucene_inst ) {
     177    print "You have KinoSearch installed, so will test with that...\n\n";
     178    $Wiki::Toolkit::TestConfig::config{kinosearch} = 1;
     179} else {
     180    print "KinoSearch not installed; skipping test...\n\n";
     181    $Wiki::Toolkit::TestConfig::config{kinosearch} = undef;
     182}
     183
    173184# Write out the config for next run.
    174185open OUT, ">lib/Wiki/Toolkit/TestConfig.pm"
    175186    or die "Couldn't open lib/Wiki/Toolkit/TestConfig.pm for writing: $!";
  • new file lib/Wiki/Toolkit/Search/KinoSearch.pm

    diff --git a/lib/Wiki/Toolkit/Search/KinoSearch.pm b/lib/Wiki/Toolkit/Search/KinoSearch.pm
    new file mode 100644
    index 0000000..5993ae4
    - +  
     1package Wiki::Toolkit::Search::KinoSearch;
     2use strict;
     3our $VERSION = '0.01';
     4
     5use base 'Wiki::Toolkit::Search::Base';
     6
     7#use File::Spec::Functions qw(catfile);
     8use File::Spec;
     9use KinoSearch::InvIndexer;
     10use KinoSearch::Analysis::PolyAnalyzer;
     11
     12=head1 NAME
     13
     14Wiki::Toolkit::Search::KinoSearch - Use KinoSearch to search your Wiki::Toolkit wiki.
     15
     16=head1 SYNOPSIS
     17
     18  my $search = Wiki::Toolkit::Search::KinoSearch->new( path => "/var/KinoSearch/wiki" );
     19  my %wombat_nodes = $search->search_nodes("wombat");
     20
     21Provides search-related methods for L<Wiki::Toolkit>.
     22
     23=cut
     24
     25=head1 METHODS
     26
     27=over 4
     28
     29=item B<new>
     30
     31  my $search = Wiki::Toolkit::Search::KinoSearch->new( path => "/var/KinoSearch/wiki" );
     32
     33Takes only one parameter, which is mandatory. C<path> must be a directory
     34for storing the indexed data.  It should exist and be writeable.
     35
     36=cut
     37
     38sub _init {
     39    my ( $self, %args ) = @_;
     40    $self->{_dir} = $args{path};
     41    return $self;
     42}
     43
     44sub _dir { shift->{_dir} }
     45
     46sub _analyzer {
     47    KinoSearch::Analysis::PolyAnalyzer->new( language => 'en', );
     48}
     49
     50sub _indexer {
     51    my ($self) = @_;
     52    my $indexer = KinoSearch::InvIndexer->new(
     53        analyzer => $self->_analyzer,
     54        invindex => $self->_dir,
     55        create   => 1,
     56    );
     57    $indexer->spec_field( name => 'title' );
     58    $indexer->spec_field(
     59        name       => 'body_text',
     60        vectorized => 1,
     61    );
     62    return $indexer;
     63}
     64
     65sub index_node {
     66    my ( $self, $node, $content ) = @_;
     67    my $indexer = $self->_indexer;
     68    my $doc     = $indexer->new_doc;
     69    $doc->set_value( title     => $node );
     70    $doc->set_value( body_text => $content );
     71    $indexer->add_doc($doc);
     72    $indexer->finish( optimize => $self->optimize );
     73}
     74
     75sub _searcher {
     76    my ($self) = @_;
     77    KinoSearch::Searcher->new(
     78        invindex => $self->_dir,
     79        analyzer => $self->_analyzer,
     80    );
     81}
     82
     83sub _search_nodes {
     84    my ( $self, $query ) = @_;
     85    $self->_searcher->search($query);
     86}
     87
     88sub search_nodes {
     89    my ( $self, @args ) = @_;
     90    my $hits    = $self->_search_nodes(@args);
     91    my $results = {};
     92    while ( $hit = $hits->fetch_hit_hashref ) {
     93        $results->{ $hit->{title} } = $hit->{score};
     94    }
     95    return %$results;
     96}
     97
     98# sub _fuzzy_match {
     99#     my ( $self, $string, $canonical ) = @_;
     100#     return
     101#       map { $_ => ( $_ eq $string ? 2 : 1 ) }
     102#       $self->_search_nodes("fuzzy:$canonical");
     103# }
     104
     105# sub indexed {
     106#     my ( $self, $id ) = @_;
     107#     my $term = Plucene::Index::Term->new( { field => 'id', text => $id } );
     108#     return $self->_reader->doc_freq($term);
     109# }
     110
     111sub optimize { 1 }
     112
     113sub delete_node {
     114    my ( $self, $id ) = @_;
     115    my $term = KinoSearch::Index::Term->new( title => $id );
     116    my $indexer = $self->_indexer;
     117    $indexer->delete_docs_by_term($term);
     118    $indexer->finish( optimize => $self->optimize );
     119}
     120
     121sub supports_phrase_searches { return 0; }
     122sub supports_fuzzy_searches  { return 0; }
     123
     1241;
     125__END__
     126
     127=back
     128
     129=head1 TODO
     130
     131=over 4
     132
     133=item Phrase Searching
     134=item Fuzzy Matching
     135
     136=head1 SEE ALSO
     137
     138L<Wiki::Toolkit>, L<Wiki::Toolkit::Search::Base>.
     139
     140=cut
     141
  • lib/Wiki/Toolkit/TestLib.pm

    diff --git a/lib/Wiki/Toolkit/TestLib.pm b/lib/Wiki/Toolkit/TestLib.pm
    index aa3b71d..e1b437f 100644
    a b if ( $configured{plucene} ) { 
    145145    $plucene_path = "t/plucene";
    146146}
    147147
     148my $kino_path;
     149# Test with Plucene if possible.
     150if ( $configured{kinosearch} ) {
     151    $kino_path = "t/kinosearch";
     152}
     153
    148154# @wiki_info describes which searches work with which stores.
    149155
    150156# Database-specific searchers.
    foreach my $dbtype ( qw( MySQL Pg SQLite ) ) { 
    169175        if ( $datastore_info{$dbtype} and $plucene_path );
    170176    push @wiki_info, { datastore_info => $datastore_info{$dbtype} }
    171177        if $datastore_info{$dbtype};
     178       
     179    push @wiki_info, { datastore_info => $datastore_info{$dbtype},
     180                       plucene_path   => $kino_path }
     181        if ( $datastore_info{$dbtype} and $kino_path );
    172182}
    173183
    174184=head1 METHODS