| Summary | Package variables | Synopsis | General documentation | Methods |
| Summary | Top |
| Clair::Nutch::Search - A class for performing simple Nutch searches. |
| Package variables | Top |
| |
| $SEARCH_CLASS = "edu.umich.si.clair.nutch.SimpleSearch" |
| Included modules | Top |
| Clair::Cluster |
| Clair::Document |
| Synopsis | Top |
#!/usr/bin/perl -w
use strict;
use Clair::Nutch::Search;
my $search = Clair::Nutch::Search->new(
nutch_home => "/path/to/nutch",
index_path => "/path/to/index"
);
# Returns a list of hits, where each hit is a hashref
my @results = $search->query("cat rabies", 20);
foreach my $hit (@results) {
foreach my $key (%$hit) {
print "$key => $hit->{$key}\n";
}
} |
| Description | Top |
| Methods | Top |
| new | Description | Code |
| query | Description | Code |
| query_cluster | Description | Code |
| new | code | next | Top |
| Takes two required parameters: "nutch_home" (the path to nutch) and "index_path" (the path to a Nutch index directory [it will contain db and segments]). |
| query | code | prev | next | Top |
$search->query($query, $numhits) Queries Nutch with the given query (required) and returns at most $numhits (optional, defaults to 10). |
| query_cluster | code | prev | next | Top |
$search->query($query, $numhits) Queries Nutch with the given query (required) and returns at most $numhits in a Clair::Cluster. The id of the each document is set to the query followed by the index of the hit. |
| new | description | prev | next | Top |
sub new
{
my $class = shift;
my %params = @_;
die "'nutch_home' is a required field"
unless (defined $params{nutch_home});
die "'index_path' is a required field"
unless (defined $params{index_path});
my $self = bless\% params, $class;
return $self;} |
| query | description | prev | next | Top |
sub query
{ my $self = shift;
my $query = shift;
my $hits = shift;
unless ($hits) {
$hits = 10;
}
my $script = "$self->{nutch_home}/bin/nutch";
my $command = "$script $SEARCH_CLASS $self->{index_path} '$query' $hits";
unless (defined $self->{verbose}) {
$command .= " 2>/dev/null";
}
my @lines = `$command`;
my @result;
foreach my $line (@lines) {
my @pairs = split /\t/, $line;
my %hit = @pairs;
for (keys %hit) {
if ($_ =~ /^\s*$/) {
delete $hit{$_};
}
}
push @result,\% hit;
}
return @result;} |
| query_cluster | description | prev | next | Top |
sub query_cluster
{ my $self = shift;
my $query = shift;
my $hits = shift;
my @hits = $self->query($query, $hits);
my $cluster = Clair::Cluster->new();
my $i = 1;
my $clean_query = $query;
$clean_query =~ s/\s/_/;
foreach my $hit (@hits) {
if (defined $hit->{content}) {
my $text = $hit->{content};
my $id = "$clean_query$i";
my $doc = new Clair::Document(
string => $text,
type => "text",
id => $id
);
$cluster->insert($id, $doc);
}
$i++;
}
return $cluster;} |
| VERSION | Top |
| Version 0.01 |