| Summary | Package variables | Synopsis | Description | General documentation | Methods |
| Summary | Top |
| Idf |
| Package variables | Top |
| No package variables defined. |
| Included modules | Top |
| Clair::Config |
| File::Spec |
| Lingua::Stem |
| Synopsis | Top |
$idfref = Idf->new("myidf"); |
| Description | Top |
| The IDF object is an open database. Once the constructor is called, the database remains open until the reference ($idfref) goes out of scope. At that point, the database is closed automatically. The point is that when the IDF is opened, a significant portion of database is read into a hash. The Idf object makes it possible to access the IDF multiple times without rereading the hash. |
| Methods | Top |
| DESTROY | Description | Code |
| getIdfForWord | Description | Code |
| getIdfs | Description | Code |
| new | Description | Code |
| DESTROY | code | next | Top |
| (called automatically) Closes IDF database |
| getIdfForWord | code | prev | next | Top |
| getIdfForWord($word) Returns IDF value for word or 3 if word is not found |
| getIdfs | code | prev | next | Top |
| getIdf() Returns all the IDF values as a hash of word -> value |
| new | code | prev | next | Top |
| $idfref = Idf->new($rootdir, $corpusname); Opens IDF database |
| DESTROY | description | prev | next | Top |
sub DESTROY
{ my $self = shift();
dbmclose %{$self->{idfref}};} |
| getIdfForWord | description | prev | next | Top |
sub getIdfForWord
{
my $self = shift();
my $word = shift();
my $idfref = $self->{idfref};
if ( $self->{stemmed} ) {
my $stemmer = Lingua::Stem->new(-locale => 'EN-US');
$stemmer->stem_caching({-level => 2});
my @temp0 = ();
push (@temp0,$word);
# @temp1 = ale_stemsome(@temp0);} |
| getIdfs | description | prev | next | Top |
sub getIdfs
{ my $self = shift();
my $word = shift();
my $idfref = $self->{idfref};
my %idfs = ();
foreach my $k (keys %{$idfref}) {
$idfs{$k} = $idfref->{$k};
}
return %idfs;} |
| new | description | prev | next | Top |
sub new
{ my $class = shift;
my %args = @_;
my $rootdir = "/data0/projects/tfidf";
if ( $args{rootdir} ) {
$rootdir = $args{rootdir};
}
if ( ! $args{corpusname} ) {
print "Corpus name must be specified\n";
return;
}
my $corpusname = $args{corpusname};
my $stemmed = ( defined $args{stemmed} ? $args{stemmed} : 0 );
my %idf;
my $dbmname = ( $stemmed ?
"$rootdir/corpus-data/$corpusname/$corpusname-idf-s" :
"$rootdir/corpus-data/$corpusname/$corpusname-idf");
dbmopen %idf, $dbmname, 0666 or
die "Can't open idf: $dbmname\n";
my %self = ('rootdir' => $rootdir,
'corpusname' => $corpusname,
'stemmed' => $stemmed,
'idfref' =>\% idf );
bless (\%self, $class);
return (\%self);} |