| Summary | Package variables | Synopsis | General documentation | Methods |
| Summary | Top |
| Clair::Network - Network Class for the CLAIR Library head1 VERSION Version 0.01 |
| Package variables | Top |
| No package variables defined. |
| Included modules | Top |
| BerkeleyDB |
| Clair::Config |
| Clair::Document |
| Clair::GraphWrapper |
| Clair::Network::Reader::Edgelist |
| Clair::Network::Reader::Pajek |
| Clair::Network::Writer::Edgelist |
| Clair::Network::Writer::Pajek |
| Clair::Statistics::Distributions::TDist |
| Clair::Util |
| File::Temp qw /tempfile tempdir/ |
| Graph::Directed |
| Graph::Undirected |
| MEAD::SimRoutines |
| MLDBM qw ( DB_File Storable ) |
| Math::MatrixReal |
| Storable qw /dclone/ |
| lib " $MEAD_HOME /lib " |
| Synopsis | Top |
| The Network Class is one of the core modules for the CLAIR library. The Network described a structure of relationships between nodes, and has operations for performing many typical graph functions, such as finding the diameter of the graph, adding and removing normal and external nodes, and creating edges. |
| Description | Top |
| Methods | Top |
| DESTROY | No description | Code |
| Watts_Strogatz_clus_coeff | Description | Code |
| Watts_Strogatz_local_clus_coeff | Description | Code |
| add_edge | Description | Code |
| add_node | Description | Code |
| add_weighted_edge | Description | Code |
| average_cosines | Description | Code |
| average_shortest_path | Description | Code |
| avg_in_degree | Description | Code |
| avg_out_degree | Description | Code |
| avg_total_degree | Description | Code |
| compute_cohesion | Description | Code |
| compute_in_link_histogram | Description | Code |
| compute_out_link_histogram | Description | Code |
| compute_pagerank | Description | Code |
| compute_random_walk_step | No description | Code |
| compute_rank_result | No description | Code |
| compute_rank_step | No description | Code |
| compute_stationary_distribution | Description | Code |
| compute_total_link_histogram | Description | Code |
| cosine_histograms | Description | Code |
| count_lines | No description | Code |
| create_cluster_from_lexrank | Description | Code |
| create_cosine_dat_files | Description | Code |
| create_network_from_lexrank | Description | Code |
| create_subset_network | Description | Code |
| create_subset_network_from_file | Description | Code |
| create_uniform_vector | No description | Code |
| dfs_visit_1 | Description | Code |
| dfs_visit_2 | Description | Code |
| diameter | Description | Code |
| export_to_Pajek | Description | Code |
| find_largest_component | Description | Code |
| find_largest_component_size | No description | Code |
| find_path | Description | Code |
| find_scc | Description | Code |
| get_current_probability_distribution | Description | Code |
| get_current_probability_matrix | No description | Code |
| get_dat_stats | Description | Code |
| get_edge_attribute | Description | Code |
| get_edge_weight | Description | Code |
| get_edges | Description | Code |
| get_histogram_as_string | No description | Code |
| get_index | Description | Code |
| get_node_weight | Description | Code |
| get_predecessor_matrix | Description | Code |
| get_property_hash | No description | Code |
| get_property_matrix | No description | Code |
| get_property_vector | No description | Code |
| get_scc | No description | Code |
| get_shortest_path | Description | Code |
| get_transition_probability_matrix | No description | Code |
| get_undirected_graph | Description | Code |
| get_vertex_attribute | Description | Code |
| get_vertices | Description | Code |
| harmonic_mean_geodesic_distance | Description | Code |
| has_edge | Description | Code |
| has_edge_attribute | Description | Code |
| has_node | Description | Code |
| has_vertex_attribute | Description | Code |
| is_vector_change_within_tolerance | No description | Code |
| iterative_dfs_visit_1 | Description | Code |
| iterative_dfs_visit_1_v2 | Description | Code |
| iterative_dfs_visit_2 | Description | Code |
| iterative_dfs_visit_2_v2 | Description | Code |
| make_matrix_stochastic | No description | Code |
| make_transitions_stochastic | No description | Code |
| new | Description | Code |
| new_average_shorest_path | No description | Code |
| new_hyperlink_network | No description | Code |
| newman_power_law_exponent | Description | Code |
| num_documents | Description | Code |
| num_links | Description | Code |
| num_nodes | Description | Code |
| num_pairs | Description | Code |
| perform_next_random_walk_step | No description | Code |
| perform_next_rank_step | No description | Code |
| power_law_exponent | Description | Code |
| power_law_in_link_distribution | Description | Code |
| power_law_out_link_distribution | Description | Code |
| power_law_total_link_distribution | Description | Code |
| print_current_lexrank_distribution | Description | Code |
| print_current_pagerank_distribution | Description | Code |
| print_current_probability_distribution | Description | Code |
| print_db | No description | Code |
| print_edges_with_property | No description | Code |
| print_hyperlink_edges | Description | Code |
| print_property_distribution | No description | Code |
| read_initial_probability_distribution | Description | Code |
| read_matrix_property_from_file | No description | Code |
| read_pagerank_initial_distribution | Description | Code |
| read_pagerank_personalization | Description | Code |
| read_pagerank_probabilities_from_file | Description | Code |
| read_property_from_file | No description | Code |
| read_transition_probabilities_from_file | No description | Code |
| remove_edge | Description | Code |
| remove_node | Description | Code |
| save_current_pagerank_distribution | Description | Code |
| save_current_probability_distribution | No description | Code |
| save_edges_with_property_to_file | No description | Code |
| save_hyperlink_edges_to_file | Description | Code |
| save_matrix_property_to_file | No description | Code |
| save_pagerank_probabilities_to_file | Description | Code |
| save_property_distribution | No description | Code |
| save_transition_probabilities_to_file | No description | Code |
| scale_to_unit_interval | No description | Code |
| set_current_probability_matrix | No description | Code |
| set_edge_attribute | Description | Code |
| set_edge_weight | Description | Code |
| set_node_weight | Description | Code |
| set_properties_from_matrix | No description | Code |
| set_property_matrix | No description | Code |
| set_transition_probability_from_matrix | No description | Code |
| set_vertex_attribute | Description | Code |
| write_db | Description | Code |
| write_histogram_matlab | Description | Code |
| write_link_dist | Description | Code |
| write_link_matlab | Description | Code |
| write_links | Description | Code |
| write_nodes | Description | Code |
| Watts_Strogatz_clus_coeff | code | next | Top |
| Watts_Strogatz_clus_coeff(filename => $filename) Computes the Watts Strogatz clustering coefficient. If a filename is provided, intermediate output is written to the file. |
| Watts_Strogatz_local_clus_coeff | code | prev | next | Top |
| Get the local clustering coefficient for each vertex This is only defined for vertices with > 2 edges |
| add_edge | code | prev | next | Top |
| add_edge($id1, $id2); Creates an edge between the two vertices specified. If a vertex is not already part of the graph, it is automatically added. |
| add_node | code | prev | next | Top |
| add_node($id, $text) Adds a vertex to the graph. Vertex has attribute text set to $text |
| add_weighted_edge | code | prev | next | Top |
| add_weighted_edge($id1, $id2, $w); Creates an edge between the two vertices specified. If a vertex is not already part of the graph, it is automatically added. |
| average_cosines | code | prev | next | Top |
| ($linked_avg, $not_linked_avg) = average_cosines($cosine_matrix_reference) Returns the average of the cosines between documents that are connected in the matrix and between documents that are not connected. The averages are returned in an array. |
| average_shortest_path | code | prev | next | Top |
| average_shortest_path() Finds the average shortest path of a graph. The average shortest path is the average of all of the shortest paths between pairs of vertices. To compute this, we loop through each vertex, computing the shortest paths to all vertices that vertex reaches. The average of that vertex is then computed. This is repeated for all vertices with greater than zero out-degree in the graph, and the average of that is returned. |
| avg_in_degree | code | prev | next | Top |
| avg_in_degree() Returns the average number of inlinks per node in the network |
| avg_out_degree | code | prev | next | Top |
| avg_out_degree() Returns the average number of outlinks per node in the network |
| avg_total_degree | code | prev | next | Top |
| avg_total_degree() Returns the average number of links (both out and in) per node in the network |
| compute_cohesion | code | prev | next | Top |
| compute_cohesion Computes the cohesion of the documents in the network. |
| compute_in_link_histogram | code | prev | next | Top |
| compute_in_link_histogram() Returns a histogram of the number of inlinks per node in the network |
| compute_out_link_histogram | code | prev | next | Top |
| compute_out_link_histogram() Returns a histogram of the number of outlinks per node in the network |
| compute_pagerank | code | prev | next | Top |
| compute_pagerank(pagerank_value => 'pagerank_value', pagerank_transition => 'pagerank_transition', pagerank_bias => 'pagerank_bias', jump => 0.15, tolerance => 0.0001, max_iterations => 200) Computes the pagerank for the network. The property given for pagerank_value is used for the initial value, and for pagerank_transition for the transition probabilities. The pagerank_bias property is used to set the bias. If the network does not have any values for that property (or they are all zero) then the unbiased pagerank is computed. All parameters are optional, the defaults for the properties are given. Passing zero for any numerical parameter (or not specifying that parameter) will cause the default value to be used. The result is saved as the pagerank_value property of each node. |
| compute_stationary_distribution | code | prev | next | Top |
| compute_stationary_distribution Computes the stationary distribution from a random walk. This uses the values from the probability distribution and the transition probabilities. |
| compute_total_link_histogram | code | prev | next | Top |
| compute_total_link_histogram() Returns a histogram of the number of links (both out and in) per node in the network |
| cosine_histograms | code | prev | next | Top |
| cosine_histograms($cosine_matrix_reference) Returns a histograms for cosines that are linked in the graph and for cosines that are not. |
| create_cluster_from_lexrank | code | prev | next | Top |
| create_cluster_from_lexrank($threshold, attribute_name => 'document', parent_document => 0) Creates a cluster with any documents that currently have a lexrank value above the threshold. The optional attribute_name parameter specifies what attribute of the node contains the document. 'document', the default, is the attribute that will be used if the network was created from a cluster. Setting the optional parent_document parameter to 1 will create the cluster out of the parent document of each document, rather than the document itself. |
| create_cosine_dat_files | code | prev | next | Top |
| create_cosine_dat_files($domain, $cosine_matrix_reference, directory => $directory) Creates dat files with information from the cosine matrix, based on randomly selected cosines |
| create_network_from_lexrank | code | prev | next | Top |
| create_network_from_lexrank Creates a network with any nodes that currently have a lexrank value above the threshold. |
| create_subset_network | code | prev | next | Top |
| create_subset_network($@subset_vertices); Creates a network with just the nodes in the array provided as the first parameter. Edges from the original network are carried across to the network if they are between two nodes that are in the new network. |
| create_subset_network_from_file | code | prev | next | Top |
| create_subset_network_from_file($filename) Creates a network with just the nodes listed in the file, one per each line. Edges from the original network are carried across to the new network if they are between two nodes that are in the new network. |
| dfs_visit_1 | code | prev | next | Top |
| An internal function used by find_scc |
| dfs_visit_2 | code | prev | next | Top |
| An internal function used by find_scc |
| diameter | code | prev | next | Top |
| diameter(filename => $filename, directed => 1, max => 0) Returns the diameter of the network. If the parameter 'directed' is 1 or not specified, this is the diameter of the directed network. If it is 0 or the parameter 'undirected' is 1, then this is the diameter of the undirected network. If max is 1 or not specified, then this is the maximum diameter. If max is 0 or avg is 1, then this is the average diameter. A filename may also be specified to produce debugging information while the diameter is being determined. |
| export_to_Pajek | code | prev | next | Top |
| export_to_Pajek($networkName, $filename) Write the network to the file $filename in Pajek form, giving it the specified network name. |
| find_largest_component | code | prev | next | Top |
| find_largest_component($type) type is the type of component, either "weakly" or "strongly" Finds the largest component in a graph, returning a network made up of that component. |
| find_path | code | prev | next | Top |
| @path = find_path($s, $v) Finds the shortest path from $s to $v using the Floyed Warshall algorithm. |
| find_scc | code | prev | next | Top |
| find_scc($dbfile, $xpfile, $finfile) $dbfile should be the filename of a db file of the links that will be used by find_scc (the file can be produced with write_db) $xpfile should be the filename of a db file of the links tranposed $finfile is the location where a temporary file should go that will be used by find_scc and the helper functions find_scc finds a strongly connected subgraph from the graph of the network. It needs to input files, a db file of the links and a db file of the transposed links. |
| get_current_probability_distribution | code | prev | next | Top |
| get_current_probability_distribution Returns a hash with the current probability values (the values used for the random walk) |
| get_dat_stats | code | prev | next | Top |
| get_dat_stats($domain, $links_file, $cosine_file) Returns a string with statistics obtained from the analyzing the dat files created by create_cosine_dat_files |
| get_edge_attribute | code | prev | next | Top |
| get_edge_attribute($u, $v, $attribute_name) Returns the value of the attribute on the given edge |
| get_edge_weight | code | prev | next | Top |
| get_edge_weight($u, $v) Returns the weight of the given edge. |
| get_edges | code | prev | next | Top |
| get_edges Returns the edges of the network |
| get_index | code | prev | next | Top |
| An internal function used by cosine_histograms. Used to determine what bin a cosine value should go into. |
| get_node_weight | code | prev | next | Top |
| get_node_weight($id) Returns the weight of the specified vertex. |
| get_predecessor_matrix | code | prev | next | Top |
| $matrix = get_predecessor_matrix() Get the shortest path matrix from the network, using BFS algorithm. The content of the matrix is the predecessor of the current node in the shortest path matrix. e.g. : $matrix->{$i}->{$j} notes the predecessor of node $j in the shortest path from $i to $j to get the shortest path from $i to $j, you can use function get_shortest_path |
| get_shortest_path | code | prev | next | Top |
| $path = get_shortest_path($start, $end) Get the shortest path from $start to $end. |
| get_undirected_graph | code | prev | next | Top |
| get_undirected_graph($graph) Takes a graph and returns its undirected equivalent. This maintains the weight on each edge and vertex. |
| get_vertex_attribute | code | prev | next | Top |
| get_vertex_attribute($u, $attribute_name) Returns the value of the attribute on the given vertex |
| get_vertices | code | prev | next | Top |
| get_vertices Returns the array of vertices (nodes) in the network |
| harmonic_mean_geodesic_distance | code | prev | next | Top |
| Compute the harmonic mean geodesic distance |
| has_edge | code | prev | next | Top |
| has_edge($u, $v) Returns true if an edge exists in the network, false otherwise |
| has_edge_attribute | code | prev | next | Top |
| has_edge_attribute($u, $v, $attribute_name) Returns true if the attribute has been set on the given edge and false otherwise. |
| has_node | code | prev | next | Top |
| has_node($u) Returns true if the node is in the network |
| has_vertex_attribute | code | prev | next | Top |
| has_vertex_attribute($u, $attribute_name) Returns true if the attribute has been set on the given vertex and false otherwise. |
| iterative_dfs_visit_1 | code | prev | next | Top |
| An internal function used by find_scc |
| iterative_dfs_visit_1_v2 | code | prev | next | Top |
| An internal function used by find_scc |
| iterative_dfs_visit_2 | code | prev | next | Top |
| An internal function used by find_scc |
| iterative_dfs_visit_2_v2 | code | prev | next | Top |
| An internal function used by find_scc |
| new | code | prev | next | Top |
| $network = new Clair::Network(); Creates a new, empty network |
| newman_power_law_exponent | code | prev | next | Top |
| newman_power_law_exponent($histogram_reference, $x_cutoff) Computes the power law exponent on the histogram passed in by reference. This uses the method described in Newman\'s "Power laws, Pareto distributions and Zipf's law", formula 5 and 6. Return value is an array containing two items, the power law exponent, and a measure of the statistical error |
| num_documents | code | prev | next | Top |
| num_documents Returns the number of documents in the network |
| num_links | code | prev | next | Top |
| num_links Returns the number of links (edges) in the network. If the parameter external is specified and set to 1 (or internal is specified and set to 0), the number of external links is returned, otherwise the number of internal links is given. If the parameter unique is specified and equal to 1, only unique links will be counted. |
| num_nodes | code | prev | next | Top |
| num_nodes Returns the number of nodes in the network |
| num_pairs | code | prev | next | Top |
| num_pairs Returns the number of pairs of documents, defined as nd * (nd - 1) / 2 where nd is the number of documents. |
| power_law_exponent | code | prev | next | Top |
| power_law_exponent($histogram_reference) Computes the power law coefficient on the histogram passed in by reference. This uses linear regression on the logs of the data points to find both the coefficient and the exponent. Retun value is a string of the form "y = a x^b" where a is the coefficient and b is the exponent. |
| power_law_in_link_distribution | code | prev | next | Top |
| power_law_in_link_distribution() Returns the power law formula from the in link distribution |
| power_law_out_link_distribution | code | prev | next | Top |
| power_law_out_link_distribution() Returns the power law formula from the out link distribution |
| power_law_total_link_distribution | code | prev | next | Top |
| power_law_total_link_distribution() Returns the power law formula from the total link distribution (both in and out links) |
| print_current_lexrank_distribution | code | prev | next | Top |
| print_current_lexrank_distribution Prints the current lexrank values. If the lexrank has been calculated, these are the results, otherwise this may be the initial or intermediate values. |
| print_current_pagerank_distribution | code | prev | next | Top |
| print_current_pagerank_distribution Prints the current pagerank values. If the pagerank has been calculated, these are the results, otherwise this may be the initial or intermediate values. |
| print_current_probability_distribution | code | prev | next | Top |
| print_current_probability_distribution Prints the current probability values from the random walk. If the stationary distribution has been calculated, these are the results, otherwise these may be the initial or intermediate values |
| print_hyperlink_edges | code | prev | next | Top |
| print_hyperlink_edges Prints all edges with the 'pagerank_transition' property set. In the case of networks built from hyperlinks from clusters, these edges are the edges that had a hyperlink between them. The edges are listed as source, then destination. |
| read_initial_probability_distribution | code | prev | next | Top |
| read_initial_probability_distribution($filename) Reads the initial probabilities for the random walk from the specified file. |
| read_pagerank_initial_distribution | code | prev | next | Top |
| read_pagerank_initial_distribution($filename) Reads the initial pagerank values from the specified file |
| read_pagerank_personalization | code | prev | next | Top |
| read_pagerank_personalization($filename) Reads the pagerank personalization values (bias) from the specified file |
| read_pagerank_probabilities_from_file | code | prev | next | Top |
| read_pagerank_probabilities_from_file($filename) Read the pagerank transition probabilities from the specified file |
| remove_edge | code | prev | next | Top |
| remove_edge($u, $v) Removes the edge from $u to $v from the graph. |
| remove_node | code | prev | next | Top |
| remove_node($id) Removes the vertex with id $id from the graph |
| save_current_pagerank_distribution | code | prev | next | Top |
| save_current_pagerank_distribution($filename) Saves the current pagerank values to a file. If pagerank has been calculated, then these are the results, otherwise these could be initial or intermediate values. |
| save_hyperlink_edges_to_file | code | prev | next | Top |
| save_hyperlink_edges_to_file($filename) Saves all edges with the 'pagerank_transition' property set to the specified file. In the case of networks built from hyperlinks from clusters, these edges are the edges that had a hyperlink between them. The edges are listed as source, then destination. |
| save_pagerank_probabilities_to_file | code | prev | next | Top |
| save_pagerank_probabilities_to_file Saves the transition probabilities used in pagerank to the specified file. |
| set_edge_attribute | code | prev | next | Top |
| set_edge_attribute($u, $v, $attribute_name, $value) Sets the attribute for the given edge to the given value |
| set_edge_weight | code | prev | next | Top |
| set_edge_weight($u, $v, $weight) Sets the weight of the given edge. |
| set_node_weight | code | prev | next | Top |
| set_node_weight($id, $weight) Set the weight of node $id. |
| set_vertex_attribute | code | prev | next | Top |
| set_vertex_attribute($u, $attribute_name, $value) Sets the attribute for the vertex to the given value |
| write_db | code | prev | next | Top |
| write_db($filename, transpose => 1) Writes the graph's links to a db file. Links are written transposed if the parameter transpose is provided and equal to 1. |
| write_histogram_matlab | code | prev | next | Top |
| write_histogram_matlab($linked_histogram_reference, $not_linked_histogram_reference, $filename_base) Writes matlab files for linked, linked cumulative, and not linked histograms based on the histogram distributions given. |
| write_link_dist | code | prev | next | Top |
| write_link_dist($histogram_reference, $filename) Writes a link distribution file for the histogram that is passed in by reference |
| write_link_matlab | code | prev | next | Top |
| write_link_matlab($histogram_reference, $filename, $dependency) Writes a Matlab for the histogram. $histogram_reference should be a reference to the histogram that should be written to the matlab file. $dependency is the names of any dependencies that the Matlab file should have |
| write_links | code | prev | next | Top |
| write_links($filename, skip_duplicates => 1, transpose => 1, weights => 0) Writes the network links to a file. If the parameter skip_duplicates is specified as 1, duplicate edges are skipped. If the parameter transpose is 1, the links are written transposed. |
| write_nodes | code | prev | next | Top |
| write_nodes($filename) Writes the list of nodes in the network to a file. |
| DESTROY | description | prev | next | Top |
sub DESTROY
{ my $self = shift;
if (defined $self->{adjacency_matrix}) {
untie %{$self->{adjacency_matrix}};
}
if (defined $self->{adjacency_matrix_file}) {
unlink $self->{adjacency_matrix_file} or die "Couldn't delete " .
$self->{adjacency_matrix_file} . ": $!\n";
}
if (defined $self->{path_length_matrix}) {
untie %{$self->{path_length_matrix}};
}
if (defined $self->{path_length_matrix_filename}) {
unlink $self->{path_length_matrix_filename} or die "Couldn't delete " .
$self->{path_length_matrix_filename} . ": $!\n";
}} |
| Watts_Strogatz_clus_coeff | description | prev | next | Top |
sub Watts_Strogatz_clus_coeff
{
my $self = shift;
my $graph = $self->{graph};
my %parameters = @_;
my $write_to_file = 0;
my $filename = "";
if (exists $parameters{filename}) {
$write_to_file = 1;
$filename = $parameters{filename};
open(WATT, "> $filename") or die("Could not open file: $filename\n");
}
my %link_hash;
if ($write_to_file == 1) {
print WATT "reading the input...\n";
}
%link_hash = $self->get_adjacency_matrix(undirected => 1);
if ($write_to_file == 1) {
print WATT "done!\n";
}
my $sum = 0;
my $count = 0;
my $skipped = 0;
foreach my $v (keys %link_hash) {
my $c = 0;
my $connected = 0;
my %nn;
my @neighbors;
if (exists $link_hash{$v}) {
@neighbors = keys %{$link_hash{$v}};
if (@neighbors > 1) {
if (@neighbors > 5000) {
$skipped++;
next;
}
foreach my $n1 (0..$#neighbors) {
foreach my $n2 ($n1+1..$#neighbors) {
if (exists $link_hash{$neighbors[$n1]}{$neighbors[$n2]}) {
$connected++;
}
}
}
$c = 2 * $connected / (@neighbors * (@neighbors-1));} |
| Watts_Strogatz_local_clus_coeff | description | prev | next | Top |
sub Watts_Strogatz_local_clus_coeff
{
my $self = shift;
my $graph = $self->{graph};
my %parameters = @_;
my $write_to_file = 0;
my $filename = "";
if (exists $parameters{filename}) {
$write_to_file = 1;
$filename = $parameters{filename};
open(WATT, "> $filename") or die("Could not open file: $filename\n");
}
my %link_hash;
if ($write_to_file == 1) {
print WATT "reading the input...\n";
}
%link_hash = $self->get_adjacency_matrix();
my $skipped = 0;
my %local_cc = ();
foreach my $v (keys %link_hash) {
my $c = 0;
my $connected = 0;
my %nn;
my @neighbors;
if (exists $link_hash{$v}) {
@neighbors = keys %{$link_hash{$v}};
if (@neighbors > 1) {
if (@neighbors > 5000) {
$skipped++;
next;
}
foreach my $n1 (0..$#neighbors) {
foreach my $n2 ($n1+1..$#neighbors) {
if (exists $link_hash{$neighbors[$n1]}{$neighbors[$n2]}) {
$connected++;
}
}
}
$c = 2 * $connected / (@neighbors * (@neighbors-1));} |
| add_edge | description | prev | next | Top |
sub add_edge
{ my $self = shift;
my $u = shift;
my $v = shift;
my $graph = $self->{graph};
$graph->add_edge($u, $v);
$self->clear_cache();} |
| add_node | description | prev | next | Top |
sub add_node
{ my $self = shift;
my $node = shift;
my @remaining_args = @_;
# my $text = shift;} |
| add_weighted_edge | description | prev | next | Top |
sub add_weighted_edge
{ my $self = shift;
my $u = shift;
my $v = shift;
my $w = shift;
my $graph = $self->{graph};
$graph->add_weighted_edge($u, $v, $w);
$self->clear_cache();} |
| average_cosines | description | prev | next | Top |
sub average_cosines
{
my $self = shift;
my $graph = $self->{graph};
my $cm = shift;
my %cos_matrix = %$cm;
my $tot_link_cos = 0;
my $link_count = 0;
my $tot_nl_cos = 0;
my $nl_count = 0;
foreach my $doc1 (keys %cos_matrix) {
foreach my $doc2 (keys %{ $cos_matrix{$doc1} }) {
if ($graph->has_edge($doc1, $doc2)) {
$tot_link_cos += $cos_matrix{$doc1}{$doc2};
$link_count++;
} else {
$tot_nl_cos += $cos_matrix{$doc1}{$doc2};
$nl_count++;
}
}
}
my $link_avg = 0;
if ($link_count > 0) {
$link_avg = $tot_link_cos/$link_count;} |
| average_shortest_path | description | prev | next | Top |
sub average_shortest_path
{
my $self = shift;
my $graph = $self->{graph};
my $directed = $self->{directed};
my $asp_matrix = $self->get_shortest_path_matrix(directed => $directed);
my %avg = ();
my $total_cnt = 0;
foreach my $v1 (keys %{$asp_matrix}) {
my $sum = 0;
my $cnt = 0;
foreach my $v2 (keys %{$asp_matrix->{$v1}}) {
my $len = $asp_matrix->{$v1}{$v2};
if ($len >= 0) {
$sum += $len;
$cnt++;
}
}
if ($cnt > 1) {
$avg{$v1} = $sum / $cnt;} |
| avg_in_degree | description | prev | next | Top |
sub avg_in_degree
{
my $self = shift;
my %histogram = $self->compute_in_link_histogram();
my $total_in = 0;
my $num_nodes = scalar $self->get_vertices();
foreach my $value (keys %histogram)
{
#skip nodes that have no links} |
| avg_out_degree | description | prev | next | Top |
sub avg_out_degree
{
my $self = shift;
my %histogram = $self->compute_out_link_histogram();
my $total_out = 0;
my $num_nodes = scalar $self->get_vertices();
foreach my $value (keys %histogram)
{
#skip nodes that have no links} |
| avg_total_degree | description | prev | next | Top |
sub avg_total_degree
{
my $self = shift;
my %histogram = $self->compute_total_link_histogram();
if (!$self->{directed}) {
return $self->{graph}->average_degree();
}
my $total = 0;
my $num_nodes = 0;
foreach my $value (keys %histogram)
{
#skip nodes that have no links} |
| compute_cohesion | description | prev | next | Top |
sub compute_cohesion
{ my $self = shift;
my %parameters = @_;
my $text_of = $self->{text_of};
my %cosine_of;
my $graph = $self->{graph};
foreach $u ($graph->vertices) {
my %cosines_of_u;
foreach $v ($graph->vertices) {
if ($u < $v) {
my $text_of_u = $text_of->{$u};
my $text_of_v = $text_of->{$v};
my $cosine = GetLexSim($text_of_u, $text_of_v);
print "$u $v, $cosine\n";
$cosines_of_u{$v} = $cosine;
}
}
if (scalar(keys(%u_cosine_of)) > 0) {
$cosine_of{$u} =\% cosines_of_u;
}
}
return %cosine_of;} |
| compute_in_link_histogram | description | prev | next | Top |
sub compute_in_link_histogram
{
my $self = shift;
my $graph = $self->{graph};
my %histogram = ();
foreach my $v ($graph->vertices)
{
my $num_in = $graph->predecessors($v);
if (not exists $histogram{$num_in} )
{
$histogram{$num_in} = 1;
} else {
$histogram{$num_in}++;
}
}
return %histogram;} |
| compute_out_link_histogram | description | prev | next | Top |
sub compute_out_link_histogram
{
my $self = shift;
my $graph = $self->{graph};
my %histogram = ();
foreach my $v ($graph->vertices)
{
my $num_out = $graph->successors($v);
if (not exists $histogram{$num_out})
{
$histogram{$num_out} = 1;
} else {
$histogram{$num_out}++;
}
}
return %histogram;} |
| compute_pagerank | description | prev | next | Top |
sub compute_pagerank
{ my $self = shift;
my %params = @_;
my $pagerank_value = 'pagerank_value';
if (exists $params{pagerank_value}) {
$pagerank_value = $params{pagerank_value};
}
my $pagerank_transition = 'pagerank_transition';
if (exists $params{pagerank_transition}) {
$pagerank_transition = $params{pagerank_transition};
}
my $pagerank_bias = 'pagerank_bias';
if (exists $param{pagerank_bias}) {
$pagerank_bias = $params{pagerank_bias};
}
my $jump = 0;
if (exists $params{jump}) {
$jump = $params{jump};
}
my $tolerance = 0;
if (exists $params{tolerance}) {
$tolerance = $params{tolerance};
}
my $max_iterations = 0;
if (exists $params{max_iterations}) {
$max_iterations = $params{max_iterations};
}
return $self->compute_rank_result($pagerank_value, $pagerank_transition, $jump,
$pagerank_bias, tolerance => $tolerance,
max_iterations => $max_iterations);} |
| compute_random_walk_step | description | prev | next | Top |
sub compute_random_walk_step
{ my $self = shift;
my $graph = $self->{graph};
my $cur_prob = shift;
my $trans_matrix = shift;
my $result = $trans_matrix->multiply($cur_prob)< |