PatrickOHara · PatrickOHara · Mar 27, 2024 · Aug 3, 2023 · Aug 9, 2023 · Aug 10, 2023
diff --git a/include/pctsp/graph.hh b/include/pctsp/graph.hh
@@ -2,6 +2,7 @@
 
 #ifndef __PCTSP_GRAPH__
 #define __PCTSP_GRAPH__
+#include <iostream>
 #include <boost/bimap.hpp>
 #include <boost/graph/adjacency_list.hpp>
 #include <boost/graph/filtered_graph.hpp>
@@ -150,6 +151,17 @@ std::vector<SCIP_VAR*> getEdgeVariables(
     std::vector<PCTSPedge>& edges
 );
 
+template <typename TGraph, typename EdgeIt>
+void printEdges(TGraph& graph, EdgeIt& first, EdgeIt& last) {
+    // typedef typename boost::graph_traits< TGraph >::edge_descriptor TEdge;
+    for (; first != last; first++) {
+        auto edge = *first;
+        std::cout << boost::source(edge, graph) << ", " << boost::target(edge, graph) << std::endl;
+    }
+};
+
+void printEdges(std::vector<std::pair<PCTSPvertex, PCTSPvertex>>& edges);
+
 template <typename TGraph, typename EdgeIt>
 std::vector<typename boost::graph_traits< TGraph >::vertex_descriptor> getVerticesOfEdges(
     TGraph& graph,

diff --git a/include/pctsp/logger.hh b/include/pctsp/logger.hh
@@ -6,6 +6,7 @@
 #include <boost/log/core.hpp>
 #include <boost/log/sources/severity_logger.hpp>
 #include <boost/log/expressions.hpp>
+#include <boost/log/utility/setup/common_attributes.hpp>
 
 namespace logging = boost::log;
 

diff --git a/pctsp/apps/dataset_app.py b/pctsp/apps/dataset_app.py
@@ -1,99 +1,178 @@
 """Dataset app"""
 
-from typing import Any, Dict, List
+import itertools
+from typing import Dict, List
 from pathlib import Path
 import networkx as nx
 import pandas as pd
 import typer
 from tspwplib import (
     BaseTSP,
+    EdgeWeightType,
     Generation,
     ProfitsProblem,
+    asymmetric_from_undirected,
+    biggest_vertex_id_from_graph,
     build_path_to_londonaq_yaml,
     build_path_to_oplib_instance,
     metricness,
     mst_cost,
     rename_edge_attributes,
     rename_node_attributes,
     sparsify_uid,
+    split_head,
     total_cost,
     total_prize,
 )
-from ..preprocessing import remove_one_connected_components
+from ..compare import params
+from ..preprocessing import (
+    remove_one_connected_components,
+    undirected_vertex_disjoint_paths_map,
+    vertex_disjoint_cost_map,
+)
+from ..suurballe import suurballe_shortest_vertex_disjoint_paths
+from ..utils import get_pctsp_logger
 from ..vial import DatasetName
 
 from .options import LondonaqRootOption, OPLibRootOption
-from ..compare import params
 
 dataset_app = typer.Typer(name="dataset", help="Making and summarizing datasets")
 
 
-@dataset_app.command(name="metricness")
-def metricness_of_dataset(
+@dataset_app.command(name="stats")
+def stats_of_dataset(
     dataset: DatasetName,
     londonaq_root: Path = LondonaqRootOption,
     oplib_root: Path = OPLibRootOption,
 ) -> pd.DataFrame:
     """Create a pandas dataframe of the metricness and write to CSV"""
-    dataset_stats: Dict[str, List[Any]] = {
-        "num_nodes": [],
-        "num_edges": [],
-        "total_cost": [],
-        "total_prize": [],
-        "metricness": [],
-    }
+    logger = get_pctsp_logger("dataset-stats")
+    dataset_stats: List[Dict[str, float]] = []
     names = []
+    index = None
     if dataset == DatasetName.londonaq:
-        names = params.LONDONAQ_GRAPH_NAME_LIST
-    elif dataset == DatasetName.tspwplib:
-        names = params.TSPLIB_GRAPH_NAME_LIST
-    for graph_name in names:
-        # load the graph
-        if dataset == DatasetName.londonaq:
+        logger.info("Calculating stats for londonaq dataset.")
+        for graph_name in params.LONDONAQ_GRAPH_NAME_LIST:
+            logger.info("Loading %s", graph_name.value)
             problem_path = build_path_to_londonaq_yaml(londonaq_root, graph_name)
             tsp = BaseTSP.from_yaml(problem_path)
-        elif dataset == DatasetName.tspwplib:
+            graph = tsp.get_graph()
+            rename_edge_attributes(graph, {"weight": "cost"}, del_old_attr=True)
+            rename_node_attributes(graph, {"demand": "prize"}, del_old_attr=True)
+            logger.info("Calculating stats for %s", graph_name.value)
+            dataset_stats.append(get_graph_stats(graph, tsp.depots[0]))
+            names.append(graph_name.value)
+        index = pd.Index(names, name="graph_name")
+
+    elif dataset == DatasetName.tspwplib:
+        for graph_name, gen, cost, kappa in itertools.product(
+            params.TSPLIB_GRAPH_NAME_LIST,
+            Generation,
+            params.TSPLIB_COST_FUNCTIONS,
+            params.TSPLIB_KAPPA_LIST,
+        ):
+            logger.info(
+                "Loading %s on generation %s with cost %s and kappa %s",
+                graph_name.value,
+                gen.value,
+                cost.value,
+                kappa,
+            )
             problem_path = build_path_to_oplib_instance(
                 oplib_root,
-                Generation.gen3,
+                gen,
                 graph_name,
             )
             # load the problem from file
             problem = ProfitsProblem().load(problem_path)
             tsp = BaseTSP.from_tsplib95(problem)
-        # get the graph in networkx
-        graph = tsp.get_graph()
-        rename_edge_attributes(graph, {"weight": "cost"}, del_old_attr=True)
-        try:  # londonaq dataset
-            rename_node_attributes(graph, {"demand": "prize"}, del_old_attr=True)
-        except KeyError:  # tsplib dataset
+            graph = tsp.get_graph()
             nx.set_node_attributes(graph, problem.get_node_score(), name="prize")
-
-        # if removing edges
-        if dataset == DatasetName.tspwplib:
-            graph = sparsify_uid(graph, 5)
-            new_cost = mst_cost(graph, cost_attr="cost")
-            nx.set_edge_attributes(graph, new_cost, name="cost")
-
-        # preprocessing
-        graph = remove_one_connected_components(graph, tsp.depots[0])
-
-        # count the number of edges, vertices, total prize, total cost and the metricness
-        dataset_stats["num_nodes"].append(graph.number_of_nodes())
-        dataset_stats["num_edges"].append(graph.number_of_edges())
-        dataset_stats["total_cost"].append(
-            total_cost(nx.get_edge_attributes(graph, "cost"), list(graph.edges()))
-        )
-        dataset_stats["total_prize"].append(
-            total_prize(nx.get_node_attributes(graph, "prize"), list(graph.nodes()))
+            rename_edge_attributes(graph, {"weight": "cost"}, del_old_attr=True)
+            graph = sparsify_uid(graph, kappa)
+            if cost == EdgeWeightType.MST:
+                new_cost = mst_cost(graph, cost_attr="cost")
+                nx.set_edge_attributes(graph, new_cost, name="cost")
+            logger.info("Calculating stats for %s", graph_name.value)
+            dataset_stats.append(get_graph_stats(graph, tsp.depots[0]))
+            names.append((graph_name.value, gen.value, cost.value, kappa))
+        index = pd.MultiIndex.from_tuples(
+            names, names=["graph_name", "generation", "cost_function", "kappa"]
         )
-        dataset_stats["metricness"].append(metricness(graph))
-    df = pd.DataFrame(dataset_stats, index=names)
+
+    logger.info("Creating dataframe from dataset stats.")
+    df = pd.DataFrame(dataset_stats, index=index)
     print(df)
     if dataset == DatasetName.londonaq:
         filepath = londonaq_root / "londonaq_dataset.csv"
     elif dataset == DatasetName.tspwplib:
         filepath = oplib_root / "tsplib_dataset.csv"
-    df.index = df.index.rename("graph_name")
+    logger.info("Writing dataframe to CSV at %s", filepath)
     df.to_csv(filepath, index=True)
     return df
+
+
+def get_graph_stats(graph: nx.Graph, root_vertex: int) -> Dict[str, float]:
+    """Calculate features such as the number of edges, vertices, total prize,
+    total cost and the metricness.
+    """
+    instance_stats = {}
+    instance_stats["num_nodes"] = graph.number_of_nodes()
+    instance_stats["num_edges"] = graph.number_of_edges()
+    instance_stats["total_cost"] = total_cost(
+        nx.get_edge_attributes(graph, "cost"), list(graph.edges())
+    )
+    og_prize = total_prize(nx.get_node_attributes(graph, "prize"), list(graph.nodes()))
+    instance_stats["total_prize"] = og_prize
+    try:
+        instance_stats["metricness"] = metricness(graph)
+    except nx.exception.NetworkXException:  # NOTE change to NotConnectedException
+        largest_component_graph = graph.subgraph(
+            max(nx.connected_components(graph), key=len)
+        )
+        instance_stats["metricness"] = metricness(largest_component_graph)
+
+    # evaluate the largest prize of any least-cost vertex-disjoint paths
+    biggest_vertex = biggest_vertex_id_from_graph(graph)
+    asymmetric_graph = asymmetric_from_undirected(graph)
+    tree = suurballe_shortest_vertex_disjoint_paths(
+        asymmetric_graph,
+        split_head(biggest_vertex, root_vertex),
+        weight="cost",
+    )
+    vertex_disjoint_paths_map = undirected_vertex_disjoint_paths_map(
+        tree, biggest_vertex
+    )
+    biggest_prize = 0
+    biggest_vertex = None
+    prize_map = nx.get_node_attributes(graph, "prize")
+    for u, (path1, path2) in vertex_disjoint_paths_map.items():
+        prize = (
+            total_prize(prize_map, path1)
+            + total_prize(prize_map, path2)
+            - prize_map[u]
+            - prize_map[root_vertex]
+        )
+        if prize > biggest_prize:
+            biggest_prize = prize
+            biggest_vertex = u
+    instance_stats["biggest_disjoint_prize"] = biggest_prize
+    instance_stats["disjoint_prize_ratio"] = float(biggest_prize) / float(og_prize)
+    instance_stats["max_disjoint_paths_cost"] = max(
+        vertex_disjoint_cost_map(tree, biggest_vertex).values()
+    )
+
+    # preprocessing
+    graph = remove_one_connected_components(graph, root_vertex)
+
+    # re-evaluate stats after preprocessing
+    instance_stats["preprocessed_num_nodes"] = graph.number_of_nodes()
+    instance_stats["preprocessed_num_edges"] = graph.number_of_edges()
+    instance_stats["preprocessed_total_cost"] = total_cost(
+        nx.get_edge_attributes(graph, "cost"), list(graph.edges())
+    )
+    pp_prize = total_prize(nx.get_node_attributes(graph, "prize"), list(graph.nodes()))
+    instance_stats["preprocessed_total_prize"] = pp_prize
+    instance_stats["preprocessed_metricness"] = metricness(graph)
+    instance_stats["preprocessed_prize_ratio"] = float(pp_prize) / float(og_prize)
+    return instance_stats
diff --git a/pctsp/apps/main_app.py b/pctsp/apps/main_app.py
@@ -42,6 +42,8 @@
     dryrun,
     compare_heuristics,
     disjoint_tours_vs_heuristics,
+    cc_londonaq_alpha,
+    londonaq_alpha,
     simple_branch_cut,
     tailing_off,
 )
@@ -95,7 +97,9 @@
     ExperimentName.baseline: baseline,
     ExperimentName.compare_heuristics: compare_heuristics,
     ExperimentName.cost_cover: cost_cover,
+    ExperimentName.cc_londonaq_alpha: cc_londonaq_alpha,
     ExperimentName.dryrun: dryrun,
+    ExperimentName.londonaq_alpha: londonaq_alpha,
     ExperimentName.tailing_off: tailing_off,
 }
 

diff --git a/pctsp/apps/plot_app.py b/pctsp/apps/plot_app.py
@@ -10,6 +10,7 @@
 from ..vial import DatasetName, ShortAlgorithmName
 from .options import LabDirOption
 from .tables_app import get_heuristics_df
+from ..utils import get_pctsp_logger
 
 plot_app = typer.Typer(name="plot", help="Plotting results")
 
@@ -29,8 +30,11 @@ def plot_heuristics_figure(
     lab_dir: Path = LabDirOption,
 ) -> None:
     """Plot a figure showing the performance of heuristics on a dataset"""
+    logger = get_pctsp_logger("plot-heuristics")
     figures_dir.mkdir(exist_ok=True, parents=False)
+    logger.info("Reading heuristics results from %s.", lab_dir)
     tspwplib_df = get_heuristics_df(DatasetName.tspwplib, lab_dir)
+    logger.info("TSPLIB heuristics dataframe has %s rows.", len(tspwplib_df))
     londonaq_df = get_heuristics_df(DatasetName.londonaq, lab_dir)
 
     # give short names to algorithms
@@ -86,6 +90,12 @@ def plot_heuristics_figure(
             kappa_df = kappa_df.iloc[
                 kappa_df.index.get_level_values("cost_function") == cost_function
             ]
+            logger.info(
+                "Plotting %s points for cost function %s and kappa %s.",
+                len(kappa_df),
+                cost_function.value,
+                kappa,
+            )
             for algorithm in [
                 ShortAlgorithmName.bfs_extension_collapse,
                 ShortAlgorithmName.bfs_path_extension_collapse,
@@ -113,9 +123,15 @@ def plot_heuristics_figure(
                 "x": 1,
             },
         )
-        bottom_fig.write_image(
-            str(figures_dir / f"{DatasetName.tspwplib}_{cost_function}_heuristics.pdf")
+        figure_path = (
+            figures_dir / f"{DatasetName.tspwplib}_{cost_function}_heuristics.pdf"
         )
+        logger.info(
+            "Writing figure for cost function %s to %s",
+            cost_function.value,
+            figure_path,
+        )
+        bottom_fig.write_image(str(figure_path))
 
 
 def add_traces_heuristic(