From 94209049f273b58e8f52ac111afe53a9137d1711 Mon Sep 17 00:00:00 2001
From: Lily Wang <lily@mdanalysis.org>
Date: Wed, 11 Sep 2024 09:56:53 +1000
Subject: [PATCH 01/10] update token in lifecycle submission

---
 .github/workflows/lifecycle-submission.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/lifecycle-submission.yml b/.github/workflows/lifecycle-submission.yml
index 263e7efd..e5123e67 100644
--- a/.github/workflows/lifecycle-submission.yml
+++ b/.github/workflows/lifecycle-submission.yml
@@ -63,7 +63,7 @@ jobs:
 
       - name: Run lifecycle processing script
         env:
-          GH_TOKEN: ${{ secrets.GH_DANGERBOT_TOKEN_LIMITED }}
+          GH_TOKEN: ${{ secrets.QCA_DATASET_SUBMISSION_PAT }}
           QCA_USER: ${{ secrets.QCA_USER }}
           QCA_KEY: ${{ secrets.QCA_KEY }}
         run: |

From 7f74fda5ec47f97d977eb69d61fe9b062630b283 Mon Sep 17 00:00:00 2001
From: Lily Wang <lily@mdanalysis.org>
Date: Wed, 11 Sep 2024 10:25:46 +1000
Subject: [PATCH 02/10] tmp test

---
 management/lifecycle.py  | 19 +++++++++++++++----
 management/projectsv2.py | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/management/lifecycle.py b/management/lifecycle.py
index a035220b..861d2c48 100755
--- a/management/lifecycle.py
+++ b/management/lifecycle.py
@@ -124,8 +124,15 @@ def _get_column(repo, column):
         return [col for col in cols if col.name == column][0]
 
     def set_backlog(self):
-        backlog = self._get_column(self.repo, "Backlog")
-        backlog.create_card(content_id=self.pr.id, content_type="PullRequest")
+        import projectsv2
+        project = projectsv2._get_project()
+        project._create_card_from_content_id(
+            content_id=self.pr.id,
+            column_name="Backlog"
+        )
+
+        # backlog = self._get_column(self.repo, "Backlog")
+        # backlog.create_card(content_id=self.pr.id, content_type="PullRequest")
 
     def execute_state(self, board=None, states=None,
                       reset_errors=False, set_priority=False,
@@ -133,8 +140,11 @@ def execute_state(self, board=None, states=None,
         """Based on current state of the PR, perform appropriate actions.
 
         """
+        import projectsv2
+
         if board is None:
-            board = _get_full_board(self.repo)
+            board = projectsv2._get_full_board()
+            # board = _get_full_board(self.repo)
 
         pr_card, pr_state = self._get_board_card_state(board, self.pr)
 
@@ -143,7 +153,8 @@ def execute_state(self, board=None, states=None,
             pr_state = self.set_backlog()
 
             # reload board, since we just added this card
-            board = _get_full_board(self.repo)
+            board = projectsv2._get_full_board()
+            # board = _get_full_board(self.repo)
             pr_card, pr_state = self._get_board_card_state(board, self.pr)
 
         # exit early if states specified, and this PR is not
diff --git a/management/projectsv2.py b/management/projectsv2.py
index d6ce1380..3fafe215 100644
--- a/management/projectsv2.py
+++ b/management/projectsv2.py
@@ -63,11 +63,35 @@ def _get_project_data(self, project_node_id):
         }
         """ % project_node_id
 
+        data = self._post_query(query)
+        return data
+    
+    def _post_query(self, query):
         headers = {"Authorization": f"Bearer {os.environ['GH_TOKEN']}"}
         response = requests.post('https://github.com/graphql', json={'query': query}, headers=headers)
 
         data = response.json()
         return data
+    
+    def _get_column_id(self, column_name: str):
+        for column in self.columns.values():
+            if column.column_name == column_name:
+                return column.column_node_id
+    
+    def _create_card_from_content_id(self, content_id, column_name: str):
+        column_id = self._get_column_id(column_name)
+        query = """
+        mutation {
+          addProjectCard(input: {contentId: "%s", projectColumnId: "%s"}) {
+            cardEdge {
+              node {
+                id
+              }
+        }
+        """ % (content_id, column_id)
+
+        data = self._post_query(query)
+        return data
 
 
 class ProjectV2Column:
@@ -90,8 +114,13 @@ def move(position, column):
         pass
 
 
-def _get_full_board():
+def _get_project():
     proj = ProjectV2Project("PVT_kwDOARrkss4Am84U")
+    return proj
+
+
+def _get_full_board():
+    proj = _get_project()
     board = {col.column_name: [card for card in col.cards] for col in proj.columns.values()}
 
     for col, cards in board.items():

From 5ab1c50b15e43cc8904870c62bedfb5ca4480a74 Mon Sep 17 00:00:00 2001
From: Lily Wang <lily@mdanalysis.org>
Date: Wed, 11 Sep 2024 12:54:18 +1000
Subject: [PATCH 03/10] try full graphql

---
 .github/workflows/lifecycle-submission.yml |   2 +-
 management/lifecycle-v2.py                 | 832 +++++++++++++++++++++
 management/submittable.py                  | 528 +++++++++++++
 3 files changed, 1361 insertions(+), 1 deletion(-)
 create mode 100644 management/lifecycle-v2.py
 create mode 100644 management/submittable.py

diff --git a/.github/workflows/lifecycle-submission.yml b/.github/workflows/lifecycle-submission.yml
index e5123e67..22091014 100644
--- a/.github/workflows/lifecycle-submission.yml
+++ b/.github/workflows/lifecycle-submission.yml
@@ -67,4 +67,4 @@ jobs:
           QCA_USER: ${{ secrets.QCA_USER }}
           QCA_KEY: ${{ secrets.QCA_KEY }}
         run: |
-            python ./management/lifecycle.py --states "Queued for Submission"
+            python ./management/lifecycle-v2.py --states "Queued for Submission"
diff --git a/management/lifecycle-v2.py b/management/lifecycle-v2.py
new file mode 100644
index 00000000..7b5057b8
--- /dev/null
+++ b/management/lifecycle-v2.py
@@ -0,0 +1,832 @@
+
+#!/usr/bin/env python
+
+"""Lifecycle management for QCArchive datasets using GraphQL interface"""
+
+import os
+import pathlib
+import requests
+import textwrap
+
+
+DATASET_GLOB = "dataset*.json*"
+COMPUTE_GLOB = "compute*.json*"
+
+PRIORITIES = {'priority-low': 0, 'priority-normal': 1, 'priority-high': 2}
+
+
+def _post_query(query, variables=None):
+    """Post a query to the GitHub GraphQL API"""
+    headers = {"Authorization": f"Bearer {os.environ['GH_TOKEN']}"}
+    json_dict = {"query": query}
+    if variables:
+        json_dict["variables"] = variables
+    response = requests.post('https://github.com/graphql', json=json_dict, headers=headers)
+
+    data = response.json()
+    return data
+
+
+class PullRequest:
+    """
+    A single pull request on a repository.
+
+    Parameters
+    ----------
+    repo : Repo
+        The repository where the PR is located
+    id : str
+        The node ID of the PR
+    number : int
+        The PR number
+    title : str
+        The PR title
+    url : str
+        The URL of the PR
+    merged : bool, optional
+        Whether the PR has been merged
+    """
+    def __init__(self, repo, id: str, number: int, title: str, url: str, merged=None):
+        self.repo = repo
+        self.id = id
+        self.number = number
+        self.title = title
+        self.url = url
+        self.merged = merged
+
+    @classmethod
+    def from_node_item(cls, repo, node_item):
+        return cls(
+            repo,
+            node_item["id"],
+            node_item["number"],
+            node_item.get("title"),
+            node_item.get("url"),
+            node_item.get("merged"),
+        )
+    
+    def get_label_names(self) -> list[str]:
+        query = """
+        query {
+          repository(owner: "$owner", name: "$name") {
+            pullRequest(number: $number) {
+              labels(first: 10) {
+                nodes {
+                  name
+                }
+              }
+            }
+          }
+        }
+        """
+        variables = {"owner": self.repo.owner, "name": self.repo.name, "number": self.number}
+        data = _post_query(query, variables)
+        label_names = []
+        for node in data["data"]["repository"]["pullRequest"]["labels"]["nodes"]:
+            label_names.append(node["name"])
+        return label_names
+    
+    def add_to_labels(self, label: str):
+        label_id = self.repo.get_label_id(label)
+        query = """
+        mutation {
+          addLabelsToLabelable(input: {labelableId: "$id", labelIds: ["$label_id"]}) {
+            labelable {
+              id
+            }
+          }
+        }
+        """
+        variables = {"id": self.id, "label_id": label_id}
+        return _post_query(query, variables)
+    
+    def remove_from_labels(self, label: str):
+        label_id = self.repo.get_label_id(label)
+        query = """
+        mutation {
+          removeLabelsFromLabelable(input: {labelableId: "$id", labelIds: ["$label_id"]}) {
+            labelable {
+              id
+            }
+          }
+        }
+        """
+        variables = {"id": self.id, "label_id": label_id}
+        return _post_query(query, variables)
+    
+    def add_issue_comment(self, body: str):
+        query = """
+        mutation {
+          addComment(input: {subjectId: "$id", body: "$body"}) {
+            commentEdge {
+              node {
+                id
+              }
+            }
+          }
+        }
+        """
+        variables = {"id": self.id, "body": body}
+        return _post_query(query, variables)
+    
+    def get_file_paths(self) -> list[pathlib.Path]:
+        query = """
+        query {
+          repository(owner: "$owner", name: "$name") {
+            pullRequest(number: $number) {
+              files(first: 100) {
+                nodes {
+                  path
+                }
+              }
+            }
+          }
+        }
+        """
+        variables = {"owner": self.repo.owner, "name": self.repo.name, "number": self.number}
+        data = _post_query(query, variables)
+        files = []
+        for node in data["data"]["repository"]["pullRequest"]["files"]["nodes"]:
+            files.append(pathlib.Path(node["path"]))
+        return files
+
+
+class Repo:
+    def __init__(
+        self,
+        name: str = "qca-dataset-submission",
+        owner: str = "openforcefield",
+    ):
+        self.name = name
+        self.owner = owner
+        self.repo_name = f"{owner}/{name}"
+
+    def get_label_id(self, label: str):
+        query = """
+        query {
+          repository(owner: "$owner", name: "$name") {
+            label(name: "$label") {
+              id
+            }
+          }
+        }
+        """
+        variables = {"owner": self.owner, "name": self.name, "label": label}
+        data = _post_query(query, variables)
+        return data["data"]["repository"]["label"]["id"]
+    
+    def get_tracking_pull_requests(self) -> list[PullRequest]:
+        """Get pull requests with the 'tracking' label"""
+
+        query = """
+        query {
+          repository(owner: "$owner", name: "$name") {
+            pullRequests(first: 100, labels: ["tracking"], after: $cursor) {
+              pageInfo {
+                hasNextPage
+                endCursor
+              }
+              nodes {
+                id
+                number
+                title
+                url
+              }
+            }
+          }
+        }
+        """
+
+        variables = {"owner": self.owner, "name": self.name, "cursor": None}
+        data = _post_query(query, variables)
+        has_next_page = data["data"]["repository"]["pullRequests"]["pageInfo"]["hasNextPage"]
+
+        prs = []
+        for node_item in data["data"]["repository"]["pullRequests"]["nodes"]:
+            pr = PullRequest.from_node_item(self, node_item)
+            prs.append(pr)
+        
+        while has_next_page:
+            cursor = data["data"]["repository"]["pullRequests"]["pageInfo"]["endCursor"]
+            variables["cursor"] = cursor
+            data = _post_query(query, variables)
+            has_next_page = data["data"]["repository"]["pullRequests"]["pageInfo"]["hasNextPage"]
+            for node_item in data["data"]["repository"]["pullRequests"]["nodes"]:
+                pr = PullRequest.from_node_item(self, node_item)
+                prs.append(pr)
+        return prs
+    
+    def get_pull_request(self, number: int) -> PullRequest:
+        """
+        Get a pull request by number
+        
+        Parameters
+        ----------
+        number : int
+            The PR number
+            
+        Returns
+        -------
+        PullRequest
+        
+        """
+        query = """
+        query {
+          repository(owner: "$owner", name: "$name") {
+            pullRequest(number: $number) {
+              id
+              title
+              url
+              merged
+            }
+          }
+        }
+        """
+        variables = {"owner": self.owner, "name": self.name, "number": number}
+        data = _post_query(query, variables)
+        return PullRequest.from_node_item(self, data["data"]["repository"]["pullRequest"])
+
+
+class ProjectV2PRCard:
+    """
+    A single card on a project board, corresponding to a single PR.
+
+    Parameters
+    ----------
+    project : Project
+        The project board where the card is located
+    column : ProjectV2Column
+        The column where the card is located
+    card_node_id : str
+        The node ID of the card
+    card_url : str
+        The URL of the card
+    card_name : str
+        The name of the card
+    number : int
+        The PR number
+    """
+    def __init__(self, project, column, card_node_id, card_url, card_name, number):
+        self.project = project
+        self.card_node_id = card_node_id
+        self.card_url = card_url
+        self.card_name = card_name
+        self.column = column
+        self.number = number
+
+    def get_item(self) -> PullRequest:
+        """Retrieve the PR associated with the card"""
+        repo = self.project.repo
+        return repo.get_pull_request(self.number)
+
+class ProjectV2Column:
+    """
+    A single column on a project board.
+
+    Parameters
+    ----------
+    project : Project
+        The project board where the column is located
+    column_node_id : str
+        The node ID of the column
+    column_name : str
+        The name of the column
+
+
+    Attributes
+    ----------
+    cards : list[ProjectV2PRCard]
+        The cards in the column
+    """
+    def __init__(self, project, column_node_id, column_name):
+        self.project = project
+        self.column_node_id = column_node_id
+        self.column_name = column_name
+        self.cards = list()
+
+    def add_card(self, item: PullRequest):
+        """Add a card to the top of the specified column"""
+        query = """
+        mutation {
+          addProjectCard(input: {contentId: "$content_id", projectColumnId: "$column_id"}) {
+            cardEdge {
+              node {
+                id
+                content {
+                    __typename
+                    ... on Issue {
+                      title
+                      url
+                    }
+                    ... on PullRequest {
+                      title
+                      url
+                    }
+                  }
+              }
+            }
+          }
+        }
+        """
+        variables = {
+            "content_id": item.id,
+            "column_id": self.column_node_id
+        }
+        data = _post_query(query, variables)
+        return self._add_card_to_self_from_content(
+            data["data"]["addProjectCard"]["cardEdge"]["node"]
+        )
+        
+
+    def _add_card_to_self(self, card_node_id, card_url, card_name, card_number):
+        """Updates self with card information"""
+        card = ProjectV2PRCard(self.project, self, card_node_id, card_url, card_name, card_number)
+        self.cards.append(card)
+        self.project.cards_by_id[card_node_id] = card
+        return card
+    
+    def _add_card_to_self_from_content(self, content):
+        """Updates self with card information from content"""
+        card_id = content['id']
+        card_name = content['content']['title']
+        card_url = content['content']['url']
+        card_number = content['content'].get("number")
+        return self._add_card_to_self(
+            card_id, card_url, card_name, card_number
+        )
+
+
+class Project:
+    """
+    A single project board, corresponding to a single repository.
+
+    Many assumptions are made as this is created for the Dataset Tracking
+    board. This is not a general-purpose project board class.
+
+    Parameters
+    ----------
+    repo : Repo
+        The repository where the project board is located
+    project_node_id : str
+        The node ID of the project board
+    """
+    @classmethod
+    def from_repo(cls, repo: Repo, project_number: int = 2):
+        query = """
+        query {
+          organization(login: "$owner") {
+          projectV2(number: $project_number) {
+            id
+            }
+          }
+        }
+        """
+        variables = {
+            "owner": repo.owner,
+            "project_number": project_number
+        }
+        data = _post_query(query, variables)
+        project_node_id = data["data"]["repository"]["project"]["id"]
+        return cls(repo, project_node_id)
+    
+
+    def _get_item_card(self, item: PullRequest):
+        """
+        Retrieve the card associated with an issue or PR. Currently only PRs supported
+        """
+        for card in self.cards_by_id.values():
+            if card.number == item.number:
+                return card
+    
+
+    def __init__(self, repo, node_id: str):
+        self.repo = repo
+        self.project_node_id = node_id
+        self._reinitialize()
+
+
+    def _reinitialize(self):
+        self.columns_by_name = {}
+        self.columns_by_id = {}
+        self.cards_by_id = {}
+
+        # set up project board
+        project_data = self._get_project_data()
+        # this is the card item
+        for node_item in project_data:
+            for field in node_item['fieldValues']['nodes']:
+                if "name" in field:  # this is the column item
+                    column_name = field['name']
+                    column_node_id = field['id']
+                    column = self.__create_or_retrieve_column(column_name, column_node_id)
+                    column._add_card_to_self_from_content(node_item)
+
+    def _create_or_retrieve_column(
+        self,
+        column_name: str,
+        column_node_id: str,
+    ):
+        if column_name in self.columns_by_name:
+            assert column_node_id in self.columns_by_id
+            return self.columns_by_name[column_name]
+        column = ProjectV2Column(self, column_node_id, column_name)
+        self.columns_by_name[column_name] = column
+        self.columns_by_id[column_node_id] = column
+        return column
+    
+    
+
+    
+
+    def move_card_to_column(self, card, column: str):
+        """Moves card to the top of the specified column"""
+        if isinstance(card, str):
+            card = self.cards_by_id[card]
+        
+        query = """
+        mutation {
+          moveProjectCard(input: {cardId: "$card_id", columnId: "$column_id"}) {
+            cardEdge {
+              node {
+                id
+              }
+            }
+          }
+        }
+        """
+        variables = {
+            "card_id": card.card_node_id,
+            "column_id": self.columns_by_name[column].column_node_id
+        }
+        return _post_query(query, variables)
+
+
+
+    def _get_project_data(self):
+        query = """
+        query {
+          node(id: "$project_node_id") {
+            ... on ProjectV2 {
+              items(first: 100, after: $cursor) {
+                nodes {
+                  id
+                  content {
+                    __typename
+                    ... on Issue {
+                      title
+                      url
+                    }
+                    ... on PullRequest {
+                      title
+                      url
+                    }
+                  }
+                  fieldValues(first: 10) {
+                    nodes {
+                      ... on ProjectV2ItemFieldSingleSelectValue {
+                        name
+                        id
+                      }
+                    }
+                  }
+                }
+                pageInfo {
+                  hasNextPage
+                  endCursor
+                }
+              }
+            }
+          }
+        }
+        """
+        variables = {"project_node_id": self.project_node_id, "cursor": None}
+        data = _post_query(query, variables)
+
+        output_data = list(data['data']['node']['items']['nodes'])
+        has_next_page = data["data"]["node"]["items"]["pageInfo"]["hasNextPage"]
+        while has_next_page:
+            cursor = data["data"]["node"]["items"]["pageInfo"]["endCursor"]
+            variables["cursor"] = cursor
+            data = _post_query(query, variables)
+            output_data.extend(data['data']['node']['items']['nodes'])
+            has_next_page = data["data"]["node"]["items"]["pageInfo"]["hasNextPage"]
+        return output_data
+
+
+
+class Submission:
+    """A submission, corresponding to a single PR, possibly multiple datasets.
+
+    A submission has a lifecycle with well-defined states.
+    This class represents the current state of a submission,
+    and provides the machinery for execution of lifecycle processes based on that state.
+
+    All lifecycle state is stored on Github in the original PR for the submission,
+    mapped onto states in the "Dataset Tracking" project board.
+
+    Parameters
+    ----------
+    project : Project
+        The project board where the submission is tracked
+    item : PullRequest
+        The PR corresponding to the submission
+    priority : int, optional
+        Priority to use for the dataset if set by method calls;
+        one of 0, 1, or 2, in increasing-priority order.
+    computetag : str, optional
+        Compute tag to use for the dataset if set by method calls;
+        tasks with a given compute tag will only be computed by managers
+        configured to service that tag.
+
+    """
+    def __init__(self, project, item, priority=1, computetag="openff"):
+        self.project = project
+        self.item = item
+        self.priority = priority
+        self.computetag = computetag
+
+        files = item.get_file_paths()
+        self.computes = self._get_computes(files)
+        self.datasets = self._get_datasets(files)
+
+    
+    def _get_datasets(self, files):
+        datasets = []
+        for file in files:
+            if file.exists() and file.is_file() and file.match(DATASET_GLOB):
+                datasets.append(str(file.resolve()))
+        return datasets
+
+    def _get_computes(self, files):
+        computes = []
+        for file in files:
+            if file.exists() and file.is_file() and file.match(COMPUTE_GLOB):
+                computes.append(str(file.resolve()))
+        return computes
+
+
+
+    def execute_state(self, states=None):
+        card = self.project._get_item_card(self.item)
+        # if card not on board, then it starts in the Backlog
+        if card is None:
+            column = self.project.columns_by_name["Backlog"]
+            card = column.add_card(self.item)
+
+            # reload board, since we just added this card
+            self.project._reinitialize()
+        
+        # exit early if states specified, and this PR is not
+        # in one of those
+        if states is not None:
+            if card.column.column_name not in states:
+                return
+            
+        ACTIONS = {
+            "Backlog": self.execute_backlog,
+            "Queued for Submission": self.execute_queued_submit,
+            "Error Cycling": self.execute_errorcycle,
+            "Requires Scientific Review": self.execute_requires_scientific_review,
+            "End of Life": self.execute_end_of_life,
+            "Archived/Complete": self.execute_archived_complete,
+        }
+        if card.column.column_name in ACTIONS:
+            return ACTIONS[card.column.column_name](card)
+        
+    
+    def execute_backlog(self, card):
+        item = card.get_item()
+        if item.merged:
+            comment = textwrap.dedent(
+            """\
+            ## Lifecycle - Backlog
+
+            Merged dataset moved from "Backlog" to "Queued for Submission".
+
+            """
+            )
+            item.add_issue_comment(comment)
+            self.project.move_card_to_column(card, "Queued for Submission")
+
+    def resolve_new_state(self, dataset_results) -> str:
+        """If new state agreed upon by dataset results, that state is returned.
+        Otherwise, returns `None`.
+        """
+        # get unique states recommended by datasets for this PR
+        # may not always be the same, say, if e.g. submission fails for one
+        # of many datasets in this submission
+        new_state = None
+        new_card_state = set(res["new_state"] for res in dataset_results)
+
+        # if all datasets agree on the new card state, we change to that state
+        if len(new_card_state) == 1:
+            new_state = list(new_card_state)[0]
+        return new_state
+
+    def execute_queued_submit(self, card):
+        from submittable import DataSet, Compute
+
+        results = []
+        for dataset in self.datasets:
+            print(f"Processing dataset '{dataset}'")
+            ds = DataSet(dataset, self)
+            results.append(ds.submit())
+
+        for compute in self.computes:
+            print(f"Processing compute '{compute}'")
+            ct = Compute(compute, self)
+            results.append(ct.submit())
+        
+        new_state = self.resolve_new_state(results)
+        if new_state is not None:
+            self.project.move_card_to_column(card, new_state)
+
+    def execute_errorcycle(self, card, reset_errors=False,
+                           set_priority=False,
+                           set_computetag=False):
+        from submittable import DataSet, Compute
+
+        results = []
+        for dataset in self.datasets:
+            print(f"Processing dataset '{dataset}'")
+            ds = DataSet(
+                dataset,
+                self,
+                priority=self.priority,
+                computetag=self.computetag
+            )
+            results.append(ds.execute_errorcycle(reset_errors=reset_errors,
+                                                 set_priority=set_priority,
+                                                 set_computetag=set_computetag))
+
+        for compute in self.computes:
+            print(f"Processing compute '{compute}'")
+            ct = Compute(
+                compute,
+                self,
+                priority=self.priority,
+                computetag=self.computetag
+            )
+            results.append(ct.execute_errorcycle(reset_errors=reset_errors,
+                                                 set_priority=set_priority,
+                                                 set_computetag=set_computetag))
+        
+        new_state = self.resolve_new_state(results)
+        if new_state is not None:
+            self.project.move_card_to_column(card, new_state)
+        
+        if new_state == "Archived/Complete":
+            for dataset in self.datasets:
+                ds = DataSet(dataset, self)
+                ds.comment_archived_complete()
+
+    def execute_requires_scientific_review(self, card):
+        # add `scientific-review` label
+        # remove `end-of-life`, `complete` label if present
+        labels =  self.item.get_label_names()
+
+        add_label = "scientific-review"
+
+        if add_label not in labels:
+            self.item.add_to_labels(add_label)
+
+        for label in ("end-of-life", "complete"):
+            if label in labels:
+                self.item.remove_from_labels(label)
+
+    
+    def execute_end_of_life(self, card):
+        # add `end-of-life` label
+        # remove `scientific-review`, `complete` label if present
+        labels =  self.item.get_label_names()
+
+        add_label = "end-of-life"
+
+        if add_label not in labels:
+            self.item.add_to_labels(add_label)
+
+        for label in ("scientific-review", "complete"):
+            if label in labels:
+                self.item.remove_from_labels(label)
+
+    def execute_archived_complete(self, card):
+        # add `complete` label
+        # remove `scientific-review`, `end-of-life` label if present
+        labels = self.item.get_label_names()
+
+        add_label = "complete"
+
+        if add_label not in labels:
+            self.item.add_to_labels(add_label)
+
+        for label in ("scientific-review", "end-of-life"):
+            if label in labels:
+                self.item.remove_from_labels(label)
+
+
+    
+
+def main():
+    import argparse
+    import gc
+
+    parser = argparse.ArgumentParser(
+        description="Process PRs according to dataset lifecycle"
+    )
+    parser.add_argument(
+        "--states",
+        type=str,
+        nargs="*",
+        help="states to limit processing to; if not provided, use all states",
+    )
+    parser.add_argument(
+        "--prs",
+        type=int,
+        nargs="*",
+        help="PR numbers to limit processing to; if not provided, all labeled PRs processed",
+    )
+    parser.add_argument(
+        "--set-priority",
+        action='store_true',
+        help="Triggers priority (re)setting based on Github PR label",
+    )
+    parser.add_argument(
+        "--set-computetag",
+        action='store_true',
+        help="Triggers compute tag (re)setting based on Github PR label",
+    )
+    parser.add_argument(
+        "--reset-errors",
+        action='store_true',
+        help="Whether to reset errored cases",
+    )
+
+    args = parser.parse_args()
+    states = args.states if args.states else None
+    prnums = args.prs if args.prs else None
+    
+    repo = Repo()
+
+    # gather up all PRs with the `tracking` label
+    prs = repo.get_tracking_pull_requests()
+    if prnums is not None:
+        prs = [
+            pr.number
+            for pr in prs
+            if pr.number in prnums
+        ]
+    
+    print(f"Found {len(prs)} with the 'tracking' label")
+    # grab the full project board state once so we don't have to hammer the API
+    project = Project.from_repo(repo, project_number=2)
+
+    # for each PR, we examine the changes to find files used for the submission
+    # this is where the mapping is made between the PR and the submission files
+    for pr in prs:
+        print(f"Processing PR #{pr.number}")
+
+        labels = pr.get_label_names()
+
+        # if we are setting priority, check for priority label(s) on PR
+        # take highest one and set priority downstream
+        # if no priority label(s), DO NOT set priority at all for this PR
+        set_priority = False
+        selected_priority = 1
+        if args.set_priority:
+            priorities = set(PRIORITIES.keys()).intersection(labels)
+
+            if priorities:
+                set_priority = True
+                selected_priority = max([PRIORITIES[p] for p in priorities])
+            print(f"Setting priority to {selected_priority} for PR #{pr.number}")
+        
+        set_computetag = False
+        selected_computetag = "openff"
+        if args.set_computetag:
+            prefix = "compute-"
+            n_prefix = len(prefix)
+            computetags = [
+                label[n_prefix:]
+                for label in labels
+                if label.startswith(prefix)
+            ]
+
+            if computetags:
+                set_computetag = True
+                # if multiple compute tags on the PR, we choose the first one lexically
+                selected_computetag = sorted(computetags)[0]
+            print(f"Setting compute tag to {selected_computetag} for PR #{pr.number}")
+    
+        submission = Submission(
+            project,
+            pr,
+            priority=selected_priority,
+            computetag=selected_computetag
+        )
+        submission.execute_state(
+            states=states,
+            reset_errors=args.reset_errors,
+            set_priority=set_priority,
+            set_computetag=set_computetag
+        )
+        
\ No newline at end of file
diff --git a/management/submittable.py b/management/submittable.py
new file mode 100644
index 00000000..86aa29bc
--- /dev/null
+++ b/management/submittable.py
@@ -0,0 +1,528 @@
+import os
+from pprint import pformat
+import traceback
+from itertools import chain
+from collections import defaultdict, Counter
+from datetime import datetime
+
+QCFRACTAL_URL = "https://api.qcarchive.molssi.org:443/"
+
+
+DATASET_TYPES = {
+    'dataset': 'singlepoint',
+    'optimizationdataset': 'optimization',
+    'torsiondrivedataset': 'torsiondrive'
+}
+
+def get_version_info():
+    """
+    Get the version info for the packages used to validate the submission.
+    """
+    import importlib
+    import pandas as pd
+
+    report = {}
+    # list the core packages here
+    packages = ["openff.qcsubmit", "openff.toolkit", "basis_set_exchange", "qcelemental"]
+    for package in packages:
+        module = importlib.import_module(package)
+        report[package] = pd.Series({"version": module.__version__})
+
+    # now try openeye else use rdkit
+    try:
+        import openeye
+
+        report["openeye"] = pd.Series({"version": openeye.__version__})
+    except ImportError:
+        import rdkit
+
+        report["rdkit"] = pd.Series({"version": rdkit.__version__})
+
+    return pd.DataFrame(report).transpose()
+
+
+def create_dataset(dataset_data):
+    from openff.qcsubmit.datasets import BasicDataset, OptimizationDataset, TorsiondriveDataset
+
+    datasets = {
+        "dataset": BasicDataset,
+        "optimizationdataset": OptimizationDataset,
+        "torsiondrivedataset": TorsiondriveDataset,
+    }
+
+    if "type" in dataset_data:
+        dataset_type = dataset_data["type"].lower()
+    elif "dataset_type" in dataset_data:
+        dataset_type = dataset_data["dataset_type"].lower()
+
+    dataset_class = datasets.get(dataset_type, None)
+    if dataset_class is not None:
+        return dataset_class.parse_obj(dataset_data)
+    else:
+        raise RuntimeError(f"The dataset type {dataset_type} is not supported.")
+
+
+class SubmittableBase:
+    def __init__(self, submittable, submission,
+                 priority=1, computetag='openff'):
+        """Create new Submittable instance linking a submission dataset to its PR.
+
+        Parameters
+        ----------
+        submittable : path-like
+            Path to submission file.
+        submission : Submission
+            Submission instance corresponding to the dataset submission.
+        repo : str
+            Github repo where datasets are tracked.
+        priority : int
+            Priority to use for the dataset if set by method calls;
+            one of 0, 1, or 2, in increasing-priority order.
+        computetag : str
+            Compute tag to use for the dataset if set by method calls;
+            tasks with a given compute tag will only be computed by managers
+            configured to service that tag.
+
+        """
+        self.submittable = submittable
+        self.submission = submission
+        self.item = submission.item
+        self.priority = priority
+        self.computetag = computetag
+
+
+    def _parse_spec(self):
+        spec = self._load_submittable()
+        dataset_name = spec["dataset_name"]
+        if "type" in spec:
+            dataset_type = DATASET_TYPES[spec["type"].lower()]
+        elif "dataset_type" in spec:
+            dataset_type = DATASET_TYPES[spec["dataset_type"].lower()]
+        dataset_specs = spec.get("qc_specifications", None)
+        return dataset_name, dataset_type, dataset_specs
+
+    def _load_submittable(self):
+        from openff.qcsubmit.serializers import deserialize
+
+        spec = deserialize(self.submittable)
+        return spec
+
+    def _get_qca_client(self):
+        import qcportal as ptl
+
+        client = ptl.PortalClient(
+            address=QCFRACTAL_URL,
+            username=os.environ["QCA_USER"],
+            password=os.environ["QCA_KEY"]
+        )
+        return client
+
+    def _get_meta(self):
+        import pandas as pd
+
+        datehr = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
+        dataset_name, dataset_type, dataset_specs = self._parse_spec()
+
+        meta = {
+            "**Dataset Name**": dataset_name,
+            "**Dataset Type**": dataset_type,
+            "**UTC Datetime**": datehr,
+        }
+
+        return pd.DataFrame(pd.Series(meta, name=""))
+
+    def _version_info_report(self):
+        version = get_version_info()
+
+        comment = f"""
+        <details>
+        <summary><b>QCSubmit</b> version information(<i>click to expand</i>)</summary>
+        <!-- have to be followed by an empty line! -->
+
+        {version.to_markdown()}
+        </details>
+        """
+
+        return comment
+
+    def execute_queued_submit(self, max_retries=3):
+        """Submit, perhaps with some retry logic.
+
+        """
+        client = self._get_qca_client()
+
+        # load dataset into QCSubmit class
+        ds = self._load_submittable()
+        dataset_qcs = create_dataset(ds)
+
+        try:
+            # submit to QCArchive
+            output = self.submit(dataset_qcs, client)
+            self._queued_submit_report(output, success=True)
+        except:
+            self._queued_submit_report(traceback.format_exc(), success=False)
+            return {"new_state": "Queued for Submission"}
+        else:
+            return {"new_state": "Error Cycling"}
+
+    def _queued_submit_report(self, output, success):
+        success_text = "**SUCCESS**" if success else "**FAILED**"
+
+        comment = f"""
+        ## Lifecycle - QCSubmit Submission Report : {success_text}
+
+        {self._get_meta().to_markdown()}
+        
+        Response from public QCArchive:
+
+        ```
+        {output}
+        ```
+
+        ----------
+        {self._version_info_report()}
+
+        """
+
+        # postprocess due to raw spacing above
+        comment = "\n".join([substr.strip() for substr in comment.split("\n")])
+
+        # submit comment
+        self.item.add_issue_comment(comment)
+
+    def execute_errorcycle(self,
+                           reset_errors=False,
+                           set_priority=False,
+                           set_computetag=False):
+        """Obtain complete, incomplete, error stats for submittable and report.
+
+        For suspected random errors, we perform restarts.
+
+        If submittable complete, recommend state "Archived/Complete".
+
+        """
+        client = self._get_qca_client()
+
+        dataset_name, dataset_type, dataset_specs = self._parse_spec()
+        ds = client.get_dataset(dataset_type, dataset_name)
+
+        if dataset_type == "torsiondrive":
+            complete = self._errorcycle_torsiondrive(
+                    ds, client, dataset_specs,
+                    reset_errors=reset_errors, set_priority=set_priority,
+                    set_computetag=set_computetag)
+
+        elif dataset_type == "optimization":
+            complete = self._errorcycle_dataset(
+                    ds, client, dataset_specs,
+                    self._errorcycle_optimization_report,
+                    reset_errors=reset_errors, set_priority=set_priority,
+                    set_computetag=set_computetag)
+
+        elif dataset_type == "singlepoint":
+            complete = self._errorcycle_dataset(
+                    ds, client, dataset_specs,
+                    self._errorcycle_dataset_report,
+                    reset_errors=reset_errors, set_priority=set_priority,
+                    set_computetag=set_computetag)
+
+        if complete:
+            return {"new_state": "Archived/Complete"}
+        else:
+            return {"new_state": "Error Cycling"}
+
+    def comment_archived_complete(self):
+        comment = f"""
+        ## Lifecycle - Archived/Complete
+
+        {self._get_meta().to_markdown()}
+
+        **Dataset Complete!**
+
+        """
+
+        # postprocess due to raw spacing above
+        comment = "\n".join([substr.strip() for substr in comment.split("\n")])
+
+        # submit comment
+        self.item.add_issue_comment(comment)
+
+    @staticmethod
+    def count_unique_error_messages(errors_in, pretty_print=False):
+        errors = defaultdict(set)
+    
+        for id, error in errors_in.items():
+            errors["\n".join([error[i] for i in ['error_type', 'error_message']])].add(id)
+    
+        errors = dict(errors)
+    
+        content = ""
+        if pretty_print:
+            for count, key, value in sorted([(len(value), key, value) for key, value in errors.items()], reverse=True):
+                content += '-------------------------------------\n'
+                content += f"count : {count}\n"
+                content += '\n'
+                content += f'{key}\n'
+                content += '\n'
+                content += 'ids : \n'
+                content += f'{pformat(value, width=80, compact=True)}\n'
+                content += '-------------------------------------\n'
+            return content
+        else:
+            return errors
+
+    def _errorcycle_torsiondrive(self, ds, client, dataset_specs,
+            reset_errors=False, set_priority=False, set_computetag=False):
+        import pandas as pd
+        from qcportal.record_models import RecordStatusEnum
+
+        if dataset_specs is None:
+            dataset_specs = ds.specification_names
+
+        df_status = self._errorcycle_get_status(ds, dataset_specs)
+
+        if reset_errors:
+            erred_rec_ids = []
+            erred_opts = {}
+            status_counts = {}
+            for ds_spec in dataset_specs:
+                recs = ds.iterate_records(
+                        specification_names=[ds_spec], 
+                        #status='error'
+                        )
+
+                # build up optimization statuses and errors, if present
+                erred_opts[ds_spec] = []
+                status_counts[ds_spec] = Counter({status.value.upper(): 0 for status in list(RecordStatusEnum)})
+                for entry, spec, rec in recs:
+                    if rec.status == 'error':
+                        erred_rec_ids.append(rec.id)
+                    for opt in chain.from_iterable(rec.optimizations.values()):
+                        status_counts[ds_spec][opt.status.value.upper()] += 1
+
+                        if opt.status == 'error':
+                            erred_opts[ds_spec].append((opt.id, opt.error))
+
+            # create status counts dataframe
+            df_opt_status = pd.DataFrame(status_counts).transpose()
+            df_opt_status = df_opt_status[['COMPLETE', 'RUNNING', 'WAITING', 'ERROR', 'CANCELLED', 'INVALID', 'DELETED']]
+            df_opt_status.index.name = 'specification'
+
+            # aggregate all errors to get single set of counts for error messages
+            errors = {}
+            for ds_spec in erred_opts:
+                errors.update({r[0]: r[1] for r in erred_opts[ds_spec]})
+
+            error_counts = self.count_unique_error_messages(errors, pretty_print=True)
+
+            self._errorcycle_torsiondrive_report(df_status, df_opt_status, error_counts)
+
+        if df_status[["WAITING", "RUNNING", "ERROR"]].sum().sum() == 0:
+            complete = True
+        else:
+            if reset_errors:
+                client.reset_records(erred_rec_ids)
+            if set_priority:
+                ds.modify_records(specification_names=list(dataset_specs),
+                                  new_priority=self.priority)
+            if set_computetag:
+                ds.modify_records(specification_names=list(dataset_specs),
+                                  new_tag=self.computetag)
+            complete = False
+
+        return complete
+
+    def _errorcycle_torsiondrive_report(self, df_tdr, df_tdr_opt, opt_error_counts):
+
+        if len(opt_error_counts) > 60000:
+            opt_error_counts = opt_error_counts[:60000]
+            opt_error_counts += "\n--- Too many errors; truncated here ---\n"
+
+        comment = f"""
+        ## Lifecycle - Error Cycling Report
+
+        {self._get_meta().to_markdown()}
+
+        All errored tasks and services will be restarted.
+        Errored states prior to restart reported below.
+
+        ### `TorsionDriveRecord` current status
+
+        {df_tdr.to_markdown()}
+
+        ### `OptimizationRecord` current status
+
+        {df_tdr_opt.to_markdown()}
+
+        #### `OptimizationRecord` Error Tracebacks:
+
+        <details>
+        <summary><b>Tracebacks</b> (<i>click to expand</i>)</summary>
+        <!-- have to be followed by an empty line! -->
+
+        ```
+        {opt_error_counts}
+        ```
+        </details>
+
+        ----------
+        {self._version_info_report()}
+
+        """
+
+        # postprocess due to raw spacing above
+        comment = "\n".join([substr.strip() for substr in comment.split("\n")])
+
+        # submit comment
+        self.item.add_issue_comment(comment)
+
+    def _errorcycle_get_status(self, ds, dataset_specs):
+        import pandas as pd
+        from qcportal.record_models import RecordStatusEnum
+
+        if dataset_specs is None:
+            dataset_specs = ds.specification_names
+        
+        status = ds.status()
+        status_ = {key: {status.value.upper(): counts.get(status, 0)
+                         for status in list(RecordStatusEnum)}
+                   for key, counts in status.items() if key in dataset_specs.keys()}
+
+        df = pd.DataFrame(status_).transpose()
+        df = df[['COMPLETE', 'RUNNING', 'WAITING', 'ERROR', 'CANCELLED', 'INVALID', 'DELETED']]
+        df.index.name = 'specification'
+
+        return df
+
+    def _errorcycle_optimization_report(self, df_status, opt_error_counts):
+
+        if len(opt_error_counts) > 60000:
+            opt_error_counts = opt_error_counts[:60000]
+            opt_error_counts += "\n--- Too many errors; truncated here ---\n"
+
+        comment = f"""
+        ## Lifecycle - Error Cycling Report
+
+        {self._get_meta().to_markdown()}
+
+        All errored tasks will be restarted.
+        Errored states prior to restart reported below.
+
+        ### `OptimizationRecord` current status
+
+        {df_status.to_markdown()}
+
+        #### `OptimizationRecord` Error Tracebacks:
+
+        <details>
+        <summary><b>Tracebacks</b> (<i>click to expand</i>)</summary>
+        <!-- have to be followed by an empty line! -->
+
+        ```
+        {opt_error_counts}
+        ```
+        </details>
+
+        ----------
+        {self._version_info_report()}
+
+        """
+
+        # postprocess due to raw spacing above
+        comment = "\n".join([substr.strip() for substr in comment.split("\n")])
+
+        # submit comment
+        self.item.add_issue_comment(comment)
+
+    def _errorcycle_dataset(self, ds, client, dataset_specs, report_method,
+            reset_errors=False, set_priority=False, set_computetag=False):
+
+        if dataset_specs is None:
+            dataset_specs = ds.specification_names
+
+        df_status = self._errorcycle_get_status(ds, dataset_specs)
+
+        if reset_errors:
+            erred_recs = ds.iterate_records(
+                    specification_names=list(dataset_specs), 
+                    status='error')
+
+            errors = {r.id: r.error for entry, spec, r in erred_recs}
+            error_counts = self.count_unique_error_messages(errors, pretty_print=True)
+
+            report_method(df_status, error_counts)
+
+        if df_status[["WAITING", "RUNNING", "ERROR"]].sum().sum() == 0:
+            complete = True
+        else:
+            if reset_errors:
+                client.reset_records(list(errors))
+            if set_priority:
+                ds.modify_records(specification_names=list(dataset_specs),
+                                  new_priority=self.priority)
+            if set_computetag:
+                ds.modify_records(specification_names=list(dataset_specs),
+                                  new_tag=self.computetag)
+            complete = False
+
+        return complete
+
+    def _errorcycle_dataset_report(self, df_res, res_error_counts):
+
+        if len(res_error_counts) > 60000:
+            res_error_counts = res_error_counts[:60000]
+            res_error_counts += "\n--- Too many errors; truncated here ---\n"
+
+        comment = f"""
+        ## Lifecycle - Error Cycling Report
+
+        {self._get_meta().to_markdown()}
+
+        All errored tasks will be restarted.
+        Errored states prior to restart reported below.
+
+        ### `ResultRecord` current status
+
+        {df_res.to_markdown()}
+
+        #### `ResultRecord` Error Tracebacks:
+
+        <details>
+        <summary><b>Tracebacks</b> (<i>click to expand</i>)</summary>
+        <!-- have to be followed by an empty line! -->
+
+        ```
+        {res_error_counts}
+        ```
+        </details>
+
+        ----------
+        {self._version_info_report()}
+
+        """
+
+        # postprocess due to raw spacing above
+        comment = "\n".join([substr.strip() for substr in comment.split("\n")])
+
+        # submit comment
+        self.item.add_issue_comment(comment)
+
+    def submit(self, dataset_qcs, client):
+        return dataset_qcs.submit(client=client, ignore_errors=True)
+
+
+class DataSet(SubmittableBase):
+    """A dataset submitted to QCArchive.
+
+    A dataset has a lifecycle with well-defined states.
+    The state of a dataset is the state of its submission PR.
+
+    """
+    ...
+
+
+class Compute(SubmittableBase):
+    """Supplemental compute submitted to QCArchive.
+
+    """
+    ...

From 49f73b52a29e44076d057d78bb57bd162fa8b95e Mon Sep 17 00:00:00 2001
From: Lily Wang <lily@mdanalysis.org>
Date: Wed, 11 Sep 2024 12:56:52 +1000
Subject: [PATCH 04/10] add main call

---
 .../lifecycle-set-priority-computetag.yml     |   2 +-
 .github/workflows/test-lifecycle.yaml         |  68 ++++
 management/lifecycle-v2.py                    | 351 +++++++++++++-----
 3 files changed, 336 insertions(+), 85 deletions(-)
 create mode 100644 .github/workflows/test-lifecycle.yaml

diff --git a/.github/workflows/lifecycle-set-priority-computetag.yml b/.github/workflows/lifecycle-set-priority-computetag.yml
index 8831c73d..a9dc0eb7 100644
--- a/.github/workflows/lifecycle-set-priority-computetag.yml
+++ b/.github/workflows/lifecycle-set-priority-computetag.yml
@@ -48,7 +48,7 @@ jobs:
 
       - name: Run lifecycle processing script
         env:
-          GH_TOKEN: ${{ secrets.GH_DANGERBOT_TOKEN_LIMITED }}
+          GH_TOKEN: ${{ secrets.QCA_DATASET_SUBMISSION_PAT }}
           QCA_USER: ${{ secrets.QCA_USER }}
           QCA_KEY: ${{ secrets.QCA_KEY }}
         run: |
diff --git a/.github/workflows/test-lifecycle.yaml b/.github/workflows/test-lifecycle.yaml
new file mode 100644
index 00000000..1f580a7e
--- /dev/null
+++ b/.github/workflows/test-lifecycle.yaml
@@ -0,0 +1,68 @@
+---
+# test lifecycle processing. This only executes if
+# the PR has a "test-ci" label
+
+name: Test lifecycle
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+  workflow_dispatch:
+
+defaults:
+  run:
+    shell: bash -l {0}
+
+jobs:
+  test-lifecycle:
+    if: contains(github.event.pull_request.labels.*.name, 'test-ci')
+    runs-on: ubuntu-latest
+    env:
+      OE_LICENSE: ${{ github.workspace }}/oe_license.txt
+    steps:
+      - name: Checkout code
+        uses: nschloe/action-cached-lfs-checkout@v1
+
+      - name: ensure we only have one instance running
+        uses: softprops/turnstyle@master
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_DANGERBOT_TOKEN_LIMITED }}
+        with:
+          abort-after-seconds: 60
+
+      - name: Additional info about the build
+        run: |
+          uname -a
+          df -h
+          ulimit -a
+
+      - name: Install environment
+        uses: mamba-org/setup-micromamba@v1
+        with:
+          environment-file: devtools/conda-envs/queued-submit.yaml
+          create-args: >-
+            python=3.11
+          cache-environment: true
+
+      - name: Environment Information
+        run: |
+          conda info
+          conda list
+
+      - name: Make oe_license.txt file from GH org secret "OE_LICENSE"
+        env:
+          OE_LICENSE_TEXT: ${{ secrets.OE_LICENSE }}
+        run: |
+          echo "${OE_LICENSE_TEXT}" > ${OE_LICENSE}
+
+      - name: Run lifecycle processing script
+        env:
+          GH_TOKEN: ${{ secrets.QCA_DATASET_SUBMISSION_PAT }}
+          QCA_USER: ${{ secrets.QCA_USER }}
+          QCA_KEY: ${{ secrets.QCA_KEY }}
+        run: |
+            python ./management/lifecycle-v2.py --run-tests
diff --git a/management/lifecycle-v2.py b/management/lifecycle-v2.py
index 7b5057b8..4a69dce3 100644
--- a/management/lifecycle-v2.py
+++ b/management/lifecycle-v2.py
@@ -3,6 +3,7 @@
 
 """Lifecycle management for QCArchive datasets using GraphQL interface"""
 
+import json
 import os
 import pathlib
 import requests
@@ -46,6 +47,9 @@ class PullRequest:
     merged : bool, optional
         Whether the PR has been merged
     """
+    def __repr__(self):
+        return f"PullRequest({self.repo.repo_name}, {self.number}, {self.title})"
+
     def __init__(self, repo, id: str, number: int, title: str, url: str, merged=None):
         self.repo = repo
         self.id = id
@@ -66,11 +70,12 @@ def from_node_item(cls, repo, node_item):
         )
     
     def get_label_names(self) -> list[str]:
+        """Get the names of the labels on the PR"""
         query = """
-        query {
-          repository(owner: "$owner", name: "$name") {
+        query($owner: String!, $name: String!, $number: Int!) {
+          repository(owner: $owner, name: $name) {
             pullRequest(number: $number) {
-              labels(first: 10) {
+              labels(first: 100) {
                 nodes {
                   name
                 }
@@ -87,12 +92,18 @@ def get_label_names(self) -> list[str]:
         return label_names
     
     def add_to_labels(self, label: str):
+        """Add a label to the PR by name
+
+        Note: labels must already exist in the repository
+        """
         label_id = self.repo.get_label_id(label)
         query = """
-        mutation {
-          addLabelsToLabelable(input: {labelableId: "$id", labelIds: ["$label_id"]}) {
+        mutation($id: ID!, $label_id: ID!) {
+          addLabelsToLabelable(input: { labelableId: $id, labelIds: [$label_id] }) {
             labelable {
-              id
+                labels {
+                    nodes { name }
+                }
             }
           }
         }
@@ -101,12 +112,18 @@ def add_to_labels(self, label: str):
         return _post_query(query, variables)
     
     def remove_from_labels(self, label: str):
+        """Remove a label from the PR by name
+        
+        Note: labels must already exist in the repository
+        """
         label_id = self.repo.get_label_id(label)
         query = """
-        mutation {
-          removeLabelsFromLabelable(input: {labelableId: "$id", labelIds: ["$label_id"]}) {
+        mutation($id: ID!, $label_id: ID!) {
+          removeLabelsFromLabelable(input: { labelableId: $id, labelIds: [$label_id]}) {
             labelable {
-              id
+              labels {
+                nodes { name }
+              }
             }
           }
         }
@@ -115,9 +132,10 @@ def remove_from_labels(self, label: str):
         return _post_query(query, variables)
     
     def add_issue_comment(self, body: str):
+        """Add a comment to the PR"""
         query = """
-        mutation {
-          addComment(input: {subjectId: "$id", body: "$body"}) {
+        mutation($id: ID!, $body: String!) {
+          addComment(input: { subjectId: $id, body: $body }) {
             commentEdge {
               node {
                 id
@@ -131,8 +149,8 @@ def add_issue_comment(self, body: str):
     
     def get_file_paths(self) -> list[pathlib.Path]:
         query = """
-        query {
-          repository(owner: "$owner", name: "$name") {
+        query($owner: String!, $name: String!, $number: Int!) {
+          repository(owner: $owner, name: $name) {
             pullRequest(number: $number) {
               files(first: 100) {
                 nodes {
@@ -152,6 +170,16 @@ def get_file_paths(self) -> list[pathlib.Path]:
 
 
 class Repo:
+    """A single repository on GitHub.
+    
+    Parameters
+    ----------
+    name : str, optional
+        The name of the repository
+    owner : str, optional
+        The owner of the repository
+    
+    """
     def __init__(
         self,
         name: str = "qca-dataset-submission",
@@ -161,11 +189,12 @@ def __init__(
         self.owner = owner
         self.repo_name = f"{owner}/{name}"
 
-    def get_label_id(self, label: str):
+    def get_label_id(self, label: str) -> str:
+        """Get the node ID of a label"""
         query = """
-        query {
-          repository(owner: "$owner", name: "$name") {
-            label(name: "$label") {
+        query($owner: String!, $name: String!, $label: String!) {
+          repository(owner: $owner, name: $name) {
+            label(name: $label) {
               id
             }
           }
@@ -178,10 +207,10 @@ def get_label_id(self, label: str):
     def get_tracking_pull_requests(self) -> list[PullRequest]:
         """Get pull requests with the 'tracking' label"""
 
-        query = """
-        query {
-          repository(owner: "$owner", name: "$name") {
-            pullRequests(first: 100, labels: ["tracking"], after: $cursor) {
+        query_base = """
+        query($owner: String!, $name: String! %s) {
+          repository(owner: $owner, name: $name) {
+            pullRequests(first: 100, labels: ["tracking"] %s) {
               pageInfo {
                 hasNextPage
                 endCursor
@@ -196,8 +225,8 @@ def get_tracking_pull_requests(self) -> list[PullRequest]:
           }
         }
         """
-
-        variables = {"owner": self.owner, "name": self.name, "cursor": None}
+        query = query_base % ("", "")
+        variables = {"owner": self.owner, "name": self.name}
         data = _post_query(query, variables)
         has_next_page = data["data"]["repository"]["pullRequests"]["pageInfo"]["hasNextPage"]
 
@@ -207,6 +236,7 @@ def get_tracking_pull_requests(self) -> list[PullRequest]:
             prs.append(pr)
         
         while has_next_page:
+            query = query_base % (", $cursor: String", ", after: $cursor")
             cursor = data["data"]["repository"]["pullRequests"]["pageInfo"]["endCursor"]
             variables["cursor"] = cursor
             data = _post_query(query, variables)
@@ -231,8 +261,8 @@ def get_pull_request(self, number: int) -> PullRequest:
         
         """
         query = """
-        query {
-          repository(owner: "$owner", name: "$name") {
+        query($owner: String!, $name: String!, $number: Int!) {
+          repository(owner: $owner, name: $name) {
             pullRequest(number: $number) {
               id
               title
@@ -266,6 +296,9 @@ class ProjectV2PRCard:
     number : int
         The PR number
     """
+    def __repr__(self):
+        return f"ProjectV2PRCard({self.project}, {self.column}, {self.card_node_id}, {self.card_name}, {self.number})"
+    
     def __init__(self, project, column, card_node_id, card_url, card_name, number):
         self.project = project
         self.card_node_id = card_node_id
@@ -287,8 +320,8 @@ class ProjectV2Column:
     ----------
     project : Project
         The project board where the column is located
-    column_node_id : str
-        The node ID of the column
+    column_option_id : str
+        The option ID of the column
     column_name : str
         The name of the column
 
@@ -298,45 +331,43 @@ class ProjectV2Column:
     cards : list[ProjectV2PRCard]
         The cards in the column
     """
-    def __init__(self, project, column_node_id, column_name):
+    def __repr__(self):
+        return f"ProjectV2Column({self.project}, {self.column_option_id}, {self.column_name})"
+
+    def __init__(self, project, column_option_id, column_name):
         self.project = project
-        self.column_node_id = column_node_id
+        self.column_option_id = column_option_id
         self.column_name = column_name
         self.cards = list()
 
     def add_card(self, item: PullRequest):
         """Add a card to the top of the specified column"""
-        query = """
-        mutation {
-          addProjectCard(input: {contentId: "$content_id", projectColumnId: "$column_id"}) {
-            cardEdge {
-              node {
-                id
-                content {
-                    __typename
-                    ... on Issue {
-                      title
-                      url
-                    }
-                    ... on PullRequest {
-                      title
-                      url
+        add_card_query = """
+            mutation($project_id: ID!, $content_id: ID!) {
+                addProjectV2ItemById(input: { projectId: $project_id, contentId: $content_id }) {
+                    item {
+                        id
                     }
-                  }
-              }
+                }
             }
-          }
-        }
         """
+
         variables = {
-            "content_id": item.id,
-            "column_id": self.column_node_id
+            "project_id": self.project.project_node_id,
+            "content_id": item.id
         }
-        data = _post_query(query, variables)
-        return self._add_card_to_self_from_content(
-            data["data"]["addProjectCard"]["cardEdge"]["node"]
+
+        data = _post_query(add_card_query, variables)
+        card_id = data["data"]["addProjectV2ItemById"]["item"]["id"]
+
+        card = self._add_card_to_self(
+            card_id,
+            item.url,
+            item.title,
+            item.number
         )
-        
+        self.project.move_card_to_column(card, self.column_name)
+
 
     def _add_card_to_self(self, card_node_id, card_url, card_name, card_number):
         """Updates self with card information"""
@@ -373,8 +404,8 @@ class Project:
     @classmethod
     def from_repo(cls, repo: Repo, project_number: int = 2):
         query = """
-        query {
-          organization(login: "$owner") {
+        query($owner: String!, $project_number: Int!) {
+          organization(login: $owner) {
           projectV2(number: $project_number) {
             id
             }
@@ -386,9 +417,15 @@ def from_repo(cls, repo: Repo, project_number: int = 2):
             "project_number": project_number
         }
         data = _post_query(query, variables)
-        project_node_id = data["data"]["repository"]["project"]["id"]
+        project_node_id = data["data"]["organization"]["projectV2"]["id"]
         return cls(repo, project_node_id)
     
+    def add_item_to_column(self, item: PullRequest, column: str):
+        if isinstance(column, str):
+            column = self.columns_by_name[column]
+
+        return column.add_card(item)
+    
 
     def _get_item_card(self, item: PullRequest):
         """
@@ -402,7 +439,10 @@ def _get_item_card(self, item: PullRequest):
     def __init__(self, repo, node_id: str):
         self.repo = repo
         self.project_node_id = node_id
+        # The _column_status_id is the fieldId needed to label the Status
+        self._column_status_id = ""
         self._reinitialize()
+        
 
 
     def _reinitialize(self):
@@ -411,63 +451,101 @@ def _reinitialize(self):
         self.cards_by_id = {}
 
         # set up project board
+        # first get all columns and create them initially
+        column_data = self._get_all_columns()
+        for item in column_data:
+            if item.get("name") == "Status":
+                self._column_status_id = item["id"]
+
+                for option in item["options"]:
+                    self._create_or_retrieve_column(option["name"], option["id"])
+
         project_data = self._get_project_data()
         # this is the card item
         for node_item in project_data:
             for field in node_item['fieldValues']['nodes']:
-                if "name" in field:  # this is the column item
+                if "name" in field and field["field"]["name"] == "Status":  # this is the column item
                     column_name = field['name']
-                    column_node_id = field['id']
-                    column = self.__create_or_retrieve_column(column_name, column_node_id)
+                    column_option_id = field['optionId']
+                    column = self._create_or_retrieve_column(column_name, column_option_id)
                     column._add_card_to_self_from_content(node_item)
 
+
     def _create_or_retrieve_column(
         self,
         column_name: str,
-        column_node_id: str,
+        column_option_id: str,
     ):
         if column_name in self.columns_by_name:
-            assert column_node_id in self.columns_by_id
+            assert column_option_id in self.columns_by_id
             return self.columns_by_name[column_name]
-        column = ProjectV2Column(self, column_node_id, column_name)
+        column = ProjectV2Column(self, column_option_id, column_name)
         self.columns_by_name[column_name] = column
-        self.columns_by_id[column_node_id] = column
+        self.columns_by_id[column_option_id] = column
         return column
     
-    
-
-    
 
     def move_card_to_column(self, card, column: str):
         """Moves card to the top of the specified column"""
         if isinstance(card, str):
             card = self.cards_by_id[card]
-        
+
         query = """
-        mutation {
-          moveProjectCard(input: {cardId: "$card_id", columnId: "$column_id"}) {
-            cardEdge {
-              node {
-                id
-              }
-            }
+        mutation($card_id: ID!, $column_status_id: ID!, $project_id: ID!, $new_column_id: String!) {
+          updateProjectV2ItemFieldValue(input: {
+            itemId: $card_id,
+            fieldId: $column_status_id,
+            projectId: $project_id,
+            value: { singleSelectOptionId: $new_column_id }
+        }) {
+            projectV2Item { id }
           }
         }
         """
+        column = self.columns_by_name[column]
         variables = {
             "card_id": card.card_node_id,
-            "column_id": self.columns_by_name[column].column_node_id
+            "column_status_id": self._column_status_id,
+            "project_id": self.project_node_id,
+            "new_column_id": column.column_option_id
         }
         return _post_query(query, variables)
 
+    def _get_all_columns(self):
+        """Get all columns, even if they're empty with no cards"""
 
+        # 100 should be more. We can include pagination later if necessary
 
-    def _get_project_data(self):
         query = """
-        query {
-          node(id: "$project_node_id") {
+        query($project_node_id: ID!) {
+          node(id: $project_node_id) {
+            ... on ProjectV2 {
+              fields(first: 100) {
+                nodes {
+                  ... on ProjectV2SingleSelectField {
+                    name
+                    id
+                    options {
+                      id
+                      name
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        """
+        variables = {"project_node_id": self.project_node_id}
+        data = _post_query(query, variables)
+        return data["data"]["node"]["fields"]["nodes"]
+
+    def _get_project_data(self):
+        query_base = """
+        query($project_node_id: ID! %s) {
+          node(id: $project_node_id) {
             ... on ProjectV2 {
-              items(first: 100, after: $cursor) {
+              items(first: 100 %s) {
                 nodes {
                   id
                   content {
@@ -475,17 +553,26 @@ def _get_project_data(self):
                     ... on Issue {
                       title
                       url
+                      number
                     }
                     ... on PullRequest {
                       title
                       url
+                      number
                     }
                   }
                   fieldValues(first: 10) {
                     nodes {
                       ... on ProjectV2ItemFieldSingleSelectValue {
+                        field {
+                          ... on ProjectV2SingleSelectField {
+                            name
+                            id
+                          }
+                        }
                         name
-                        id
+                        optionId
+                        
                       }
                     }
                   }
@@ -499,12 +586,14 @@ def _get_project_data(self):
           }
         }
         """
-        variables = {"project_node_id": self.project_node_id, "cursor": None}
+        query = query_base % ("", "")
+        variables = {"project_node_id": self.project_node_id}
         data = _post_query(query, variables)
 
         output_data = list(data['data']['node']['items']['nodes'])
         has_next_page = data["data"]["node"]["items"]["pageInfo"]["hasNextPage"]
         while has_next_page:
+            query = query_base % (", $cursor: String", ", after: $cursor")
             cursor = data["data"]["node"]["items"]["pageInfo"]["endCursor"]
             variables["cursor"] = cursor
             data = _post_query(query, variables)
@@ -565,7 +654,6 @@ def _get_computes(self, files):
         return computes
 
 
-
     def execute_state(self, states=None):
         card = self.project._get_item_card(self.item)
         # if card not on board, then it starts in the Backlog
@@ -624,6 +712,7 @@ def resolve_new_state(self, dataset_results) -> str:
         return new_state
 
     def execute_queued_submit(self, card):
+        """Process a PR in the 'Queued for Submission' state"""
         from submittable import DataSet, Compute
 
         results = []
@@ -644,6 +733,7 @@ def execute_queued_submit(self, card):
     def execute_errorcycle(self, card, reset_errors=False,
                            set_priority=False,
                            set_computetag=False):
+        """Process a PR in the 'Error Cycling' state"""
         from submittable import DataSet, Compute
 
         results = []
@@ -681,6 +771,7 @@ def execute_errorcycle(self, card, reset_errors=False,
                 ds.comment_archived_complete()
 
     def execute_requires_scientific_review(self, card):
+        """Process a PR in the 'Requires Scientific Review' state"""
         # add `scientific-review` label
         # remove `end-of-life`, `complete` label if present
         labels =  self.item.get_label_names()
@@ -696,6 +787,7 @@ def execute_requires_scientific_review(self, card):
 
     
     def execute_end_of_life(self, card):
+        """Process a PR in the 'End of Life' state"""
         # add `end-of-life` label
         # remove `scientific-review`, `complete` label if present
         labels =  self.item.get_label_names()
@@ -710,6 +802,7 @@ def execute_end_of_life(self, card):
                 self.item.remove_from_labels(label)
 
     def execute_archived_complete(self, card):
+        """Process a PR in the 'Archived/Complete' state"""
         # add `complete` label
         # remove `scientific-review`, `end-of-life` label if present
         labels = self.item.get_label_names()
@@ -724,6 +817,81 @@ def execute_archived_complete(self, card):
                 self.item.remove_from_labels(label)
 
 
+def run_tests():
+    repo = Repo()
+
+    # gather up all PRs with the `tracking` label
+    prs = repo.get_tracking_pull_requests()
+    print(f"Found {len(prs)} with the 'tracking' label")
+    print(prs)
+
+    print("Creating project")
+    project = Project.from_repo(repo, project_number=2)
+    print(f"Project {project.project_node_id}")
+    print(f"Columns: {project.columns_by_name.keys()}")
+    for column_name, column in project.columns_by_name.items():
+        print("===")
+        print(column_name, column.column_option_id)
+        for card in column.cards:
+            print(card.card_name, card.card_node_id)
+    
+    print("===")
+    print("Cards")
+    for card in project.cards_by_id.values():
+        print(card)
+    print("")
+
+    # try test on latest
+    pr = sorted(prs, key=lambda pr: pr.number)[-1]
+    print(f"Processing PR #{pr.number}")
+    labels = pr.get_label_names()
+
+    card = project._get_item_card(pr)
+    previous_column = None
+    if card:
+        previous_column = card.column.column_name
+    print(f"PR #{pr.number} is in column {previous_column}")
+
+    # print files
+    files = pr.get_file_paths()
+    print("Files:")
+    print(files)
+    assert len(files) > 0
+
+    # temporarily move to "Backlog"
+    if card:
+        print("Moving card to backlog")
+        data = project.move_card_to_column(card, "Backlog")
+    else:
+        print(f"Adding card to backlog")
+        data = project.add_item_to_column(pr, "Backlog")
+        print(data)
+    project._reinitialize()
+    card = project._get_item_card(pr)
+    assert card.column.column_name == "Backlog"
+
+    # move back to original column
+    if previous_column:
+        project.move_card_to_column(card, previous_column)
+        project._reinitialize()
+        card = project._get_item_card(pr)
+        assert card.column.column_name == previous_column
+
+
+    # temporarily add label
+    data = pr.add_to_labels("test-label")
+    print(data)
+    label_names = pr.get_label_names()
+    assert "test-label" in label_names
+
+    data = pr.remove_from_labels("test-label")
+    print(data)
+    labels = pr.get_label_names()
+    assert "test-label" not in labels
+
+    # add comment
+    pr.add_issue_comment("This is a test comment from running CI. Please ignore")
+
     
 
 def main():
@@ -760,8 +928,18 @@ def main():
         action='store_true',
         help="Whether to reset errored cases",
     )
+    parser.add_argument(
+        "--run-tests",
+        action='store_true',
+        help="Ignores everything else and runs a test function",
+    )
 
     args = parser.parse_args()
+    if args.run_tests:
+        run_tests()
+        return
+
+
     states = args.states if args.states else None
     prnums = args.prs if args.prs else None
     
@@ -829,4 +1007,9 @@ def main():
             set_priority=set_priority,
             set_computetag=set_computetag
         )
-        
\ No newline at end of file
+        gc.collect()
+
+
+
+if __name__ == "__main__":
+    main()

From 4e57403b20533d6d8b748ac585c10309c499955c Mon Sep 17 00:00:00 2001
From: Lily Wang <lily@mdanalysis.org>
Date: Wed, 11 Sep 2024 17:18:11 +1000
Subject: [PATCH 05/10] reset previous scripts

---
 management/lifecycle.py  | 19 ++++---------------
 management/projectsv2.py | 31 +------------------------------
 2 files changed, 5 insertions(+), 45 deletions(-)

diff --git a/management/lifecycle.py b/management/lifecycle.py
index 861d2c48..a035220b 100755
--- a/management/lifecycle.py
+++ b/management/lifecycle.py
@@ -124,15 +124,8 @@ def _get_column(repo, column):
         return [col for col in cols if col.name == column][0]
 
     def set_backlog(self):
-        import projectsv2
-        project = projectsv2._get_project()
-        project._create_card_from_content_id(
-            content_id=self.pr.id,
-            column_name="Backlog"
-        )
-
-        # backlog = self._get_column(self.repo, "Backlog")
-        # backlog.create_card(content_id=self.pr.id, content_type="PullRequest")
+        backlog = self._get_column(self.repo, "Backlog")
+        backlog.create_card(content_id=self.pr.id, content_type="PullRequest")
 
     def execute_state(self, board=None, states=None,
                       reset_errors=False, set_priority=False,
@@ -140,11 +133,8 @@ def execute_state(self, board=None, states=None,
         """Based on current state of the PR, perform appropriate actions.
 
         """
-        import projectsv2
-
         if board is None:
-            board = projectsv2._get_full_board()
-            # board = _get_full_board(self.repo)
+            board = _get_full_board(self.repo)
 
         pr_card, pr_state = self._get_board_card_state(board, self.pr)
 
@@ -153,8 +143,7 @@ def execute_state(self, board=None, states=None,
             pr_state = self.set_backlog()
 
             # reload board, since we just added this card
-            board = projectsv2._get_full_board()
-            # board = _get_full_board(self.repo)
+            board = _get_full_board(self.repo)
             pr_card, pr_state = self._get_board_card_state(board, self.pr)
 
         # exit early if states specified, and this PR is not
diff --git a/management/projectsv2.py b/management/projectsv2.py
index 3fafe215..d6ce1380 100644
--- a/management/projectsv2.py
+++ b/management/projectsv2.py
@@ -63,35 +63,11 @@ def _get_project_data(self, project_node_id):
         }
         """ % project_node_id
 
-        data = self._post_query(query)
-        return data
-    
-    def _post_query(self, query):
         headers = {"Authorization": f"Bearer {os.environ['GH_TOKEN']}"}
         response = requests.post('https://github.com/graphql', json={'query': query}, headers=headers)
 
         data = response.json()
         return data
-    
-    def _get_column_id(self, column_name: str):
-        for column in self.columns.values():
-            if column.column_name == column_name:
-                return column.column_node_id
-    
-    def _create_card_from_content_id(self, content_id, column_name: str):
-        column_id = self._get_column_id(column_name)
-        query = """
-        mutation {
-          addProjectCard(input: {contentId: "%s", projectColumnId: "%s"}) {
-            cardEdge {
-              node {
-                id
-              }
-        }
-        """ % (content_id, column_id)
-
-        data = self._post_query(query)
-        return data
 
 
 class ProjectV2Column:
@@ -114,13 +90,8 @@ def move(position, column):
         pass
 
 
-def _get_project():
-    proj = ProjectV2Project("PVT_kwDOARrkss4Am84U")
-    return proj
-
-
 def _get_full_board():
-    proj = _get_project()
+    proj = ProjectV2Project("PVT_kwDOARrkss4Am84U")
     board = {col.column_name: [card for card in col.cards] for col in proj.columns.values()}
 
     for col, cards in board.items():

From 704e96a1262f3a078ea0c1598bd129cc3961715e Mon Sep 17 00:00:00 2001
From: Lily Wang <lily@mdanalysis.org>
Date: Wed, 11 Sep 2024 17:23:26 +1000
Subject: [PATCH 06/10] update ci to use v2

---
 .github/workflows/lifecycle-backlog.yml                 | 2 +-
 .github/workflows/lifecycle-error-cycle.yml             | 2 +-
 .github/workflows/lifecycle-set-priority-computetag.yml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/lifecycle-backlog.yml b/.github/workflows/lifecycle-backlog.yml
index 8dfb5755..0c21b0fd 100644
--- a/.github/workflows/lifecycle-backlog.yml
+++ b/.github/workflows/lifecycle-backlog.yml
@@ -57,4 +57,4 @@ jobs:
           QCA_USER: ${{ secrets.QCA_USER }}
           QCA_KEY: ${{ secrets.QCA_KEY }}
         run: |
-            python ./management/lifecycle.py --states "Backlog" "Requires Scientific Review" "End of Life" "Archived/Complete"
+            python ./management/lifecycle-v2.py --states "Backlog" "Requires Scientific Review" "End of Life" "Archived/Complete"
diff --git a/.github/workflows/lifecycle-error-cycle.yml b/.github/workflows/lifecycle-error-cycle.yml
index 8739ffac..6a7d33a1 100644
--- a/.github/workflows/lifecycle-error-cycle.yml
+++ b/.github/workflows/lifecycle-error-cycle.yml
@@ -52,4 +52,4 @@ jobs:
           QCA_USER: ${{ secrets.QCA_USER }}
           QCA_KEY: ${{ secrets.QCA_KEY }}
         run: |
-            python ./management/lifecycle.py --states "Error Cycling" --reset-errors --set-priority --set-computetag
+            python ./management/lifecycle-v2.py --states "Error Cycling" --reset-errors --set-priority --set-computetag
diff --git a/.github/workflows/lifecycle-set-priority-computetag.yml b/.github/workflows/lifecycle-set-priority-computetag.yml
index a9dc0eb7..f034662b 100644
--- a/.github/workflows/lifecycle-set-priority-computetag.yml
+++ b/.github/workflows/lifecycle-set-priority-computetag.yml
@@ -52,4 +52,4 @@ jobs:
           QCA_USER: ${{ secrets.QCA_USER }}
           QCA_KEY: ${{ secrets.QCA_KEY }}
         run: |
-            python ./management/lifecycle.py --states "Error Cycling" --set-priority --set-computetag
+            python ./management/lifecycle-v2.py --states "Error Cycling" --set-priority --set-computetag

From 6f1abc322a2b148a261a25120cc32b110327bf49 Mon Sep 17 00:00:00 2001
From: Lily Wang <lily@mdanalysis.org>
Date: Wed, 11 Sep 2024 17:45:43 +1000
Subject: [PATCH 07/10] update token

---
 .github/workflows/lifecycle-backlog.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/lifecycle-backlog.yml b/.github/workflows/lifecycle-backlog.yml
index 0c21b0fd..5852f4f4 100644
--- a/.github/workflows/lifecycle-backlog.yml
+++ b/.github/workflows/lifecycle-backlog.yml
@@ -53,7 +53,7 @@ jobs:
 
       - name: Run lifecycle processing script
         env:
-          GH_TOKEN: ${{ secrets.GH_DANGERBOT_TOKEN_LIMITED }}
+          GH_TOKEN: ${{ secrets.QCA_DATASET_SUBMISSION_PAT }}
           QCA_USER: ${{ secrets.QCA_USER }}
           QCA_KEY: ${{ secrets.QCA_KEY }}
         run: |

From 1ccfb79fbd71bbd50c3019ca568bfc79be288c27 Mon Sep 17 00:00:00 2001
From: Lily Wang <lily@mdanalysis.org>
Date: Wed, 11 Sep 2024 17:51:17 +1000
Subject: [PATCH 08/10] pass errors, kwargs through

---
 management/lifecycle-v2.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/management/lifecycle-v2.py b/management/lifecycle-v2.py
index 4a69dce3..60fd3ef0 100644
--- a/management/lifecycle-v2.py
+++ b/management/lifecycle-v2.py
@@ -654,7 +654,8 @@ def _get_computes(self, files):
         return computes
 
 
-    def execute_state(self, states=None):
+    def execute_state(self, states=None, reset_errors=False, set_priority=False,
+                      set_computetag=False):
         card = self.project._get_item_card(self.item)
         # if card not on board, then it starts in the Backlog
         if card is None:
@@ -679,6 +680,11 @@ def execute_state(self, states=None):
             "Archived/Complete": self.execute_archived_complete,
         }
         if card.column.column_name in ACTIONS:
+            if card.column.column_name == "Error Cycling":
+                return ACTIONS[card.column.column_name](card,
+                                                        reset_errors=reset_errors,
+                                                        set_priority=set_priority,
+                                                        set_computetag=set_computetag)
             return ACTIONS[card.column.column_name](card)
         
     

From 28d7046aeb03ae930e6578b1ee9abbe748bd9588 Mon Sep 17 00:00:00 2001
From: Lily Wang <lily@mdanalysis.org>
Date: Wed, 11 Sep 2024 18:04:19 +1000
Subject: [PATCH 09/10] return card

---
 management/lifecycle-v2.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/management/lifecycle-v2.py b/management/lifecycle-v2.py
index 60fd3ef0..0942a2a8 100644
--- a/management/lifecycle-v2.py
+++ b/management/lifecycle-v2.py
@@ -367,6 +367,7 @@ def add_card(self, item: PullRequest):
             item.number
         )
         self.project.move_card_to_column(card, self.column_name)
+        return card
 
 
     def _add_card_to_self(self, card_node_id, card_url, card_name, card_number):

From 050a43b081afe8ed138ca82f22a4c2a3eb60eeb5 Mon Sep 17 00:00:00 2001
From: Lily Wang <lily@mdanalysis.org>
Date: Wed, 11 Sep 2024 18:10:03 +1000
Subject: [PATCH 10/10] pass in number

---
 management/lifecycle-v2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/management/lifecycle-v2.py b/management/lifecycle-v2.py
index 0942a2a8..5e6126aa 100644
--- a/management/lifecycle-v2.py
+++ b/management/lifecycle-v2.py
@@ -59,11 +59,11 @@ def __init__(self, repo, id: str, number: int, title: str, url: str, merged=None
         self.merged = merged
 
     @classmethod
-    def from_node_item(cls, repo, node_item):
+    def from_node_item(cls, repo, node_item, number=None):
         return cls(
             repo,
             node_item["id"],
-            node_item["number"],
+            node_item.get("number", number),
             node_item.get("title"),
             node_item.get("url"),
             node_item.get("merged"),