nasa-gcn · athish-thiru · Sep 19, 2024 · Oct 2, 2024
diff --git a/gcn_classic_text_to_json/conversion.py b/gcn_classic_text_to_json/conversion.py
@@ -21,6 +21,32 @@
 invalid_trigger_dates = ["(yy-mm-dd)", "(yy/mm/dd)", "(yyyy/mm/dd)"]
 
 
+def parse_notice(text):
+    """Convert the text of an email body to a dictionary
+
+    Parameters
+    ----------
+    text: string
+        the email body
+
+    Returns
+    --------
+    dict
+        The dictionary equivalent of the text"""
+    output = {}
+    text_list = text.split("\n")
+    for line in text_list:
+        line_data = line.split()
+        key = line_data[0][:-1]
+        value = " ".join(line_data[1:])
+        if key in output:
+            output[key] += "\n" + value
+        else:
+            output[key] = value
+
+    return output
+
+
 def parse_trigger_links(link, prefix, regex_string):
     """Returns a list of trigger_links present in `link`.
 
@@ -123,14 +149,18 @@ def text_to_json(notice, keywords_dict):
         notice_ra = keywords_dict["standard"]["ra"]
         ra_data = notice[notice_ra].split()
 
-        if ra_data[0] != "Undefined":
+        if ra_data[0] == "Undefined":
+            output["ra"] = None
+        else:
             output["ra"] = float(ra_data[0][:-1])
 
     if "dec" in keywords_dict["standard"]:
         notice_dec = keywords_dict["standard"]["dec"]
         dec_data = notice[notice_dec].split()
 
-        if dec_data[0] != "Undefined":
+        if dec_data[0] == "Undefined":
+            output["dec"] = None
+        else:
             output["dec"] = float(dec_data[0][:-1])
 
     if "additional" in keywords_dict:

diff --git a/gcn_classic_text_to_json/notices/amon/README.md b/gcn_classic_text_to_json/notices/amon/README.md
@@ -0,0 +1,28 @@
+# AMON Text Conversion
+
+Parses through all webpages with AMON text notices and creates a JSON with GCN schema keywords. Creates a `amon_jsons` directory inside an `output` directory and saves jsons as `AMON_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage.
+
+### Uses the following fields from the core schema for text notice fields
+- `id` &#8594; EVENT_NUM, RUN_NUM
+- `ra` &#8594; SRC_RA
+- `dec` &#8594; SRC_DEC
+- `ra_dec_error` &#8594; SRC_ERROR
+- `alert_datetime` &#8594; NOTICE_DATE
+- `trigger_time` &#8594; DISCOVERY_DATE, DISCOVERY_TIME
+- `event_name` &#8594; EVENT_NAME
+- `record_number` &#8594; REVISION
+- `far` &#8594; FAR
+
+### Defines the following new fields for the text notice fields
+- `notice_type` &#8594; NOTICE_TYPE
+- `n_events` &#8594; N_EVENTS
+- `delta_time` &#8594; DELTA_T
+- `sigma_time` &#8594; SIGMA_T
+- `false_positive` &#8594; FALSE_POS
+- `charge` &#8594; CHARGE
+- `signalness` &#8594; SIGNALNESS, SIGNAL_TRACKNESS
+- `coincidence_with` &#8594; COINC_PAIR
+
+## Caveats
+- The notices have a field called STREAM, but these seems to be degenrate with NOTICE_TYPE and so I've not added these to the JSONs
+- SKYMAP_FITS_URL is a field for the Burst notice but these are not available for any of the notices. Hence, I've not included them in the JSONs
diff --git a/gcn_classic_text_to_json/notices/amon/__init__.py b/gcn_classic_text_to_json/notices/amon/__init__.py
diff --git a/gcn_classic_text_to_json/notices/amon/__main__.py b/gcn_classic_text_to_json/notices/amon/__main__.py
@@ -0,0 +1,4 @@
+from . import conversion
+
+if __name__ == "__main__":
+    conversion.create_all_amon_jsons()
diff --git a/gcn_classic_text_to_json/notices/amon/conversion.py b/gcn_classic_text_to_json/notices/amon/conversion.py
@@ -0,0 +1,287 @@
+import email
+import json
+import os
+
+import requests
+
+from ... import conversion
+
+input_gold_bronze = {
+    "standard": {
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
+        "ra": "SRC_RA",
+        "dec": "SRC_DEC",
+    },
+    "additional": {
+        "ra_dec_error": ("SRC_ERROR", "float"),
+        "ra_dec_error_50": ("SRC_ERROR50", "float"),
+        "record_number": ("REVISION", "int"),
+        "energy": ("ENERGY", "float"),
+        "signalness": ("SIGNALNESS", "float"),
+        "far": ("FAR", "float"),
+    },
+}
+
+input_burst = {
+    "standard": {
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
+        "ra": "SRC_RA",
+        "dec": "SRC_DEC",
+    },
+    "additional": {
+        "ra_dec_error": ("SRC_ERROR", "float"),
+        "record_number": ("REVISION", "int"),
+        "far": ("FAR", "float"),
+        "delta_time": ("delta_T", "float"),
+        "p_value": ("Pvalue", "float"),
+    },
+}
+
+input_coincidence = {
+    "standard": {
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
+        "ra": "SRC_RA",
+        "dec": "SRC_DEC",
+    },
+    "additional": {
+        "ra_dec_error": ("SRC_ERROR", "float"),
+        "ra_dec_error_50": ("SRC_ERROR50", "float"),
+        "record_number": ("REVISION", "int"),
+        "delta_time": ("delta_T", "float"),
+        "far": ("FAR", "float"),
+        "event_date": ("EVENT_DATE", "string"),
+    },
+}
+
+input_cascade = {
+    "standard": {
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
+        "ra": "SRC_RA",
+        "dec": "SRC_DEC",
+    },
+    "additional": {
+        "ra_dec_error": ("SRC_ERROR", "float"),
+        "ra_dec_error_50": ("SRC_ERROR50", "float"),
+        "record_number": ("REVISION", "int"),
+        "energy": ("ENERGY", "float"),
+        "signalness": ("SIGNALNESS", "float"),
+        "far": ("FAR", "float"),
+    },
+}
+
+input_ehe = {
+    "standard": {
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
+        "ra": "SRC_RA",
+        "dec": "SRC_DEC",
+    },
+    "additional": {
+        "ra_dec_error": ("SRC_ERROR", "float"),
+        "record_number": ("REVISION", "int"),
+        "energy": ("ENERGY", "float"),
+        "signalness": ("SIGNALNESS", "float"),
+        "n_events": ("N_EVENTS", "int"),
+        "delta_time": ("DELTA_T", "float"),
+        "sigma_time": ("SIGMA_T", "float"),
+        "charge": ("CHARGE", "float"),
+    },
+}
+
+input_hese = {
+    "standard": {
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
+        "ra": "SRC_RA",
+        "dec": "SRC_DEC",
+    },
+    "additional": {
+        "ra_dec_error": ("SRC_ERROR", "float"),
+        "ra_dec_error_50": ("SRC_ERROR50", "float"),
+        "record_number": ("REVISION", "int"),
+        "signalness": ("SIGNAL_TRACKNESS", "float"),
+        "n_events": ("N_EVENTS", "int"),
+        "delta_time": ("DELTA_T", "float"),
+        "sigma_time": ("SIGMA_T", "float"),
+        "charge": ("CHARGE", "float"),
+        "p_value": ("PVALUE", "float"),
+        "false_positive": ("FALSE_POS", "float"),
+    },
+}
+
+
+def text_to_json_amon(notice, input, notice_type):
+    """Function calls text_to_json and then adds additional fields with cannot be dealt with by the general function.
+
+    Parameters
+    -----------
+    notice: dict
+        The text notice that is being parsed.
+    input: dict
+        The mapping between text notices keywords and GCN schema keywords.
+    notice_type: string
+        The type of AMON notice.
+
+    Returns
+    -------
+    dictionary
+        A dictionary compliant with the associated schema for the mission."""
+    output_dict = conversion.text_to_json(notice, input)
+
+    output_dict["$schema"] = (
+        "https://gcn.nasa.gov/schema/main/gcn/notices/classic/amon/alert.schema.json"
+    )
+    output_dict["mission"] = "AMON"
+    output_dict["notice_type"] = notice_type
+
+    output_dict["record_number"] += 1
+
+    if notice_type != "HESE" and notice_type != "EHE":
+        output_dict["far"] /= 365 * 24 * 60 * 60
+
+    output_dict["id"] = [
+        f"{notice["RUN_NUM"].split()[0]}_{notice["EVENT_NUM"].split()[0]}"
+    ]
+
+    if notice_type != "Cascade":
+        output_dict["ra_dec_error"] /= 60
+
+    if notice_type == "Astrotrack Gold" or notice_type == "Astrotrack Bronze":
+        output_dict["energy"] *= 1e9
+        output_dict["ra_dec_error_50"] /= 60
+    elif notice_type == "Neutrino-EM Coincidence":
+        output_dict["coincidence_with"] = [notice["COINC_PAIR"].split()[1]]
+        output_dict["ra_dec_error_50"] /= 60
+    elif notice_type == "Cascade" or notice_type == "EHE" or notice_type == "HESE":
+        output_dict["systematic_included"] = True
+        if notice_type == "Cascade":
+            output_dict["event_name"] = [notice["EVENT_NAME"].split()[0]]
+        if notice_type == "EHE":
+            output_dict["containment_probability"] = 0.5
+        if notice_type == "HESE":
+            output_dict["ra_dec_error_50"] /= 60
+
+    return output_dict
+
+
+def create_amon_jsons_one_webpage(link, search_string, output_path, sernum):
+    """Parse through all the triggers in `link` and convert them to JSONs
+
+    Parameters
+    ----------
+    link: string
+        The webpage with the table of triggers
+    search_string: string
+        The search string for finding trigger links
+    output_path: string
+        The path to save the JSONs to
+    sernum: int
+        The random iterating number with no relations to the data in the JSONs
+
+    Returns
+    --------
+    sernum: int
+        returns sernum for the next function call"""
+    prefix = "https://gcn.gsfc.nasa.gov/"
+    links_set = conversion.parse_trigger_links(link, prefix, search_string)
+    links_list = list(links_set)
+
+    for link in links_list:
+        data = requests.get(link).text
+
+        start_idx = data.find("\n") + 1
+        while True:
+            end_idx = data.find("\n \n ", start_idx)
+            if end_idx == -1:
+                break
+
+            notice_message = email.message_from_string(data[start_idx:end_idx].strip())
+            if "\n\n" in notice_message.as_string():
+                notice_string = data[start_idx:end_idx].strip()
+                notice_dict = conversion.parse_notice(notice_string)
+            else:
+                comment_list = notice_message.get_all("COMMENTS")
+                comment_list = [item for item in comment_list if item]
+                comment = "\n".join(comment_list)
+                notice_dict = dict(notice_message)
+                notice_dict["COMMENTS"] = comment
+
+            notice_type = notice_message["NOTICE_TYPE"].split()[1]
+            if notice_type == "Astrotrack":
+                notice_type = f"Astrotrack {notice_message["NOTICE_TYPE"].split()[2]}"
+                output = text_to_json_amon(notice_dict, input_gold_bronze, notice_type)
+            elif notice_type == "Burst":
+                output = text_to_json_amon(notice_dict, input_burst, notice_type)
+            elif notice_type == "Neutrino-EM":
+                notice_type = f"Neutrino-EM {notice_message["NOTICE_TYPE"].split()[2]}"
+                output = text_to_json_amon(notice_dict, input_coincidence, notice_type)
+            elif notice_type == "Cascade":
+                output = text_to_json_amon(notice_dict, input_cascade, notice_type)
+            elif notice_type == "ICECUBE":
+                notice_type = notice_message["NOTICE_TYPE"].split()[2]
+                if notice_type == "EHE":
+                    output = text_to_json_amon(notice_dict, input_ehe, notice_type)
+                elif notice_type == "HESE":
+                    output = text_to_json_amon(notice_dict, input_hese, notice_type)
+
+            with open(
+                f"{output_path}AMON_{sernum}_{output["record_number"]}.json", "w"
+            ) as f:
+                json.dump(output, f)
+
+            sernum += 1
+            temp_start_idx = data.find("///////////", end_idx)
+            start_idx = data.find("\n", temp_start_idx)
+            if temp_start_idx == -1:
+                break
+
+    return sernum
+
+
+def create_all_amon_jsons():
+    """Creates a `amon_jsons` directory and fills it with the json for all AMON triggers."""
+    output_path = "./output/amon_jsons/"
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    sernum = 1
+    sernum = create_amon_jsons_one_webpage(
+        "https://gcn.gsfc.nasa.gov/amon_icecube_gold_bronze_events.html",
+        "notices_amon_g_b/.*amon",
+        output_path,
+        sernum,
+    )
+    sernum = create_amon_jsons_one_webpage(
+        "https://gcn.gsfc.nasa.gov/amon_hawc_events.html",
+        "notices_amon_hawc/.*amon",
+        output_path,
+        sernum,
+    )
+    sernum = create_amon_jsons_one_webpage(
+        "https://gcn.gsfc.nasa.gov/amon_nu_em_coinc_events.html",
+        "notices_amon_nu_em/.*amon",
+        output_path,
+        sernum,
+    )
+    sernum = create_amon_jsons_one_webpage(
+        "https://gcn.gsfc.nasa.gov/amon_icecube_cascade_events.html",
+        "notices_amon_icecube_cascade/.*amon",
+        output_path,
+        sernum,
+    )
+    sernum = create_amon_jsons_one_webpage(
+        "https://gcn.gsfc.nasa.gov/amon_ehe_events.html",
+        "notices_amon/.*amon",
+        output_path,
+        sernum,
+    )
+    sernum = create_amon_jsons_one_webpage(
+        "https://gcn.gsfc.nasa.gov/amon_hese_events.html",
+        "notices_amon/.*amon",
+        output_path,
+        sernum,
+    )