From 0f01d0c22f74a9b46f3dfba266560931faf34263 Mon Sep 17 00:00:00 2001 From: Athish Thiruvengadam Date: Thu, 19 Sep 2024 17:22:34 -0500 Subject: [PATCH 1/2] A series of bug fixes removed utils.py small fix updated trigger_date parsing and calet jsons creation editted conversion.py to deal with inputs that have no 'additional' dictionary Fixed ra/dec parsing to be seperate Updated alexis conversion updated poetry updated all modules for search_string Added \n to additional_info in snews and sk_sn added all current conversions to main.py updated id changed url to lightcurve_url; added field Added field to all conversions --- gcn_classic_text_to_json/conversion.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gcn_classic_text_to_json/conversion.py b/gcn_classic_text_to_json/conversion.py index fbb7eca..da16417 100644 --- a/gcn_classic_text_to_json/conversion.py +++ b/gcn_classic_text_to_json/conversion.py @@ -123,14 +123,18 @@ def text_to_json(notice, keywords_dict): notice_ra = keywords_dict["standard"]["ra"] ra_data = notice[notice_ra].split() - if ra_data[0] != "Undefined": + if ra_data[0] == "Undefined": + output["ra"] = None + else: output["ra"] = float(ra_data[0][:-1]) if "dec" in keywords_dict["standard"]: notice_dec = keywords_dict["standard"]["dec"] dec_data = notice[notice_dec].split() - if dec_data[0] != "Undefined": + if dec_data[0] == "Undefined": + output["dec"] = None + else: output["dec"] = float(dec_data[0][:-1]) if "additional" in keywords_dict: From 3be35023a536c07a25105c26263594050d9f520e Mon Sep 17 00:00:00 2001 From: Athish Thiruvengadam Date: Tue, 1 Oct 2024 23:51:18 -0500 Subject: [PATCH 2/2] AMON Text Conversion fixed ra_dec_error and ra_dec_error_50 removed comments --- gcn_classic_text_to_json/conversion.py | 26 ++ .../notices/amon/README.md | 28 ++ .../notices/amon/__init__.py | 0 .../notices/amon/__main__.py | 4 + .../notices/amon/conversion.py | 287 ++++++++++++++++++ 5 files changed, 345 insertions(+) create mode 100644 gcn_classic_text_to_json/notices/amon/README.md create mode 100644 gcn_classic_text_to_json/notices/amon/__init__.py create mode 100644 gcn_classic_text_to_json/notices/amon/__main__.py create mode 100644 gcn_classic_text_to_json/notices/amon/conversion.py diff --git a/gcn_classic_text_to_json/conversion.py b/gcn_classic_text_to_json/conversion.py index da16417..78d46c6 100644 --- a/gcn_classic_text_to_json/conversion.py +++ b/gcn_classic_text_to_json/conversion.py @@ -21,6 +21,32 @@ invalid_trigger_dates = ["(yy-mm-dd)", "(yy/mm/dd)", "(yyyy/mm/dd)"] +def parse_notice(text): + """Convert the text of an email body to a dictionary + + Parameters + ---------- + text: string + the email body + + Returns + -------- + dict + The dictionary equivalent of the text""" + output = {} + text_list = text.split("\n") + for line in text_list: + line_data = line.split() + key = line_data[0][:-1] + value = " ".join(line_data[1:]) + if key in output: + output[key] += "\n" + value + else: + output[key] = value + + return output + + def parse_trigger_links(link, prefix, regex_string): """Returns a list of trigger_links present in `link`. diff --git a/gcn_classic_text_to_json/notices/amon/README.md b/gcn_classic_text_to_json/notices/amon/README.md new file mode 100644 index 0000000..462c3ab --- /dev/null +++ b/gcn_classic_text_to_json/notices/amon/README.md @@ -0,0 +1,28 @@ +# AMON Text Conversion + +Parses through all webpages with AMON text notices and creates a JSON with GCN schema keywords. Creates a `amon_jsons` directory inside an `output` directory and saves jsons as `AMON_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage. + +### Uses the following fields from the core schema for text notice fields +- `id` → EVENT_NUM, RUN_NUM +- `ra` → SRC_RA +- `dec` → SRC_DEC +- `ra_dec_error` → SRC_ERROR +- `alert_datetime` → NOTICE_DATE +- `trigger_time` → DISCOVERY_DATE, DISCOVERY_TIME +- `event_name` → EVENT_NAME +- `record_number` → REVISION +- `far` → FAR + +### Defines the following new fields for the text notice fields +- `notice_type` → NOTICE_TYPE +- `n_events` → N_EVENTS +- `delta_time` → DELTA_T +- `sigma_time` → SIGMA_T +- `false_positive` → FALSE_POS +- `charge` → CHARGE +- `signalness` → SIGNALNESS, SIGNAL_TRACKNESS +- `coincidence_with` → COINC_PAIR + +## Caveats +- The notices have a field called STREAM, but these seems to be degenrate with NOTICE_TYPE and so I've not added these to the JSONs +- SKYMAP_FITS_URL is a field for the Burst notice but these are not available for any of the notices. Hence, I've not included them in the JSONs diff --git a/gcn_classic_text_to_json/notices/amon/__init__.py b/gcn_classic_text_to_json/notices/amon/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gcn_classic_text_to_json/notices/amon/__main__.py b/gcn_classic_text_to_json/notices/amon/__main__.py new file mode 100644 index 0000000..206a604 --- /dev/null +++ b/gcn_classic_text_to_json/notices/amon/__main__.py @@ -0,0 +1,4 @@ +from . import conversion + +if __name__ == "__main__": + conversion.create_all_amon_jsons() diff --git a/gcn_classic_text_to_json/notices/amon/conversion.py b/gcn_classic_text_to_json/notices/amon/conversion.py new file mode 100644 index 0000000..eed8704 --- /dev/null +++ b/gcn_classic_text_to_json/notices/amon/conversion.py @@ -0,0 +1,287 @@ +import email +import json +import os + +import requests + +from ... import conversion + +input_gold_bronze = { + "standard": { + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"], + "ra": "SRC_RA", + "dec": "SRC_DEC", + }, + "additional": { + "ra_dec_error": ("SRC_ERROR", "float"), + "ra_dec_error_50": ("SRC_ERROR50", "float"), + "record_number": ("REVISION", "int"), + "energy": ("ENERGY", "float"), + "signalness": ("SIGNALNESS", "float"), + "far": ("FAR", "float"), + }, +} + +input_burst = { + "standard": { + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"], + "ra": "SRC_RA", + "dec": "SRC_DEC", + }, + "additional": { + "ra_dec_error": ("SRC_ERROR", "float"), + "record_number": ("REVISION", "int"), + "far": ("FAR", "float"), + "delta_time": ("delta_T", "float"), + "p_value": ("Pvalue", "float"), + }, +} + +input_coincidence = { + "standard": { + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"], + "ra": "SRC_RA", + "dec": "SRC_DEC", + }, + "additional": { + "ra_dec_error": ("SRC_ERROR", "float"), + "ra_dec_error_50": ("SRC_ERROR50", "float"), + "record_number": ("REVISION", "int"), + "delta_time": ("delta_T", "float"), + "far": ("FAR", "float"), + "event_date": ("EVENT_DATE", "string"), + }, +} + +input_cascade = { + "standard": { + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"], + "ra": "SRC_RA", + "dec": "SRC_DEC", + }, + "additional": { + "ra_dec_error": ("SRC_ERROR", "float"), + "ra_dec_error_50": ("SRC_ERROR50", "float"), + "record_number": ("REVISION", "int"), + "energy": ("ENERGY", "float"), + "signalness": ("SIGNALNESS", "float"), + "far": ("FAR", "float"), + }, +} + +input_ehe = { + "standard": { + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"], + "ra": "SRC_RA", + "dec": "SRC_DEC", + }, + "additional": { + "ra_dec_error": ("SRC_ERROR", "float"), + "record_number": ("REVISION", "int"), + "energy": ("ENERGY", "float"), + "signalness": ("SIGNALNESS", "float"), + "n_events": ("N_EVENTS", "int"), + "delta_time": ("DELTA_T", "float"), + "sigma_time": ("SIGMA_T", "float"), + "charge": ("CHARGE", "float"), + }, +} + +input_hese = { + "standard": { + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"], + "ra": "SRC_RA", + "dec": "SRC_DEC", + }, + "additional": { + "ra_dec_error": ("SRC_ERROR", "float"), + "ra_dec_error_50": ("SRC_ERROR50", "float"), + "record_number": ("REVISION", "int"), + "signalness": ("SIGNAL_TRACKNESS", "float"), + "n_events": ("N_EVENTS", "int"), + "delta_time": ("DELTA_T", "float"), + "sigma_time": ("SIGMA_T", "float"), + "charge": ("CHARGE", "float"), + "p_value": ("PVALUE", "float"), + "false_positive": ("FALSE_POS", "float"), + }, +} + + +def text_to_json_amon(notice, input, notice_type): + """Function calls text_to_json and then adds additional fields with cannot be dealt with by the general function. + + Parameters + ----------- + notice: dict + The text notice that is being parsed. + input: dict + The mapping between text notices keywords and GCN schema keywords. + notice_type: string + The type of AMON notice. + + Returns + ------- + dictionary + A dictionary compliant with the associated schema for the mission.""" + output_dict = conversion.text_to_json(notice, input) + + output_dict["$schema"] = ( + "https://gcn.nasa.gov/schema/main/gcn/notices/classic/amon/alert.schema.json" + ) + output_dict["mission"] = "AMON" + output_dict["notice_type"] = notice_type + + output_dict["record_number"] += 1 + + if notice_type != "HESE" and notice_type != "EHE": + output_dict["far"] /= 365 * 24 * 60 * 60 + + output_dict["id"] = [ + f"{notice["RUN_NUM"].split()[0]}_{notice["EVENT_NUM"].split()[0]}" + ] + + if notice_type != "Cascade": + output_dict["ra_dec_error"] /= 60 + + if notice_type == "Astrotrack Gold" or notice_type == "Astrotrack Bronze": + output_dict["energy"] *= 1e9 + output_dict["ra_dec_error_50"] /= 60 + elif notice_type == "Neutrino-EM Coincidence": + output_dict["coincidence_with"] = [notice["COINC_PAIR"].split()[1]] + output_dict["ra_dec_error_50"] /= 60 + elif notice_type == "Cascade" or notice_type == "EHE" or notice_type == "HESE": + output_dict["systematic_included"] = True + if notice_type == "Cascade": + output_dict["event_name"] = [notice["EVENT_NAME"].split()[0]] + if notice_type == "EHE": + output_dict["containment_probability"] = 0.5 + if notice_type == "HESE": + output_dict["ra_dec_error_50"] /= 60 + + return output_dict + + +def create_amon_jsons_one_webpage(link, search_string, output_path, sernum): + """Parse through all the triggers in `link` and convert them to JSONs + + Parameters + ---------- + link: string + The webpage with the table of triggers + search_string: string + The search string for finding trigger links + output_path: string + The path to save the JSONs to + sernum: int + The random iterating number with no relations to the data in the JSONs + + Returns + -------- + sernum: int + returns sernum for the next function call""" + prefix = "https://gcn.gsfc.nasa.gov/" + links_set = conversion.parse_trigger_links(link, prefix, search_string) + links_list = list(links_set) + + for link in links_list: + data = requests.get(link).text + + start_idx = data.find("\n") + 1 + while True: + end_idx = data.find("\n \n ", start_idx) + if end_idx == -1: + break + + notice_message = email.message_from_string(data[start_idx:end_idx].strip()) + if "\n\n" in notice_message.as_string(): + notice_string = data[start_idx:end_idx].strip() + notice_dict = conversion.parse_notice(notice_string) + else: + comment_list = notice_message.get_all("COMMENTS") + comment_list = [item for item in comment_list if item] + comment = "\n".join(comment_list) + notice_dict = dict(notice_message) + notice_dict["COMMENTS"] = comment + + notice_type = notice_message["NOTICE_TYPE"].split()[1] + if notice_type == "Astrotrack": + notice_type = f"Astrotrack {notice_message["NOTICE_TYPE"].split()[2]}" + output = text_to_json_amon(notice_dict, input_gold_bronze, notice_type) + elif notice_type == "Burst": + output = text_to_json_amon(notice_dict, input_burst, notice_type) + elif notice_type == "Neutrino-EM": + notice_type = f"Neutrino-EM {notice_message["NOTICE_TYPE"].split()[2]}" + output = text_to_json_amon(notice_dict, input_coincidence, notice_type) + elif notice_type == "Cascade": + output = text_to_json_amon(notice_dict, input_cascade, notice_type) + elif notice_type == "ICECUBE": + notice_type = notice_message["NOTICE_TYPE"].split()[2] + if notice_type == "EHE": + output = text_to_json_amon(notice_dict, input_ehe, notice_type) + elif notice_type == "HESE": + output = text_to_json_amon(notice_dict, input_hese, notice_type) + + with open( + f"{output_path}AMON_{sernum}_{output["record_number"]}.json", "w" + ) as f: + json.dump(output, f) + + sernum += 1 + temp_start_idx = data.find("///////////", end_idx) + start_idx = data.find("\n", temp_start_idx) + if temp_start_idx == -1: + break + + return sernum + + +def create_all_amon_jsons(): + """Creates a `amon_jsons` directory and fills it with the json for all AMON triggers.""" + output_path = "./output/amon_jsons/" + if not os.path.exists(output_path): + os.makedirs(output_path) + + sernum = 1 + sernum = create_amon_jsons_one_webpage( + "https://gcn.gsfc.nasa.gov/amon_icecube_gold_bronze_events.html", + "notices_amon_g_b/.*amon", + output_path, + sernum, + ) + sernum = create_amon_jsons_one_webpage( + "https://gcn.gsfc.nasa.gov/amon_hawc_events.html", + "notices_amon_hawc/.*amon", + output_path, + sernum, + ) + sernum = create_amon_jsons_one_webpage( + "https://gcn.gsfc.nasa.gov/amon_nu_em_coinc_events.html", + "notices_amon_nu_em/.*amon", + output_path, + sernum, + ) + sernum = create_amon_jsons_one_webpage( + "https://gcn.gsfc.nasa.gov/amon_icecube_cascade_events.html", + "notices_amon_icecube_cascade/.*amon", + output_path, + sernum, + ) + sernum = create_amon_jsons_one_webpage( + "https://gcn.gsfc.nasa.gov/amon_ehe_events.html", + "notices_amon/.*amon", + output_path, + sernum, + ) + sernum = create_amon_jsons_one_webpage( + "https://gcn.gsfc.nasa.gov/amon_hese_events.html", + "notices_amon/.*amon", + output_path, + sernum, + )