-
Notifications
You must be signed in to change notification settings - Fork 0
/
log.py
66 lines (53 loc) · 2.09 KB
/
log.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import subprocess
import pandas
RELEASES = ["v2023-07", "v2024-04"]
S3 = (
"s3://"
+ os.environ.get("OBJECTSTORE_BUCKET")
+ "/dss_projects_2024/harvest_restrictions"
)
# column header comes from git tag
tag = subprocess.check_output(["git", "describe", "--tags"]).decode("ascii").strip()
# read data
d_log = pandas.read_csv(os.path.join(S3, "log_land_designations.csv"))
d_summary = pandas.read_csv("current_land_designations.csv")
h_log = pandas.read_csv(os.path.join(S3, "log_harvest_restrictions.csv"))
h_summary = pandas.read_csv("current_harvest_restrictions.csv")
# log columns - retain only the categories and area_ha of previous releases
d_columns = [
"land_designation_type_rank",
"harvest_restriction_class_rank",
"harvest_restriction_class_name",
"land_designation_type_code",
"land_designation_type_name",
] + RELEASES
h_columns = [
"harvest_restriction_class_rank",
"harvest_restriction_class_name",
] + RELEASES
d_log = d_log[d_columns]
h_log = h_log[h_columns]
# summary columns - drop everything but keys and current area totals
d_summary = d_summary[["land_designation_type_rank", "area_ha"]]
h_summary = h_summary[["harvest_restriction_class_rank", "area_ha"]]
# join the log to the latest summary
d = d_log.merge(d_summary, how="outer", on="land_designation_type_rank").fillna(0)
h = h_log.merge(h_summary, how="outer", on="harvest_restriction_class_rank").fillna(0)
# use current tag as new column name
d = d.rename(columns={"area_ha": tag})
h = h.rename(columns={"area_ha": tag})
# calculate diff and pct diff
previous_tag = RELEASES[-1]
d["diff"] = d[previous_tag] - d[tag]
h["diff"] = h[previous_tag] - h[tag]
d["pct_diff"] = (d["diff"] / d[previous_tag]) * 100
h["pct_diff"] = (h["diff"] / h[previous_tag]) * 100
# clean up
d = d.round({tag: 0, "diff": 0, "pct_diff": 2}).set_index("land_designation_type_rank")
h = h.round({tag: 0, "diff": 0, "pct_diff": 2}).set_index(
"harvest_restriction_class_rank"
)
# dump results back to s3
d.to_csv(os.path.join(S3, "log_land_designations.csv"))
h.to_csv(os.path.join(S3, "log_harvest_restrictions.csv"))