-
Notifications
You must be signed in to change notification settings - Fork 0
/
PB20061254.py
58 lines (48 loc) · 2.05 KB
/
PB20061254.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python3
import os
import json
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
DATA_PATH = "./data/test_data_sample.json"
## for TA's test
## you need to modify the class name to your student id.
## you also need to implement the predict function, which reads the .json file,
## calls your trained model and returns predict results as an ndarray
class PB20061254():
def predict(self, data_path):
#### This function is generated by `scripts/generate.py`
import sys
sys.path.append('./src')
class args:
in_dir = './models'
data = data_path
#### Predict function begins here
from torch import load
prep = load(f'{args.in_dir}/preprocessor.pt')
models = load(f'{args.in_dir}/models.pt')
# Load data from disk
test_df = pd.read_json(args.data)
# Cleanse and transform data
test_df = prep.cleanse(test_df)
test_df_prep = prep.transform(test_df)
# Get feature matrix and target vector
X = test_df_prep.drop('fit', axis=1).values
y = test_df_prep['fit'].values
# To tackle class imbalance, we split the majority class (True to Size) into 3 folds,
# and train the model on each fold separately. When predicting on the test set,
# we aggregate the predictions from all 3 trained models and take the majority vote.
y_preds = [model.predict(X) for model in models]
y_preds = list(map(list, zip(*y_preds))) # list transpose
y_pred = np.array([max(set(votes), key=votes.count) for votes in y_preds])
# Our OrdinalEncoder maps the labels to 0, 1, 2, so we need to plus 1
return y_pred + 1
## for local validation
if __name__ == '__main__':
with open(DATA_PATH, "r") as f:
test_data_list = json.load(f)
true = np.array([int(data["fit"]) for data in test_data_list])
bot = PB20061254()
pred = bot.predict(DATA_PATH)
macro_f1 = f1_score(y_true=true, y_pred=pred, average="macro")
print(macro_f1)