generated from KSUDS/p4_machinelearning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
explain.py
77 lines (64 loc) · 2.27 KB
/
explain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# %%
import sys
!{sys.executable} -m pip install shap
# %%
import pandas as pd
import numpy as np
import dalex as dx
import matplotlib.pyplot as plt
import shap
import joblib
from dalex._explainer.yhat import yhat_proba_default
from sklearn.model_selection import train_test_split
# %%
# load models and data
clfNB = joblib.load('models/clfNB.pkl')
clfGB = joblib.load('models/clfGB.pkl')
clfGB_reduced = joblib.load('models/clfGB_final.pkl')
compVars = pd.read_pickle('models/compVars.pkl').tolist()
dat_ml = pd.read_pickle('dat_ml.pkl')
y_pred = dat_ml.before1980
X_pred = dat_ml.drop(['yrbuilt', 'before1980'], axis = 1)
X_pred_reduced = dat_ml.filter(compVars, axis = 1)
X_train, X_test, y_train, y_test = train_test_split(
X_pred, y_pred, test_size = .34, random_state = 76)
# may not be the most efficient way to do this
X_train_reduced, X_test_reduced, y_train, y_test = train_test_split(
X_pred_reduced, y_pred, test_size = .34, random_state = 76)
# %%
# Create explainer objects and show variable importance chart
expReduced = dx.Explainer(clfGB_reduced, X_test_reduced, y_test)
explanationReduced = expReduced.model_parts()
explanationReduced.plot(max_vars=15)
# %%
# show model performance
mpReduced = expReduced.model_performance(model_type = 'classification')
print(mpReduced.result)
mpReduced.plot(geom="roc")
# %%
# Explain variables
pdp_num_red = expReduced.model_profile(type = 'partial', label="pdp", variables = compVars)
ale_num_red = expReduced.model_profile(type = 'accumulated', label="ale", variables = compVars)
pdp_num_red.plot(ale_num_red)
# %%
# Explain observation
# shapley values
sh = expReduced.predict_parts(X_test_reduced.iloc[0,:], type='shap', label="first observation")
sh.plot(max_vars=12)
# %%
# %%
# Build shap explainer
explainerShap = shap.Explainer(clfGB_reduced)
shap_values = explainerShap(X_test_reduced)
# %%
# Show variable importance based on shap values
shap.plots.bar(shap_values)
# %%
# https://medium.com/dataman-in-ai/the-shap-with-more-elegant-charts-bc3e73fa1c0c
shap.plots.beeswarm(shap_values)
# %%
# comparable to the bar plot
shap.plots.beeswarm(shap_values.abs, color="shap_red")
# %%
# combine the above charts
shap.plots.heatmap(shap_values[0:1000], max_display=13)