forked from mmajewsk/velo_dimred
-
Notifications
You must be signed in to change notification settings - Fork 1
/
PCA.py
184 lines (123 loc) · 4.85 KB
/
PCA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:light
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.5'
# jupytext_version: 1.10.2
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---
# ### Installation
#
# requirement.txt can be found in the repository. In order install all necessary packages you should run:
#
# pip install -r requirements.txt
#
# Adding Calina library to pythonpath is also necessary. If you are using using anaconda you can just type:
#
# conda develop Calina_path
# ### Importing required modules
import pandas as pd
# %load_ext autoreload
# %autoreload 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
# ### Loading data into dataset
# +
from calibration_dataset import Tell1Dataset
class MyDS(Tell1Dataset):
filename_format = '%Y-%m-%d'
filename_regex_format = r'\d{4}-\d{2}-\d{2}.csv'
datapath = "data/calibrations/"
data_list = MyDS.get_filepaths_from_dir(datapath)
mds = MyDS(data_list, read=True)
# -
# ### Seperating data
data = {'hit threshold':mds.dfh.df,'pedestal' : mds.dfp.df,'low threshold': mds.dfl.df}
# ### Clearing data
for key in data:
print(key)
data[key] = data[key].drop(['Zmod','slot_label','mod_nr','sensor_number','type','datetime'],axis=1)
print(data[key].sensor_type.unique())
data[key] = {'phi':data[key][data[key]['sensor_type'] == 'phi'],\
'r_phi':data[key][data[key]['sensor_type'] == 'R']}
for typ in data[key]:
data[key][typ] = data[key][typ].drop(['mod_type','sensor_type'],axis=1)
# ### Importing modules needed for PCA
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import random
# ### Sets color map for plot, can be changed by editing
# ##### cm = plt.get_cmap(new color map)
#
def set_color(plot):
num_colors = 30
cm = plt.get_cmap('jet')
cNorm = mpl.colors.Normalize(vmin=0, vmax=num_colors-1)
scalarMap = mpl.cm.ScalarMappable(norm=cNorm, cmap=cm)
colors = [scalarMap.to_rgba(i) for i in range(num_colors)]
random.shuffle(colors,lambda: 0.1)
plot.set_prop_cycle("color",colors)
# ### Data normalization and PCA
def full_pca(data,percent):
data = StandardScaler().fit_transform(data)
data_transponsed = data.transpose()
pca = PCA(n_components = percent,svd_solver = 'auto')
result = pca.fit_transform(data_transponsed)
column_names = [f"Principal component {x+1}" for x in range(len(result[0]))]
row_names = [f"Channel {x}" for x in range(len(result))]
return pd.DataFrame(result,columns = column_names,index = row_names)
# ### Setting plot size and number of primal components
plt.rcParams['figure.figsize'] = [16, 9*15]
procentage_or_num_of_comp = 2
# ### Gets single sensor data, making PCA and scattering it at plot
# #### alpha - sets transparency of the points
def scatter_data(single_data):
alpha = 0.4
for sensor_data_key in single_data:
dataset = single_data[sensor_data_key]
dataset_after_pca = full_pca(dataset,procentage_or_num_of_comp)
scatter = plt.scatter(dataset_after_pca.iloc[:,0], dataset_after_pca.iloc[:,1], edgecolor='none', alpha=alpha,label=sensor_data_key)
plt.legend(title="Module nr.")
# ### Separating module type data into single module
def draw_a_plot(sensor, mod_key):
for sensor_key in sensor:
single_data = sensor[sensor_key]
plot = plt.subplot(draw_a_plot.position, title=f'{mod_key} - {sensor_key}')
set_color(plot)
single_data = {k: v.drop('sensor',axis=1) for k, v in single_data.groupby('sensor')}
scatter_data(single_data)
plt.xlabel('Principal component 1')
plt.ylabel('Principal component 2')
draw_a_plot.position+=1
# ### Separates data into modules types data then plotting it
def do_a_pca_and_draw_a_plot(data):
draw_a_plot.position = 911
plt.suptitle("PCA results",fontsize=16)
for mod_key in data:
draw_a_plot(data[mod_key],mod_key)
plt.tight_layout()
import plotly.express as px
d = data['pedestal']['r_phi']
single_data = {k: v.drop('sensor',axis=1) for k, v in d.groupby('sensor')}
alpha = 0.4
thisrun = []
for sensor_data_key in single_data:
dataset = single_data[sensor_data_key]
dataset_after_pca = full_pca(dataset,procentage_or_num_of_comp)
dataset_after_pca["sensor"] = str(int(sensor_data_key))
#scatter = plt.scatter(dataset_after_pca.iloc[:,0], dataset_after_pca.iloc[:,1], edgecolor='none', alpha=alpha,label=sensor_data_key)
#plt.legend(title="Module nr.")
thisrun.append(dataset_after_pca)
alldat = pd.concat(thisrun)
# + pycharm={"name": "#%%\n"}
fig = px.scatter(alldat, x="Principal component 1", y="Principal component 2", color='sensor', opacity=0.5)
fig.show(renderer="notebook")
fig.write_html("PCA.html")
# +