-
Notifications
You must be signed in to change notification settings - Fork 1
/
process_enzyme_data.R
executable file
·304 lines (263 loc) · 13.4 KB
/
process_enzyme_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
library(openxlsx)
library(tidyverse)
library(lubridate)
# To run this script, download the extracellular_enzyme_data folder from the WTFProject Drive
# Make a list of the file names
enzyme_files <- list.files(path = "extracellular_enzyme_data/raw_data",pattern = ".txt",full.names = TRUE, recursive = TRUE)
# Read in plate data
enzymes <- data.frame()
for(x in 1:length(enzyme_files)) {
file <- as.data.frame(read_table(enzyme_files[x],col_names=FALSE))
names(file) <- c("Plate","Well","Read")
enzymes <- rbind(enzymes,file)
}
# Parse out plate information
enzymes2 <- enzymes %>%
separate(Plate, c("assay_date","PlateType","SampleID","extraID"), convert=T, sep="_") %>%
unite(SampleID,extraID, col="SampleID",sep="_",na.rm=TRUE) %>%
mutate(assay_date = ymd(assay_date),
Read = as.numeric(Read))
# Correct naming issues, select correct samples, remove samples that need to be redone
# Some samples have multiple reads, the correct read is marked by "_reread"
enzyme_redo <- enzymes2 %>%
separate(SampleID,c("SampleID","extraID"),sep="_") %>%
filter(extraID == "reread") %>%
mutate(is_redo = TRUE) %>%
select(assay_date,PlateType,SampleID,is_redo) %>%
unique()
enzymes3 <- enzymes2 %>%
left_join(enzyme_redo,by=c("assay_date","PlateType","SampleID")) %>%
mutate(is_redo = replace(is_redo,is.na(is_redo),FALSE)) %>%
filter(is_redo == FALSE) %>%
mutate(SampleID = gsub("_reread","",SampleID)) %>%
select(-is_redo)
# Remove other samples with errors
enzymes4 <- enzymes3 %>%
separate(SampleID,c("SampleID","extraID"),sep="_") %>%
filter(is.na(extraID)) %>%
filter(!SampleID%in%c("test","a")) %>%
mutate(SampleID = as.numeric(SampleID)) %>% # fixes blank IDs for buffer plates caused by unite
select(-extraID)
# Merge plate layout info
Sample.Info <-read.xlsx("extracellular_enzyme_data/EnzymePlateInfo.xlsx",1)
enzymes5 <- left_join(enzymes4,Sample.Info, by=c("PlateType","Well"))
# Clean clear plate readings by deleting samples with reads that could interfere with the MM fit
enzymes6 <- enzymes5 %>%
separate(Well,into = c("Row", "Col"),sep ="(?<=[A-Za-z])(?=[0-9])",remove = FALSE) %>%
mutate(Remove = "No",
Col = as.numeric(Col))
enzymes_P <- enzymes6 %>%
filter(PlateType == "P") %>%
arrange(SampleID,Col)
# Input desired threshold here
# 1.65 is currently the lowest threshold we can use for clear plates without removing valid points
threshold <- 1.65
# Counter helps keep track of when we finish reading every plate
well_counter <- 0
for (row in 1:nrow(enzymes_P)) {
well_counter <- well_counter + 1
# Corrects for row A, the top row of the plate
if (enzymes_P[row,]$Row == "A") {
if (enzymes_P[row, 'Read'] > threshold*enzymes_P[row+1, 'Read']) {
enzymes_P[row, 'Remove'] = 'Yes'
}
}
# Corrects for row H, the bottom row of the plate
else if (enzymes_P[row,]$Row == "H") {
if (enzymes_P[row, 'Read'] > threshold*enzymes_P[row-1, 'Read']) {
enzymes_P[row, 'Remove'] = 'Yes'
}
}
# Corrects for rows in middle
else {
if (enzymes_P[row, 'Read'] > threshold*(enzymes_P[row+1, 'Read']+threshold*enzymes_P[row-1, 'Read'])/2) {
enzymes_P[row, 'Remove'] = 'Yes'
}
}
# Indicates when we have finished reading a well
if (well_counter == 96) {
# Loops through letters A-H
for (platerow in LETTERS[1:8]) {
# Gives subset of data that has only specific Sample ID and plate row
enzymes_P_platerow <- enzymes_P[enzymes_P$SampleID == enzymes_P[row,'SampleID'] & enzymes_P$Row == platerow,]
# Counts the number of Yes's from the specific Sample ID and plate row
# If more than 4 in the row are Yes, we change entire row to Yes
if (length(which(enzymes_P_platerow$Remove=="Yes")) >= 4) {
enzymes_P[enzymes_P$SampleID == enzymes_P[row,'SampleID'] & enzymes_P$Row == platerow, 'Remove'] = 'Wipe'
}
}
# Resets well_counter to 0 for the new plate
well_counter = 0
}
}
enzymes_clean <- enzymes_P %>%
# Remove transfer errors that affected entire rows
filter(Remove!="Wipe") %>%
# Indicate buffer errors and change read to 0.05 when error is present
mutate(Remove = ifelse(Col%in%c(1:4) & Remove=="Yes","buffer_error",Remove),
Read = ifelse(Remove=="buffer_error",0.05,Read)) %>%
# Filter out sample points that were marked as errors
filter(Remove!="Yes") %>%
rbind(filter(enzymes6,PlateType!="P"))
#################################################
# Calculate activity from fluorescence/absorbance readings
# Group together similar well types and calculate averages
Enz <- enzymes_clean %>%
group_by(assay_date,SampleID,AssayGroup,WellType,Variate,Concen) %>%
summarize(Read.Mean = mean(Read,na.rm=TRUE)) %>%
ungroup()
# Organize the well types for LAP assay
AMC <- filter(Enz,AssayGroup %in% c("AMC","AMCMUB")) %>% select(-AssayGroup)
AMC.Stan <- filter(AMC,WellType %in% c("Stan","Buffer")) %>% select(-SampleID,-Variate,-Concen) %>% spread(WellType,Read.Mean)
AMC.Quench <- filter(AMC,WellType %in% c("Quench","HomBlank")) %>% select(-Variate,-Concen) %>% spread(WellType,Read.Mean)
AMC.Assay <- filter(AMC,WellType=="Assay") %>% spread(WellType,Read.Mean)
AMC.SubCon <- filter(AMC,WellType=="SubCon") %>% select(-SampleID) %>% spread(WellType,Read.Mean)
# Organize the well types for other hydrolase assays
MUB <- filter(Enz,AssayGroup %in% c("MUB","AMCMUB")) %>% select(-AssayGroup)
MUB.Stan <- filter(MUB,WellType %in% c("Stan","Buffer")) %>% select(-SampleID,-Variate,-Concen) %>% spread(WellType,Read.Mean)
MUB.Quench <- filter(MUB,WellType %in% c("Quench","HomBlank")) %>% select(-Variate,-Concen) %>% spread(WellType,Read.Mean)
MUB.Assay <- filter(MUB,WellType=="Assay") %>% spread(WellType,Read.Mean)
MUB.SubCon <- filter(MUB,WellType=="SubCon") %>% select(-SampleID) %>% spread(WellType,Read.Mean)
# Read in and process dry weight data
Weights <-rbind(read.xlsx("extracellular_enzyme_data/enzyme_weights.xlsx",sheet="Natives"),
read.xlsx("extracellular_enzyme_data/enzyme_weights.xlsx",sheet="Pines")) %>%
select(sample_id,envelope_weight,envelope_fresh,envelope_dry,assay_weight)
names(Weights) <- c("SampleID","Env","EnvFresh","EnvDry","AssayWt")
# Calculate dry weight of assay sample
Weights <- filter(select(mutate(Weights,DryWt = AssayWt*(EnvDry - Env)/(EnvFresh - Env)),SampleID,DryWt),!is.na(DryWt))
# Merge hydrolase data with dry weights
AMC.MUB <- left_join(rbind(left_join(left_join(left_join(AMC.Assay,AMC.SubCon),AMC.Stan),AMC.Quench),
left_join(left_join(left_join(MUB.Assay,MUB.SubCon),MUB.Stan),MUB.Quench)),Weights)
# Calculate hydrolase activity in umol/h/g units based on 3.125 nmol standard and 0.125 ml homogenate, 4 hours, DryWt per 100 ml
Hydro <- mutate(AMC.MUB, Quench.coef = Quench/Stan) %>%
mutate(Activity = ((Assay-HomBlank-SubCon*Quench.coef)/(Stan*Quench.coef/3.125))/(4*DryWt/100*0.125)/1000) %>%
select(assay_date,SampleID,Concen,Variate,Activity)
# Organize the well types for oxidase assays
OX <- filter(Enz,AssayGroup=="Oxidase") %>% select(-AssayGroup)
OX.Blank <- filter(OX,WellType %in% c("Buffer","HomBlank")) %>% select(-Concen) %>% spread(WellType,Read.Mean)
OX.Assay <- filter(OX,WellType %in% c("Assay","SubCon")) %>% spread(WellType,Read.Mean)
# Calculate oxidase activity in umol/h/g units based on LDOPA EC of 7.9*125/300 (125 ml supernatant transferred), 0.125 ml homogenate, 24 hours, DryWt per 100 ml buffer
Oxi <- mutate(left_join(left_join(OX.Assay,OX.Blank),Weights), Activity = ((Assay-HomBlank-(SubCon-Buffer))*100)/(3.29*0.125*24*DryWt))
PER <- rename(select(filter(Oxi,Variate=="PER"),assay_date,SampleID,Concen,Activity),PER = Activity)
PPO <- rename(select(filter(Oxi,Variate=="PPO"),assay_date,SampleID,Concen,Activity),PPO = Activity)
Oxidase <- gather(mutate(full_join(PPO,PER),PER = PER - PPO),key=Variate,value=Activity,-assay_date,-SampleID,-Concen)
# Combine hydrolase and oxidase data
Enz_all <- rbind(Hydro,Oxidase) %>%
filter(is.finite(Activity))
#################################################
# Fit data to Michaelis Menten curve
library(nlme)
library(nlstools)
# MM fit function
attach(Enz_all)
mmfit <- function(i){
nls.m <- try(nls(y~Vmax*x/(Km+x),data=list(y=Activity[i],x=Concen[i]),
start=list(Vmax=max(Activity[i],na.rm=T),Km=max(Concen[i]/4,na.rm=T))))
Vmax <- try(coef(nls.m)[[1]],silent=T)
Km <- try(coef(nls.m)[[2]],silent=T)
lowerV <- try(confint2(nls.m)[[1]],silent=T)
upperV <- try(confint2(nls.m)[[3]],silent=T)
lowerK <- try(confint2(nls.m)[[2]],silent=T)
upperK <- try(confint2(nls.m)[[4]],silent=T)
fit <- try(100*(upperV - lowerV)/Vmax,silent=T)
model.pts <- try(Vmax*Concen[i]/(Km+Concen[i]),silent=T)
plot(Activity[i]~Concen[i],main=paste("Sample ID:",SampleID[i][1],"Enzyme:",Variate[i][1],sep=" "))
try(lines(model.pts~Concen[i],col="red"),silent=T)
text(Concen[i],Activity[i],Concen[i],pos=4)
mtext(paste("Vmax=",ifelse(is.numeric(Vmax),round(Vmax,3),NA)," Km=",ifelse(is.numeric(Km),round(Km,3),NA),
" Fit=",ifelse(is.numeric(fit),round(fit,3),NA),sep=""))
c(Vmax,Km,lowerV,upperV,lowerK,upperK)
}
# Apply function to values
pdf("fits.pdf")
x.seq <- seq(len=dim(Enz_all)[1])
fit <- tapply(x.seq, list(Enz_all$assay_date, Enz_all$SampleID, Enz_all$Variate),
mmfit)
dev.off()
Enz_fit <- as.data.frame.table(tapply(x.seq, list(Enz_all$assay_date, Enz_all$SampleID, Enz_all$Variate),function(i)1))
names(Enz_fit) <- c("assay_date","SampleID","Variate","Vmax")
detach(Enz_all)
# Extract successful Vmax and Km values
Enz_fit$Vmax <- as.numeric(unlist(sapply(sapply(fit,'[',1),'[[',1) %>% replace_na()))
Enz_fit$Km <- as.numeric(unlist(sapply(sapply(fit,'[',2),'[[',1) %>% replace_na()))
Enz_fit$lowerV <- as.numeric(unlist(sapply(sapply(fit,'[',3),'[[',1) %>% replace_na()))
Enz_fit$upperV <- as.numeric(unlist(sapply(sapply(fit,'[',4),'[[',1) %>% replace_na()))
Enz_fit$lowerK <- as.numeric(unlist(sapply(sapply(fit,'[',5),'[[',1) %>% replace_na()))
Enz_fit$upperK <- as.numeric(unlist(sapply(sapply(fit,'[',6),'[[',1) %>% replace_na()))
# Reorganize dataset and add back samples that failed the fit
# Full list samples
Enz_all2 <- Enz_all %>%
select(assay_date,SampleID, Variate) %>%
unique()
Enz_fit2 <- Enz_fit %>%
filter(!is.na(Vmax)) %>%
mutate(SampleID = as.numeric(as.character(SampleID)),
Variate = as.character(Variate),
assay_date = ymd(as.character(assay_date))) %>%
right_join(Enz_all2,by=c("assay_date","SampleID","Variate"))
# Set Vmaxs that are negative to 0 and their Kms to NA
Enz_fit3 <- Enz_fit2 %>%
mutate(Vmax = ifelse(Vmax<0,0,Vmax),
lowerV = ifelse(Vmax==0,0,lowerV),
upperV = ifelse(Vmax==0,0,upperV),
Km = ifelse(Vmax==0,NA,Km),
lowerK = ifelse(Vmax==0,NA,lowerK),
upperK = ifelse(Vmax==0,NA,upperK))
# Export file
write_csv(Enz_fit3, "fitted_enzyme_data.csv")
#################################################
# The next section of code visualizes the samples that are still having fit issues
# Check samples that failed fit
Enz_fit_fail <- Enz_fit2 %>%
filter(is.na(Vmax)) %>%
select(assay_date,SampleID,Variate) %>%
unique() %>%
# Add points that were used in MM fit
left_join(Enz_all,by=c("assay_date","SampleID","Variate"))
# Determine which of the fit fails were just due to there being no detectable activity
Enz_fit_fail_0 <- Enz_fit_fail %>%
# Take the average of the highest 3-4 concentration points
filter(Concen > 60) %>%
group_by(assay_date,SampleID,Variate) %>%
summarize(Avg_activity = mean(Activity,na.rm=TRUE)) %>%
# When averages are less than 0.05 for BX and BG and 0.2 for the remaining enzymes, the plots look like noise
filter(Avg_activity < 0.2 & Variate%in%c("AG","AP","BG","CBH","LAP","PER","PPO") |
Avg_activity < 0.05 & Variate%in%c("BX","NAG")) %>%
ungroup() %>%
select(-Avg_activity) %>%
unique()
# Vmax can be set to 0 for these samples and enzymes
# Use the Enz0 if you want to visualize the plots
Enz_0 <- Enz_fit_fail_0 %>%
left_join(Enz_all,by=c("assay_date","SampleID","Variate"))
# Determine which of the fit fails had detectable activity and need to be cleaned again for proper fits
Enz_fit_fail_1 <- Enz_fit_fail %>%
filter(Concen > 60) %>%
group_by(assay_date,SampleID,Variate) %>%
summarize(Avg_activity = mean(Activity,na.rm=TRUE)) %>%
filter(Avg_activity >= 0.2 & Variate%in%c("AG","AP","BG","CBH","LAP","PER","PPO") |
Avg_activity >= 0.05 & Variate%in%c("BX","NAG")) %>%
ungroup() %>%
select(-Avg_activity) %>%
unique()
Enz_reclean <- Enz_fit_fail_1 %>%
left_join(Enz_all,by=c("assay_date","SampleID","Variate"))
# Visualize
ggplot(filter(Enz_reclean,Variate=="AG"),aes(Concen,Activity)) +
geom_point() + facet_wrap(SampleID~.)
ggplot(filter(Enz_reclean,Variate=="AP"),aes(Concen,Activity)) +
geom_point() + facet_wrap(SampleID~.)
ggplot(filter(Enz_reclean,Variate=="BG"),aes(Concen,Activity)) +
geom_point() + facet_wrap(SampleID~.)
ggplot(filter(Enz_reclean,Variate=="BX"),aes(Concen,Activity)) +
geom_point() + facet_wrap(SampleID~.)
ggplot(filter(Enz_reclean,Variate=="CBH"),aes(Concen,Activity)) +
geom_point() + facet_wrap(SampleID~.)
ggplot(filter(Enz_reclean,Variate=="LAP"),aes(Concen,Activity)) +
geom_point() + facet_wrap(SampleID~.)
ggplot(filter(Enz_reclean,Variate=="NAG"),aes(Concen,Activity)) +
geom_point() + facet_wrap(SampleID~.)
ggplot(filter(Enz_reclean,Variate=="PER"),aes(Concen,Activity)) +
geom_point() + facet_wrap(SampleID~.)
ggplot(filter(Enz_reclean,Variate=="PPO"),aes(Concen,Activity)) +
geom_point() + facet_wrap(SampleID~.)