-
Notifications
You must be signed in to change notification settings - Fork 2
/
database_interface.py
1295 lines (1102 loc) · 62.3 KB
/
database_interface.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2017. All rights reserved. See AUTHORS.txt
# Licensed under the Apache License, Version 2.0 which is in LICENSE.txt
"""
Defines several functions to finish processing EIA data and upload to the
Switch-WECC database. Some functions may be used for other purposes.
"""
import os, sys
import pandas as pd
import numpy as np
import getpass
from IPython import embed
from ggplot import *
from utils import connect_to_db_and_run_query, append_historic_output_to_csv, connect_to_db_and_push_df
coal_codes = ['ANT','BIT','LIG','SGC','SUB','WC','RC']
outputs_directory = 'processed_data'
# Disable false positive warnings from pandas
pd.options.mode.chained_assignment = None
def pull_generation_projects_data(gen_scenario_id):
"""
Returns generation plant data for a specific existing and planned scenario id.
For now, only used to compare the old AMPL dataset with new heat rates.
"""
print "Reading in existing and planned generation project data from database..."
query = "SELECT * \
FROM generation_plant JOIN generation_plant_existing_and_planned \
USING (generation_plant_id) \
WHERE generation_plant_existing_and_planned_scenario_id = {}".format(gen_scenario_id)
db_gens = connect_to_db_and_run_query(query=query, database='switch_wecc')
print "======="
print "Read in {} projects from the database for id {}, with {:.0f} GW of capacity".format(
len(db_gens), gen_scenario_id, db_gens['capacity'].sum()/1000.0)
thermal_db_gens = db_gens[db_gens['full_load_heat_rate'] > 0]
print "Weighted average of heat rate: {:.3f} MMBTU/MWh".format(
thermal_db_gens['capacity'].dot(thermal_db_gens['full_load_heat_rate'])/thermal_db_gens['capacity'].sum())
print "======="
return db_gens
def filter_plants_by_region_id(region_id, year, host='localhost', area=0.5):
"""
Filters generation plant data by NERC Region, according to the provided id.
Generation plants w/o Region get assigned to the NERC Region with which more
than a certain percentage of its County area intersects (by default, 50%).
A list is saved with Counties and States belonging to the specified Region.
Both County and State are necessary to correctly assign plants (some County
names exist in multiple States).
Returns a DataFrame with the filtered data.
"""
state_dict = {
'Alabama':'AL',
'Alaska':'AK',
'Arizona':'AZ',
'Arkansas':'AR',
'California':'CA',
'Colorado':'CO',
'Connecticut':'CT',
'Delaware':'DE',
'Florida':'FL',
'Georgia':'GA',
'Hawaii':'HI',
'Idaho':'ID',
'Illinois':'IL',
'Indiana':'IN',
'Iowa':'IA',
'Kansas':'KS',
'Kentucky':'KY',
'Louisiana':'LA',
'Maine':'ME',
'Maryland':'MD',
'Massachusetts':'MA',
'Michigan':'MI',
'Minnesota':'MN',
'Mississippi':'MS',
'Missouri':'MO',
'Montana':'MT',
'Nebraska':'NE',
'Nevada':'NV',
'New Hampshire':'NH',
'New Jersey':'NJ',
'New Mexico':'NM',
'New York':'NY',
'North Carolina':'NC',
'North Dakota':'ND',
'Ohio':'OH',
'Oklahoma':'OK',
'Oregon':'OR',
'Pennsylvania':'PA',
'Rhode Island':'RI',
'South Carolina':'SC',
'South Dakota':'SD',
'Tennessee':'TN',
'Texas':'TX',
'Utah':'UT',
'Vermont':'VT',
'Virginia':'VA',
'Washington':'WA',
'West Virginia':'WV',
'Wisconsin':'WI',
'Wyoming':'WY'
}
print "Getting region name from database..."
query = "SELECT regionabr FROM ventyx_nerc_reg_region WHERE gid={}".format(
region_id)
region_name = connect_to_db_and_run_query(query=query,
database='switch_gis', host=host)['regionabr'][0]
counties_path = os.path.join('other_data', '{}_counties.tab'.format(region_name))
if not os.path.exists(counties_path):
# assign county if (area)% or more of its area falls in the region
query = "SELECT name, state\
FROM ventyx_nerc_reg_region regions CROSS JOIN us_counties cts\
JOIN (SELECT DISTINCT state, state_fips FROM us_states) sts \
ON (sts.state_fips=cts.statefp) \
WHERE regions.gid={} AND\
ST_Area(ST_Intersection(cts.the_geom, regions.the_geom))/\
ST_Area(cts.the_geom)>={}".format(region_id, area)
print "\nGetting counties and states for the region from database..."
region_counties = pd.DataFrame(connect_to_db_and_run_query(query=query,
database='switch_gis', host=host)).rename(columns={'name':'County','state':'State'})
region_counties.replace(state_dict, inplace=True)
region_counties.to_csv(counties_path, sep='\t', index=False)
else:
print "Reading counties from .tab file..."
region_counties = pd.read_csv(counties_path, sep='\t', index_col=None)
generators = pd.read_csv(
os.path.join('processed_data','generation_projects_{}.tab'.format(year)), sep='\t')
generators.loc[:,'County'] = generators['County'].map(lambda c: str(c).title())
print "\nRead in data for {} generators, of which:".format(len(generators))
print "--{} are existing".format(len(generators[generators['Operational Status']=='Operable']))
print "--{} are proposed".format(len(generators[generators['Operational Status']=='Proposed']))
generators_with_assigned_region = generators.loc[generators['Nerc Region'] == region_name]
generators = generators[generators['Nerc Region'].isnull()]
generators_without_assigned_region = pd.merge(generators, region_counties, how='inner', on=['County','State'])
generators = pd.concat([
generators_with_assigned_region,
generators_without_assigned_region],
axis=0)
generators.replace(
to_replace={'Energy Source':coal_codes, 'Energy Source 2':coal_codes,
'Energy Source 3':coal_codes}, value='COAL', inplace=True)
generators_columns = list(generators.columns)
existing_gens = generators[generators['Operational Status']=='Operable']
proposed_gens = generators[generators['Operational Status']=='Proposed']
print "======="
print "Filtered to {} projects in the {} region, of which:".format(
len(generators), region_name)
print "--{} are existing with {:.0f} GW of capacity".format(
len(existing_gens), existing_gens['Nameplate Capacity (MW)'].sum()/1000.0)
print "--{} are proposed with {:.0f} GW of capacity".format(
len(proposed_gens), proposed_gens['Nameplate Capacity (MW)'].sum()/1000.0)
print "======="
return generators
def compare_eia_heat_rates_to_ampl_projs(year):
"""
Compares calculated 'Best Heat Rates' for EIA plants with full load heat
rates of previously stored Switch AMPL data (generation scenario id 1) in
the database.
ToDo: Only EIA860 data is merged with existing AMPL data, so no 'Best Heat
Rate' column is present. Need to also merge with EIA923 processed data
(historic_heat_rates_WIDE.tab file).
Returns the comparison DataFrame and prints it to a tab file.
"""
db_gen_projects = pull_generation_projects_data(gen_scenario_id=1).rename(
columns={'name':'Plant Name', 'gen_tech':'Prime Mover'})
db_gen_projects.loc[:,'Prime Mover'].replace(
{
'Coal_Steam_Turbine':'ST',
'Gas_Steam_Turbine':'ST',
'Gas_Combustion_Turbine':'GT',
'Gas_Combustion_Turbine_Cogen':'GT',
'CCGT':'CC',
'DistillateFuelOil_Combustion_Turbine':'GT',
'DistillateFuelOil_Internal_Combustion_Engine':'IC',
'Geothermal':'ST',
'Gas_Internal_Combustion_Engine':'IC',
'Bio_Gas_Internal_Combustion_Engine':'IC',
'Bio_Gas_Steam_Turbine':'ST'
},
inplace=True)
eia_gen_projects = filter_plants_by_region_id(13, year)
df = pd.merge(db_gen_projects, eia_gen_projects,
on=['Plant Name','Prime Mover'], how='left').loc[:,[
'Plant Name','gen_tech','energy_source','full_load_heat_rate',
'Best Heat Rate','Prime Mover','Energy Source','Energy Source 2','Operating Year']]
df = df[df['full_load_heat_rate']>0]
print "\nPrinting intersection of DB and EIA generation projects that have a specified heat rate to heat_rate_comparison.tab"
fpath = os.path.join('processed_data','heat_rate_comparison.tab')
with open(fpath, 'w') as outfile:
df.to_csv(outfile, sep='\t', header=True, index=False)
return df
def assign_heat_rates_to_projects(generators, year):
"""
Assigns calculated heat rates based on EIA923 data to plants parsed from
EIA860 data. Receives a DataFrame with all generators and the year.
Coal plants with better heat rates than 8.607 MMBTU/MWh (still need to add
the reference to this best historic heat rate of 2015) and other thermal
plants with heat rate better (lower) than 6.711 MMBTU/MWh are ignored and get
assigned an average heat rate, since we assume a report error has taken place.
The top and bottom .5% of heat rates get replaced by the heat rate at the
top and bottom .5 percentile, respectively. This replaces unrealistic values
that must have been caused by reporting errors.
Heat rate averages used to replace unrealistic values and to be assigned to
projects without heat rate are calculated as the average heat rate of plants
with the same technology, energy source and vintage. A 4-year window is used
to identify plants with similar vintage. If less than 4 plants fall into this
window, it is enlarged successively. If no other project with the same
technology-energy source combination exists, then the technology's average
heat rate is used. The last two assignments (per technology-energy source-window
if other projects exist, and per technology is no other projects exist) are
applied to both existing projects without heat rate data and to new projects.
Heat rate distributions per technology and energy source are plotted and
printed to a PDF file in order to visually inspect them.
Returns the original DataFrame with a Best Heat Rate column.
"""
fuels = {
'LFG':'Bio_Gas',
'OBG':'Bio_Gas',
'AB':'Bio_Solid',
'BLQ':'Bio_Liquid',
'NG':'Gas',
'OG':'Gas',
'PG':'Gas',
'DFO':'DistillateFuelOil',
'JF':'ResidualFuelOil',
'COAL':'Coal',
'GEO':'Geothermal',
'NUC':'Uranium',
'PC':'Coal',
'SUN':'Solar',
'WDL':'Bio_Liquid',
'WDS':'Bio_Solid',
'MSW':'Bio_Solid',
'PUR':'Purchased_Steam',
'WH':'Waste_Heat',
'OTH':'Other',
'WAT':'Water',
'MWH':'Electricity',
'WND':'Wind'
}
generators = generators.replace({'Energy Source':fuels})
existing_gens = generators[generators['Operational Status']=='Operable']
print "-------------------------------------"
print "There are {} existing thermal projects that sum up to {:.1f} GW.".format(
len(existing_gens[existing_gens['Prime Mover'].isin(['CC','GT','IC','ST'])]),
existing_gens[existing_gens['Prime Mover'].isin(['CC','GT','IC','ST'])][
'Nameplate Capacity (MW)'].sum()/1000)
heat_rate_data = pd.read_csv(
os.path.join('processed_data','historic_heat_rates_WIDE.tab'), sep='\t').rename(
columns={'Plant Code':'EIA Plant Code'})
heat_rate_data = heat_rate_data[heat_rate_data['Year']==year]
heat_rate_data = heat_rate_data.replace({'Energy Source':fuels})
thermal_gens = pd.merge(
existing_gens, heat_rate_data[['EIA Plant Code','Prime Mover','Energy Source','Best Heat Rate']],
how='left', suffixes=('',''),
on=['EIA Plant Code','Prime Mover','Energy Source']).drop_duplicates()
thermal_gens = thermal_gens[thermal_gens['Prime Mover'].isin(['CC','GT','IC','ST'])]
# Replace null and unrealistic heat rates by average values per technology,
# fuel, and vintage. Also, set HR of top and bottom .5% to max and min
null_heat_rates = thermal_gens['Best Heat Rate'].isnull()
unrealistic_heat_rates = (((thermal_gens['Energy Source'] == 'Coal') &
(thermal_gens['Best Heat Rate'] < 8.607)) |
((thermal_gens['Energy Source'] != 'Coal') &
(thermal_gens['Best Heat Rate'] < 6.711)))
print "{} generators don't have heat rate data specified ({:.1f} GW of capacity)".format(
len(thermal_gens[null_heat_rates]), thermal_gens[null_heat_rates]['Nameplate Capacity (MW)'].sum()/1000.0)
print "{} generators have better heat rate than the best historical records ({} GW of capacity)".format(
len(thermal_gens[unrealistic_heat_rates]), thermal_gens[unrealistic_heat_rates]['Nameplate Capacity (MW)'].sum()/1000.0)
thermal_gens_w_hr = thermal_gens[~null_heat_rates & ~unrealistic_heat_rates]
thermal_gens_wo_hr = thermal_gens[null_heat_rates | unrealistic_heat_rates]
# Print fuels and technologies with missing HR to console
# for fuel in thermal_gens_wo_hr['Energy Source'].unique():
# print "{} of these use {} as their fuel".format(
# len(thermal_gens_wo_hr[thermal_gens_wo_hr['Energy Source']==fuel]),fuel)
# print "Technologies:"
# for prime_mover in thermal_gens_wo_hr[thermal_gens_wo_hr['Energy Source']==fuel]['Prime Mover'].unique():
# print "\t{} use {}".format(
# len(thermal_gens_wo_hr[(thermal_gens_wo_hr['Energy Source']==fuel) &
# (thermal_gens_wo_hr['Prime Mover']==prime_mover)]),prime_mover)
print "-------------------------------------"
print "Assigning max/min heat rates per technology and fuel to top .5% / bottom .5%, respectively:"
n_outliers = int(len(thermal_gens_w_hr)*0.008)
thermal_gens_w_hr = thermal_gens_w_hr.sort_values('Best Heat Rate')
min_hr = thermal_gens_w_hr.loc[thermal_gens_w_hr.index[n_outliers],'Best Heat Rate']
max_hr = thermal_gens_w_hr.loc[thermal_gens_w_hr.index[-1-n_outliers],'Best Heat Rate']
print "(Total capacity of these plants is {:.1f} GW)".format(
thermal_gens_w_hr[thermal_gens_w_hr['Best Heat Rate'] < min_hr]['Nameplate Capacity (MW)'].sum()/1000.0 +
thermal_gens_w_hr[thermal_gens_w_hr['Best Heat Rate'] > max_hr]['Nameplate Capacity (MW)'].sum()/1000.0)
print "Minimum heat rate is {:.3f}".format(min_hr)
print "Maximum heat rate is {:.3f}".format(max_hr)
for i in range(n_outliers):
thermal_gens_w_hr.loc[thermal_gens_w_hr.index[i],'Best Heat Rate'] = min_hr
thermal_gens_w_hr.loc[thermal_gens_w_hr.index[-1-i],'Best Heat Rate'] = max_hr
def calculate_avg_heat_rate(thermal_gens_df, prime_mover, energy_source, vintage, window=2):
similar_generators = thermal_gens_df[
(thermal_gens_df['Prime Mover']==prime_mover) &
(thermal_gens_df['Energy Source']==energy_source) &
(thermal_gens_df['Operating Year']>=vintage-window) &
(thermal_gens_df['Operating Year']<=vintage+window)]
while len(similar_generators) < 4:
window += 2
similar_generators = thermal_gens_df[
(thermal_gens_df['Prime Mover']==prime_mover) &
(thermal_gens_df['Energy Source']==energy_source) &
(thermal_gens_df['Operating Year']>=vintage-window) &
(thermal_gens_df['Operating Year']<=vintage+window)]
# Gens span from 1925 to 2015, so a window of 90 years is the maximum
if window >= 90:
break
if len(similar_generators) > 0:
return similar_generators['Best Heat Rate'].mean()
else:
# If no other similar projects exist, return average of technology
return thermal_gens_df[thermal_gens_df['Prime Mover']==prime_mover]['Best Heat Rate'].mean()
print "-------------------------------------"
print "Assigning average heat rates per technology, fuel, and vintage to projects w/o heat rate..."
for idx in thermal_gens_wo_hr.index:
pm = thermal_gens_wo_hr.loc[idx,'Prime Mover']
es = thermal_gens_wo_hr.loc[idx,'Energy Source']
v = thermal_gens_wo_hr.loc[idx,'Operating Year']
#print "{}\t{}\t{}\t{}".format(pm,es,v,calculate_avg_heat_rate(thermal_gens_w_hr, pm, es, v))
thermal_gens_wo_hr.loc[idx,'Best Heat Rate'] = calculate_avg_heat_rate(
thermal_gens_w_hr, pm, es, v)
thermal_gens = pd.concat([thermal_gens_w_hr, thermal_gens_wo_hr], axis=0)
existing_gens = pd.merge(existing_gens, thermal_gens, on=list(existing_gens.columns), how='left')
# Plot histograms for resulting heat rates per technology and fuel
thermal_gens["Technology"] = thermal_gens["Energy Source"].map(str) + ' ' + thermal_gens["Prime Mover"]
p = ggplot(aes(x='Best Heat Rate',fill='Technology'), data=thermal_gens) + geom_histogram(binwidth=0.5) + facet_wrap("Technology") + ylim(0,30)
p.save(os.path.join(outputs_directory,'heat_rate_distributions.pdf'))
proposed_gens = generators[generators['Operational Status']=='Proposed']
thermal_proposed_gens = proposed_gens[proposed_gens['Prime Mover'].isin(['CC','GT','IC','ST'])]
other_proposed_gens = proposed_gens[~proposed_gens['Prime Mover'].isin(['CC','GT','IC','ST'])]
print "There are {} proposed thermal projects that sum up to {:.2f} GW.".format(
len(thermal_proposed_gens), thermal_proposed_gens['Nameplate Capacity (MW)'].sum()/1000)
print "Assigning average heat rate of technology and fuel of most recent years..."
for idx in thermal_proposed_gens.index:
pm = thermal_proposed_gens.loc[idx,'Prime Mover']
es = thermal_proposed_gens.loc[idx,'Energy Source']
#print "{}\t{}\t{}\t{}".format(pm,es,v,calculate_avg_heat_rate(thermal_gens_w_hr, pm, es, v))
thermal_proposed_gens.loc[idx,'Best Heat Rate'] = calculate_avg_heat_rate(
thermal_gens_w_hr, pm, es, year)
other_proposed_gens['Best Heat Rate'] = float('nan')
proposed_gens = pd.concat([thermal_proposed_gens,other_proposed_gens], axis=0)
return pd.concat([existing_gens, proposed_gens], axis=0)
def finish_project_processing(year):
"""
Receives a year, and processes the scraped EIA data for that year by using
previously defined functions.
First, plants are read in from the generation_projects_YEAR.tab file, which
come from the EIA860 form, and filtered by region. For now, region 13 (WECC)
is hardcoded.
Second, plants are assigned heat rates from the historic_heat_rates_WIDE.tab
file, which come from the EIA923 form. Plants with missing heat rates are
assigned averages, and unrealistic heat rate values are replaced by reasonable
parameters.
Prints out 3 tab files with resulting data:
existing_generation_projects_YEAR.tab
new_generation_projects_YEAR.tab
uprates_to_generation_projects_YEAR.tab
These files are later post-processed and pushed into the Switch-WECC database
of RAEL (UC Berkeley), though data is formatted in a general-purpose manner,
so it could be used for any other purpose.
"""
generators = filter_plants_by_region_id(13, year)
generators = assign_heat_rates_to_projects(generators, year)
existing_gens = generators[generators['Operational Status']=='Operable']
proposed_gens = generators[generators['Operational Status']=='Proposed']
fname = 'existing_generation_projects_{}.tab'.format(year)
with open(os.path.join(outputs_directory, fname),'w') as f:
existing_gens.to_csv(f, sep='\t', encoding='utf-8', index=False)
uprates = pd.DataFrame()
new_gens = pd.DataFrame()
for idx in proposed_gens.index:
pc = proposed_gens.loc[idx,'EIA Plant Code']
pm = proposed_gens.loc[idx,'Prime Mover']
es = proposed_gens.loc[idx,'Energy Source']
existing_units_for_proposed_gen = existing_gens[
(existing_gens['EIA Plant Code'] == pc) &
(existing_gens['Prime Mover'] == pm) &
(existing_gens['Energy Source'] == es)]
if len(existing_units_for_proposed_gen) == 0:
new_gens = pd.concat([new_gens, pd.DataFrame(proposed_gens.loc[idx,:]).T], axis=0)
elif len(existing_units_for_proposed_gen) == 1:
uprates = pd.concat([uprates, pd.DataFrame(proposed_gens.loc[idx,:]).T], axis=0)
else:
print "There is more than one option for uprating plant id {}, prime mover {} and energy source {}".format(int(pc), pm, es)
fname = 'new_generation_projects_{}.tab'.format(year)
with open(os.path.join(outputs_directory, fname),'w') as f:
new_gens.to_csv(f, sep='\t', encoding='utf-8', index=False)
fname = 'uprates_to_generation_projects_{}.tab'.format(year)
with open(os.path.join(outputs_directory, fname),'w') as f:
uprates.to_csv(f, sep='\t', encoding='utf-8', index=False)
def upload_generation_projects(year):
"""
Reads existing and new project data previously processed from the EIA forms
in order to upload it to the Switch-WECC database of RAEL, at UC Berkeley.
First, generation project data is read in from the processed tab files.
Projects using Electricity or Purchased Steam as their energy source are
dropped from the generator set.
Projects using Other as their energy source are assigned Gas as default.
Capacity limits are set as total existing and projected capacity for each
project (e.g. no additional capacity additions will be allowed for
predetermined projects in Switch).
Plant-level heat rates are calculated by doing a capacity-weighted average
over the individual heat rates of each unit in the plant that have the same
technology and use the same energy source. This allows obtaining a single
heat rate for plants with units that have different vintages.
Baseload flags are set for all plants that use Nuclear, Coal, or Geothermal
as their energy source.
Variable flags are set for all plants that use Hydro, Photovoltaic, or Wind
Turbine technologies.
Cogen flags are set for all plants that declared being Cogen.
Columns are renamed to match the PSQL database column definitions.
Resulting generation plant data is uploaded to the database with generation
plant scenario id 2. A subsequent aggregated set per technology, energy source,
and load zone is uploaded with id 3.
WARNING: The upload process will clean the database from all previous projects
with ids 2 and 3. This includes:
Hydro capacity factors
Plant cost
Plant build years
Plant scenario members
Plant level data
But not variable capacity factor data (that was uploaded after finishing
this part of the code, so its still in the todo list).
After uploading generation plant data, the geom column is populated with
the geometric object representing the location of the project, for those
projects with latitude and longitude defined.
Then, plants are assigned to load zones:
Plants with geom data are assgined to zones into which their location
falls in.
Plants without lat and long data are assigned to the load zone in which
their County's centroid falls in.
Plants with coordinates out of the WECC region (only a few) are assigned
to the closest WECC load zone if they are within a 100 mile radius from
its boundary. Otherwise, they are dropped from the data set (for now,
only a couple of cases in the East Coast, which must have a reporting
mistake).
Maximum age, outage rates, and variable O&M costs are assigned as
technology-default values.
The Diablo Canyon nuclear power plant is set a maximum age of 40 years.
Uploaded plants are assigned to generation plant scenario id 2.
The uploaded generation plant ids are recovered, so that build year data
can be uploaded for existing and new projects.
Fixed and investment costs are assigned a default value of 0 to all plants.
Hydro capacity factors are uploaded for each hydro plant, according to
nameplate capacity. Minimum flows are set to a default of 0.5 times the
average flow. The hydro scenario id is set to 2.
The plant dataset is then aggregated by technology, energy source, and load
zone, considering heat rate windows of 1 MMBTU/MWh (so that plants with
significantly different heat rates are not lumped in together). Heat rates
are averaged by weighting the capacity of each plant. Other properties,
such as capacity limit, are simply summed.
The dataset is uploaded with id 3, and build years, hydro capacity factors,
and all other data is processed in the same way as for id 2.
"""
user = getpass.getpass('Enter username for the database:')
password = getpass.getpass('Enter database password for user {}:'.format(user))
def read_output_csv(fname):
try:
return pd.read_csv(os.path.join(outputs_directory,fname), sep='\t', index_col=None)
except:
print "Failed to read file {}. It will be considered to be empty.".format(fname)
return None
existing_gens = read_output_csv('existing_generation_projects_{}.tab'.format(year))
new_gens = read_output_csv('new_generation_projects_{}.tab'.format(year))
uprates = read_output_csv('uprates_to_generation_projects_{}.tab'.format(year))
if uprates is not None:
print "Read data for {} existing projects, {} new projects, and {} uprates".format(
len(existing_gens), len(new_gens), len(uprates))
print "Existing capacity: {:.2f} GW".format(existing_gens['Nameplate Capacity (MW)'].sum()/1000.0)
print "Proposed capacity: {:.2f} GW".format(new_gens['Nameplate Capacity (MW)'].sum()/1000.0)
print "Capacity uprates: {:.2f} GW".format(uprates['Nameplate Capacity (MW)'].sum()/1000.0)
else:
print "Read data for {} existing projects and {} new projects".format(
len(existing_gens), len(new_gens))
print "Existing capacity: {:.2f} GW".format(existing_gens['Nameplate Capacity (MW)'].sum()/1000.0)
print "Proposed capacity: {:.2f} GW".format(new_gens['Nameplate Capacity (MW)'].sum()/1000.0)
generators = pd.concat([existing_gens, new_gens], axis=0)
ignore_energy_sources = ['Purchased_Steam','Electricity']
print ("Dropping projects that use Batteries or Purchased Steam, since these"
" are not modeled in Switch, totalizing {:.2f} GW of capacity").format(
generators[generators['Energy Source'].isin(
ignore_energy_sources)]['Nameplate Capacity (MW)'].sum()/1000.0)
print "Replacing 'Other' for 'Gas' as energy source for {:.2f} GW of capacity".format(
generators[generators['Energy Source'] == 'Other'][
'Nameplate Capacity (MW)'].sum()/1000.0)
generators.drop(generators[generators['Energy Source'].isin(
ignore_energy_sources)].index, inplace=True)
generators.replace({'Energy Source':{'Other':'Gas'}}, inplace=True)
def weighted_avg(group, avg_name, weight_name):
"""
http://stackoverflow.com/questions/10951341/pandas-dataframe-aggregate-function-using-multiple-columns
"""
d = group[avg_name]
w = group[weight_name]
try:
return (d * w).sum() / w.sum()
except ZeroDivisionError:
return d.mean()
index_cols = ['EIA Plant Code','Prime Mover','Energy Source']
print "Calculating capacity-weighted average heat rates per plant, technology and energy source..."
generators = pd.merge(generators,
pd.DataFrame(generators.groupby(index_cols).apply(weighted_avg, 'Best Heat Rate',
'Nameplate Capacity (MW)')).reset_index().replace(0, float('nan')),
how='right',
on=index_cols).drop('Best Heat Rate', axis=1)
print "Calculating maximum capacity limits per plant, technology and energy source..."
gb = generators.groupby(index_cols)
agg_generators = gb.agg({col:sum if col == 'Nameplate Capacity (MW)' else 'max'
for col in generators.columns}).rename(columns=
{'Nameplate Capacity (MW)':'capacity_limit_mw'})
generators = pd.merge(generators, agg_generators[index_cols+['capacity_limit_mw']],
on=index_cols, how='right')
print "Assigning baseload, variable and cogen flags..."
generators.loc[:,'is_baseload'] = np.where(generators['Energy Source'].isin(
['Nuclear','Coal','Geothermal']),True,False)
generators.loc[:,'is_variable'] = np.where(generators['Prime Mover'].isin(
['HY','PV','WT']),True,False)
if 'Cogen' not in generators.columns:
generators.loc[:,'is_cogen'] = False
else:
generators.loc[:,'is_cogen'] = np.where(generators['Cogen'] == 'Y',True,False)
database_column_renaming_dict = {
'EIA Plant Code':'eia_plant_code',
'Plant Name':'name',
'Prime Mover':'gen_tech',
'Energy Source':'energy_source',
0:'full_load_heat_rate',
'Operating Year':'build_year',
'Nameplate Capacity (MW)':'capacity'
}
generators.rename(columns=database_column_renaming_dict, inplace=True)
generators.replace(' ',float('nan'), inplace=True)
carry_on = getpass.getpass('WARNING: In order to push projects into the DB,'
'all projects currently in the generation_plant table that are'
'not present in the generation_plant_scenario_member table will be'
'removed. Continue? [y/n]')
while carry_on not in ['y','n']:
carry_on = getpass.getpass('WARNING: In order to push projects into the DB,'
'all projects currently in the generation_plant table that are'
'not present in the generation_plant_scenario_member table will be'
'removed. Continue? [y/n]')
if carry_on == 'n':
sys.exit()
print "\n-----------------------------"
print "Pushing generation plants to the DB:\n"
# Make sure the "switch" schema is on the search path
# Drop NOT NULL constraint for load_zone_id & max_age cols to avoid raising error
query = 'ALTER TABLE "generation_plant" ALTER "load_zone_id" DROP NOT NULL;'
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
query = 'ALTER TABLE "generation_plant" ALTER "max_age" DROP NOT NULL;'
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
# First, delete previously stored projects for the EIA scenario id
gen_scenario_id = 2.0
query = 'DELETE FROM hydro_historical_monthly_capacity_factors\
WHERE hydro_simple_scenario_id = {}'.format(gen_scenario_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
query = 'DELETE FROM generation_plant_scenario_member\
WHERE generation_plant_scenario_id = {}'.format(gen_scenario_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
query = 'DELETE FROM generation_plant_cost\
WHERE generation_plant_cost_scenario_id = {}'.format(gen_scenario_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
query = 'DELETE FROM generation_plant_existing_and_planned\
WHERE generation_plant_existing_and_planned_scenario_id = {}'.format(gen_scenario_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
# It is necessary to temporarily disable triggers when deleting from
# generation_plant table, because of multiple fkey constraints
query = 'SET session_replication_role = replica;\
DELETE FROM generation_plant\
WHERE generation_plant_id NOT IN\
(SELECT generation_plant_id FROM generation_plant_scenario_member);\
SET session_replication_role = DEFAULT;'
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
print "Deleted previously stored projects for the EIA dataset (id 2). Pushing data..."
query = 'SELECT last_value FROM generation_plant_id_seq'
first_gen_id = connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True).iloc[0,0] + 1
generators_to_db = generators[['name','gen_tech','capacity_limit_mw',
'full_load_heat_rate','is_variable','is_baseload','is_cogen',
'energy_source','eia_plant_code', 'Latitude','Longitude','County',
'State']].drop_duplicates()
connect_to_db_and_push_df(df=generators_to_db,
col_formats=("(DEFAULT,%s,%s,NULL,NULL,%s,NULL,NULL,NULL,%s,NULL,NULL,"
"NULL,%s,%s,%s,%s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,%s,%s,%s,%s,%s,NULL)"),
table='generation_plant',
database='switch_wecc', user=user, password=password, quiet=True)
print "Successfully pushed generation plants!"
query = 'SELECT last_value FROM generation_plant_id_seq'
last_gen_id = connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True).iloc[0,0]
# Populate geometry column for GIS work
query = "UPDATE generation_plant\
SET geom = ST_SetSRID(ST_MakePoint(longitude, latitude), 4326)\
WHERE longitude IS NOT NULL AND latitude IS NOT NULL AND\
generation_plant_id BETWEEN {} AND {}".format(first_gen_id, last_gen_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
print "\nAssigning load zones..."
query = "UPDATE generation_plant SET load_zone_id = z.load_zone_id\
FROM load_zone z\
WHERE ST_contains(boundary, geom) AND\
generation_plant_id BETWEEN {} AND {}".format(first_gen_id, last_gen_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
n_plants_assigned_by_lat_long = connect_to_db_and_run_query("SELECT count(*)\
FROM generation_plant WHERE load_zone_id IS NOT NULL AND\
generation_plant_id BETWEEN {} AND {}".format(first_gen_id, last_gen_id),
database='switch_wecc', user=user, password=password, quiet=True).iloc[0,0]
print "--Assigned load zone according to lat & long to {} plants".format(
n_plants_assigned_by_lat_long)
query = "UPDATE generation_plant g SET load_zone_id = z.load_zone_id\
FROM us_counties c\
JOIN load_zone z ON ST_contains(z.boundary, ST_centroid(c.the_geom))\
WHERE g.load_zone_id IS NULL AND g.state = c.state_name AND g.county = c.name\
AND generation_plant_id BETWEEN {} AND {}".format(first_gen_id, last_gen_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
n_plants_assigned_by_county_state = connect_to_db_and_run_query("SELECT count(*)\
FROM generation_plant WHERE load_zone_id IS NOT NULL AND\
generation_plant_id BETWEEN {} AND {}".format(first_gen_id, last_gen_id),
database='switch_wecc', user=user, password=password, quiet=True
).iloc[0,0] - n_plants_assigned_by_lat_long
print "--Assigned load zone according to county & state to {} plants".format(
n_plants_assigned_by_county_state)
# Plants that are located outside of the WECC region boundary get assigned
# to the nearest load zone, ONLY if they are located less than 100 miles
# out of the boundary
query = "UPDATE generation_plant AS g1 SET load_zone_id = lz1.load_zone_id\
FROM load_zone lz1\
WHERE g1.load_zone_id is NULL AND g1.geom IS NOT NULL\
AND g1.generation_plant_id between {} AND {}\
AND ST_Distance(g1.geom::geography,lz1.boundary::geography)/1609 < 100\
AND ST_Distance(g1.geom::geography,lz1.boundary::geography)/1609 = \
(SELECT min(ST_Distance(g2.geom::geography,lz2.boundary::geography)/1609)\
FROM generation_plant g2\
CROSS JOIN load_zone lz2\
WHERE g2.load_zone_id is NULL AND g2.geom IS NOT NULL\
AND g2.generation_plant_id = g1.generation_plant_id)".format(first_gen_id, last_gen_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
n_plants_assigned_to_nearest_lz = connect_to_db_and_run_query("SELECT count(*)\
FROM generation_plant WHERE load_zone_id IS NOT NULL AND\
generation_plant_id BETWEEN {} AND {}".format(first_gen_id, last_gen_id),
database='switch_wecc', user=user, password=password, quiet=True
).iloc[0,0] - n_plants_assigned_by_lat_long - n_plants_assigned_by_county_state
print "--Assigned load zone according to nearest load zone to {} plants".format(
n_plants_assigned_to_nearest_lz)
plants_wo_load_zone_count_and_cap = connect_to_db_and_run_query("SELECT count(*),\
sum(capacity_limit_mw) FROM generation_plant WHERE load_zone_id IS NULL\
AND generation_plant_id BETWEEN {} AND {}".format(first_gen_id, last_gen_id),
database='switch_wecc', user=user, password=password, quiet=True)
if plants_wo_load_zone_count_and_cap.iloc[0,0] > 0:
print ("--WARNING: There are {:.0f} plants with a total of {:.2f} GW of capacity"
" w/o an assigned load zone. These will be removed.").format(
plants_wo_load_zone_count_and_cap.iloc[0,0],
plants_wo_load_zone_count_and_cap.iloc[0,1]/1000.0)
connect_to_db_and_run_query("DELETE FROM generation_plant\
WHERE load_zone_id IS NULL AND generation_plant_id BETWEEN {}\
AND {}".format(first_gen_id, last_gen_id),
database='switch_wecc', user=user, password=password, quiet=True)
# Assign default technology values
print "\nAssigning default technology parameter values..."
for param in ['max_age','forced_outage_rate','scheduled_outage_rate', 'variable_o_m']:
query = "UPDATE generation_plant g SET {} = t.{}\
FROM generation_plant_technologies t\
WHERE g.energy_source = t.energy_source AND\
g.gen_tech = t.gen_tech AND generation_plant_id BETWEEN {} AND\
{}".format(param, param, first_gen_id, last_gen_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
print "--Assigned {}".format(param)
# Manually assign maximum age for diablo canyon
query = "UPDATE generation_plant SET max_age = 40 WHERE name = 'Diablo Canyon'"
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
# Now, create scenario and assign ids for scenario #2
# Get the actual list of ids in the table, since some rows were deleted
# because no load zone could be assigned to those projects
print "\nAssigning all individual plants to scenario id {}...".format(gen_scenario_id)
query = 'SELECT generation_plant_id FROM generation_plant\
WHERE generation_plant_id BETWEEN {} AND {}'.format(first_gen_id, last_gen_id)
gen_plant_ids = connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
gen_plant_ids['generation_plant_scenario_id'] = gen_scenario_id
connect_to_db_and_push_df(df=gen_plant_ids[['generation_plant_scenario_id','generation_plant_id']],
col_formats="(%s,%s)", table='generation_plant_scenario_member',
database='switch_wecc', user=user, password=password, quiet=True)
print "Successfully assigned pushed generation plants to a scenario!"
# Recover original NOT NULL constraint
query = 'ALTER TABLE "generation_plant" ALTER "load_zone_id" SET NOT NULL;'
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
query = 'ALTER TABLE "generation_plant" ALTER "max_age" SET NOT NULL;'
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
# Get the list of indexes of plants actually uploaded
print "\nAssigning build years to generation plants..."
query = 'SELECT * FROM generation_plant\
JOIN generation_plant_scenario_member USING (generation_plant_id)\
WHERE generation_plant_scenario_id = {}'.format(gen_scenario_id)
gens_in_db = connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
gen_indexes_in_db = gens_in_db[['generation_plant_id','eia_plant_code','energy_source','gen_tech']]
# Create the df and upload it
build_years_df = pd.merge(generators, gen_indexes_in_db,
on=['eia_plant_code','energy_source','gen_tech'])[['generation_plant_id',
'build_year','capacity']]
build_years_df['generation_plant_existing_and_planned_scenario_id'] = gen_scenario_id
build_years_df = build_years_df[[
'generation_plant_existing_and_planned_scenario_id','generation_plant_id',
'build_year','capacity']]
connect_to_db_and_push_df(df=build_years_df,
col_formats="(%s,%s,%s,%s)", table='generation_plant_existing_and_planned',
database='switch_wecc', user=user, password=password, quiet=True)
print "Successfully uploaded build years!"
print "\nAssigning fixed and investment costs to generation plants..."
cost_df = build_years_df.rename(columns={
'generation_plant_existing_and_planned_scenario_id':
'generation_plant_cost_scenario_id'}).drop('capacity', axis=1)
cost_df['fixed_o_m'] = 0
cost_df['overnight_cost'] = 0
connect_to_db_and_push_df(df=cost_df,
col_formats="(%s,%s,%s,%s,%s)", table='generation_plant_cost',
database='switch_wecc', user=user, password=password, quiet=True)
print "Successfully uploaded fixed and capital costs!"
# Read hydro capacity factor data, merge with generators in the database, and upload
print "\nUploading hydro capacity factors..."
hydro_cf = read_output_csv('historic_hydro_capacity_factors_NARROW.tab').rename(
columns={'Plant Code':'eia_plant_code','Prime Mover':'gen_tech'})
hydro_cf = pd.merge(hydro_cf,gen_indexes_in_db[['generation_plant_id','eia_plant_code','gen_tech']],
on=['eia_plant_code','gen_tech'], how='inner')
hydro_cf.rename(columns={'Month':'month','Year':'year'}, inplace=True)
hydro_cf.loc[:,'hydro_avg_flow_mw'] = hydro_cf.loc[:,'Capacity Factor'] * hydro_cf.loc[:,'Nameplate Capacity (MW)']
hydro_cf.loc[:,'hydro_min_flow_mw'] = hydro_cf.loc[:,'hydro_avg_flow_mw'] / 2
hydro_cf.loc[:,'hydro_simple_scenario_id'] = gen_scenario_id
hydro_cf = hydro_cf[['hydro_simple_scenario_id','generation_plant_id',
'year','month','hydro_min_flow_mw','hydro_avg_flow_mw']]
hydro_cf = hydro_cf.fillna(0.01)
connect_to_db_and_push_df(df=hydro_cf,
col_formats="(%s,%s,%s,%s,%s,%s)", table='hydro_historical_monthly_capacity_factors',
database='switch_wecc', user=user, password=password, quiet=True)
print "Successfully uploaded hydro capacity factors!"
print "\n-----------------------------"
print "Aggregating projects by load zone..."
# First, group by load zone, gen tech, energy source and heat rate
# (while calculating a capacity-weighted average heat rate)
gens_in_db['hr_group'] = gens_in_db['full_load_heat_rate'].fillna(0).round()
gens_in_db['full_load_heat_rate'] *= gens_in_db['capacity_limit_mw']
gens_in_db_cols = gens_in_db.columns
gb = gens_in_db.groupby(['gen_tech','load_zone_id','energy_source',
'hr_group'])
aggregated_gens = gb.agg(
{col:(sum if col in ['capacity_limit_mw','full_load_heat_rate']
else 'max') for col in gens_in_db.columns}).reset_index(drop=True)
aggregated_gens['full_load_heat_rate'] /= aggregated_gens['capacity_limit_mw']
aggregated_gens = aggregated_gens[gens_in_db_cols]
# Now, clean up columns
aggregated_gens['name'] = ('LZ_' + aggregated_gens['load_zone_id'].map(str) + '_' +
aggregated_gens['gen_tech'] + '_' + aggregated_gens['energy_source'] + '_HR_' +
aggregated_gens['hr_group'].map(int).map(str))
aggregated_gens.drop(['generation_plant_id','generation_plant_scenario_id',
'eia_plant_code','latitude','longitude','county','state'],
axis=1, inplace=True)
print "Aggregated into {} projects.".format(len(aggregated_gens))
# First, delete previously stored projects for the aggregated plants
gen_scenario_id = 3.0
query = 'DELETE FROM generation_plant_scenario_member\
WHERE generation_plant_scenario_id = {}'.format(gen_scenario_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
query = 'DELETE FROM generation_plant_existing_and_planned\
WHERE generation_plant_existing_and_planned_scenario_id = {}'.format(gen_scenario_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
query = 'DELETE FROM generation_plant_cost\
WHERE generation_plant_cost_scenario_id = {}'.format(gen_scenario_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
query = 'DELETE FROM hydro_historical_monthly_capacity_factors\
WHERE hydro_simple_scenario_id = {}'.format(gen_scenario_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
# It is necessary to temporarily disable triggers when deleting from
# generation_plant table, because of multiple fkey constraints
query = 'SET session_replication_role = replica;\
DELETE FROM generation_plant\
WHERE generation_plant_id NOT IN\
(SELECT generation_plant_id FROM generation_plant_scenario_member);\
SET session_replication_role = DEFAULT;'
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
print "\nDeleted previously stored projects for the load zone-aggregated EIA dataset (id 3). Pushing data..."
query = 'SELECT last_value FROM generation_plant_id_seq'
first_gen_id = connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True).iloc[0,0] + 1
connect_to_db_and_push_df(df=aggregated_gens.drop(['hr_group','geom'], axis=1),
col_formats=("(DEFAULT,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
"%s,%s,%s,%s,%s,%s,%s,%s,%s,NULL,NULL,NULL,NULL,NULL,NULL)"),
table='generation_plant',
database='switch_wecc', user=user, password=password, quiet=True)
print "Successfully pushed aggregated project data!"
query = 'SELECT last_value FROM generation_plant_id_seq'
last_gen_id = connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True).iloc[0,0]
print "\nAssigning all aggregated plants to scenario id {}...".format(gen_scenario_id)
query = 'INSERT INTO generation_plant_scenario_member\
(SELECT {}, generation_plant_id FROM generation_plant\
WHERE generation_plant_id BETWEEN {} AND {})'.format(
gen_scenario_id,first_gen_id, last_gen_id)
connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
print "Successfully assigned pushed generation plants to a scenario!"
query = 'SELECT last_value FROM generation_plant_id_seq'
last_gen_id = connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
print "\nAssigning build years to generation plants..."
query = 'SELECT * FROM generation_plant\
JOIN generation_plant_scenario_member USING (generation_plant_id)\
WHERE generation_plant_scenario_id = {}'.format(gen_scenario_id)
aggregated_gens_in_db = connect_to_db_and_run_query(query,
database='switch_wecc', user=user, password=password, quiet=True)
aggregated_gens_in_db['hr_group'] = aggregated_gens_in_db['full_load_heat_rate'].fillna(0).round()
aggregated_gens_in_db['generation_plant_existing_and_planned_scenario_id'] = gen_scenario_id
gens_in_db = pd.merge(gens_in_db, generators[['eia_plant_code','energy_source',
'gen_tech','capacity','build_year']],
on=['eia_plant_code','energy_source','gen_tech'], suffixes=('','_y'))
aggregated_gens_bld_yrs = pd.merge(aggregated_gens_in_db, gens_in_db,
on=['load_zone_id','energy_source','gen_tech','hr_group'], suffixes=('','_y'))[[
'generation_plant_existing_and_planned_scenario_id',
'generation_plant_id','build_year','capacity']]
aggregated_gens_bld_yrs_cols = list(aggregated_gens_bld_yrs.columns)
gb = aggregated_gens_bld_yrs.groupby(aggregated_gens_bld_yrs_cols[:-1])
aggregated_gens_bld_yrs = gb.agg(
{col:(sum if col=='capacity' else 'max')
for col in aggregated_gens_bld_yrs.columns}).reset_index(drop=True)
aggregated_gens_bld_yrs = aggregated_gens_bld_yrs[aggregated_gens_bld_yrs_cols]
connect_to_db_and_push_df(df=aggregated_gens_bld_yrs,
col_formats="(%s,%s,%s,%s)",