diff --git a/cycle_2020/utils/loader.py b/cycle_2020/utils/loader.py index 7ea84eb..d16c3ac 100644 --- a/cycle_2020/utils/loader.py +++ b/cycle_2020/utils/loader.py @@ -462,7 +462,7 @@ def load_filing(filing, filename, filing_fieldnames): #filing does not exist or it failed previously try: - filing_dict = process_filing.process_electronic_filing(filename) + filing_dict = process_filing.process_electronic_filing(filename, dump_full=False) except Exception as e: logging.log(title="fec2json failed", text="fec2json failed {} {}".format(filing, e), @@ -591,17 +591,35 @@ def load_filing(filing, filename, filing_fieldnames): schb_count = 0 sche_count = 0 if 'itemizations' in filing_dict: - if 'SchA' in filing_dict['itemizations']: - scha_count = load_itemizations(ScheduleA, filing_dict['itemizations']['SchA']) - if 'SchB' in filing_dict['itemizations']: - schb_count = load_itemizations(ScheduleB, filing_dict['itemizations']['SchB']) - if 'SchE' in filing_dict['itemizations']: - sche_count = load_itemizations(ScheduleE, filing_dict['itemizations']['SchE']) - if 'F57' in filing_dict['itemizations']: - sche_count += load_itemizations(ScheduleE, filing_dict['itemizations']['F57']) - sys.stdout.write("inserted {} schedule A's\n".format(scha_count)) - sys.stdout.write("inserted {} schedule B's\n".format(schb_count)) - sys.stdout.write("inserted {} schedule E's\n".format(sche_count)) + load_chunk_size = 20000 + i = 0 + complete = False + while not complete: + itemization_dict = {} + while i < load_chunk_size: + try: + line = next(filing_dict['itemizations']) + except StopIteration: + print("stopping") + complete = True + break + #print(line) + itemization_type = process_filing.get_itemization_type(line.get('form_type')) + if itemization_type not in itemization_dict: + itemization_dict[itemization_type] = [] + itemization_dict[itemization_type].append(line) + + if 'SchA' in itemization_dict: + scha_count = load_itemizations(ScheduleA, itemization_dict['SchA']) + if 'SchB' in itemization_dict: + schb_count = load_itemizations(ScheduleB, itemization_dict['SchB']) + if 'SchE' in itemization_dict: + sche_count = load_itemizations(ScheduleE, itemization_dict['SchE']) + if 'F57' in itemization_dict: + sche_count += load_itemizations(ScheduleE, itemization_dict['F57']) + sys.stdout.write("inserted {} schedule A's\n".format(scha_count)) + sys.stdout.write("inserted {} schedule B's\n".format(schb_count)) + sys.stdout.write("inserted {} schedule E's\n".format(sche_count)) except: #something failed in the transaction loading, keep the filing as failed diff --git a/requirements.txt b/requirements.txt index 342e579..d55398e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ django-storages djangorestframework django-pure-pagination django-localflavor --e git+https://github.com/newsdev/fec2json@4655e710a3c74b7991fb963becbcdc71f65ad560#egg=fec2json +-e git+https://github.com/newsdev/fec2json@5bf66570c248e994086c256b9abc49abc16f2b8c#egg=fec2json lxml==4.2.1 requests