forked from oscarsuen/beige-book
-
Notifications
You must be signed in to change notification settings - Fork 0
/
files.py
50 lines (43 loc) · 1.69 KB
/
files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import pandas as pd
from tools import * # pylint: disable=wildcard-import, unused-wildcard-import
def filesizes():
errorfile = open("out/csv/filesizes.csv", "w")
errorfile.write("year,month,region,filesize\n")
for year, month, region in gen(False):
filename = get_txt_file((year, month, region))
if os.path.exists(filename):
errorfile.write(f"{year},{month:02d},{region},{os.path.getsize(filename)}\n")
errorfile.close()
def missings():
errorfile = open("out/csv/missing.csv", "w")
errorfile.write("year,month,region\n")
for year, month, region in gen(False):
filename = get_txt_file((year, month, region))
if not os.path.exists(filename):
errorfile.write(f"{year},{month:02d},{region}\n")
errorfile.close()
def analyze_missing(printing=True, writing=True):
df = pd.read_csv("out/csv/missing.csv")
empty = df.groupby(["year", "month"]).aggregate(count=('region', 'size')).query('count == 13')
if printing:
print(empty)
if writing:
empty.to_csv("out/csv/norelease.csv", columns=[])
incomplete = df.groupby(['year', 'month']).filter(lambda x: len(x) != 13)
if printing:
print(incomplete)
if writing:
incomplete.to_csv("out/csv/incomplete.csv", index=False)
def analyze_filesize(printing=True, writing=True):
df = pd.read_csv("out/csv/filesizes.csv")
smallfiles = df.query('filesize < 1024')
if printing:
print(smallfiles)
if writing:
smallfiles.to_csv("out/csv/smallfiles.csv", columns=['year', 'month', 'region'], index=False)
if __name__ == "__main__":
missings()
filesizes()
analyze_missing()
analyze_filesize()