-
Notifications
You must be signed in to change notification settings - Fork 0
/
column_selector.py
executable file
·70 lines (47 loc) · 2.17 KB
/
column_selector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This script selects data from an input TSV file based on column names and
creates a new file with the selected data/coumns.
"""
import argparse
import pandas as pd
def main(input_file, output_file, columns, order, names):
# Read input file
data_table = pd.read_csv(input_file, sep='\t')
# Select columns
selected_data = data_table[columns]
# Reorder columns if provided
if order is not None:
selected_data = selected_data[order]
# Rename columns if provided
if names is not None:
current_columns = list(selected_data.columns)
names_mapper = {current_columns[i]:n for i, n in enumerate(names)}
selected_data = selected_data.rename(columns=names_mapper)
# Save dataframe with selected data
selected_data.to_csv(output_file, sep='\t', index=False)
def parse_arguments():
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-i', '--input-file', type=str,
required=True, dest='input_file',
help='Path to the input TSV file.')
parser.add_argument('-o', '--output-file', type=str,
required=True, dest='output_file',
help='Path to the output file created to store the selected data.')
parser.add_argument('--columns', nargs='+', type=str,
required=True, dest='columns',
help='Names/identifiers of the columns to select.')
parser.add_argument('--order', nargs='+', type=str,
dest='order',
help='Order of the names/identifiers of the columns in the output file.')
parser.add_argument('--names', nargs='+', type=str,
dest='names',
help='New names/identifiers to attribute to the selected columns '
'(Must respect the order provided to `--order`).')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_arguments()
main(**vars(args))