-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_rank_direct.py
94 lines (75 loc) · 4.07 KB
/
run_rank_direct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
from configparser import ConfigParser
import yaml
# Ranking Task
config_path = os.getcwd() + '/' + 'config' + '/' + 'wmt.cfg'
config = ConfigParser()
config.readfp(open(config_path))
config_path_learning = os.getcwd() + '/' + 'config/learner/logistic.cfg'
with open(config_path_learning, 'r') as cfg_file:
config_learning = yaml.load(cfg_file.read())
# ranking_task = RankingTask(config_path)
# ranking_task.prepare_feature_files()
# Training set for learn to rank
# data_structure2 = prepare_wmt.get_data_structure2(config)
# f_judgements = config.get('WMT', 'human_ranking')
# human_rankings = HumanRanking()
# human_rankings.add_human_data(f_judgements, config)
# dataset_for_all = config.get('WMT', 'dataset')
# feature_set_name = os.path.basename(config.get('Features', 'feature_set')).replace(".txt", "")
# feature_values = read_features_file(os.path.expanduser(config.get('WMT', 'output_dir')) + '/' + 'x_' + dataset_for_all + '.' + feature_set_name + '.' + 'all' + '.tsv', "\t")
# ranking_task.training_set_for_learn_to_rank(data_structure2, human_rankings, feature_values)
# ranking_task.train_save(config_learning, config)
# ranking_task.load_get_coefficients(config_learning, config)
# Test learn to rank
# predictions = ranking_task.test_learn_to_rank_coefficients(config_learning, config)
# data_structure = prepare_wmt.get_data_structure(config)
# prepare_wmt.wmt_format(config, feature_set_name, dataset_for_all, predictions, data_structure)
# # ranking_task.training_set_for_learn_to_rank_from_feature_file(config_learning, config)
#
# ranking_task.train_save(config_learning, config)
# predictions = ranking_task.test_learn_to_rank(config_learning)
#
# data_structure = prepare_wmt.get_data_structure2(config)
# prepare_wmt.wmt_format(config, "test", config.get("WMT", "dataset"), predictions, data_structure)
# ranking_task.training_set_for_rank_direct(data_structure, human_rankings, feature_values)
# human_ranking = HumanRanking()
# human_ranking.add_human_data(config.get("WMT", "human_ranking"), config)
# ranking_task.clean_dataset(config_learning, human_ranking)
# input_x = os.getcwd() + '/' + 'test' + '/' + 'x_newstest2014.tsv'
# input_y = os.getcwd() + '/' + 'test' + '/' + 'y_newstest2014.tsv'
# output_dir = os.getcwd() + '/' + 'test'
# split_dataset(input_x, input_y, output_dir)
# gold_labels = read_reference_file(config_learning.get("y_test", None), "\t")
# ranking_task.train_save(config_learning, config)
# predicted = ranking_task.train_predict(config_path_learning)
# predicted = ranking_task.load_predict(config_learning, config)
# ranking_task.evaluate_predicted(predicted, gold_labels)
# ranking_task.train_save(config_learning, config)
# ranking_task.load_get_coefficients(config_learning, config)
# ranking_task.recursive_feature_elimination(config_learning, config)
# Scoring Task
# config_path_learning = os.getcwd() + '/' + 'config/learner/svr.cfg'
# with open(config_path_learning, 'r') as cfg_file:
# config_learning = yaml.load(cfg_file.read())
#
# config_path = os.getcwd() + '/' + 'config' + '/' + 'absolute.cfg'
# config = ConfigParser()
# config.readfp(open(config_path))
#
# # evaluate_feature_scoring(config, ['meteor'], 'eamt2009', 'es-en', 'system')
#
# scoring_task = ScoringTask(config_path)
# # scoring_task.prepare_wmt16('parse')
# feature_values, human_scores = scoring_task.get_data()
# scoring_task.save_data(feature_values, human_scores)
# input_x = os.path.expanduser('~/Dropbox/informative_features_for_evaluation/data/absolute_scoring/x_mtc4.meteor_comb_min_fluency_features_alignment_quest.tsv')
# input_y = os.path.expanduser('~/Dropbox/informative_features_for_evaluation/data/absolute_scoring/y_mtc4.fluency_features_alignment_quest.tsv')
# input_y = os.path.expanduser('~/Dropbox/workspace/dataSets/mtc4-manual-evaluation/avg_mean.txt')
# output_dir = os.getcwd() + '/' + 'test'
# split_dataset_repeated_segments(input_x, input_y, output_dir, 919)
# #
# gold_labels = read_reference_file(config_learning.get("y_test", None), "\t")
# predicted = scoring_task.train_predict(config_path_learning)
# scoring_task.evaluate_predicted(predicted, gold_labels)
# #