Skip to content

Commit

Permalink
Make the yaml importer load multiple languages. (#1253)
Browse files Browse the repository at this point in the history
Co-authored-by: William Di Luigi <williamdiluigi@gmail.com>
  • Loading branch information
veluca93 and wil93 authored Oct 5, 2024
1 parent da34a29 commit 1ad05b7
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 81 deletions.
63 changes: 63 additions & 0 deletions cmscontrib/loaders/base_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,69 @@

from abc import ABCMeta, abstractmethod

LANGUAGE_MAP = {
'afrikaans': 'af',
'arabic': 'ar',
'armenian': 'hy',
'azerbaijani': 'az',
'belarusian': 'be',
'bengali': 'bn',
'bosnian': 'bs',
'bulgarian': 'bg',
'catalan': 'ca',
'chinese': 'zh',
'croatian': 'hr',
'czech': 'cs',
'danish': 'da',
'dutch': 'nl',
'english': 'en',
'estonian': 'et',
'filipino': 'fil',
'finnish': 'fi',
'french': 'fr',
'georgian': 'ka',
'german': 'de',
'greek': 'el',
'hebrew': 'he',
'hindi': 'hi',
'hungarian': 'hu',
'icelandic': 'is',
'indonesian': 'id',
'irish': 'ga',
'italian': 'it',
'japanese': 'ja',
'kazakh': 'kk',
'korean': 'ko',
'kyrgyz': 'ky',
'latvian': 'lv',
'lithuanian': 'lt',
'macedonian': 'mk',
'malay': 'ms',
'mongolian': 'mn',
'norwegian': 'no',
'persian': 'fa',
'polish': 'pl',
'portuguese': 'pt',
'romanian': 'ro',
'russian': 'ru',
'serbian': 'sr',
'sinhala': 'si',
'slovak': 'sk',
'slovene': 'sl',
'spanish': 'es',
'swedish': 'sv',
'tajik': 'tg',
'tamil': 'ta',
'thai': 'th',
'turkish': 'tr',
'turkmen': 'tk',
'ukrainian': 'uk',
'urdu': 'ur',
'uzbek': 'uz',
'vietnamese': 'vi',
'other': 'other',
}


class BaseLoader(metaclass=ABCMeta):
"""Base class for deriving loaders.
Expand Down
79 changes: 63 additions & 16 deletions cmscontrib/loaders/italy_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from cmscommon.crypto import build_password
from cmscommon.datetime import make_datetime
from cmscontrib import touch
from .base_loader import ContestLoader, TaskLoader, UserLoader, TeamLoader
from .base_loader import ContestLoader, TaskLoader, UserLoader, TeamLoader, LANGUAGE_MAP


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -356,24 +356,68 @@ def get_task(self, get_statement=True):
logger.info("Loading parameters for task %s.", name)

if get_statement:
# The language of testo.pdf / statement.pdf, defaulting to 'it'
primary_language = load(conf, None, "primary_language")
if primary_language is None:
primary_language = 'it'
paths = [os.path.join(self.path, "statement", "statement.pdf"),
os.path.join(self.path, "testo", "testo.pdf")]
for path in paths:
primary_language = "it"

statement = None
for localized_statement in ["statement", "testo"]:
if os.path.exists(os.path.join(self.path, localized_statement)):
# Ensure that only one folder exists: either testo/ or statement/
if statement is not None:
logger.critical(
"Both testo/ and statement/ are present. This is likely an error."
)
sys.exit(1)
statement = localized_statement

if statement is None:
logger.critical("Statement folder not found.")
sys.exit(1)

single_statement_path = os.path.join(
self.path, statement, "%s.pdf" % statement)
if not os.path.exists(single_statement_path):
single_statement_path = None

multi_statement_paths = {}
for lang, lang_code in LANGUAGE_MAP.items():
path = os.path.join(self.path, statement, "%s.pdf" % lang)
if os.path.exists(path):
digest = self.file_cacher.put_file_from_path(
path,
"Statement for task %s (lang: %s)" %
(name, primary_language))
break
multi_statement_paths[lang_code] = path

if len(multi_statement_paths) > 0:
# Ensure that either a statement.pdf or testo.pdf is specified,
# or a list of <lang>.pdf files are specified, but not both,
# unless statement.pdf or testo.pdf is a symlink, in which case
# we let it slide.
if single_statement_path is not None and not os.path.islink(
single_statement_path
):
logger.warning(
f"A statement (not a symlink!) is present at {single_statement_path} "
f"but {len(multi_statement_paths)} more multi-language statements "
"were found. This is likely an error. Proceeding with "
"importing the multi-language files only."
)
statements_to_import = multi_statement_paths
else:
logger.critical("Couldn't find any task statement, aborting.")
statements_to_import = {
primary_language: single_statement_path}

if primary_language not in statements_to_import.keys():
logger.critical(
"Couldn't find statement for primary language %s, aborting." % primary_language)
sys.exit(1)
args["statements"] = {
primary_language: Statement(primary_language, digest)
}

args["statements"] = dict()
for lang_code, statement_path in statements_to_import.items():
digest = self.file_cacher.put_file_from_path(
statement_path,
"Statement for task %s (lang: %s)" % (name, lang_code),
)
args["statements"][lang_code] = Statement(lang_code, digest)

args["primary_statements"] = [primary_language]

Expand Down Expand Up @@ -563,7 +607,7 @@ def get_task(self, get_statement=True):
if subtask_detected:
# Close the previous subtask
if points is None:
assert(testcases == 0)
assert testcases == 0
else:
subtasks.append([points, testcases])
# Open the new one
Expand All @@ -582,7 +626,7 @@ def get_task(self, get_statement=True):
args["score_type_parameters"] = input_value
else:
subtasks.append([points, testcases])
assert(100 == sum([int(st[0]) for st in subtasks]))
assert 100 == sum([int(st[0]) for st in subtasks])
n_input = sum([int(st[1]) for st in subtasks])
args["score_type"] = "GroupMin"
args["score_type_parameters"] = subtasks
Expand Down Expand Up @@ -799,6 +843,9 @@ def task_has_changed(self):
# Statement
files.append(os.path.join(self.path, "statement", "statement.pdf"))
files.append(os.path.join(self.path, "testo", "testo.pdf"))
for lang in LANGUAGE_MAP:
files.append(os.path.join(self.path, "statement", "%s.pdf" % lang))
files.append(os.path.join(self.path, "testo", "%s.pdf" % lang))

# Managers
files.append(os.path.join(self.path, "check", "checker"))
Expand Down
66 changes: 1 addition & 65 deletions cmscontrib/loaders/polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from cms.db import Contest, User, Task, Statement, Dataset, Manager, Testcase
from cmscommon.crypto import build_password
from cmscontrib import touch
from .base_loader import ContestLoader, TaskLoader, UserLoader
from .base_loader import ContestLoader, TaskLoader, UserLoader, LANGUAGE_MAP


logger = logging.getLogger(__name__)
Expand All @@ -41,70 +41,6 @@ def make_timedelta(t):
return timedelta(seconds=t)


LANGUAGE_MAP = {
'afrikaans': 'af',
'arabic': 'ar',
'armenian': 'hy',
'azerbaijani': 'az',
'belarusian': 'be',
'bengali': 'bn',
'bosnian': 'bs',
'bulgarian': 'bg',
'catalan': 'ca',
'chinese': 'zh',
'croatian': 'hr',
'czech': 'cs',
'danish': 'da',
'dutch': 'nl',
'english': 'en',
'estonian': 'et',
'filipino': 'fil',
'finnish': 'fi',
'french': 'fr',
'georgian': 'ka',
'german': 'de',
'greek': 'el',
'hebrew': 'he',
'hindi': 'hi',
'hungarian': 'hu',
'icelandic': 'is',
'indonesian': 'id',
'irish': 'ga',
'italian': 'it',
'japanese': 'ja',
'kazakh': 'kk',
'korean': 'ko',
'kyrgyz': 'ky',
'latvian': 'lv',
'lithuanian': 'lt',
'macedonian': 'mk',
'malay': 'ms',
'mongolian': 'mn',
'norwegian': 'no',
'persian': 'fa',
'polish': 'pl',
'portuguese': 'pt',
'romanian': 'ro',
'russian': 'ru',
'serbian': 'sr',
'sinhala': 'si',
'slovak': 'sk',
'slovene': 'sl',
'spanish': 'es',
'swedish': 'sv',
'tajik': 'tg',
'tamil': 'ta',
'thai': 'th',
'turkish': 'tr',
'turkmen': 'tk',
'ukrainian': 'uk',
'urdu': 'ur',
'uzbek': 'uz',
'vietnamese': 'vi',
'other': 'other',
}


class PolygonTaskLoader(TaskLoader):
"""Load a task stored using the Codeforces Polygon format.
Expand Down

0 comments on commit 1ad05b7

Please sign in to comment.