diff --git a/arxiv_latex_cleaner/arxiv_latex_cleaner.py b/arxiv_latex_cleaner/arxiv_latex_cleaner.py index f3d8f83..bb3855e 100644 --- a/arxiv_latex_cleaner/arxiv_latex_cleaner.py +++ b/arxiv_latex_cleaner/arxiv_latex_cleaner.py @@ -177,28 +177,93 @@ def _remove_environment(text, environment): ) -def _remove_iffalse_block(text): - r"""Removes possibly nested '\iffalse*\fi' blocks from 'text'.""" - p = regex.compile(r'\\if\s*(\w+)|\\fi(?!\w)') - level = -1 - positions_to_delete = [] - start, end = 0, 0 +def _simplify_conditional_blocks(text): + r"""Simplify possibly nested conditional blocks from 'text'. + + For example, `\iffalse TEST1\else TEST2\fi` is simplified to `TEST2`, + and `\iftrue TEST1\else TEST2\fi` is simplified to `TEST1`. + Unknown conditionals are left untouched. + + Raises a ValueError if an unmatched \if or \else is found. + """ + p = regex.compile(r'(?!(?<=\\newif\s*))\\if\s*(\w+)|\\else(?!\w)|\\fi(?!\w)') + toplevel_tree = { "left": [], "kind": "toplevel", "parent": None } + + tree = toplevel_tree + + def new_subtree(kind): + return { "kind": kind, "left": [], "right": [] } + + def add_subtree(tree, subtree): + if "else" not in tree: + tree["left"].append(subtree) + else: + tree["right"].append(subtree) + subtree["parent"] = tree + for m in p.finditer(text): - if ( - m.group().replace(' ', '') == r'\iffalse' - or m.group().replace(' ', '') == r'\if0' - ) and level == -1: - level += 1 - start = m.start() - elif m.group().startswith(r'\if') and level >= 0: - level += 1 - elif m.group() == r'\fi' and level >= 0: - if level == 0: - end = m.end() - positions_to_delete.append((start, end)) - level -= 1 + m_no_space = m.group().replace(' ', '') + if (m_no_space == r'\iffalse' or m_no_space == r'\if0'): + subtree = new_subtree("iffalse") + subtree["start"] = m + add_subtree(tree, subtree) + tree = subtree + elif (m_no_space == r'\iftrue' or m_no_space == r'\if1'): + subtree = new_subtree("iftrue") + subtree["start"] = m + add_subtree(tree, subtree) + tree = subtree + elif m_no_space.startswith(r'\if'): + subtree = new_subtree("unknown") + subtree["start"] = m + add_subtree(tree, subtree) + tree = subtree + elif (m_no_space == r'\else'): + if tree["parent"] is None: + os.sys.stderr.write('Warning: Ignoring unmatched \\else!\n') + continue + tree['else'] = m + elif m.group() == r'\fi': + if tree["parent"] is None: + os.sys.stderr.write('Warning: Ignoring unmatched \\fi!\n') + continue + tree["end"] = m + tree = tree["parent"] + else: + raise ValueError('Unreachable!') + if tree["parent"] is not None: + raise ValueError(f"Unmatched {tree['start'].group()}") + + positions_to_delete = [] + + def traverse_tree(tree): + if tree["kind"] == "iffalse": + if "else" in tree: + positions_to_delete.append((tree["start"].start(), tree["else"].end())) + for subtree in tree["right"]: + traverse_tree(subtree) + positions_to_delete.append((tree["end"].start(), tree["end"].end())) + else: + positions_to_delete.append((tree["start"].start(), tree["end"].end())) + elif tree["kind"] == "iftrue": + if "else" in tree: + positions_to_delete.append((tree["start"].start(), tree["start"].end())) + for subtree in tree["left"]: + traverse_tree(subtree) + positions_to_delete.append((tree["else"].start(), tree["end"].end())) + else: + positions_to_delete.append((tree["start"].start(), tree["start"].end())) + positions_to_delete.append((tree["end"].start(), tree["end"].end())) + elif tree["kind"] == "unknown": + for subtree in tree["left"]: + traverse_tree(subtree) + for subtree in tree["right"]: + traverse_tree(subtree) else: - pass + raise ValueError('Unreachable!') + + for tree in toplevel_tree["left"]: + traverse_tree(tree) for start, end in reversed(positions_to_delete): if end < len(text) and text[end].isspace(): @@ -288,7 +353,7 @@ def _remove_comments_and_commands_to_delete(content, parameters): """Erases all LaTeX comments in the content, and writes it.""" content = [_remove_comments_inline(line) for line in content] content = _remove_environment(''.join(content), 'comment') - content = _remove_iffalse_block(content) + content = _simplify_conditional_blocks(content) for environment in parameters.get('environments_to_delete', []): content = _remove_environment(content, environment) for command in parameters.get('commands_only_to_delete', []): diff --git a/arxiv_latex_cleaner/tests/arxiv_latex_cleaner_test.py b/arxiv_latex_cleaner/tests/arxiv_latex_cleaner_test.py index 0bedc8d..5062446 100644 --- a/arxiv_latex_cleaner/tests/arxiv_latex_cleaner_test.py +++ b/arxiv_latex_cleaner/tests/arxiv_latex_cleaner_test.py @@ -476,10 +476,35 @@ def test_remove_environment(self, text_in, true_output): 'text_in': '\\newcommand\\figref[1]{Figure~\\ref{fig:\#1}}', 'true_output': '\\newcommand\\figref[1]{Figure~\\ref{fig:\#1}}', }, + { + 'testcase_name': 'iffalse_else_sustained', + 'text_in': '\\iffalse not there\\else here\\fi', + 'true_output': 'here', + }, + { + 'testcase_name': 'iftrue_else_removed', + 'text_in': '\\iftrue expected\\else not expected\\fi', + 'true_output': 'expected', + }, + { + 'testcase_name': 'if0_removed', + 'text_in': '\\if0 to be removed\\fi', + 'true_output': '', + }, + { + 'testcase_name': 'if1 works', + 'text_in': '\\if 1 expected\\fi', + 'true_output': 'expected', + }, + { + 'testcase_name': 'new_if_ignored', + 'text_in': '\\newif \\ifvar \\ifvar\iffalse test\\fi\\fi', + 'true_output': '\\newif \\ifvar \\ifvar\\fi', + }, ) - def test_remove_iffalse_block(self, text_in, true_output): + def test_simplify_conditional_blocks(self, text_in, true_output): self.assertEqual( - arxiv_latex_cleaner._remove_iffalse_block(text_in), true_output + arxiv_latex_cleaner._simplify_conditional_blocks(text_in), true_output ) @parameterized.named_parameters( @@ -899,7 +924,7 @@ def test_complete(self, input_dir): # Checks the set of files is the same as in the true folder. out_files = set(arxiv_latex_cleaner._list_all_files(self.out_path)) out_files_true = set(arxiv_latex_cleaner._list_all_files(out_path_true)) - self.assertEqual(out_files, out_files_true) + self.assertSetEqual(out_files, out_files_true) # Compares the contents of each file against the true value. for f1 in out_files: diff --git a/tex.zip b/tex.zip index b1d3fce..0df87ff 100644 Binary files a/tex.zip and b/tex.zip differ diff --git a/tex/main.tex b/tex/main.tex index 9dae5c9..5d37470 100644 --- a/tex/main.tex +++ b/tex/main.tex @@ -36,6 +36,9 @@ \end{mynote} \newif\ifvar +\newif \ifvarII + +\ifvarII asdf \fi \ifvar \if false @@ -51,6 +54,11 @@ \fi \fi +\iffalse I shall be gone (iffalse block)!\else Expect me (else block of iffalse)!\fi + +\iftrue Expect me (iftrue block)!\else I shall be gone (else block of iftrue)!\fi + + \newcommand{\red}[1]{{\color{red} #1}} hello test \red{hello test \red{hello}} diff --git a/tex_arXiv_true/main.tex b/tex_arXiv_true/main.tex index 8de0ae5..aa5d948 100644 --- a/tex_arXiv_true/main.tex +++ b/tex_arXiv_true/main.tex @@ -22,10 +22,16 @@ \newif\ifvar +\newif \ifvarII + +\ifvarII asdf \fi \ifvar \fi +Expect me (else block of iffalse)! +Expect me (iftrue block)! + \newcommand{\red}[1]{{\color{red} #1}} hello test hello test hello