From f03b69d8dd1bec5cc4f038c06b432c85e23fcb47 Mon Sep 17 00:00:00 2001 From: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com> Date: Tue, 7 May 2024 12:58:16 +0200 Subject: [PATCH] Fix #40, in some cases, the presence of a "\\" would send the library in an infinite loop --- pyproject.toml | 2 +- src/json_repair/json_repair.py | 26 +++++++++++--------------- tests/test_json_repair.py | 4 ++-- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4edcd62..4becb3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" [project] name = "json_repair" -version = "0.17.1" +version = "0.17.2" license = {file = "LICENSE"} authors = [ { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" }, diff --git a/src/json_repair/json_repair.py b/src/json_repair/json_repair.py index da77f6d..1b12982 100644 --- a/src/json_repair/json_repair.py +++ b/src/json_repair/json_repair.py @@ -302,16 +302,6 @@ def parse_string(self) -> str: string_acc += char self.index += 1 char = self.get_char_at() - # If the string contains an escaped character we should respect that or remove the escape - if self.get_char_at(-1) == "\\": - if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]: - string_acc += char - self.index += 1 - char = self.get_char_at() - else: - # Remove this character from the final output - string_acc = string_acc[:-2] + string_acc[-1:] - self.index -= 1 # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here if char == rstring_delimiter: # Special case here, in case of double quotes one after another @@ -495,11 +485,17 @@ def log(self, text: str, level: str) -> None: context = self.json_fd.read(self.logger["window"] * 2) self.json_fd.seek(self.index) else: - context = self.json_str[ - self.index - - self.logger["window"] : self.index - + self.logger["window"] - ] + start = ( + self.index - self.logger["window"] + if (self.index - self.logger["window"]) >= 0 + else 0 + ) + end = ( + self.index + self.logger["window"] + if (self.index + self.logger["window"]) <= len(self.json_str) + else len(self.json_str) + ) + context = self.json_str[start:end] self.logger["log"].append( { "text": text, diff --git a/tests/test_json_repair.py b/tests/test_json_repair.py index c8582e0..15d146f 100644 --- a/tests/test_json_repair.py +++ b/tests/test_json_repair.py @@ -7,7 +7,7 @@ def test_repair_json(): assert repair_json("[{]") == "[]" assert repair_json(" { } ") == "{}" assert repair_json("\"") == '""' - assert repair_json("\n") == '""' + assert repair_json("\n") == '""' assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}' assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}' assert repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}") == '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}' @@ -56,7 +56,7 @@ def test_repair_json(): assert repair_json('{"') == '{}' assert repair_json('["') == '[]' assert repair_json("'\"'") == '"\\\""' - assert repair_json("'string\"") == '"string\\\""' + assert repair_json("'string\"\n\t\le") == '"string\\\"\\n\\t\\\\le"' assert repair_json('{foo: [}') == '{"foo": []}' assert repair_json('''{ "a": "{ b": {} }" }''') == '{"a": "{ b"}' assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'