From 2c37811c34758642d1549c9d68273f59fae36e98 Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Mon, 19 Feb 2024 12:39:40 +0000
Subject: [PATCH 01/12] add coordination ruler

---
 spacy/pipeline/__init__.py                    |   2 +
 spacy/pipeline/coordinationruler.py           | 321 ++++++++++++++++++
 .../tests/pipeline/test_coordinationruler.py  |  66 ++++
 3 files changed, 389 insertions(+)
 create mode 100644 spacy/pipeline/coordinationruler.py
 create mode 100644 spacy/tests/pipeline/test_coordinationruler.py

diff --git a/spacy/pipeline/__init__.py b/spacy/pipeline/__init__.py
index 2c4a5a8a87f..02c900310b5 100644
--- a/spacy/pipeline/__init__.py
+++ b/spacy/pipeline/__init__.py
@@ -1,4 +1,5 @@
 from .attributeruler import AttributeRuler
+from .coordinationruler import CoordinationSplitter
 from .dep_parser import DependencyParser
 from .edit_tree_lemmatizer import EditTreeLemmatizer
 from .entity_linker import EntityLinker
@@ -21,6 +22,7 @@
 
 __all__ = [
     "AttributeRuler",
+    "CoordinationSplitter",
     "DependencyParser",
     "EditTreeLemmatizer",
     "EntityLinker",
diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
new file mode 100644
index 00000000000..f2b62ac85c0
--- /dev/null
+++ b/spacy/pipeline/coordinationruler.py
@@ -0,0 +1,321 @@
+from typing import List, Callable, Optional, Union
+from pydantic import BaseModel, validator
+import re
+import en_core_web_sm
+
+from ..tokens import Doc
+from ..language import Language
+from ..vocab import Vocab
+from .pipe import Pipe
+
+########### DEFAULT COORDINATION SPLITTING RULES ##############
+
+
+def _split_duplicate_object(doc: Doc) -> Union[List[str], None]:
+    """Split a text with 2 verbs and 1 object (and optionally a subject) into
+       2 texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
+
+    i.e. 'I use and provide clinical supervision' -->
+    ['I use clinical supervision', 'I provide clinical supervision']
+
+    Args:
+        doc (Doc): The spaCy Doc object.
+
+    Returns:
+        List[str]: The split texts.
+    """
+    sentences = []
+
+    for token in doc:
+        if token.pos_ == "VERB" and (token.dep_ == "ROOT" or token.dep_ == "conj"):
+
+            has_AND = False
+            has_second_verb = False
+            has_dobj = False
+            subject = None
+
+            # Find the subject if it exists
+            for possible_subject in token.head.children:
+                if possible_subject.dep_ in ["nsubj", "nsubjpass"]:
+                    subject = possible_subject
+                    break
+
+            for child in token.children:
+
+                if child.pos_ == "CCONJ" and child.lemma_ == "and":
+                    has_AND = True
+
+                if child.pos_ == "VERB" and child.dep_ == "conj":
+                    has_second_verb = True
+                    second_verb = child
+                    first_verb = token.head if token.dep_ == "conj" else token
+
+                    for descendant in second_verb.subtree:
+                        if descendant.dep_ == "dobj":
+                            has_dobj = True
+                            # Collect the full noun phrase for the direct object
+                            dobj_span = doc[
+                                descendant.left_edge.i : descendant.right_edge.i + 1
+                            ]
+                            dobj = dobj_span.text
+
+            if has_AND and has_second_verb and has_dobj:
+                subject_text = subject.text + " " if subject else ""
+                first_text = "{}{} {}".format(subject_text, first_verb, dobj)
+                second_text = "{}{} {}".format(subject_text, second_verb, dobj)
+
+                sentences.extend([first_text, second_text])
+
+    return sentences if sentences else None
+
+
+def _split_on_and(text: str) -> List[str]:
+    """Split a text on 'and' and return a list of the split texts.
+
+    Args:
+        text (str): The text to split.
+
+    Returns:
+        List[str]: The split texts.
+    """
+    text = re.sub(r"\s\s+", " ", text)
+
+    replacements = {
+        ";": ",",
+        ", and ,": " and ",
+        ", and,": " and ",
+        ",and ,": " and ",
+        ", and ": " and ",
+        " and ,": " and ",
+        ",and,": " and ",
+        " and,": " and ",
+        ",and ": " and ",
+    }
+    for old, new in replacements.items():
+        text = text.replace(old, new)
+
+    return [t.strip() for t in re.split(r",| and ", text)]
+
+
+def _split_duplicate_verb(doc: Doc) -> Union[List[str], None]:
+    """Split a text with 1 verb and 2 objects.
+
+    i.e. 'I love using smartphones and apps' -->
+    ['I love using smartphones', 'I love using apps']
+
+    Args:
+        doc (Doc): The spaCy Doc object.
+
+    Returns:
+        List[str]: The split texts.
+    """
+
+    for token in doc:
+
+        if token.pos_ == "VERB" and token.dep_ == "ROOT":
+
+            has_AND = False
+            has_dobj = False
+            has_sec_obj = False
+            subject = ""
+
+            for child in token.children:
+
+                if child.dep_ == "dobj":
+                    has_dobj = True
+
+                subject = child.text if child.dep_ == "nsubj" else subject
+
+                objects = " ".join(
+                    [
+                        c.text
+                        for c in token.subtree
+                        if c.text != token.text and c.dep_ != "nsubj"
+                    ]
+                )
+
+                split_objects = _split_on_and(objects)
+
+                object_list = []
+                for split in split_objects:
+                    object_list.append(split)
+
+                for subchild in child.children:
+
+                    if subchild.pos_ == "CCONJ" and subchild.lemma_ == "and":
+                        has_AND = True
+
+                    if subchild.dep_ == "conj":
+                        has_sec_obj = True
+
+                if has_AND and has_dobj and has_sec_obj:
+                    text_list = [
+                        f"{subject} {token.text} {split}.".strip()
+                        for split in object_list
+                    ]
+                    return [text.replace(" ..", ".") for text in text_list]
+
+    return None
+
+
+def _split_skill_mentions(doc: Doc) -> Union[List[str], None]:
+    """Split a text with 2 skills into 2 texts with 1 skill.
+
+        i.e. 'written and oral communication skills' -->
+    ['written communication skills', 'oral communication skills']
+
+    Args:
+        text (str): The text to split.
+
+    Returns:
+        List[str]: The split texts.
+    """
+    for token in doc:
+        if (
+            token.pos_ == "NOUN"
+            and token.lemma_ == "skill"
+            and token.idx == doc[-1].idx
+        ):
+
+            has_AND = False
+
+            root = [token for token in doc if token.dep_ == "ROOT"]
+            if root:
+                root = root[0]
+
+                for child in root.subtree:
+
+                    if child.pos_ == "CCONJ" and child.lemma_ == "and":
+                        has_AND = True
+
+                if has_AND:
+                    skill_def = " ".join(
+                        [c.text for c in root.subtree if c.text != token.text]
+                    )
+
+                    split_skills = _split_on_and(skill_def)
+
+                    skill_lists = []
+                    for split_skill in split_skills:
+                        skill_lists.append("{} {}".format(split_skill, token.text))
+
+                    return skill_lists
+    return None
+
+
+class SplittingRule(BaseModel):
+    function: Callable[[Doc], Union[List[str], None]]
+
+    @validator("function")
+    def check_return_type(cls, v):
+        nlp = en_core_web_sm.load()
+        dummy_doc = nlp("This is a dummy sentence.")
+        result = v(dummy_doc)
+        if result is not None:
+            if not isinstance(result, List):
+                raise ValueError(
+                    "The custom splitting rule must return None or a list."
+                )
+            elif not all(isinstance(item, str) for item in result):
+                raise ValueError(
+                    "The custom splitting rule must return None or a list of strings."
+                )
+        return v
+
+
+@Language.factory(
+    "coordination_splitter", requires=["token.dep", "token.tag", "token.pos"]
+)
+def make_coordination_splitter(nlp: Language, name: str):
+    """Make a CoordinationSplitter component.
+
+    the default splitting rules include:
+
+    - _split_duplicate_object: Split a text with 2 verbs and 1 object (and optionally a subject) into two texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
+    - _split_duplicate_verb: Split a text with 1 verb and 2 objects into two texts each with 1 verb and 1 object.
+    - _split_skill_mentions: Split a text with 2 skills into 2 texts with 1 skill (the phrase must end with 'skills' and the skills must be separated by 'and')
+
+
+    Args:
+        nlp (Language): The spaCy Language object.
+        name (str): The name of the component.
+
+    RETURNS The CoordinationSplitter component.
+
+    DOCS: xxx
+    """
+
+    return CoordinationSplitter(nlp.vocab, name=name)
+
+
+class CoordinationSplitter(Pipe):
+    def __init__(
+        self,
+        vocab: Vocab,
+        name: str = "coordination_splitter",
+        rules: Optional[List[SplittingRule]] = None,
+    ) -> None:
+        self.name = name
+        self.vocab = vocab
+        if rules is None:
+            default_rules = [
+                _split_duplicate_object,
+                _split_duplicate_verb,
+                _split_skill_mentions,
+            ]
+            self.rules = [SplittingRule(function=rule) for rule in default_rules]
+        else:
+            # Ensure provided rules are wrapped in SplittingRule instances
+            self.rules = [
+                rule
+                if isinstance(rule, SplittingRule)
+                else SplittingRule(function=rule)
+                for rule in rules
+            ]
+
+    def clear_rules(self) -> None:
+        """Clear the default splitting rules."""
+        self.rules = []
+
+    def add_default_rules(self) -> List[SplittingRule]:
+        """Reset the default splitting rules."""
+        default_rules = [
+            _split_duplicate_object,
+            _split_duplicate_verb,
+            _split_skill_mentions,
+        ]
+        self.rules = [SplittingRule(function=rule) for rule in default_rules]
+
+    def add_rule(self, rule: Callable[[Doc], Union[List[str], None]]) -> None:
+        """Add a single splitting rule to the default rules."""
+        validated_rule = SplittingRule(function=rule)
+        self.rules.append(validated_rule)
+
+    def add_rules(self, rules: List[Callable[[Doc], Union[List[str], None]]]) -> None:
+        """Add a list of splitting rules to the default rules.
+
+        Args:
+            rules (List[Callable[[Doc], Union[List[str], None]]]): A list of functions to be added as splitting rules.
+        """
+        for rule in rules:
+            # Wrap each rule in a SplittingRule instance to ensure it's validated
+            validated_rule = SplittingRule(function=rule)
+            self.rules.append(validated_rule)
+
+    def __call__(self, doc: Doc) -> Doc:
+        """Apply the splitting rules to the doc.
+
+        Args:
+            doc (Doc): The spaCy Doc object.
+
+        Returns:
+            Doc: The modified spaCy Doc object.
+        """
+        if doc.lang_ != "en":
+            return doc
+
+        for rule in self.rules:
+            split = rule.function(doc)
+            if split:
+                return Doc(doc.vocab, words=split)
+        return doc
diff --git a/spacy/tests/pipeline/test_coordinationruler.py b/spacy/tests/pipeline/test_coordinationruler.py
new file mode 100644
index 00000000000..be439e9c599
--- /dev/null
+++ b/spacy/tests/pipeline/test_coordinationruler.py
@@ -0,0 +1,66 @@
+import pytest
+from typing import List
+from spacy.tokens import Doc
+
+import en_core_web_sm
+
+
+@pytest.fixture
+def nlp():
+    return en_core_web_sm.load()
+
+
+def _my_custom_splitting_rule(doc: Doc) -> List[str]:
+    split_phrases = []
+    for token in doc:
+        if token.text == "read":
+            split_phrases.append("test1")
+            split_phrases.append("test2")
+    return split_phrases
+
+
+def test_coordinationruler(nlp):
+    doc = nlp("I read and write books")
+    assert len(doc) == 5
+    assert [d.text for d in doc] == ["I", "read", "and", "write", "books"]
+    coord_splitter = nlp.add_pipe("coordination_splitter")
+    assert len(coord_splitter.rules) == 3
+    assert coord_splitter.name == "coordination_splitter"
+    doc_split = coord_splitter(doc)
+    assert len(doc_split) == 2
+    assert [t.text for t in doc_split] == ["I read books", "I write books"]
+
+
+def test_coordinationruler_clear_rules(nlp):
+    coord_splitter = nlp.add_pipe("coordination_splitter")
+    assert len(coord_splitter.rules) == 3
+    coord_splitter.clear_rules()
+    assert len(coord_splitter.rules) == 0
+    assert coord_splitter.rules == []
+
+
+def test_coordinationruler_add_rule(nlp):
+    coord_splitter = nlp.add_pipe("coordination_splitter")
+    assert len(coord_splitter.rules) == 3
+    coord_splitter.add_rule(_my_custom_splitting_rule)
+    assert len(coord_splitter.rules) == 4
+
+
+def test_coordinationruler_add_rules(nlp):
+    doc = nlp("I read and write books")
+    coord_splitter = nlp.add_pipe("coordination_splitter")
+    coord_splitter.clear_rules()
+    coord_splitter.add_rules([_my_custom_splitting_rule, _my_custom_splitting_rule])
+    assert len(coord_splitter.rules) == 2
+    doc_split = coord_splitter(doc)
+    assert len(doc_split) == 2
+
+    assert [t.text for t in doc_split] == ["test1", "test2"]
+
+
+def test_coordinationruler_add_default_rules(nlp):
+    coord_splitter = nlp.add_pipe("coordination_splitter")
+    coord_splitter.clear_rules()
+    assert len(coord_splitter.rules) == 0
+    coord_splitter.add_default_rules()
+    assert len(coord_splitter.rules) == 3

From 81c52c8ff22cba36f8f189919a8c9c1135eaceba Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Thu, 29 Feb 2024 14:45:07 -0300
Subject: [PATCH 02/12] add usecase

---
 spacy/pipeline/__init__.py                    |   4 +-
 spacy/pipeline/coordinationruler.py           | 480 +++++++-----------
 .../tests/pipeline/test_coordinationruler.py  | 211 +++++---
 3 files changed, 327 insertions(+), 368 deletions(-)

diff --git a/spacy/pipeline/__init__.py b/spacy/pipeline/__init__.py
index 02c900310b5..52e30ad4f4c 100644
--- a/spacy/pipeline/__init__.py
+++ b/spacy/pipeline/__init__.py
@@ -1,5 +1,5 @@
 from .attributeruler import AttributeRuler
-from .coordinationruler import CoordinationSplitter
+#from .coordinationruler import CoordinationSplitter
 from .dep_parser import DependencyParser
 from .edit_tree_lemmatizer import EditTreeLemmatizer
 from .entity_linker import EntityLinker
@@ -22,7 +22,7 @@
 
 __all__ = [
     "AttributeRuler",
-    "CoordinationSplitter",
+    #"CoordinationSplitter",
     "DependencyParser",
     "EditTreeLemmatizer",
     "EntityLinker",
diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index f2b62ac85c0..e171dca9b26 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -1,7 +1,6 @@
 from typing import List, Callable, Optional, Union
 from pydantic import BaseModel, validator
 import re
-import en_core_web_sm
 
 from ..tokens import Doc
 from ..language import Language
@@ -9,313 +8,180 @@
 from .pipe import Pipe
 
 ########### DEFAULT COORDINATION SPLITTING RULES ##############
-
-
-def _split_duplicate_object(doc: Doc) -> Union[List[str], None]:
-    """Split a text with 2 verbs and 1 object (and optionally a subject) into
-       2 texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
-
-    i.e. 'I use and provide clinical supervision' -->
-    ['I use clinical supervision', 'I provide clinical supervision']
-
-    Args:
-        doc (Doc): The spaCy Doc object.
-
-    Returns:
-        List[str]: The split texts.
-    """
-    sentences = []
-
-    for token in doc:
-        if token.pos_ == "VERB" and (token.dep_ == "ROOT" or token.dep_ == "conj"):
-
-            has_AND = False
-            has_second_verb = False
-            has_dobj = False
-            subject = None
-
-            # Find the subject if it exists
-            for possible_subject in token.head.children:
-                if possible_subject.dep_ in ["nsubj", "nsubjpass"]:
-                    subject = possible_subject
-                    break
-
-            for child in token.children:
-
-                if child.pos_ == "CCONJ" and child.lemma_ == "and":
-                    has_AND = True
-
-                if child.pos_ == "VERB" and child.dep_ == "conj":
-                    has_second_verb = True
-                    second_verb = child
-                    first_verb = token.head if token.dep_ == "conj" else token
-
-                    for descendant in second_verb.subtree:
-                        if descendant.dep_ == "dobj":
-                            has_dobj = True
-                            # Collect the full noun phrase for the direct object
-                            dobj_span = doc[
-                                descendant.left_edge.i : descendant.right_edge.i + 1
-                            ]
-                            dobj = dobj_span.text
-
-            if has_AND and has_second_verb and has_dobj:
-                subject_text = subject.text + " " if subject else ""
-                first_text = "{}{} {}".format(subject_text, first_verb, dobj)
-                second_text = "{}{} {}".format(subject_text, second_verb, dobj)
-
-                sentences.extend([first_text, second_text])
-
-    return sentences if sentences else None
-
-
-def _split_on_and(text: str) -> List[str]:
-    """Split a text on 'and' and return a list of the split texts.
-
-    Args:
-        text (str): The text to split.
-
-    Returns:
-        List[str]: The split texts.
-    """
-    text = re.sub(r"\s\s+", " ", text)
-
-    replacements = {
-        ";": ",",
-        ", and ,": " and ",
-        ", and,": " and ",
-        ",and ,": " and ",
-        ", and ": " and ",
-        " and ,": " and ",
-        ",and,": " and ",
-        " and,": " and ",
-        ",and ": " and ",
-    }
-    for old, new in replacements.items():
-        text = text.replace(old, new)
-
-    return [t.strip() for t in re.split(r",| and ", text)]
-
-
-def _split_duplicate_verb(doc: Doc) -> Union[List[str], None]:
-    """Split a text with 1 verb and 2 objects.
-
-    i.e. 'I love using smartphones and apps' -->
-    ['I love using smartphones', 'I love using apps']
-
-    Args:
-        doc (Doc): The spaCy Doc object.
-
-    Returns:
-        List[str]: The split texts.
-    """
-
-    for token in doc:
-
-        if token.pos_ == "VERB" and token.dep_ == "ROOT":
-
-            has_AND = False
-            has_dobj = False
-            has_sec_obj = False
-            subject = ""
-
-            for child in token.children:
-
-                if child.dep_ == "dobj":
-                    has_dobj = True
-
-                subject = child.text if child.dep_ == "nsubj" else subject
-
-                objects = " ".join(
-                    [
-                        c.text
-                        for c in token.subtree
-                        if c.text != token.text and c.dep_ != "nsubj"
-                    ]
-                )
-
-                split_objects = _split_on_and(objects)
-
-                object_list = []
-                for split in split_objects:
-                    object_list.append(split)
-
-                for subchild in child.children:
-
-                    if subchild.pos_ == "CCONJ" and subchild.lemma_ == "and":
-                        has_AND = True
-
-                    if subchild.dep_ == "conj":
-                        has_sec_obj = True
-
-                if has_AND and has_dobj and has_sec_obj:
-                    text_list = [
-                        f"{subject} {token.text} {split}.".strip()
-                        for split in object_list
-                    ]
-                    return [text.replace(" ..", ".") for text in text_list]
-
-    return None
-
-
-def _split_skill_mentions(doc: Doc) -> Union[List[str], None]:
-    """Split a text with 2 skills into 2 texts with 1 skill.
-
-        i.e. 'written and oral communication skills' -->
-    ['written communication skills', 'oral communication skills']
-
+ 
+def split_noun_coordination(doc: Doc) -> Union[List[str], None]:
+    """Identifies and splits phrases with multiple nouns, a modifier
+        and a conjunction.
+    
+    Examples:
+        - "apples and oranges" -> None
+        - "green apples and oranges" -> ["green apples", "green oranges"]
+        - "green apples and rotten oranges" -> None
+        - "apples and juicy oranges" -> ["juicy apples", "juicy oranges"]
+        - "hot chicken wings and soup" -> ["hot chicken wings", "hot soup"]
+        - "spicy ice cream and chicken wings" -> ["spicy ice cream", "spicy chicken wings"]
+    
     Args:
-        text (str): The text to split.
+        doc (Doc): The input document.
 
     Returns:
-        List[str]: The split texts.
+        Union[List[str], None]: A list of the coordinated noun phrases, 
+            or None if no coordinated noun phrases are found.
     """
-    for token in doc:
-        if (
-            token.pos_ == "NOUN"
-            and token.lemma_ == "skill"
-            and token.idx == doc[-1].idx
-        ):
-
-            has_AND = False
-
-            root = [token for token in doc if token.dep_ == "ROOT"]
-            if root:
-                root = root[0]
-
-                for child in root.subtree:
-
-                    if child.pos_ == "CCONJ" and child.lemma_ == "and":
-                        has_AND = True
-
-                if has_AND:
-                    skill_def = " ".join(
-                        [c.text for c in root.subtree if c.text != token.text]
-                    )
-
-                    split_skills = _split_on_and(skill_def)
-
-                    skill_lists = []
-                    for split_skill in split_skills:
-                        skill_lists.append("{} {}".format(split_skill, token.text))
-
-                    return skill_lists
-    return None
-
-
-class SplittingRule(BaseModel):
-    function: Callable[[Doc], Union[List[str], None]]
-
-    @validator("function")
-    def check_return_type(cls, v):
-        nlp = en_core_web_sm.load()
-        dummy_doc = nlp("This is a dummy sentence.")
-        result = v(dummy_doc)
-        if result is not None:
-            if not isinstance(result, List):
-                raise ValueError(
-                    "The custom splitting rule must return None or a list."
-                )
-            elif not all(isinstance(item, str) for item in result):
-                raise ValueError(
-                    "The custom splitting rule must return None or a list of strings."
-                )
-        return v
-
-
-@Language.factory(
-    "coordination_splitter", requires=["token.dep", "token.tag", "token.pos"]
-)
-def make_coordination_splitter(nlp: Language, name: str):
-    """Make a CoordinationSplitter component.
-
-    the default splitting rules include:
-
-    - _split_duplicate_object: Split a text with 2 verbs and 1 object (and optionally a subject) into two texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
-    - _split_duplicate_verb: Split a text with 1 verb and 2 objects into two texts each with 1 verb and 1 object.
-    - _split_skill_mentions: Split a text with 2 skills into 2 texts with 1 skill (the phrase must end with 'skills' and the skills must be separated by 'and')
-
-
-    Args:
-        nlp (Language): The spaCy Language object.
-        name (str): The name of the component.
-
-    RETURNS The CoordinationSplitter component.
-
-    DOCS: xxx
-    """
-
-    return CoordinationSplitter(nlp.vocab, name=name)
-
-
-class CoordinationSplitter(Pipe):
-    def __init__(
-        self,
-        vocab: Vocab,
-        name: str = "coordination_splitter",
-        rules: Optional[List[SplittingRule]] = None,
-    ) -> None:
-        self.name = name
-        self.vocab = vocab
-        if rules is None:
-            default_rules = [
-                _split_duplicate_object,
-                _split_duplicate_verb,
-                _split_skill_mentions,
-            ]
-            self.rules = [SplittingRule(function=rule) for rule in default_rules]
-        else:
-            # Ensure provided rules are wrapped in SplittingRule instances
-            self.rules = [
-                rule
-                if isinstance(rule, SplittingRule)
-                else SplittingRule(function=rule)
-                for rule in rules
-            ]
-
-    def clear_rules(self) -> None:
-        """Clear the default splitting rules."""
-        self.rules = []
-
-    def add_default_rules(self) -> List[SplittingRule]:
-        """Reset the default splitting rules."""
-        default_rules = [
-            _split_duplicate_object,
-            _split_duplicate_verb,
-            _split_skill_mentions,
-        ]
-        self.rules = [SplittingRule(function=rule) for rule in default_rules]
-
-    def add_rule(self, rule: Callable[[Doc], Union[List[str], None]]) -> None:
-        """Add a single splitting rule to the default rules."""
-        validated_rule = SplittingRule(function=rule)
-        self.rules.append(validated_rule)
-
-    def add_rules(self, rules: List[Callable[[Doc], Union[List[str], None]]]) -> None:
-        """Add a list of splitting rules to the default rules.
-
-        Args:
-            rules (List[Callable[[Doc], Union[List[str], None]]]): A list of functions to be added as splitting rules.
-        """
-        for rule in rules:
-            # Wrap each rule in a SplittingRule instance to ensure it's validated
-            validated_rule = SplittingRule(function=rule)
-            self.rules.append(validated_rule)
-
-    def __call__(self, doc: Doc) -> Doc:
-        """Apply the splitting rules to the doc.
-
-        Args:
-            doc (Doc): The spaCy Doc object.
-
-        Returns:
-            Doc: The modified spaCy Doc object.
-        """
-        if doc.lang_ != "en":
-            return doc
-
-        for rule in self.rules:
-            split = rule.function(doc)
-            if split:
-                return Doc(doc.vocab, words=split)
-        return doc
+    def _split_doc(doc: Doc) -> bool:
+        noun_modified = False
+        has_conjunction = False
+        
+        for token in doc:
+            if token.head.pos_ == 'NOUN': ## check to see that the phrase is a noun phrase
+                has_modifier = any(child.dep_ == 'amod' for child in token.head.children) #check to see if the noun has a modifier
+                if has_modifier:
+                    noun_modified = True
+            # check if there is a conjunction linked directly to a noun
+            if token.dep_ == 'conj' and token.head.pos_ == 'NOUN':
+                has_conjunction = True
+        
+        return True if noun_modified and has_conjunction else False
+    
+    phrases = []
+    modified_nouns = set()  
+    to_split = _split_doc(doc)
+    
+    if to_split: 
+        for token in doc:
+            if token.dep_ == "amod" and token.head.pos_ == "NOUN":
+                modifier = token.text
+                head_noun = token.head
+                
+                if head_noun not in modified_nouns:
+                    nouns_to_modify = [head_noun] + list(head_noun.conjuncts)
+                                        
+                    for noun in nouns_to_modify:
+                        compound_parts = [child.text for child in noun.lefts if child.dep_ == "compound"]
+                        complete_noun_phrase = " ".join(compound_parts + [noun.text])        
+                        phrases.append(f"{modifier} {complete_noun_phrase}")
+                        modified_nouns.add(noun)  # Mark this noun as modified
+
+        return phrases if phrases != [] else None
+    else:
+        return None
+
+
+###############################################################
+
+# class SplittingRule(BaseModel):
+#     function: Callable[[Doc], Union[List[str], None]]
+
+#     @validator("function")
+#     def check_return_type(cls, v):
+#         nlp = en_core_web_sm.load()
+#         dummy_doc = nlp("This is a dummy sentence.")
+#         result = v(dummy_doc)
+#         if result is not None:
+#             if not isinstance(result, List):
+#                 raise ValueError(
+#                     "The custom splitting rule must return None or a list."
+#                 )
+#             elif not all(isinstance(item, str) for item in result):
+#                 raise ValueError(
+#                     "The custom splitting rule must return None or a list of strings."
+#                 )
+#         return v
+
+
+# @Language.factory(
+#     "coordination_splitter", requires=["token.dep", "token.tag", "token.pos"]
+# )
+# def make_coordination_splitter(nlp: Language, name: str):
+#     """Make a CoordinationSplitter component.
+
+#     the default splitting rules include:
+
+#     - _split_duplicate_object: Split a text with 2 verbs and 1 object (and optionally a subject) into two texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
+#     - _split_duplicate_verb: Split a text with 1 verb and 2 objects into two texts each with 1 verb and 1 object.
+#     - _split_skill_mentions: Split a text with 2 skills into 2 texts with 1 skill (the phrase must end with 'skills' and the skills must be separated by 'and')
+
+
+#     Args:
+#         nlp (Language): The spaCy Language object.
+#         name (str): The name of the component.
+
+#     RETURNS The CoordinationSplitter component.
+
+#     DOCS: xxx
+#     """
+
+#     return CoordinationSplitter(nlp.vocab, name=name)
+
+
+# class CoordinationSplitter(Pipe):
+#     def __init__(
+#         self,
+#         vocab: Vocab,
+#         name: str = "coordination_splitter",
+#         rules: Optional[List[SplittingRule]] = None,
+#     ) -> None:
+#         self.name = name
+#         self.vocab = vocab
+#         if rules is None:
+#             default_rules = [
+#                 _split_duplicate_object,
+#                 _split_duplicate_verb,
+#                 _split_skill_mentions,
+#             ]
+#             self.rules = [SplittingRule(function=rule) for rule in default_rules]
+#         else:
+#             # Ensure provided rules are wrapped in SplittingRule instances
+#             self.rules = [
+#                 rule
+#                 if isinstance(rule, SplittingRule)
+#                 else SplittingRule(function=rule)
+#                 for rule in rules
+#             ]
+
+#     def clear_rules(self) -> None:
+#         """Clear the default splitting rules."""
+#         self.rules = []
+
+#     def add_default_rules(self) -> List[SplittingRule]:
+#         """Reset the default splitting rules."""
+#         default_rules = [
+#             _split_duplicate_object,
+#             _split_duplicate_verb,
+#             _split_skill_mentions,
+#         ]
+#         self.rules = [SplittingRule(function=rule) for rule in default_rules]
+
+#     def add_rule(self, rule: Callable[[Doc], Union[List[str], None]]) -> None:
+#         """Add a single splitting rule to the default rules."""
+#         validated_rule = SplittingRule(function=rule)
+#         self.rules.append(validated_rule)
+
+#     def add_rules(self, rules: List[Callable[[Doc], Union[List[str], None]]]) -> None:
+#         """Add a list of splitting rules to the default rules.
+
+#         Args:
+#             rules (List[Callable[[Doc], Union[List[str], None]]]): A list of functions to be added as splitting rules.
+#         """
+#         for rule in rules:
+#             # Wrap each rule in a SplittingRule instance to ensure it's validated
+#             validated_rule = SplittingRule(function=rule)
+#             self.rules.append(validated_rule)
+
+#     def __call__(self, doc: Doc) -> Doc:
+#         """Apply the splitting rules to the doc.
+
+#         Args:
+#             doc (Doc): The spaCy Doc object.
+
+#         Returns:
+#             Doc: The modified spaCy Doc object.
+#         """
+#         if doc.lang_ != "en":
+#             return doc
+
+#         for rule in self.rules:
+#             split = rule.function(doc)
+#             if split:
+#                 return Doc(doc.vocab, words=split)
+#         return doc
diff --git a/spacy/tests/pipeline/test_coordinationruler.py b/spacy/tests/pipeline/test_coordinationruler.py
index be439e9c599..7ca8f39f473 100644
--- a/spacy/tests/pipeline/test_coordinationruler.py
+++ b/spacy/tests/pipeline/test_coordinationruler.py
@@ -1,66 +1,159 @@
 import pytest
 from typing import List
-from spacy.tokens import Doc
 
-import en_core_web_sm
+from spacy.tokens import Doc
+import spacy
 
+from spacy.pipeline.coordinationruler import split_noun_coordination
 
 @pytest.fixture
 def nlp():
-    return en_core_web_sm.load()
-
-
-def _my_custom_splitting_rule(doc: Doc) -> List[str]:
-    split_phrases = []
-    for token in doc:
-        if token.text == "read":
-            split_phrases.append("test1")
-            split_phrases.append("test2")
-    return split_phrases
-
-
-def test_coordinationruler(nlp):
-    doc = nlp("I read and write books")
-    assert len(doc) == 5
-    assert [d.text for d in doc] == ["I", "read", "and", "write", "books"]
-    coord_splitter = nlp.add_pipe("coordination_splitter")
-    assert len(coord_splitter.rules) == 3
-    assert coord_splitter.name == "coordination_splitter"
-    doc_split = coord_splitter(doc)
-    assert len(doc_split) == 2
-    assert [t.text for t in doc_split] == ["I read books", "I write books"]
-
-
-def test_coordinationruler_clear_rules(nlp):
-    coord_splitter = nlp.add_pipe("coordination_splitter")
-    assert len(coord_splitter.rules) == 3
-    coord_splitter.clear_rules()
-    assert len(coord_splitter.rules) == 0
-    assert coord_splitter.rules == []
-
-
-def test_coordinationruler_add_rule(nlp):
-    coord_splitter = nlp.add_pipe("coordination_splitter")
-    assert len(coord_splitter.rules) == 3
-    coord_splitter.add_rule(_my_custom_splitting_rule)
-    assert len(coord_splitter.rules) == 4
-
-
-def test_coordinationruler_add_rules(nlp):
-    doc = nlp("I read and write books")
-    coord_splitter = nlp.add_pipe("coordination_splitter")
-    coord_splitter.clear_rules()
-    coord_splitter.add_rules([_my_custom_splitting_rule, _my_custom_splitting_rule])
-    assert len(coord_splitter.rules) == 2
-    doc_split = coord_splitter(doc)
-    assert len(doc_split) == 2
-
-    assert [t.text for t in doc_split] == ["test1", "test2"]
-
-
-def test_coordinationruler_add_default_rules(nlp):
-    coord_splitter = nlp.add_pipe("coordination_splitter")
-    coord_splitter.clear_rules()
-    assert len(coord_splitter.rules) == 0
-    coord_splitter.add_default_rules()
-    assert len(coord_splitter.rules) == 3
+    return spacy.blank("en")
+
+### NOUN CONSTRUCTION CASES ###
+@pytest.fixture
+def noun_construction_case1(nlp):
+    words = ["apples", "and", "oranges"]
+    spaces = [True, True, False]  # Indicates whether the word is followed by a space
+    pos_tags = ["NOUN", "CCONJ", "NOUN"]
+    dep_relations = ["nsubj", "cc", "conj"]
+
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    #set pos_ and dep_ attributes
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+        
+    # # define head relationships manually
+    doc[1].head = doc[2]  # "and" -> "oranges"
+    doc[2].head = doc[0]  # "oranges" -> "apples"
+    doc[0].head = doc[0] 
+    
+    return doc
+    
+@pytest.fixture
+def noun_construction_case2(nlp):
+    words = ["red", "apples", "and", "oranges"]
+    spaces = [True, True, True, False]  # Indicates whether the word is followed by a space
+    pos_tags = ["ADJ", "NOUN", "CCONJ", "NOUN"]
+    dep_relations = ["amod", "nsubj", "cc", "conj"]
+
+    # Create a Doc object manually
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    #set pos_ and dep_ attributes
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+        
+    # define head relationships manually
+    doc[0].head = doc[1]  
+    doc[2].head = doc[3]  
+    doc[3].head = doc[1]  
+    
+    return doc
+
+@pytest.fixture
+def noun_construction_case3(nlp):
+    words = ["apples", "and", "juicy", "oranges"]
+    spaces = [True, True, True, False]  # Indicates whether the word is followed by a space.
+    pos_tags = ["NOUN", "CCONJ", "ADJ", "NOUN"]
+    dep_relations = ["nsubj", "cc", "amod", "conj"]
+
+    #create a Doc object manually
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    #set POS and dependency tags
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+
+    #defining head relationships manually
+    doc[0].head = doc[0]  # "apples" as root, pointing to itself for simplicity.
+    doc[1].head = doc[3]  # "and" -> "oranges"
+    doc[2].head = doc[3]  # "juicy" -> "oranges"
+    doc[3].head = doc[0]  # "oranges" -> "apples", indicating a conjunctive relationship
+    
+    return doc
+
+@pytest.fixture
+def noun_construction_case4(nlp):
+    words = ["hot", "chicken", "wings", "and", "soup"]
+    spaces = [True, True, True, True, False]  # Indicates whether the word is followed by a space.
+    pos_tags= ["ADJ", "NOUN", "NOUN", "CCONJ", "NOUN"]
+    dep_relations = ["amod", "compound", "ROOT", "cc", "conj"]
+
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+
+    # Define head relationships manually for "hot chicken wings and soup".
+    doc[0].head = doc[2]  # "hot" -> "wings"
+    doc[1].head = doc[2]  # "chicken" -> "wings"
+    doc[2].head = doc[2]  # "wings" as root
+    doc[3].head = doc[4]  # "and" -> "soup"
+    doc[4].head = doc[2]  # "soup" -> "wings"
+    
+    return doc
+
+@pytest.fixture
+def noun_construction_case5(nlp):
+    words = ["green", "apples", "and", "rotten", "oranges"]
+    spaces = [True, True, True, True, False]  # Indicates whether the word is followed by a space.
+    pos_tags = ["ADJ", "NOUN", "CCONJ", "ADJ", "NOUN"]
+    dep_relations = ["amod", "ROOT", "cc", "amod", "conj"]
+
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    # Set POS and dependency tags.
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+
+    # Define head relationships manually for "green apples and rotten oranges".
+    doc[0].head = doc[1]  # "green" -> "apples"
+    doc[1].head = doc[1]  # "apples" as root
+    doc[2].head = doc[4]  # "and" -> "oranges"
+    doc[3].head = doc[4]  # "rotten" -> "oranges"
+    doc[4].head = doc[1]  # "oranges" -> "apples"
+    
+    return doc
+
+#test split_noun_coordination on 5 different cases
+def test_split_noun_coordination(noun_construction_case1, 
+                                 noun_construction_case2, 
+                                 noun_construction_case3, 
+                                 noun_construction_case4, 
+                                 noun_construction_case5):
+    
+    #test 1: no modifier - it should return None from _split_doc
+    case1_split = split_noun_coordination(noun_construction_case1)
+    assert case1_split == None
+    
+    #test 2: modifier is at the beginning of the noun phrase
+    case2_split = split_noun_coordination(noun_construction_case2)
+    assert len(case2_split) == 2
+    assert isinstance(case2_split, list)
+    assert all(isinstance(phrase, str) for phrase in case2_split)
+    assert case2_split == ["red apples", "red oranges"]
+    
+    #test 3: modifier is at the end of the noun phrase
+    case3_split = split_noun_coordination(noun_construction_case3)
+    assert len(case3_split) == 2
+    assert isinstance(case3_split, list)
+    assert all(isinstance(phrase, str) for phrase in case3_split)
+    assert case3_split == ["juicy apples", "juicy oranges"]
+    
+    #test 4: deal with compound nouns
+    case4_split = split_noun_coordination(noun_construction_case4)
+    assert len(case4_split) == 2
+    assert isinstance(case4_split, list)
+    assert all(isinstance(phrase, str) for phrase in case4_split)
+    assert case4_split == ["hot chicken wings", "hot soup"]
+    
+    #test 5: multiple modifiers
+    case5_split = split_noun_coordination(noun_construction_case5)
+    assert case5_split == None 
\ No newline at end of file

From e263b6c8fd4a3d60f847dc0247778d9be1486dc7 Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Thu, 29 Feb 2024 15:08:01 -0300
Subject: [PATCH 03/12] update test

---
 spacy/tests/pipeline/test_coordinationruler.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/spacy/tests/pipeline/test_coordinationruler.py b/spacy/tests/pipeline/test_coordinationruler.py
index 7ca8f39f473..08d6c2a3b1d 100644
--- a/spacy/tests/pipeline/test_coordinationruler.py
+++ b/spacy/tests/pipeline/test_coordinationruler.py
@@ -131,29 +131,36 @@ def test_split_noun_coordination(noun_construction_case1,
     
     #test 1: no modifier - it should return None from _split_doc
     case1_split = split_noun_coordination(noun_construction_case1)
+    
     assert case1_split == None
     
     #test 2: modifier is at the beginning of the noun phrase
     case2_split = split_noun_coordination(noun_construction_case2)
+    
     assert len(case2_split) == 2
     assert isinstance(case2_split, list)
     assert all(isinstance(phrase, str) for phrase in case2_split)
     assert case2_split == ["red apples", "red oranges"]
     
+
     #test 3: modifier is at the end of the noun phrase
     case3_split = split_noun_coordination(noun_construction_case3)
+
     assert len(case3_split) == 2
     assert isinstance(case3_split, list)
     assert all(isinstance(phrase, str) for phrase in case3_split)
-    assert case3_split == ["juicy apples", "juicy oranges"]
+    assert case3_split == ["juicy oranges", "juicy apples"]
     
     #test 4: deal with compound nouns
     case4_split = split_noun_coordination(noun_construction_case4)
+
     assert len(case4_split) == 2
     assert isinstance(case4_split, list)
     assert all(isinstance(phrase, str) for phrase in case4_split)
     assert case4_split == ["hot chicken wings", "hot soup"]
     
+    
     #test 5: multiple modifiers
     case5_split = split_noun_coordination(noun_construction_case5)
-    assert case5_split == None 
\ No newline at end of file
+
+    pass #this should return none i think
\ No newline at end of file

From d82d98b374d30c759c155b5e0e79fd9ace5582db Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Mon, 4 Mar 2024 09:34:02 -0300
Subject: [PATCH 04/12] update splitter

---
 spacy/pipeline/__init__.py                    |   4 +-
 spacy/pipeline/coordinationruler.py           | 359 ++++++++++-------
 .../tests/pipeline/test_coordinationruler.py  | 373 ++++++++++++++----
 3 files changed, 512 insertions(+), 224 deletions(-)

diff --git a/spacy/pipeline/__init__.py b/spacy/pipeline/__init__.py
index 52e30ad4f4c..02c900310b5 100644
--- a/spacy/pipeline/__init__.py
+++ b/spacy/pipeline/__init__.py
@@ -1,5 +1,5 @@
 from .attributeruler import AttributeRuler
-#from .coordinationruler import CoordinationSplitter
+from .coordinationruler import CoordinationSplitter
 from .dep_parser import DependencyParser
 from .edit_tree_lemmatizer import EditTreeLemmatizer
 from .entity_linker import EntityLinker
@@ -22,7 +22,7 @@
 
 __all__ = [
     "AttributeRuler",
-    #"CoordinationSplitter",
+    "CoordinationSplitter",
     "DependencyParser",
     "EditTreeLemmatizer",
     "EntityLinker",
diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index e171dca9b26..5eeea7eccdf 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -1,66 +1,130 @@
-from typing import List, Callable, Optional, Union
-from pydantic import BaseModel, validator
 import re
+from typing import Callable, List, Optional, Union
+
+from pydantic import BaseModel, validator
 
-from ..tokens import Doc
 from ..language import Language
+from ..tokens import Doc, Token
 from ..vocab import Vocab
 from .pipe import Pipe
 
+######### helper functions across the default splitting rules ##############
+
+
+def _split_doc(doc: Doc) -> bool:
+    """Check to see if the document has a noun phrase
+        with a modifier and a conjunction.
+
+    Args:
+        doc (Doc): The input document.
+
+    Returns:
+        bool: True if the document has a noun phrase
+            with a modifier and a conjunction, else False.
+    """
+
+    noun_modified = False
+    has_conjunction = False
+
+    for token in doc:
+        if token.head.pos_ == "NOUN":  ## check to see that the phrase is a noun phrase
+            has_modifier = any(
+                child.dep_ == "amod" for child in token.head.children
+            )  # check to see if the noun has a modifier
+            if has_modifier:
+                noun_modified = True
+
+        # check if there is a conjunction in the phrase
+        if token.pos_ == "CCONJ":
+            has_conjunction = True
+
+    return (
+        True if noun_modified and has_conjunction else False
+    )  # and not all_nouns_modified else False
+
+
+def _collect_modifiers(token: Token) -> List[str]:
+    """Collects adverbial modifiers for a given token.
+
+    Args:
+        token (Token): The input token.
+
+    Returns:
+        List[str]: A list of modifiers for the token.
+    """
+    modifiers = []
+    for child in token.children:
+        if child.dep_ == "amod":
+            # collect adverbial modifiers for this adjective
+            adv_mods = [
+                adv_mod.text
+                for adv_mod in child.children
+                if adv_mod.dep_ in ["advmod"] and not adv_mod.pos_ == "CCONJ"
+            ]
+
+            modifier_phrase = " ".join(adv_mods + [child.text])
+            modifiers.append(modifier_phrase)
+            # also check for conjunctions to this adjective
+            for conj in child.conjuncts:
+                adv_mods_conj = [
+                    adv_mod.text
+                    for adv_mod in conj.children
+                    if adv_mod.dep_ in ["advmod"] and not adv_mod.pos_ == "CCONJ"
+                ]
+                modifier_phrase_conj = " ".join(adv_mods_conj + [conj.text])
+                modifiers.append(modifier_phrase_conj)
+
+    return modifiers
+
+
 ########### DEFAULT COORDINATION SPLITTING RULES ##############
- 
+
+
 def split_noun_coordination(doc: Doc) -> Union[List[str], None]:
-    """Identifies and splits phrases with multiple nouns, a modifier
+    """Identifies and splits noun phrases with a modifier
         and a conjunction.
-    
-    Examples:
+
+    construction cases:
         - "apples and oranges" -> None
         - "green apples and oranges" -> ["green apples", "green oranges"]
-        - "green apples and rotten oranges" -> None
         - "apples and juicy oranges" -> ["juicy apples", "juicy oranges"]
         - "hot chicken wings and soup" -> ["hot chicken wings", "hot soup"]
-        - "spicy ice cream and chicken wings" -> ["spicy ice cream", "spicy chicken wings"]
-    
+        - "green apples and rotten oranges" -> ["green apples", "rotten oranges"]
+        - "very green apples and oranges" -> ["very green apples", "very green oranges"]
+        - "delicious and juicy apples" -> ["delicious apples", "juicy apples"]
+        - "delicious but quite sour apples" -> ["delicious apples", "quite sour apples"]
+        - "delicious but quite sour apples and oranges" -> ["delicious apples", "quite sour apples", "delicious oranges", "quite sour oranges"]
+
     Args:
         doc (Doc): The input document.
 
     Returns:
-        Union[List[str], None]: A list of the coordinated noun phrases, 
+        Union[List[str], None]: A list of the coordinated noun phrases,
             or None if no coordinated noun phrases are found.
     """
-    def _split_doc(doc: Doc) -> bool:
-        noun_modified = False
-        has_conjunction = False
-        
-        for token in doc:
-            if token.head.pos_ == 'NOUN': ## check to see that the phrase is a noun phrase
-                has_modifier = any(child.dep_ == 'amod' for child in token.head.children) #check to see if the noun has a modifier
-                if has_modifier:
-                    noun_modified = True
-            # check if there is a conjunction linked directly to a noun
-            if token.dep_ == 'conj' and token.head.pos_ == 'NOUN':
-                has_conjunction = True
-        
-        return True if noun_modified and has_conjunction else False
-    
     phrases = []
-    modified_nouns = set()  
+    modified_nouns = set()
     to_split = _split_doc(doc)
-    
-    if to_split: 
+
+    if to_split:
         for token in doc:
             if token.dep_ == "amod" and token.head.pos_ == "NOUN":
-                modifier = token.text
                 head_noun = token.head
-                
+
                 if head_noun not in modified_nouns:
+                    modifier_phrases = _collect_modifiers(head_noun)
                     nouns_to_modify = [head_noun] + list(head_noun.conjuncts)
-                                        
+
                     for noun in nouns_to_modify:
-                        compound_parts = [child.text for child in noun.lefts if child.dep_ == "compound"]
-                        complete_noun_phrase = " ".join(compound_parts + [noun.text])        
-                        phrases.append(f"{modifier} {complete_noun_phrase}")
-                        modified_nouns.add(noun)  # Mark this noun as modified
+                        compound_parts = [
+                            child.text
+                            for child in noun.lefts
+                            if child.dep_ == "compound"
+                        ]
+                        complete_noun_phrase = " ".join(compound_parts + [noun.text])
+                        for modifier_phrase in modifier_phrases:
+                            phrases.append(f"{modifier_phrase} {complete_noun_phrase}")
+                        modified_nouns.add(noun)  # mark this noun as modified
 
         return phrases if phrases != [] else None
     else:
@@ -69,119 +133,110 @@ def _split_doc(doc: Doc) -> bool:
 
 ###############################################################
 
-# class SplittingRule(BaseModel):
-#     function: Callable[[Doc], Union[List[str], None]]
-
-#     @validator("function")
-#     def check_return_type(cls, v):
-#         nlp = en_core_web_sm.load()
-#         dummy_doc = nlp("This is a dummy sentence.")
-#         result = v(dummy_doc)
-#         if result is not None:
-#             if not isinstance(result, List):
-#                 raise ValueError(
-#                     "The custom splitting rule must return None or a list."
-#                 )
-#             elif not all(isinstance(item, str) for item in result):
-#                 raise ValueError(
-#                     "The custom splitting rule must return None or a list of strings."
-#                 )
-#         return v
-
-
-# @Language.factory(
-#     "coordination_splitter", requires=["token.dep", "token.tag", "token.pos"]
-# )
-# def make_coordination_splitter(nlp: Language, name: str):
-#     """Make a CoordinationSplitter component.
-
-#     the default splitting rules include:
-
-#     - _split_duplicate_object: Split a text with 2 verbs and 1 object (and optionally a subject) into two texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
-#     - _split_duplicate_verb: Split a text with 1 verb and 2 objects into two texts each with 1 verb and 1 object.
-#     - _split_skill_mentions: Split a text with 2 skills into 2 texts with 1 skill (the phrase must end with 'skills' and the skills must be separated by 'and')
-
-
-#     Args:
-#         nlp (Language): The spaCy Language object.
-#         name (str): The name of the component.
-
-#     RETURNS The CoordinationSplitter component.
-
-#     DOCS: xxx
-#     """
-
-#     return CoordinationSplitter(nlp.vocab, name=name)
-
-
-# class CoordinationSplitter(Pipe):
-#     def __init__(
-#         self,
-#         vocab: Vocab,
-#         name: str = "coordination_splitter",
-#         rules: Optional[List[SplittingRule]] = None,
-#     ) -> None:
-#         self.name = name
-#         self.vocab = vocab
-#         if rules is None:
-#             default_rules = [
-#                 _split_duplicate_object,
-#                 _split_duplicate_verb,
-#                 _split_skill_mentions,
-#             ]
-#             self.rules = [SplittingRule(function=rule) for rule in default_rules]
-#         else:
-#             # Ensure provided rules are wrapped in SplittingRule instances
-#             self.rules = [
-#                 rule
-#                 if isinstance(rule, SplittingRule)
-#                 else SplittingRule(function=rule)
-#                 for rule in rules
-#             ]
-
-#     def clear_rules(self) -> None:
-#         """Clear the default splitting rules."""
-#         self.rules = []
-
-#     def add_default_rules(self) -> List[SplittingRule]:
-#         """Reset the default splitting rules."""
-#         default_rules = [
-#             _split_duplicate_object,
-#             _split_duplicate_verb,
-#             _split_skill_mentions,
-#         ]
-#         self.rules = [SplittingRule(function=rule) for rule in default_rules]
-
-#     def add_rule(self, rule: Callable[[Doc], Union[List[str], None]]) -> None:
-#         """Add a single splitting rule to the default rules."""
-#         validated_rule = SplittingRule(function=rule)
-#         self.rules.append(validated_rule)
-
-#     def add_rules(self, rules: List[Callable[[Doc], Union[List[str], None]]]) -> None:
-#         """Add a list of splitting rules to the default rules.
-
-#         Args:
-#             rules (List[Callable[[Doc], Union[List[str], None]]]): A list of functions to be added as splitting rules.
-#         """
-#         for rule in rules:
-#             # Wrap each rule in a SplittingRule instance to ensure it's validated
-#             validated_rule = SplittingRule(function=rule)
-#             self.rules.append(validated_rule)
-
-#     def __call__(self, doc: Doc) -> Doc:
-#         """Apply the splitting rules to the doc.
-
-#         Args:
-#             doc (Doc): The spaCy Doc object.
-
-#         Returns:
-#             Doc: The modified spaCy Doc object.
-#         """
-#         if doc.lang_ != "en":
-#             return doc
-
-#         for rule in self.rules:
-#             split = rule.function(doc)
-#             if split:
-#                 return Doc(doc.vocab, words=split)
-#         return doc
+
+class SplittingRule(BaseModel):
+    function: Callable[[Doc], Union[List[str], None]]
+
+    @validator("function")
+    def check_return_type(cls, v):
+        dummy_doc = Doc(Language().vocab, words=["dummy", "doc"], spaces=[True, False])
+        result = v(dummy_doc)
+        if result is not None:
+            if not isinstance(result, List):
+                raise ValueError(
+                    "The custom splitting rule must return None or a list."
+                )
+            elif not all(isinstance(item, str) for item in result):
+                raise ValueError(
+                    "The custom splitting rule must return None or a list of strings."
+                )
+        return v
+
+
+@Language.factory(
+    "coordination_splitter", requires=["token.dep", "token.tag", "token.pos"]
+)
+def make_coordination_splitter(nlp: Language, name: str):
+    """Make a CoordinationSplitter component.
+
+    the default splitting rules include:
+        - split_noun_coordination
+
+    Args:
+        nlp (Language): The spaCy Language object.
+        name (str): The name of the component.
+
+    RETURNS The CoordinationSplitter component.
+
+    DOCS: xxx
+    """
+
+    return CoordinationSplitter(nlp.vocab, name=name)
+
+
+class CoordinationSplitter(Pipe):
+    def __init__(
+        self,
+        vocab: Vocab,
+        name: str = "coordination_splitter",
+        rules: Optional[List[SplittingRule]] = None,
+    ) -> None:
+        self.name = name
+        self.vocab = vocab
+        if rules is None:
+            default_rules = [
+                split_noun_coordination,
+            ]
+            self.rules = [SplittingRule(function=rule) for rule in default_rules]
+        else:
+            self.rules = [
+                rule
+                if isinstance(rule, SplittingRule)
+                else SplittingRule(function=rule)
+                for rule in rules
+            ]
+
+    def clear_rules(self) -> None:
+        """Clear the default splitting rules."""
+        self.rules = []
+
+    def add_default_rules(self) -> List[SplittingRule]:
+        """Reset the default splitting rules."""
+        default_rules = [
+            split_noun_coordination,
+        ]
+        self.rules = [SplittingRule(function=rule) for rule in default_rules]
+
+    def add_rule(self, rule: Callable[[Doc], Union[List[str], None]]) -> None:
+        """Add a single splitting rule to the default rules."""
+        validated_rule = SplittingRule(function=rule)
+        self.rules.append(validated_rule)
+
+    def add_rules(self, rules: List[Callable[[Doc], Union[List[str], None]]]) -> None:
+        """Add a list of splitting rules to the default rules.
+
+        Args:
+            rules (List[Callable[[Doc], Union[List[str], None]]]): A list of functions to be added as splitting rules.
+        """
+        for rule in rules:
+            # Wrap each rule in a SplittingRule instance to ensure it's validated
+            validated_rule = SplittingRule(function=rule)
+            self.rules.append(validated_rule)
+
+    def __call__(self, doc: Doc) -> Doc:
+        """Apply the splitting rules to the doc.
+
+        Args:
+            doc (Doc): The spaCy Doc object.
+
+        Returns:
+            Doc: The modified spaCy Doc object.
+        """
+        if doc.lang_ != "en":
+            return doc
+
+        for rule in self.rules:
+            split = rule.function(doc)
+            if split:
+                return Doc(doc.vocab, words=split)
+        return doc
diff --git a/spacy/tests/pipeline/test_coordinationruler.py b/spacy/tests/pipeline/test_coordinationruler.py
index 08d6c2a3b1d..7ead426cc11 100644
--- a/spacy/tests/pipeline/test_coordinationruler.py
+++ b/spacy/tests/pipeline/test_coordinationruler.py
@@ -1,87 +1,84 @@
-import pytest
 from typing import List
 
-from spacy.tokens import Doc
-import spacy
+import pytest
 
+import spacy
 from spacy.pipeline.coordinationruler import split_noun_coordination
+from spacy.tokens import Doc
+
 
 @pytest.fixture
 def nlp():
     return spacy.blank("en")
 
-### NOUN CONSTRUCTION CASES ###
+
+### CONSTRUCTION CASES ###
 @pytest.fixture
 def noun_construction_case1(nlp):
     words = ["apples", "and", "oranges"]
-    spaces = [True, True, False]  # Indicates whether the word is followed by a space
+    spaces = [True, True, False]
     pos_tags = ["NOUN", "CCONJ", "NOUN"]
     dep_relations = ["nsubj", "cc", "conj"]
 
     doc = Doc(nlp.vocab, words=words, spaces=spaces)
 
-    #set pos_ and dep_ attributes
     for token, pos, dep in zip(doc, pos_tags, dep_relations):
         token.pos_ = pos
         token.dep_ = dep
-        
-    # # define head relationships manually
-    doc[1].head = doc[2]  # "and" -> "oranges"
-    doc[2].head = doc[0]  # "oranges" -> "apples"
-    doc[0].head = doc[0] 
-    
+
+    doc[1].head = doc[2]
+    doc[2].head = doc[0]
+    doc[0].head = doc[0]
+
     return doc
-    
+
+
 @pytest.fixture
 def noun_construction_case2(nlp):
     words = ["red", "apples", "and", "oranges"]
-    spaces = [True, True, True, False]  # Indicates whether the word is followed by a space
+    spaces = [True, True, True, False]
     pos_tags = ["ADJ", "NOUN", "CCONJ", "NOUN"]
     dep_relations = ["amod", "nsubj", "cc", "conj"]
 
-    # Create a Doc object manually
     doc = Doc(nlp.vocab, words=words, spaces=spaces)
 
-    #set pos_ and dep_ attributes
     for token, pos, dep in zip(doc, pos_tags, dep_relations):
         token.pos_ = pos
         token.dep_ = dep
-        
-    # define head relationships manually
-    doc[0].head = doc[1]  
-    doc[2].head = doc[3]  
-    doc[3].head = doc[1]  
-    
+
+    doc[0].head = doc[1]
+    doc[2].head = doc[3]
+    doc[3].head = doc[1]
+
     return doc
 
+
 @pytest.fixture
 def noun_construction_case3(nlp):
     words = ["apples", "and", "juicy", "oranges"]
-    spaces = [True, True, True, False]  # Indicates whether the word is followed by a space.
+    spaces = [True, True, True, False]
     pos_tags = ["NOUN", "CCONJ", "ADJ", "NOUN"]
     dep_relations = ["nsubj", "cc", "amod", "conj"]
 
-    #create a Doc object manually
     doc = Doc(nlp.vocab, words=words, spaces=spaces)
 
-    #set POS and dependency tags
     for token, pos, dep in zip(doc, pos_tags, dep_relations):
         token.pos_ = pos
         token.dep_ = dep
 
-    #defining head relationships manually
-    doc[0].head = doc[0]  # "apples" as root, pointing to itself for simplicity.
-    doc[1].head = doc[3]  # "and" -> "oranges"
-    doc[2].head = doc[3]  # "juicy" -> "oranges"
-    doc[3].head = doc[0]  # "oranges" -> "apples", indicating a conjunctive relationship
-    
+    doc[0].head = doc[0]
+    doc[1].head = doc[3]
+    doc[2].head = doc[3]
+    doc[3].head = doc[0]
+
     return doc
 
+
 @pytest.fixture
 def noun_construction_case4(nlp):
     words = ["hot", "chicken", "wings", "and", "soup"]
-    spaces = [True, True, True, True, False]  # Indicates whether the word is followed by a space.
-    pos_tags= ["ADJ", "NOUN", "NOUN", "CCONJ", "NOUN"]
+    spaces = [True, True, True, True, False]
+    pos_tags = ["ADJ", "NOUN", "NOUN", "CCONJ", "NOUN"]
     dep_relations = ["amod", "compound", "ROOT", "cc", "conj"]
 
     doc = Doc(nlp.vocab, words=words, spaces=spaces)
@@ -90,77 +87,313 @@ def noun_construction_case4(nlp):
         token.pos_ = pos
         token.dep_ = dep
 
-    # Define head relationships manually for "hot chicken wings and soup".
-    doc[0].head = doc[2]  # "hot" -> "wings"
-    doc[1].head = doc[2]  # "chicken" -> "wings"
-    doc[2].head = doc[2]  # "wings" as root
-    doc[3].head = doc[4]  # "and" -> "soup"
-    doc[4].head = doc[2]  # "soup" -> "wings"
-    
+    doc[0].head = doc[2]
+    doc[1].head = doc[2]
+    doc[2].head = doc[2]
+    doc[3].head = doc[4]
+    doc[4].head = doc[2]
+
     return doc
 
+
 @pytest.fixture
 def noun_construction_case5(nlp):
     words = ["green", "apples", "and", "rotten", "oranges"]
-    spaces = [True, True, True, True, False]  # Indicates whether the word is followed by a space.
+    spaces = [True, True, True, True, False]
     pos_tags = ["ADJ", "NOUN", "CCONJ", "ADJ", "NOUN"]
     dep_relations = ["amod", "ROOT", "cc", "amod", "conj"]
 
     doc = Doc(nlp.vocab, words=words, spaces=spaces)
 
-    # Set POS and dependency tags.
     for token, pos, dep in zip(doc, pos_tags, dep_relations):
         token.pos_ = pos
         token.dep_ = dep
 
-    # Define head relationships manually for "green apples and rotten oranges".
-    doc[0].head = doc[1]  # "green" -> "apples"
-    doc[1].head = doc[1]  # "apples" as root
-    doc[2].head = doc[4]  # "and" -> "oranges"
-    doc[3].head = doc[4]  # "rotten" -> "oranges"
-    doc[4].head = doc[1]  # "oranges" -> "apples"
-    
+    doc[0].head = doc[1]
+    doc[1].head = doc[1]
+    doc[2].head = doc[4]
+    doc[3].head = doc[4]
+    doc[4].head = doc[1]
+
+    return doc
+
+
+@pytest.fixture
+def noun_construction_case6(nlp):
+    words = ["very", "green", "apples", "and", "oranges"]
+    spaces = [True, True, True, True, False]
+    pos_tags = ["ADV", "ADJ", "NOUN", "CCONJ", "NOUN"]
+    dep_relations = ["advmod", "amod", "ROOT", "cc", "conj"]
+
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+
+    doc[0].head = doc[1]
+    doc[1].head = doc[2]
+    doc[2].head = doc[2]
+    doc[3].head = doc[4]
+    doc[4].head = doc[2]
+
+    return doc
+
+
+@pytest.fixture
+def noun_construction_case7(nlp):
+    words = ["fresh", "and", "juicy", "apples"]
+    spaces = [True, True, True, False]
+    pos_tags = ["ADJ", "CCONJ", "ADJ", "NOUN"]
+    dep_relations = ["amod", "cc", "conj", "ROOT"]
+
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+
+    doc[0].head = doc[3]
+    doc[1].head = doc[2]
+    doc[2].head = doc[0]
+    doc[3].head = doc[3]
+
     return doc
 
-#test split_noun_coordination on 5 different cases
-def test_split_noun_coordination(noun_construction_case1, 
-                                 noun_construction_case2, 
-                                 noun_construction_case3, 
-                                 noun_construction_case4, 
-                                 noun_construction_case5):
-    
-    #test 1: no modifier - it should return None from _split_doc
+
+@pytest.fixture
+def noun_construction_case8(nlp):
+    words = ["fresh", ",", "juicy", "and", "delicious", "apples"]
+    spaces = [True, True, True, True, True, False]
+    pos_tags = ["ADJ", "PUNCT", "ADJ", "CCONJ", "ADJ", "NOUN"]
+    dep_relations = ["amod", "punct", "conj", "cc", "conj", "ROOT"]
+
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+
+    doc[0].head = doc[5]
+    doc[1].head = doc[2]
+    doc[2].head = doc[0]
+    doc[3].head = doc[4]
+    doc[4].head = doc[0]
+    doc[5].head = doc[5]
+
+    return doc
+
+
+@pytest.fixture
+def noun_construction_case9(nlp):
+    words = ["fresh", "and", "quite", "sour", "apples"]
+    spaces = [True, True, True, True, False]
+    pos_tags = ["ADJ", "CCONJ", "ADV", "ADJ", "NOUN"]
+    dep_relations = ["amod", "cc", "advmod", "conj", "ROOT"]
+
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+
+    doc[0].head = doc[4]
+    doc[1].head = doc[3]
+    doc[2].head = doc[3]
+    doc[3].head = doc[0]
+    doc[4].head = doc[4]
+
+    return doc
+
+
+@pytest.fixture
+def noun_construction_case10(nlp):
+    words = ["fresh", "but", "quite", "sour", "apples", "and", "chicken", "wings"]
+    spaces = [True, True, True, True, True, True, True, False]
+    pos_tags = ["ADJ", "CCONJ", "ADV", "ADJ", "NOUN", "CCONJ", "NOUN", "NOUN"]
+    dep_relations = ["amod", "cc", "advmod", "conj", "ROOT", "cc", "conj", "compound"]
+
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+
+    doc[0].head = doc[4]
+    doc[1].head = doc[3]
+    doc[2].head = doc[3]
+    doc[3].head = doc[0]
+    doc[4].head = doc[4]
+    doc[5].head = doc[6]
+    doc[6].head = doc[4]
+    doc[7].head = doc[6]
+
+    return doc
+
+
+@pytest.fixture
+def noun_construction_case11(nlp):
+    words = ["water", "and", "power", "meters", "and", "electrical", "sockets"]
+    spaces = [True, True, True, True, True, True, False]
+    pos_tags = ["NOUN", "CCONJ", "NOUN", "NOUN", "CCONJ", "ADJ", "NOUN"]
+    dep_relations = ["compound", "cc", "compound", "ROOT", "cc", "amod", "conj"]
+
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+
+    doc[0].head = doc[2]
+    doc[1].head = doc[2]
+    doc[2].head = doc[3]
+    doc[3].head = doc[3]
+    doc[4].head = doc[6]
+    doc[5].head = doc[6]
+    doc[6].head = doc[3]
+
+    return doc
+
+
+### splitting rules ###
+def _my_custom_splitting_rule(doc: Doc) -> List[str]:
+    split_phrases = []
+    for token in doc:
+        if token.text == "red":
+            split_phrases.append("test1")
+            split_phrases.append("test2")
+    return split_phrases
+
+
+# test split_noun_coordination on 6 different cases
+def test_split_noun_coordination(
+    noun_construction_case1,
+    noun_construction_case2,
+    noun_construction_case3,
+    noun_construction_case4,
+    #                                 noun_construction_case5,
+    noun_construction_case6,
+    noun_construction_case7,
+    noun_construction_case8,
+    noun_construction_case9,
+    noun_construction_case10,
+    noun_construction_case11,
+):
+
+    # test 1: no modifier - it should return None from _split_doc
     case1_split = split_noun_coordination(noun_construction_case1)
-    
+
     assert case1_split == None
-    
-    #test 2: modifier is at the beginning of the noun phrase
+
+    # test 2: modifier is at the beginning of the noun phrase
     case2_split = split_noun_coordination(noun_construction_case2)
-    
+
     assert len(case2_split) == 2
     assert isinstance(case2_split, list)
     assert all(isinstance(phrase, str) for phrase in case2_split)
     assert case2_split == ["red apples", "red oranges"]
-    
 
-    #test 3: modifier is at the end of the noun phrase
+    # test 3: modifier is at the end of the noun phrase
     case3_split = split_noun_coordination(noun_construction_case3)
 
     assert len(case3_split) == 2
     assert isinstance(case3_split, list)
     assert all(isinstance(phrase, str) for phrase in case3_split)
     assert case3_split == ["juicy oranges", "juicy apples"]
-    
-    #test 4: deal with compound nouns
+
+    # test 4: deal with compound nouns
     case4_split = split_noun_coordination(noun_construction_case4)
 
     assert len(case4_split) == 2
     assert isinstance(case4_split, list)
     assert all(isinstance(phrase, str) for phrase in case4_split)
     assert case4_split == ["hot chicken wings", "hot soup"]
-    
-    
-    #test 5: multiple modifiers
-    case5_split = split_noun_coordination(noun_construction_case5)
 
-    pass #this should return none i think
\ No newline at end of file
+    # #test 5: multiple modifiers
+    # case5_split = split_noun_coordination(noun_construction_case5)
+    # assert case5_split == None
+
+    # test 6: modifier phrases
+    case6_split = split_noun_coordination(noun_construction_case6)
+
+    assert len(case6_split) == 2
+    assert isinstance(case6_split, list)
+    assert all(isinstance(phrase, str) for phrase in case6_split)
+    assert case6_split == ["very green apples", "very green oranges"]
+
+    ## test cases for coordinating adjectives
+
+    # test 7:
+    case7_split = split_noun_coordination(noun_construction_case7)
+    assert case7_split == ["fresh apples", "juicy apples"]
+
+    # test 8:
+    case8_split = split_noun_coordination(noun_construction_case8)
+    assert case8_split == ["fresh apples", "juicy apples", "delicious apples"]
+
+    # test 9:
+    case9_split = split_noun_coordination(noun_construction_case9)
+    assert case9_split == ["fresh apples", "quite sour apples"]
+
+    # test 10:
+    case10_split = split_noun_coordination(noun_construction_case10)
+    assert case10_split == ["fresh apples", "quite sour apples", "chicken soup"]
+
+    # test 11:
+    case11_split = split_noun_coordination(noun_construction_case11)
+    assert case11_split == None
+
+
+################### test factory ##############################
+
+
+def test_coordinationruler(nlp, noun_construction_case2):
+    assert len(noun_construction_case2) == 4
+    assert [d.text for d in noun_construction_case2] == [
+        "red",
+        "apples",
+        "and",
+        "oranges",
+    ]
+
+    coord_splitter = nlp.add_pipe("coordination_splitter")
+    assert len(coord_splitter.rules) == 1
+    assert coord_splitter.name == "coordination_splitter"
+    doc_split = coord_splitter(noun_construction_case2)
+    assert len(doc_split) == 2
+    assert [t.text for t in doc_split] == ["red apples", "red oranges"]
+
+
+def test_coordinationruler_clear_rules(nlp):
+    coord_splitter = nlp.add_pipe("coordination_splitter")
+    assert len(coord_splitter.rules) == 1
+    coord_splitter.clear_rules()
+    assert len(coord_splitter.rules) == 0
+    assert coord_splitter.rules == []
+
+
+def test_coordinationruler_add_rule(nlp):
+    coord_splitter = nlp.add_pipe("coordination_splitter")
+    assert len(coord_splitter.rules) == 1
+    coord_splitter.add_rule(_my_custom_splitting_rule)
+    assert len(coord_splitter.rules) == 2
+
+
+def test_coordinationruler_add_rules(nlp, noun_construction_case2):
+
+    coord_splitter = nlp.add_pipe("coordination_splitter")
+    coord_splitter.clear_rules()
+    coord_splitter.add_rules([_my_custom_splitting_rule, _my_custom_splitting_rule])
+    assert len(coord_splitter.rules) == 2
+    doc_split = coord_splitter(noun_construction_case2)
+    assert len(doc_split) == 2
+
+    assert [t.text for t in doc_split] == ["test1", "test2"]
+
+
+def test_coordinationruler_add_default_rules(nlp):
+    coord_splitter = nlp.add_pipe("coordination_splitter")
+    coord_splitter.clear_rules()
+    assert len(coord_splitter.rules) == 0
+    coord_splitter.add_default_rules()
+    assert len(coord_splitter.rules) == 1

From 3b37fb6dcf4ee149e3bf6be9624820caba8f1fbf Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Mon, 4 Mar 2024 09:45:47 -0300
Subject: [PATCH 05/12] update typing hint

---
 spacy/pipeline/coordinationruler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index 5eeea7eccdf..983cf5722d8 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -200,7 +200,7 @@ def clear_rules(self) -> None:
         """Clear the default splitting rules."""
         self.rules = []
 
-    def add_default_rules(self) -> List[SplittingRule]:
+    def add_default_rules(self) -> None:
         """Reset the default splitting rules."""
         default_rules = [
             split_noun_coordination,

From 59d8ee4132a759be7ae59c5d8b27f4e813194376 Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Mon, 4 Mar 2024 09:53:53 -0300
Subject: [PATCH 06/12] use field validator

---
 spacy/pipeline/coordinationruler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index 983cf5722d8..1aa6525c87d 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -1,7 +1,7 @@
 import re
 from typing import Callable, List, Optional, Union
 
-from pydantic import BaseModel, validator
+from pydantic import BaseModel, field_validator
 
 from ..language import Language
 from ..tokens import Doc, Token
@@ -137,7 +137,7 @@ def split_noun_coordination(doc: Doc) -> Union[List[str], None]:
 class SplittingRule(BaseModel):
     function: Callable[[Doc], Union[List[str], None]]
 
-    @validator("function")
+    @field_validator("function")
     def check_return_type(cls, v):
         dummy_doc = Doc(Language().vocab, words=["dummy", "doc"], spaces=[True, False])
         result = v(dummy_doc)

From 8b64741502492c658eaabec7332ac3574479ed58 Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Thu, 7 Mar 2024 08:10:01 -0300
Subject: [PATCH 07/12] minor changes

---
 spacy/pipeline/coordinationruler.py           | 32 +++++++++++++-----
 .../tests/pipeline/test_coordinationruler.py  | 33 +++++++++----------
 2 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index 1aa6525c87d..1b8a1d35901 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -26,22 +26,36 @@ def _split_doc(doc: Doc) -> bool:
     noun_modified = False
     has_conjunction = False
 
+    noun_count = 0
+    modifiers = set()
+
     for token in doc:
+        if token.pos_ == "NOUN":
+            noun_count += 1
         if token.head.pos_ == "NOUN":  ## check to see that the phrase is a noun phrase
-            has_modifier = any(
-                child.dep_ == "amod" for child in token.head.children
-            )  # check to see if the noun has a modifier
-            if has_modifier:
-                noun_modified = True
-
+            for child in token.head.children:
+                if child.dep_ in ["amod", "advmod", "nmod"]:
+                    modifiers.add(child.text)
+                    noun_modified = True 
+        for child in token.children:
+            if child.dep_ == "conj" and child.pos_ == "ADJ":
+                modifiers.add(child.text)
+            
         # check if there is a conjunction in the phrase
         if token.pos_ == "CCONJ":
             has_conjunction = True
 
-    return (
-        True if noun_modified and has_conjunction else False
-    )  # and not all_nouns_modified else False
+    modifier_count = len(modifiers)
+
+    noun_modified = modifier_count > 0
 
+    all_nouns_modified = modifier_count == noun_count
+
+    if noun_modified and has_conjunction and not all_nouns_modified:
+        return True
+
+    else:
+        return False
 
 def _collect_modifiers(token: Token) -> List[str]:
     """Collects adverbial modifiers for a given token.
diff --git a/spacy/tests/pipeline/test_coordinationruler.py b/spacy/tests/pipeline/test_coordinationruler.py
index 7ead426cc11..eb55df3264e 100644
--- a/spacy/tests/pipeline/test_coordinationruler.py
+++ b/spacy/tests/pipeline/test_coordinationruler.py
@@ -211,7 +211,7 @@ def noun_construction_case10(nlp):
     words = ["fresh", "but", "quite", "sour", "apples", "and", "chicken", "wings"]
     spaces = [True, True, True, True, True, True, True, False]
     pos_tags = ["ADJ", "CCONJ", "ADV", "ADJ", "NOUN", "CCONJ", "NOUN", "NOUN"]
-    dep_relations = ["amod", "cc", "advmod", "conj", "ROOT", "cc", "conj", "compound"]
+    dep_relations = ["amod", "cc", "advmod", "amod", "ROOT", "cc", "compound", "conj"]
 
     doc = Doc(nlp.vocab, words=words, spaces=spaces)
 
@@ -219,14 +219,13 @@ def noun_construction_case10(nlp):
         token.pos_ = pos
         token.dep_ = dep
 
-    doc[0].head = doc[4]
-    doc[1].head = doc[3]
-    doc[2].head = doc[3]
-    doc[3].head = doc[0]
-    doc[4].head = doc[4]
-    doc[5].head = doc[6]
-    doc[6].head = doc[4]
-    doc[7].head = doc[6]
+    doc[0].head = doc[4] 
+    doc[1].head = doc[4]  
+    doc[2].head = doc[3] 
+    doc[3].head = doc[4]  
+    doc[5].head = doc[4]  
+    doc[6].head = doc[7]  
+    doc[7].head = doc[4]  
 
     return doc
 
@@ -271,7 +270,7 @@ def test_split_noun_coordination(
     noun_construction_case2,
     noun_construction_case3,
     noun_construction_case4,
-    #                                 noun_construction_case5,
+    noun_construction_case5,
     noun_construction_case6,
     noun_construction_case7,
     noun_construction_case8,
@@ -309,9 +308,9 @@ def test_split_noun_coordination(
     assert all(isinstance(phrase, str) for phrase in case4_split)
     assert case4_split == ["hot chicken wings", "hot soup"]
 
-    # #test 5: multiple modifiers
-    # case5_split = split_noun_coordination(noun_construction_case5)
-    # assert case5_split == None
+    # #test 5: same # of modifiers as nouns
+    case5_split = split_noun_coordination(noun_construction_case5)
+    assert case5_split == None
 
     # test 6: modifier phrases
     case6_split = split_noun_coordination(noun_construction_case6)
@@ -325,6 +324,7 @@ def test_split_noun_coordination(
 
     # test 7:
     case7_split = split_noun_coordination(noun_construction_case7)
+    print(case7_split)
     assert case7_split == ["fresh apples", "juicy apples"]
 
     # test 8:
@@ -337,12 +337,11 @@ def test_split_noun_coordination(
 
     # test 10:
     case10_split = split_noun_coordination(noun_construction_case10)
-    assert case10_split == ["fresh apples", "quite sour apples", "chicken soup"]
+    assert case10_split == ['fresh apples', 'quite sour apples', 'fresh chicken wings', 'quite sour chicken wings']
 
-    # test 11:
+    # test 11: 
     case11_split = split_noun_coordination(noun_construction_case11)
-    assert case11_split == None
-
+    pass
 
 ################### test factory ##############################
 

From b502de469102215bbb9f2ee18364e6137e4d9b85 Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Thu, 7 Mar 2024 08:11:44 -0300
Subject: [PATCH 08/12] run isort

---
 spacy/pipeline/coordinationruler.py           |  5 ++--
 .../tests/pipeline/test_coordinationruler.py  | 24 ++++++++++++-------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index 1b8a1d35901..177fcd45a8a 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -36,11 +36,11 @@ def _split_doc(doc: Doc) -> bool:
             for child in token.head.children:
                 if child.dep_ in ["amod", "advmod", "nmod"]:
                     modifiers.add(child.text)
-                    noun_modified = True 
+                    noun_modified = True
         for child in token.children:
             if child.dep_ == "conj" and child.pos_ == "ADJ":
                 modifiers.add(child.text)
-            
+
         # check if there is a conjunction in the phrase
         if token.pos_ == "CCONJ":
             has_conjunction = True
@@ -57,6 +57,7 @@ def _split_doc(doc: Doc) -> bool:
     else:
         return False
 
+
 def _collect_modifiers(token: Token) -> List[str]:
     """Collects adverbial modifiers for a given token.
 
diff --git a/spacy/tests/pipeline/test_coordinationruler.py b/spacy/tests/pipeline/test_coordinationruler.py
index eb55df3264e..b276f25b094 100644
--- a/spacy/tests/pipeline/test_coordinationruler.py
+++ b/spacy/tests/pipeline/test_coordinationruler.py
@@ -219,13 +219,13 @@ def noun_construction_case10(nlp):
         token.pos_ = pos
         token.dep_ = dep
 
-    doc[0].head = doc[4] 
-    doc[1].head = doc[4]  
-    doc[2].head = doc[3] 
-    doc[3].head = doc[4]  
-    doc[5].head = doc[4]  
-    doc[6].head = doc[7]  
-    doc[7].head = doc[4]  
+    doc[0].head = doc[4]
+    doc[1].head = doc[4]
+    doc[2].head = doc[3]
+    doc[3].head = doc[4]
+    doc[5].head = doc[4]
+    doc[6].head = doc[7]
+    doc[7].head = doc[4]
 
     return doc
 
@@ -337,12 +337,18 @@ def test_split_noun_coordination(
 
     # test 10:
     case10_split = split_noun_coordination(noun_construction_case10)
-    assert case10_split == ['fresh apples', 'quite sour apples', 'fresh chicken wings', 'quite sour chicken wings']
+    assert case10_split == [
+        "fresh apples",
+        "quite sour apples",
+        "fresh chicken wings",
+        "quite sour chicken wings",
+    ]
 
-    # test 11: 
+    # test 11:
     case11_split = split_noun_coordination(noun_construction_case11)
     pass
 
+
 ################### test factory ##############################
 
 

From 84bdaf1fdde11e11b0b8aa9ef363e318c8b997fb Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Thu, 7 Mar 2024 08:27:32 -0300
Subject: [PATCH 09/12] change field validator

---
 spacy/pipeline/coordinationruler.py           | 21 +++----------------
 .../tests/pipeline/test_coordinationruler.py  |  4 ++--
 2 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index 177fcd45a8a..4f65c063098 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -1,7 +1,7 @@
 import re
 from typing import Callable, List, Optional, Union
 
-from pydantic import BaseModel, field_validator
+from pydantic import BaseModel, validator
 
 from ..language import Language
 from ..tokens import Doc, Token
@@ -26,32 +26,17 @@ def _split_doc(doc: Doc) -> bool:
     noun_modified = False
     has_conjunction = False
 
-    noun_count = 0
-    modifiers = set()
-
     for token in doc:
-        if token.pos_ == "NOUN":
-            noun_count += 1
         if token.head.pos_ == "NOUN":  ## check to see that the phrase is a noun phrase
             for child in token.head.children:
                 if child.dep_ in ["amod", "advmod", "nmod"]:
-                    modifiers.add(child.text)
                     noun_modified = True
-        for child in token.children:
-            if child.dep_ == "conj" and child.pos_ == "ADJ":
-                modifiers.add(child.text)
 
         # check if there is a conjunction in the phrase
         if token.pos_ == "CCONJ":
             has_conjunction = True
 
-    modifier_count = len(modifiers)
-
-    noun_modified = modifier_count > 0
-
-    all_nouns_modified = modifier_count == noun_count
-
-    if noun_modified and has_conjunction and not all_nouns_modified:
+    if noun_modified and has_conjunction:
         return True
 
     else:
@@ -152,7 +137,7 @@ def split_noun_coordination(doc: Doc) -> Union[List[str], None]:
 class SplittingRule(BaseModel):
     function: Callable[[Doc], Union[List[str], None]]
 
-    @field_validator("function")
+    @validator("function")
     def check_return_type(cls, v):
         dummy_doc = Doc(Language().vocab, words=["dummy", "doc"], spaces=[True, False])
         result = v(dummy_doc)
diff --git a/spacy/tests/pipeline/test_coordinationruler.py b/spacy/tests/pipeline/test_coordinationruler.py
index b276f25b094..38bfc19e59a 100644
--- a/spacy/tests/pipeline/test_coordinationruler.py
+++ b/spacy/tests/pipeline/test_coordinationruler.py
@@ -309,8 +309,8 @@ def test_split_noun_coordination(
     assert case4_split == ["hot chicken wings", "hot soup"]
 
     # #test 5: same # of modifiers as nouns
-    case5_split = split_noun_coordination(noun_construction_case5)
-    assert case5_split == None
+    # case5_split = split_noun_coordination(noun_construction_case5)
+    # assert case5_split == None
 
     # test 6: modifier phrases
     case6_split = split_noun_coordination(noun_construction_case6)

From fca1f3d8408991a77dce06daff44af5cdbf022dc Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Thu, 7 Mar 2024 08:37:53 -0300
Subject: [PATCH 10/12] deal with import error

---
 spacy/pipeline/coordinationruler.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index 4f65c063098..ab99f16ccc5 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -1,7 +1,12 @@
 import re
 from typing import Callable, List, Optional, Union
 
-from pydantic import BaseModel, validator
+from pydantic import BaseModel
+
+try:
+    from pydantic import validator
+except ImportError:
+    from pydantic import field_validator as validator
 
 from ..language import Language
 from ..tokens import Doc, Token

From 52342fc741141eb74a1b6788ce863d79261ff1dd Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Thu, 7 Mar 2024 08:46:35 -0300
Subject: [PATCH 11/12] add type ignore

---
 spacy/pipeline/coordinationruler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index ab99f16ccc5..a056f5f9366 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -4,9 +4,9 @@
 from pydantic import BaseModel
 
 try:
-    from pydantic import validator
+    from pydantic import validator  # type: ignore
 except ImportError:
-    from pydantic import field_validator as validator
+    from pydantic import field_validator as validator  # type: ignore
 
 from ..language import Language
 from ..tokens import Doc, Token

From 7abfb4e3e86def11125eb50e36a4598c36a2aec4 Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Thu, 7 Mar 2024 10:56:27 -0300
Subject: [PATCH 12/12] use pydantic version instead

---
 spacy/pipeline/coordinationruler.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index a056f5f9366..31ae729c5a3 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -1,11 +1,12 @@
 import re
 from typing import Callable, List, Optional, Union
 
+import pydantic
 from pydantic import BaseModel
 
-try:
+if pydantic.VERSION.split(".")[0] == "1":  # type: ignore
     from pydantic import validator  # type: ignore
-except ImportError:
+else:
     from pydantic import field_validator as validator  # type: ignore
 
 from ..language import Language