Skip to content

Commit

Permalink
Merge branch 'master' into docs/memory-management
Browse files Browse the repository at this point in the history
  • Loading branch information
ines authored Oct 23, 2024
2 parents c3a28e6 + 15fbf5e commit c0a6696
Show file tree
Hide file tree
Showing 12 changed files with 82 additions and 30 deletions.
20 changes: 5 additions & 15 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ on:
- "*.md"
- "*.mdx"
- "website/**"
- ".github/workflows/**"
pull_request:
types: [opened, synchronize, reopened, edited]
paths-ignore:
Expand All @@ -32,7 +31,7 @@ jobs:
- name: Configure Python version
uses: actions/setup-python@v4
with:
python-version: "3.7"
python-version: "3.10"

- name: black
run: |
Expand All @@ -59,18 +58,7 @@ jobs:
fail-fast: true
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python_version: ["3.12"]
include:
- os: windows-latest
python_version: "3.7"
- os: macos-latest
python_version: "3.8"
- os: ubuntu-latest
python_version: "3.9"
- os: windows-latest
python_version: "3.10"
- os: macos-latest
python_version: "3.11"
python_version: ["3.9", "3.11", "3.12"]

runs-on: ${{ matrix.os }}

Expand Down Expand Up @@ -159,7 +147,9 @@ jobs:
- name: "Test assemble CLI"
run: |
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
python -m spacy assemble ner_source_sm.cfg output_dir
env:
PYTHONWARNINGS: "error,ignore::DeprecationWarning"
if: matrix.python_version == '3.9'

- name: "Test assemble CLI vectors warning"
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ so that more people can benefit from it.

When opening an issue, use a **descriptive title** and include your
**environment** (operating system, Python version, spaCy version). Our
[issue template](https://github.com/explosion/spaCy/issues/new) helps you
[issue templates](https://github.com/explosion/spaCy/issues/new/choose) help you
remember the most important details to include. If you've discovered a bug, you
can also submit a [regression test](#fixing-bugs) straight away. When you're
opening an issue to report the bug, simply refer to your pull request in the
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ spacy-legacy>=3.0.11,<3.1.0
spacy-loggers>=1.0.0,<2.0.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.2.2,<8.3.0
thinc>=8.3.0,<8.4.0
ml_datasets>=0.2.0,<0.3.0
murmurhash>=0.28.0,<1.1.0
wasabi>=0.9.1,<1.2.0
Expand Down
6 changes: 2 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ classifiers =
Operating System :: Microsoft :: Windows
Programming Language :: Cython
Programming Language :: Python :: 3
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Expand All @@ -31,7 +29,7 @@ project_urls =
[options]
zip_safe = false
include_package_data = true
python_requires = >=3.7
python_requires = >=3.9
# NOTE: This section is superseded by pyproject.toml and will be removed in
# spaCy v4
setup_requires =
Expand Down Expand Up @@ -116,7 +114,7 @@ cuda12x =
cuda-autodetect =
cupy-wheel>=11.0.0,<13.0.0
apple =
thinc-apple-ops>=0.1.0.dev0,<1.0.0
thinc-apple-ops>=1.0.0,<2.0.0
# Language tokenizers with external dependencies
ja =
sudachipy>=0.5.2,!=0.6.1
Expand Down
2 changes: 1 addition & 1 deletion spacy/about.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# fmt: off
__title__ = "spacy"
__version__ = "3.8.0"
__version__ = "3.8.2"
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
4 changes: 2 additions & 2 deletions spacy/lang/hr/lemma_lookup_license.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
The list of Croatian lemmas was extracted from the reldi-tagger repository (https://github.com/clarinsi/reldi-tagger).
Reldi-tagger is licesned under the Apache 2.0 licence.
Reldi-tagger is licensed under the Apache 2.0 licence.

@InProceedings{ljubesic16-new,
author = {Nikola Ljubešić and Filip Klubička and Željko Agić and Ivo-Pavao Jazbec},
Expand All @@ -12,4 +12,4 @@ Reldi-tagger is licesned under the Apache 2.0 licence.
publisher = {European Language Resources Association (ELRA)},
address = {Paris, France},
isbn = {978-2-9517408-9-1}
}
}
14 changes: 10 additions & 4 deletions spacy/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from copy import deepcopy
from dataclasses import dataclass
from itertools import chain, cycle
import numpy
from pathlib import Path
from timeit import default_timer as timer
from typing import (
Expand All @@ -31,6 +30,7 @@
overload,
)

import numpy
import srsly
from cymem.cymem import Pool
from thinc.api import Config, CupyOps, Optimizer, get_current_ops
Expand Down Expand Up @@ -2143,7 +2143,9 @@ def to_disk(
serializers["tokenizer"] = lambda p: self.tokenizer.to_disk( # type: ignore[union-attr]
p, exclude=["vocab"]
)
serializers["meta.json"] = lambda p: srsly.write_json(p, _replace_numpy_floats(self.meta))
serializers["meta.json"] = lambda p: srsly.write_json(
p, _replace_numpy_floats(self.meta)
)
serializers["config.cfg"] = lambda p: self.config.to_disk(p)
for name, proc in self._components:
if name in exclude:
Expand Down Expand Up @@ -2257,7 +2259,9 @@ def to_bytes(self, *, exclude: Iterable[str] = SimpleFrozenList()) -> bytes:
serializers: Dict[str, Callable[[], bytes]] = {}
serializers["vocab"] = lambda: self.vocab.to_bytes(exclude=exclude)
serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"]) # type: ignore[union-attr]
serializers["meta.json"] = lambda: srsly.json_dumps(_replace_numpy_floats(self.meta))
serializers["meta.json"] = lambda: srsly.json_dumps(
_replace_numpy_floats(self.meta)
)
serializers["config.cfg"] = lambda: self.config.to_bytes()
for name, proc in self._components:
if name in exclude:
Expand Down Expand Up @@ -2309,7 +2313,9 @@ def deserialize_meta(b):


def _replace_numpy_floats(meta_dict: dict) -> dict:
return convert_recursive(lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(meta_dict))
return convert_recursive(
lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(meta_dict)
)


@dataclass
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion website/docs/api/large-language-models.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -1597,7 +1597,7 @@ The name of the model to be used has to be passed in via the `name` attribute.
| Argument | Description |
| -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `name` | The name of a mdodel supported by LangChain for this API. ~~str~~ |
| `name` | The name of a model supported by LangChain for this API. ~~str~~ |
| `config` | Configuration passed on to the LangChain model. Defaults to `{}`. ~~Dict[Any, Any]~~ |
| `query` | Function that executes the prompts. If `None`, defaults to `spacy.CallLangChain.v1`. ~~Optional[Callable[["langchain.llms.BaseLLM", Iterable[Any]], Iterable[Any]]]~~ |
Expand Down
2 changes: 1 addition & 1 deletion website/docs/usage/rule-based-matching.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ matches = matcher(doc)
# Serve visualization of sentences containing match with displaCy
# set manual=True to make displaCy render straight from a dictionary
# (if you're not running the code within a Jupyer environment, you can
# (if you're not running the code within a Jupyter environment, you can
# use displacy.serve instead)
displacy.render(matched_sents, style="ent", manual=True)
```
Expand Down
55 changes: 55 additions & 0 deletions website/meta/universe.json
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,47 @@
"ancient Greek"
]
},
{
"id": "solipcysme",
"title": "solipCysme",
"slogan": "spaCy pipeline for french fictions and first person point of view texts.",
"description": "__solipCysme__ is a pipeline for french language, designed for the analysis of fictions and first person point of view texts, with a focus on personal pronouns.",
"github": "thjbdvlt/solipCysme",
"code_example": [
"pip install https://huggingface.co/thjbdvlt/fr_solipcysme/resolve/main/fr_solipcysme-any-py3-none-any.whl",
"",
"import spacy",
"",
"nlp = spacy.load('fr_solipcysme')",
"for i in nlp(",
"'la MACHINE à (b)rouiller le temps s'est peut-être déraillée..?'",
"):",
" print(",
" i, ",
" i.norm_, ",
" i.pos_, ",
" i.morph, ",
" i.lemma_, ",
" i.dep_, ",
" i._.tokentype,",
" i._.vv_pos,",
" i._.vv_morph",
" )"
],
"code_language": "python",
"author": "thjbdvlt",
"author_links": {
"github": "thjbdvlt"
},
"category": [
"pipeline",
"research",
"models"
],
"tags": [
"french"
]
},
{
"id": "spacy-cleaner",
"title": "spacy-cleaner",
Expand Down Expand Up @@ -2587,6 +2628,20 @@
"courses"
]
},
{
"type": "education",
"id": "spacy-quickstart",
"title": "spaCy Quickstart",
"slogan": "Learn spaCy basics quickly by visualizing various Doc objects",
"description": "In this course, I use the itables Python library inside a Jupyter notebook so that you can visualize the different spaCy document objects. This will provide a solid foundation for people who wish to learn the spaCy NLP library.",
"url": "https://learnspacy.com/courses/spacy-quickstart/",
"image": "https://learnspacy.com/wp-content/uploads/2024/09/custom_search_builder_spacy-2048x1202.png",
"thumb": "https://learnspacy.com/wp-content/uploads/2024/09/learnspacy_logo.png",
"author": "Aravind Mohanoor",
"category": [
"courses"
]
},
{
"type": "education",
"id": "video-spacys-ner-model",
Expand Down
3 changes: 3 additions & 0 deletions website/src/styles/landing.module.sass
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@
margin-bottom: 0
height: 100%

a, a:hover
color: inherit

.banner-content-small
display: block
margin-bottom: 0 !important
Expand Down

0 comments on commit c0a6696

Please sign in to comment.