Skip to content

Commit

Permalink
Merge branch 'release/0.10.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
emfomy committed Nov 3, 2020
2 parents 18fc3a8 + 23bc865 commit 56165ae
Show file tree
Hide file tree
Showing 15 changed files with 145 additions and 66 deletions.
16 changes: 10 additions & 6 deletions DEVELOP.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
# Release TODO
- change version number
- make clean
- make lint
- make doc
- make tox
- make tox-report
- make upload

- >> make clean
- >> make lint
- >> make doc
- >> make tox
- >> make tox-report
- merge to master branch
- >> make clean
- >> make upload
# Requirements
Make sure test/requirements.txt matches setup.py.
21 changes: 14 additions & 7 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ Requirements

* `Python <https://www.python.org>`__ 3.6+
* `TreeLib <https://treelib.readthedocs.io>`__ 1.5+
* `CkipTagger <https://pypi.org/project/ckiptagger>`__ 0.1.1+ [Optional, Recommended]
* `CkipClassic <https://ckip-classic.readthedocs.io>`__ 1.0+ [Optional]
* `TensorFlow / TensorFlow-GPU <https://www.tensorflow.org/>`__ 1.13.1+, <2 [Required by CkipTagger]
* `CkipTagger <https://pypi.org/project/ckiptagger>`__ 0.2.1+ [Optional, Recommended]
* `CkipClassic <https://ckip-classic.readthedocs.io>`__ 1.0+ [Optional, Recommended]
* `TensorFlow / TensorFlow-GPU <https://www.tensorflow.org/>`__ 1.13.1+ [Required by CkipTagger]

Driver Requirements
^^^^^^^^^^^^^^^^^^^
Expand All @@ -105,7 +105,7 @@ Driver Built-in CkipTagger CkipClassic
Sentence Segmentation ✔
Word Segmentation† ✔ ✔
Part-of-Speech Tagging† ✔ ✔
Constituency Parsing
Constituency Parsing ✔
Named-Entity Recognition ✔
Coreference Resolution‡ ✔ ✔ ✔
================================ ======== ========== ===========
Expand All @@ -118,10 +118,17 @@ Installation via Pip

- No backend (not recommended): ``pip install ckipnlp``.
- With CkipTagger backend (recommended): ``pip install ckipnlp[tagger]`` or ``pip install ckipnlp[tagger-gpu]``.
- With CkipClassic backend: Please refer https://ckip-classic.readthedocs.io/en/latest/main/readme.html#installation for CkipClassic installation guide.
- With CkipClassic Parser Client backend (recommended): ``pip install ckipnlp[classic]``.
- With CkipClassic offline backend: Please refer https://ckip-classic.readthedocs.io/en/latest/main/readme.html#installation for CkipClassic installation guide.

Usage
-----
.. attention::
To use CkipClassic Parser Client backend, please

#. Register an account at http://parser.iis.sinica.edu.tw/v1/reg.exe
#. Set the environment variables ``$CKIPPARSER_USERNAME`` and ``$CKIPPARSER_PASSWORD`` to the username and password.

Detail
------

See https://ckipnlp.readthedocs.io/ for API details.

Expand Down
2 changes: 1 addition & 1 deletion ckipnlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
__copyright__ = '2018-2020 CKIP Lab'

__title__ = 'CKIPNLP'
__version__ = '0.9.1'
__version__ = '0.10.0'
__description__ = 'CKIP CoreNLP'
__license__ = 'CC BY-NC-SA 4.0'

Expand Down
1 change: 1 addition & 0 deletions ckipnlp/driver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from .classic import (
CkipClassicWordSegmenter,
CkipClassicConParser,
CkipClassicConParserClient,
)

from .ss import (
Expand Down
105 changes: 79 additions & 26 deletions ckipnlp/driver/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
__copyright__ = '2018-2020 CKIP Lab'
__license__ = 'CC BY-NC-SA 4.0'

from abc import (
abstractmethod as _abstractmethod,
)

from itertools import (
chain as _chain,
)
Expand Down Expand Up @@ -130,39 +134,19 @@ def _call(self, *, _wspos):

################################################################################################################################

class CkipClassicConParser(_BaseDriver):
"""The CKIP constituency parsing driver with CkipClassic backend.
Arguments
---------
lazy : bool
Lazy initialize the driver.
.. method:: __call__(*, ws, pos)
Apply constituency parsing.
class _CkipClassicConParser(_BaseDriver):

Parameters
- **ws** (:class:`~ckipnlp.container.text.TextParagraph`) — The word-segmented sentences.
- **pos** (:class:`~ckipnlp.container.text.TextParagraph`) — The part-of-speech sentences.
Returns
**conparse** (:class:`~ckipnlp.container.parse.ParseSentence`) — The constituency-parsing sentences.
"""

driver_type = 'con_parser'
driver_family = 'classic'
driver_inputs = ('ws', 'pos',)

_count = 0
@_abstractmethod
def driver_family(self): # pylint: disable=missing-docstring
return NotImplemented

@_abstractmethod
def _init(self):
self.__class__._count += 1 # pylint: disable=protected-access
if self.__class__._count > 1: # pylint: disable=protected-access
raise RuntimeError(f'Never instance more than one {self.__class__.__name__}!')

import ckip_classic.parser
self._core = ckip_classic.parser.CkipParser(do_ws=False)
return NotImplemented

def _call(self, *, ws, pos):
assert isinstance(ws, _SegParagraph)
Expand Down Expand Up @@ -217,3 +201,72 @@ def _half2full(text):
@staticmethod
def _normalize(text):
return text.split('] ', 2)[-1].rstrip('#')

################################################################################################################################

class CkipClassicConParser(_CkipClassicConParser):
"""The CKIP constituency parsing driver with CkipClassic backend.
Arguments
---------
lazy : bool
Lazy initialize the driver.
.. method:: __call__(*, ws, pos)
Apply constituency parsing.
Parameters
- **ws** (:class:`~ckipnlp.container.text.TextParagraph`) — The word-segmented sentences.
- **pos** (:class:`~ckipnlp.container.text.TextParagraph`) — The part-of-speech sentences.
Returns
**conparse** (:class:`~ckipnlp.container.parse.ParseSentence`) — The constituency-parsing sentences.
"""

driver_family = 'classic'

_count = 0

def _init(self):

self.__class__._count += 1 # pylint: disable=protected-access
if self.__class__._count > 1: # pylint: disable=protected-access
raise RuntimeError(f'Never instance more than one {self.__class__.__name__}!')

import ckip_classic.parser
self._core = ckip_classic.parser.CkipParser(do_ws=False)

class CkipClassicConParserClient(_CkipClassicConParser):
"""The CKIP constituency parsing driver with CkipClassic client backend.
Arguments
---------
lazy : bool
Lazy initialize the driver.
Notes
-----
Please register an account at http://parser.iis.sinica.edu.tw/v1/reg.exe and
set the environment variables ``$CKIPPARSER_USERNAME`` and ``$CKIPPARSER_PASSWORD``.
.. method:: __call__(*, ws, pos)
Apply constituency parsing.
Parameters
- **ws** (:class:`~ckipnlp.container.text.TextParagraph`) — The word-segmented sentences.
- **pos** (:class:`~ckipnlp.container.text.TextParagraph`) — The part-of-speech sentences.
Returns
**conparse** (:class:`~ckipnlp.container.parse.ParseSentence`) — The constituency-parsing sentences.
"""

driver_family = 'classic-client'

def _init(self):

import ckip_classic.client
self._core = ckip_classic.client.CkipParserClient()
2 changes: 1 addition & 1 deletion ckipnlp/pipeline/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def __init__(self, *,
sentence_segmenter='default',
word_segmenter='tagger',
pos_tagger='tagger',
con_parser='classic',
con_parser='classic-client',
ner_chunker='tagger',
lazy=True,
opts={},
Expand Down
1 change: 1 addition & 0 deletions docs/main/_defn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
.. |CkipClassicWordSegmenter| replace:: :class:`~ckipnlp.driver.classic.CkipClassicWordSegmenter`
.. |CkipClassicConParser| replace:: :class:`~ckipnlp.driver.classic.CkipClassicConParser`
.. |CkipClassicConParserClient| replace:: :class:`~ckipnlp.driver.classic.CkipClassicConParserClient`

.. |CkipTaggerWordSegmenter| replace:: :class:`~ckipnlp.driver.tagger.CkipTaggerWordSegmenter`
.. |CkipTaggerPosTagger| replace:: :class:`~ckipnlp.driver.tagger.CkipTaggerPosTagger`
Expand Down
13 changes: 7 additions & 6 deletions docs/main/usage/driver.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,16 @@ Drivers

Here are the list of the drivers:

================================ ================================ ================================ ================================
Driver Type \\ Family ``'default'`` ``'tagger'`` ``'classic'``
================================ ================================ ================================ ================================
================================ ================================ ================================ ================================ ================================
Driver Type \\ Family ``'default'`` ``'tagger'`` ``'classic'`` ``'classic-client'``
================================ ================================ ================================ ================================ ================================
Sentence Segmenter |CkipSentenceSegmenter|
Word Segmenter |CkipTaggerWordSegmenter| |CkipClassicWordSegmenter|†
Pos Tagger |CkipTaggerPosTagger| |CkipClassicWordSegmenter|†
Ner Chunker |CkipTaggerNerChunker|
Constituency Parser |CkipClassicConParser|
Constituency Parser |CkipClassicConParser| |CkipClassicConParserClient|‡
Coref Chunker |CkipCorefChunker|
================================ ================================ ================================ ================================
================================ ================================ ================================ ================================ ================================

† Not compatible with |CkipCorefPipeline|.
- † Not compatible with |CkipCorefPipeline|.
- ‡ Please register an account at http://parser.iis.sinica.edu.tw/v1/reg.exe and set the environment variables ``$CKIPPARSER_USERNAME`` and ``$CKIPPARSER_PASSWORD``.
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ def main():
'treelib>=1.5.5',
],
extras_require={
# 'classic': ['ckip-classic>=1.0'],
'tagger': ['ckiptagger[tf]>=0.1.1'],
'tagger-gpu': ['ckiptagger[tfgpu]>=0.1.1'],
'classic': ['ckip-classic>=1.1.2'],
'tagger': ['ckiptagger[tf]>=0.2.1'],
'tagger-gpu': ['ckiptagger[tfgpu]>=0.2.1'],
},
data_files=[],
)
Expand Down
2 changes: 2 additions & 0 deletions test/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CKIPPARSER_USERNAME=_tester
CKIPPARSER_PASSWORD=tester
2 changes: 2 additions & 0 deletions test/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CKIPPARSER_USERNAME=
CKIPPARSER_PASSWORD=
2 changes: 1 addition & 1 deletion test/script/pipeline/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
conparse = [
[
[ 'S(Head:Nab:中文字|particle:Td:耶)', ',', ],
[ '%(particle:I:啊|manner:Dh:哈|manner:D:哈哈)', '。', ],
[ '%(particle:interjection(Head:I:啊)|time:Dh:哈|time:D:哈哈)', '。', ],
],
[
[ None, '「', ],
Expand Down
12 changes: 11 additions & 1 deletion test/script/pipeline/run_classic_con_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,14 @@ def test_classic_con_parser():
obj = CkipPipeline(con_parser='classic')
doc = CkipDocument(ws=SegParagraph.from_list(ws), pos=SegParagraph.from_list(pos))
obj.get_conparse(doc)
assert doc.conparse.to_list() == conparse
assert doc.conparse.to_list() == [
[
[ 'S(Head:Nab:中文字|particle:Td:耶)', ',', ],
[ '%(particle:I:啊|manner:Dh:哈|manner:D:哈哈)', '。', ],
],
[
[ None, '「', ],
[ 'VP(Head:VH11:完蛋|particle:Ta:了)', '!」', ],
[ 'S(agent:NP(apposition:Nba:畢卡索|Head:Nhaa:他)|Head:VE2:想)', '', ],
],
]
8 changes: 8 additions & 0 deletions test/script/pipeline/run_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ def test_sentence_segmenter():

################################################################################################################################

def test_classic_con_parser_client():
obj = CkipPipeline(con_parser='classic-client')
doc = CkipDocument(ws=SegParagraph.from_list(ws), pos=SegParagraph.from_list(pos))
obj.get_conparse(doc)
assert doc.conparse.to_list() == conparse

################################################################################################################################

def test_tagger_word_segmenter():
obj = CkipPipeline(word_segmenter='tagger')
doc = CkipDocument(text=TextParagraph.from_list(text))
Expand Down
18 changes: 4 additions & 14 deletions test/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -43,25 +43,15 @@ commands =
pytest {toxinidir}/script/container/ {env:NO_COV:--cov=ckipnlp.container} {posargs}

[testenv:py36-pipeline]
ignore_errors = true

deps =
{[testenv]deps}
ckip-classic
ckiptagger[tf]

commands_pre =
- pip install ckip-classic \
--install-option='--ws' \
--install-option='--ws-lib-dir=/share/opt/ckipws/lib' \
--install-option='--ws-share-dir=/share/opt/ckipws/share' \
--install-option='--parser' \
--install-option='--parser-lib-dir=/share/opt/ckipparser/lib' \
--install-option='--parser-share-dir=/share/opt/ckipparser/share' \
--quiet
python-dotenv[cli]

commands =
pytest {toxinidir}/script/pipeline/run_kernel.py {env:NO_COV:--cov=ckipnlp.pipeline --cov=ckipnlp.driver} {posargs}
pytest {toxinidir}/script/pipeline/run_coref.py {env:NO_COV:--cov=ckipnlp.pipeline --cov=ckipnlp.driver} {posargs}
dotenv -f {toxinidir}/.env run pytest {toxinidir}/script/pipeline/run_kernel.py {env:NO_COV:--cov=ckipnlp.pipeline --cov=ckipnlp.driver} {posargs}
dotenv -f {toxinidir}/.env run pytest {toxinidir}/script/pipeline/run_coref.py {env:NO_COV:--cov=ckipnlp.pipeline --cov=ckipnlp.driver} {posargs}

[testenv:py36-classic]
ignore_errors = true
Expand Down

0 comments on commit 56165ae

Please sign in to comment.