diff --git a/setup.py b/setup.py index 01cda61aba..a67b1d75ee 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,7 @@ import shutil import sys from collections import OrderedDict +from pathlib import Path from setuptools import Extension, find_packages, setup, distutils from setuptools.command.build_ext import build_ext @@ -83,6 +84,18 @@ def make_cpp_ext(use_cython=False): ) +def get_long_description(): + long_description = Path("README.md").read_text(encoding="utf-8") + # + # We update the image paths in the README to a GitHub URL so that they render correctly on PyPI. + # https://stackoverflow.com/questions/41983209/how-do-i-add-images-to-a-pypi-readme-that-works-on-github + # + docs_path = "docs/src/readme_images/" + raw_github_url = "https://raw.githubusercontent.com/piskvorky/gensim/master/" + docs_path + long_description = long_description.replace(docs_path, raw_github_url) + return long_description + + # # We use use_cython=False here for two reasons: # @@ -158,112 +171,6 @@ def run(self): cmdclass.update(vars(wheelhouse_uploader.cmd)) -LONG_DESCRIPTION = u""" -============================================== -gensim -- Topic Modelling in Python -============================================== - -|GA|_ -|Wheel|_ - -.. |GA| image:: https://github.com/RaRe-Technologies/gensim/actions/workflows/tests.yml/badge.svg?branch=develop -.. |Wheel| image:: https://img.shields.io/pypi/wheel/gensim.svg - -.. _GA: https://github.com/RaRe-Technologies/gensim/actions -.. _Downloads: https://pypi.org/project/gensim/ -.. _License: https://radimrehurek.com/gensim/intro.html#licensing -.. _Wheel: https://pypi.org/project/gensim/ - -Gensim is a Python library for *topic modelling*, *document indexing* and *similarity retrieval* with large corpora. -Target audience is the *natural language processing* (NLP) and *information retrieval* (IR) community. - -Features ---------- - -* All algorithms are **memory-independent** w.r.t. the corpus size (can process input larger than RAM, streamed, out-of-core) -* **Intuitive interfaces** - - * easy to plug in your own input corpus/datastream (simple streaming API) - * easy to extend with other Vector Space algorithms (simple transformation API) - -* Efficient multicore implementations of popular algorithms, such as online **Latent Semantic Analysis (LSA/LSI/SVD)**, - **Latent Dirichlet Allocation (LDA)**, **Random Projections (RP)**, **Hierarchical Dirichlet Process (HDP)** or **word2vec deep learning**. -* **Distributed computing**: can run *Latent Semantic Analysis* and *Latent Dirichlet Allocation* on a cluster of computers. -* Extensive `documentation and Jupyter Notebook tutorials `_. - - -If this feature list left you scratching your head, you can first read more about the `Vector -Space Model `_ and `unsupervised -document analysis `_ on Wikipedia. - -Installation ------------- - -This software depends on `NumPy and Scipy `_, two Python packages for scientific computing. -You must have them installed prior to installing `gensim`. - -It is also recommended you install a fast BLAS library before installing NumPy. This is optional, but using an optimized BLAS such as MKL, `ATLAS `_ or `OpenBLAS `_ is known to improve performance by as much as an order of magnitude. On OSX, NumPy picks up its vecLib BLAS automatically, so you don't need to do anything special. - -Install the latest version of gensim:: - - pip install --upgrade gensim - -Or, if you have instead downloaded and unzipped the `source tar.gz `_ package:: - - python setup.py install - - -For alternative modes of installation, see the `documentation `_. - -Gensim is being `continuously tested `_ under all `supported Python versions `_. -Support for Python 2.7 was dropped in gensim 4.0.0 – install gensim 3.8.3 if you must use Python 2.7. - - -How come gensim is so fast and memory efficient? Isn't it pure Python, and isn't Python slow and greedy? --------------------------------------------------------------------------------------------------------- - -Many scientific algorithms can be expressed in terms of large matrix operations (see the BLAS note above). Gensim taps into these low-level BLAS libraries, by means of its dependency on NumPy. So while gensim-the-top-level-code is pure Python, it actually executes highly optimized Fortran/C under the hood, including multithreading (if your BLAS is so configured). - -Memory-wise, gensim makes heavy use of Python's built-in generators and iterators for streamed data processing. Memory efficiency was one of gensim's `design goals `_, and is a central feature of gensim, rather than something bolted on as an afterthought. - -Documentation -------------- -* `QuickStart`_ -* `Tutorials`_ -* `Tutorial Videos`_ -* `Official Documentation and Walkthrough`_ - -Citing gensim -------------- - -When `citing gensim in academic papers and theses `_, please use this BibTeX entry:: - - @inproceedings{rehurek_lrec, - title = {{Software Framework for Topic Modelling with Large Corpora}}, - author = {Radim {\\v R}eh{\\r u}{\\v r}ek and Petr Sojka}, - booktitle = {{Proceedings of the LREC 2010 Workshop on New - Challenges for NLP Frameworks}}, - pages = {45--50}, - year = 2010, - month = May, - day = 22, - publisher = {ELRA}, - address = {Valletta, Malta}, - language={English} - } - ----------------- - -Gensim is open source software released under the `GNU LGPLv2.1 license `_. -Copyright (c) 2009-now Radim Rehurek - -.. _Official Documentation and Walkthrough: https://radimrehurek.com/gensim/ -.. _Tutorials: https://github.com/RaRe-Technologies/gensim/blob/develop/tutorials.md#tutorials -.. _Tutorial Videos: https://github.com/RaRe-Technologies/gensim/blob/develop/tutorials.md#videos -.. _QuickStart: https://radimrehurek.com/gensim/gensim_numfocus/auto_examples/core/run_core_concepts.html - -""" - distributed_env = ['Pyro4 >= 4.27'] visdom_req = ['visdom >= 0.1.8, != 0.1.8.7'] @@ -342,8 +249,8 @@ def run(self): name='gensim', version='4.3.3', description='Python framework for fast Vector Space Modelling', - long_description=LONG_DESCRIPTION, - + long_description=get_long_description(), + long_description_content_type='text/markdown', ext_modules=ext_modules, cmdclass=cmdclass, packages=find_packages(),