fix tests maybe

MDAnalysis · Oct 30, 2018 · 6770bca · 6770bca
1 parent 688ca22
commit 6770bca
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 36 deletions.
diff --git a/conftest.py b/conftest.py
@@ -9,6 +9,7 @@
 # Released under the GNU Public Licence, v2 or any higher version
 
 from dask import distributed
+import dask
 import pytest
 
 
@@ -24,9 +25,11 @@ def client(tmpdir_factory, request):
         lc.close()
 
 
-@pytest.fixture(scope='session', params=('distributed', 'multiprocessing'))
+@pytest.fixture(scope='session', params=('distributed', 'multiprocessing', 'single-threaded'))
 def scheduler(request, client):
     if request.param == 'distributed':
-        return client
+        arg = client
     else:
-        return request.param
+        arg = request.param
+    with dask.config.set(scheduler=arg):
+        yield
diff --git a/pmda/leaflet.py b/pmda/leaflet.py
@@ -231,7 +231,6 @@ def run(self,
             start=None,
             stop=None,
             step=None,
-            scheduler=None,
             n_jobs=-1,
             cutoff=15.0):
         """Perform the calculation
@@ -244,35 +243,53 @@ def run(self,
             stop frame of analysis
         step : int, optional
             number of frames to skip between each analysed frame
-        scheduler : dask scheduler, optional
-            Use dask scheduler, defaults to multiprocessing. This can be used
-            to spread work to a distributed scheduler
         n_jobs : int, optional
             number of tasks to start, if `-1` use number of logical cpu cores.
             This argument will be ignored when the distributed scheduler is
             used
 
         """
-        if scheduler is None:
+        # are we using a distributed scheduler or should we use multiprocessing?
+        scheduler = dask.config.get('scheduler', None)
+        if scheduler is None and client is None:
             scheduler = 'multiprocessing'
+        elif scheduler is None:
+            # maybe we can grab a global worker
+            try:
+                from dask import distributed
+                scheduler = distributed.worker.get_client()
+            except ValueError:
+                pass
+            except ImportError:
+                pass
 
         if n_jobs == -1:
+            n_jobs = cpu_count()
+
+        # we could not find a global scheduler to use and we ask for a single
+        # job. Therefore we run this on the single threaded scheduler for
+        # debugging.
+        if scheduler is None and n_jobs == 1:
+            scheduler = 'single-threaded'
+
+        if n_blocks is None:
             if scheduler == 'multiprocessing':
-                n_jobs = cpu_count()
+                n_blocks = n_jobs
             elif isinstance(scheduler, distributed.Client):
-                n_jobs = len(scheduler.ncores())
+                n_blocks = len(scheduler.ncores())
             else:
-                raise ValueError(
-                    "Couldn't guess ideal number of jobs from scheduler."
-                    "Please provide `n_jobs` in call to method.")
-
-        with timeit() as b_universe:
-            universe = mda.Universe(self._top, self._traj)
+                n_blocks = 1
+                warnings.warn(
+                    "Couldn't guess ideal number of blocks from scheduler. Set n_blocks=1"
+                    "Please provide `n_blocks` in call to method.")
 
         scheduler_kwargs = {'scheduler': scheduler}
         if scheduler == 'multiprocessing':
             scheduler_kwargs['num_workers'] = n_jobs
 
+        with timeit() as b_universe:
+            universe = mda.Universe(self._top, self._traj)
+
         start, stop, step = self._trajectory.check_slice_indices(
             start, stop, step)
         with timeit() as total:

diff --git a/pmda/parallel.py b/pmda/parallel.py
@@ -278,9 +278,6 @@ def run(self,
             stop frame of analysis
         step : int, optional
             number of frames to skip between each analysed frame
-        scheduler : dask scheduler, optional
-            Use dask scheduler, defaults to multiprocessing. This can be used
-            to spread work to a distributed scheduler
         n_jobs : int, optional
             number of jobs to start, if `-1` use number of logical cpu cores.
             This argument will be ignored when the distributed scheduler is

diff --git a/pmda/test/test_custom.py b/pmda/test/test_custom.py
@@ -27,13 +27,13 @@ def test_AnalysisFromFunction(scheduler):
     u = mda.Universe(PSF, DCD)
     step = 2
     ana1 = custom.AnalysisFromFunction(custom_function, u, u.atoms).run(
-        step=step, scheduler=scheduler
+        step=step
     )
     ana2 = custom.AnalysisFromFunction(custom_function, u, u.atoms).run(
-        step=step, scheduler=scheduler
+        step=step
     )
     ana3 = custom.AnalysisFromFunction(custom_function, u, u.atoms).run(
-        step=step, scheduler=scheduler
+        step=step
     )
 
     results = []

diff --git a/pmda/test/test_parallel.py b/pmda/test/test_parallel.py
@@ -60,11 +60,6 @@ def analysis():
     return ana
 
 
-def test_wrong_scheduler(analysis):
-    with pytest.raises(ValueError):
-        analysis.run(scheduler=2)
-
-
 @pytest.mark.parametrize('n_jobs', (1, 2))
 def test_all_frames(analysis, n_jobs):
     analysis.run(n_jobs=n_jobs)
@@ -91,16 +86,8 @@ def test_no_frames(analysis, n_jobs):
     assert analysis.timing.universe == 0
 
 
-@pytest.fixture(scope='session', params=('distributed', 'multiprocessing'))
-def scheduler(request, client):
-    if request.param == 'distributed':
-        return client
-    else:
-        return request.param
-
-
 def test_scheduler(analysis, scheduler):
-    analysis.run(scheduler=scheduler)
+    analysis.run()
 
 
 def test_nframes_less_nblocks_warning(analysis):