Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust GDAL_MAX_DATASET_POOL_SIZE if system file limit is low #425

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/dolphin/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ def run(
in both ascending and descending tracks imply uplift).

"""
# Ensure we wont run into problems with large gdal reads from deep VRTs.
utils.check_open_file_limit()

Path(output_dir).mkdir(exist_ok=True, parents=True)

condition_func = argmax_index if condition == CallFunc.MAX else argmin_index
Expand Down
36 changes: 32 additions & 4 deletions src/dolphin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import datetime
import logging
import math
import os
import resource
import sys
import warnings
Expand Down Expand Up @@ -129,8 +130,6 @@ def full_suffix(filename: Filename):

def disable_gpu():
"""Disable GPU usage."""
import os

import jax

os.environ["CUDA_VISIBLE_DEVICES"] = ""
Expand Down Expand Up @@ -414,8 +413,6 @@ def set_num_threads(num_threads: int):

Uses https://github.com/joblib/threadpoolctl for numpy.
"""
import os

import numba
from threadpoolctl import ThreadpoolController

Expand Down Expand Up @@ -464,6 +461,37 @@ def get_cpu_period():
return cpu_count()


def check_open_file_limit(file_limit_threshold: int = 512) -> int:
"""Adjust GDAL_MAX_DATASET_POOL_SIZE based on the system's open file limit.

This function checks the current file descriptor limit using the resource module.
If the limit is below a specified threshold, it lowers the environment variable
`GDAL_MAX_DATASET_POOL_SIZE`.

Parameters
----------
file_limit_threshold : int, optional
The minimum file descriptor limit below which GDAL_MAX_DATASET_POOL_SIZE
should be shrunk.
Default is 512.

Returns
-------
int
The current file descriptor limit, as stated in `resource.RLIMIT_NOFILE`

"""
soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)

if soft_limit < file_limit_threshold:
logger.info(
f"File descriptor limit is below {soft_limit}. "
"Shrinking GDAL_MAX_DATASET_POOL_SIZE environment variable.",
)
os.environ["GDAL_MAX_DATASET_POOL_SIZE"] = str(file_limit_threshold // 4)
return soft_limit


def flatten(list_of_lists: Iterable[Iterable[Any]]) -> chain[Any]:
"""Flatten one level of a nested iterable."""
return chain.from_iterable(list_of_lists)
Expand Down
5 changes: 4 additions & 1 deletion src/dolphin/workflows/displacement.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,14 @@ def run(
"""
if cfg.log_file is None:
cfg.log_file = cfg.work_directory / "dolphin.log"

# Set the logging level for all `dolphin.` modules
for logger_name in ["dolphin", "spurt"]:
setup_logging(logger_name=logger_name, debug=debug, filename=cfg.log_file)
# TODO: need to pass the cfg filename for the logger

logger.debug(cfg.model_dump())
# Ensure we wont run into problems with large gdal reads from VRTs.
utils.check_open_file_limit()

if not cfg.worker_settings.gpu_enabled:
utils.disable_gpu()
Expand Down