diff --git a/.github/workflows/meson_ci.yml b/.github/workflows/meson_ci.yml index b4035ff9..491af39e 100644 --- a/.github/workflows/meson_ci.yml +++ b/.github/workflows/meson_ci.yml @@ -61,12 +61,12 @@ jobs: if: ${{ matrix.arch == 'amd64' }} run: | apt-get update - DEBIAN_FRONTEND=noninteractive apt-get install g++ meson m4 -y + DEBIAN_FRONTEND=noninteractive apt-get install g++ meson m4 libcap-dev -y - name: Getting depends (i386) if: ${{ matrix.arch == 'i386' }} run: | apt-get update - DEBIAN_FRONTEND=noninteractive apt-get install meson m4:i386 g++:i386 -y + DEBIAN_FRONTEND=noninteractive apt-get install meson m4:i386 g++:i386 libcap-dev:i386 -y - name: Setup run: meson setup -Dunit-tests=true -Digr-tests=true dirbuild - name: Build @@ -130,7 +130,7 @@ jobs: - name: Getting depends run: | apk update - apk add meson g++ m4 + apk add meson g++ m4 libcap-dev - name: Setup run: meson setup -Dunit-tests=true -Digr-tests=true dirbuild - name: Build diff --git a/.github/workflows/regular_ci.yml b/.github/workflows/regular_ci.yml index 059efab8..5bd216cc 100644 --- a/.github/workflows/regular_ci.yml +++ b/.github/workflows/regular_ci.yml @@ -52,12 +52,12 @@ jobs: if: ${{ matrix.arch == 'amd64' }} run: | apt-get update - DEBIAN_FRONTEND=noninteractive apt-get install g++ make m4 file -y + DEBIAN_FRONTEND=noninteractive apt-get install g++ make m4 libcap-dev file -y - name: Getting depends (i386) if: ${{ matrix.arch == 'i386' }} run: | apt-get update - DEBIAN_FRONTEND=noninteractive apt-get install gcc:i386 make:i386 m4:i386 g++:i386 file -y + DEBIAN_FRONTEND=noninteractive apt-get install gcc:i386 make:i386 m4:i386 g++:i386 libcap-dev:i386 file -y - name: Print g++ architecture run: g++ -dumpmachine - name: Build @@ -123,7 +123,7 @@ jobs: - name: Getting depends run: | apk update - apk add make g++ m4 file + apk add make g++ m4 file libcap-dev - name: Print g++ architecture run: g++ -dumpmachine - name: Build diff --git a/build/Makefile b/build/Makefile index a8b51a78..d44f7a5f 100644 --- a/build/Makefile +++ b/build/Makefile @@ -15,6 +15,9 @@ includes/mconfig.h: ../mconfig tools/mconfig-gen.cc version.conf DEFAULT_START_TIMEOUT=$(DEFAULT_START_TIMEOUT) \ DEFAULT_STOP_TIMEOUT=$(DEFAULT_STOP_TIMEOUT) \ $(if $(SUPPORT_CGROUPS),SUPPORT_CGROUPS=$(SUPPORT_CGROUPS),) \ + $(if $(SUPPORT_CAPABILITIES),SUPPORT_CAPABILITIES=$(SUPPORT_CAPABILITIES),) \ + $(if $(SUPPORT_IOPRIO),SUPPORT_IOPRIO=$(SUPPORT_IOPRIO),) \ + $(if $(SUPPORT_OOM_ADJ),SUPPORT_OOM_ADJ=$(SUPPORT_OOM_ADJ),) \ $(if $(USE_UTMPX),USE_UTMPX=$(USE_UTMPX),) \ $(if $(USE_INITGROUPS),USE_INITGROUPS=$(USE_INITGROUPS),) > includes/mconfig.h diff --git a/build/mconfig.mesontemplate b/build/mconfig.mesontemplate index a7a6d1fb..268363ab 100644 --- a/build/mconfig.mesontemplate +++ b/build/mconfig.mesontemplate @@ -8,6 +8,9 @@ #mesondefine USE_UTMPX #mesondefine USE_INITGROUPS #mesondefine SUPPORT_CGROUPS +#mesondefine SUPPORT_CAPABILITIES +#mesondefine SUPPORT_IOPRIO +#mesondefine SUPPORT_OOM_ADJ #mesondefine DEFAULT_AUTO_RESTART #mesondefine DEFAULT_START_TIMEOUT #mesondefine DEFAULT_STOP_TIMEOUT diff --git a/build/tools/mconfig-gen.cc b/build/tools/mconfig-gen.cc index d20ea117..5397f418 100644 --- a/build/tools/mconfig-gen.cc +++ b/build/tools/mconfig-gen.cc @@ -77,6 +77,15 @@ int main(int argc, char **argv) if (vars.find("SUPPORT_CGROUPS") != vars.end()) { cout << "#define SUPPORT_CGROUPS " << vars["SUPPORT_CGROUPS"] << "\n"; } + if (vars.find("SUPPORT_CAPABILITIES") != vars.end()) { + cout << "#define SUPPORT_CAPABILITIES " << vars["SUPPORT_CAPABILITIES"] << "\n"; + } + if (vars.find("SUPPORT_IOPRIO") != vars.end()) { + cout << "#define SUPPORT_IOPRIO " << vars["SUPPORT_IOPRIO"] << "\n"; + } + if (vars.find("SUPPORT_OOM_ADJ") != vars.end()) { + cout << "#define SUPPORT_OOM_ADJ " << vars["SUPPORT_OOM_ADJ"] << "\n"; + } if (vars.find("DEFAULT_AUTO_RESTART") != vars.end()) { cout << "#define DEFAULT_AUTO_RESTART " << vars["DEFAULT_AUTO_RESTART"] << "\n"; } diff --git a/configs/mconfig.Linux b/configs/mconfig.Linux index 7d5d3315..637553b9 100644 --- a/configs/mconfig.Linux +++ b/configs/mconfig.Linux @@ -38,6 +38,9 @@ TEST_LDFLAGS=$(TEST_LDFLAGS_BASE) $(TEST_CXXFLAGS) # Features. SUPPORT_CGROUPS=1 +SUPPORT_CAPABILITIES=1 +SUPPORT_IOPRIO=1 +SUPPORT_OOM_ADJ=1 # Service defaults. diff --git a/configs/mconfig.Linux.sh b/configs/mconfig.Linux.sh index f4cc2ec8..538246bd 100644 --- a/configs/mconfig.Linux.sh +++ b/configs/mconfig.Linux.sh @@ -111,6 +111,9 @@ FEATURE_SETTINGS=$( echo "# Feature settings" echo "" echo "SUPPORT_CGROUPS=1" + echo "SUPPORT_CAPABILITIES=1" + echo "SUPPORT_IOPRIO=1" + echo "SUPPORT_OOM_ADJ=1" ) SERVICE_DEFAULTS=$( diff --git a/configure b/configure index 365c32a8..2da0031d 100755 --- a/configure +++ b/configure @@ -208,6 +208,9 @@ for var in PREFIX \ SHUTDOWN_PREFIX \ BUILD_SHUTDOWN \ SUPPORT_CGROUPS \ + SUPPORT_CAPABILITIES \ + SUPPORT_IOPRIO \ + SUPPORT_OOM_ADJ \ USE_UTMPX \ USE_INITGROUPS \ SYSCONTROLSOCKET \ @@ -239,6 +242,12 @@ for arg in "$@"; do --disable-shutdown|--enable-shutdown=no) BUILD_SHUTDOWN=no ;; --enable-cgroups|--enable-cgroups=yes) SUPPORT_CGROUPS=1 ;; --disable-cgroups|--enable-cgroups=no) SUPPORT_CGROUPS=0 ;; + --enable-capabilities|--enable-capabilities=yes) SUPPORT_CAPABILITIES=1 ;; + --disable-capabilities|--enable-capabilities=no) SUPPORT_CAPABILITIES=0 ;; + --enable-ioprio|--enable-ioprio=yes) SUPPORT_IOPRIO=1 ;; + --disable-ioprio|--enable-ioprio=no) SUPPORT_IOPRIO=0 ;; + --enable-oom-adj|--enable-oom-adj=yes) SUPPORT_OOM_ADJ=1 ;; + --disable-oom-adj|--enable-oom-adj=no) SUPPORT_OOM_ADJ=0 ;; --enable-utmpx|--enable-utmpx=yes) USE_UTMPX=1 ;; --disable-utmpx|--enable-utmpx=no) USE_UTMPX=0 ;; --enable-initgroups|--enable-initgroups=yes) USE_INITGROUPS=1 ;; @@ -278,10 +287,16 @@ done if [ "$PLATFORM" = "Linux" ]; then : "${BUILD_SHUTDOWN:="yes"}" : "${SUPPORT_CGROUPS:="1"}" + : "${SUPPORT_CAPABILITIES:="1"}" + : "${SUPPORT_IOPRIO:="1"}" + : "${SUPPORT_OOM_ADJ:="1"}" : "${SYSCONTROLSOCKET:="/run/dinitctl"}" else : "${BUILD_SHUTDOWN:="no"}" : "${SUPPORT_CGROUPS:="0"}" + : "${SUPPORT_CAPABILITIES:="0"}" + : "${SUPPORT_IOPRIO:="0"}" + : "${SUPPORT_OOM_ADJ:="0"}" : "${SYSCONTROLSOCKET:="/var/run/dinitctl"}" fi @@ -467,6 +482,9 @@ STRIPOPTS=$STRIPOPTS # Feature settings SUPPORT_CGROUPS=$SUPPORT_CGROUPS USE_INITGROUPS=$USE_INITGROUPS +SUPPORT_CAPABILITIES=$SUPPORT_CAPABILITIES +SUPPORT_IOPRIO=$SUPPORT_IOPRIO +SUPPORT_OOM_ADJ=$SUPPORT_OOM_ADJ # Optional settings SHUTDOWN_PREFIX=${SHUTDOWN_PREFIX:-} diff --git a/doc/manpages/dinit-service.5.m4 b/doc/manpages/dinit-service.5.m4 index b520e0b6..0ecde3f0 100644 --- a/doc/manpages/dinit-service.5.m4 +++ b/doc/manpages/dinit-service.5.m4 @@ -541,6 +541,12 @@ See the \fBRESOURCE LIMITS\fR section. Note that some operating systems (notably, OpenBSD) do not support this limit; the setting will be ignored on such systems. .TP +\fBnice\fR = \fInice-value\fR +Specifies the CPU priority of the process. +When the given value is out of range for the operating system, it will be clamped to +supported range, but no error will be issued. +On Linux, this also sets the autogroup priority, assuming procfs is mounted. +.TP \fBrun\-in\-cgroup\fR = \fIcgroup-path\fR Run the service process(es) in the specified cgroup (see \fBcgroups\fR(7)). The cgroup is specified as a path; if it has a leading slash, the remainder of the path is @@ -557,6 +563,46 @@ The named cgroup must already exist prior to the service starting; it will not b \fBdinit\fR. .IP This setting is only available if \fBdinit\fR was built with cgroups support. +.TP +\fBcapabilities\fR = \fIiab\fR +.TQ +\fBcapabilities\fR += \fIiab-addendum\fR +Run the service process(es) with capabilities specified by \fIiab\fR (see \fBcapabilities\fR(7)). +The syntax follows the regular capabilities IAB format, with comma-separated capabilities. +The append form of this setting will add to the previous IAB string, automatically adding +a comma to the previous string, so you do not need to add it manually. +.IP +This setting is only available if \fBdinit\fR was built with capabilities support. +.TP +\fBsecure\-bits\fR = \fIsecbits\fR +.TQ +\fBsecure\-bits\fR += \fIsecbits-addendum\fR +This is a companion option to \fBcapabilities\fR, specifying the secure bits for the +process. +Here, it is a space-separated list of keywords. The allowed keywords are \fIkeep-caps\fR, +\fIno-setuid-fixup\fR, \fInoroot\fR, and variants of the three with the \fI-locked\fR +suffix. +The append form can be used to add more secure bits, with everything being ORed together +at the end and used as an integer. +.IP +This setting is only available if \fBdinit\fR was built with capabilities support. +.TP +\fBioprio\fR = \fIioprio-value\fR +Specifies the I/O priority class and value for the process. +The permitted values are \fInone\fR, \fIidle\fR, \fIrealtime:PRIO\fR, and +\fIbest-effort:PRIO\fR, where \fIPRIO\fR is an integer value no less than 0 +and no more than 7. +.IP +This setting is only available if \fBdinit\fR was built with ioprio support. +.TP +\fBoom-score-adj\fR = \fIadj-value\fR +Specifies the OOM killer score adjustment for the service. +The value is an integer no less than -1000 and no more than 1000. +.IP +This setting is only available if \fBdinit\fR was built with OOM score adjustment support. +.IP +This setting requires the proc filesystem to be mounted, and will result in a +service startup failure if that is not the case. .\" .SS OPTIONS .\" @@ -685,6 +731,13 @@ is suggested, i.e. every other service should either be a (possibly transitive) dependent of the service with this option set. .IP This option can be used for scripted and internal services only. +.TP +\fBno\-new\-privs\fR +Normally, child processes can gain privileges that their parent did not have, such +as setuid or setgid and file capabilities. This option can be specified to prevent +the service from gaining such privileges. +.IP +This setting is only available if \fBdinit\fR was built with capabilities support. .\" .SS RESOURCE LIMITS .\" diff --git a/meson.build b/meson.build index 96e1a0f9..21e7c58e 100644 --- a/meson.build +++ b/meson.build @@ -31,6 +31,9 @@ igr_tests = get_option('igr-tests') fuzzer = get_option('fuzzer') man_pages = get_option('man-pages') support_cgroups = get_option('support-cgroups') +support_capabilities = get_option('support-capabilities') +support_ioprio = get_option('support-ioprio') +support_oom_adj = get_option('support-oom-adj') use_utmpx = get_option('use-utmpx') use_initgroups = get_option('use-initgroups') default_auto_restart = get_option('default-auto-restart') @@ -56,6 +59,9 @@ if platform == 'freebsd' and compiler.has_link_argument('-lrt') add_project_link_arguments('-lrt', language : 'cpp') endif +## Dependencies +libcap_dep = dependency('libcap', required: support_capabilities) + ## Prepare mconfig.h mconfig_data.set_quoted('DINIT_VERSION', version) mconfig_data.set_quoted('SYSCONTROLSOCKET', dinit_control_socket_path) @@ -65,9 +71,10 @@ mconfig_data.set('DEFAULT_AUTO_RESTART', default_auto_restart) mconfig_data.set('DEFAULT_START_TIMEOUT', default_start_timeout) mconfig_data.set('DEFAULT_STOP_TIMEOUT', default_stop_timeout) mconfig_data.set10('USE_INITGROUPS', use_initgroups) -if support_cgroups.auto() and platform == 'linux' or support_cgroups.enabled() - mconfig_data.set('SUPPORT_CGROUPS', '1') -endif +mconfig_data.set10('SUPPORT_CGROUPS', support_cgroups.auto() and platform == 'linux' or support_cgroups.enabled()) +mconfig_data.set10('SUPPORT_CAPABILITIES', libcap_dep.found() and not support_capabilities.disabled()) +mconfig_data.set10('SUPPORT_IOPRIO', support_ioprio.auto() and platform == 'linux' or support_ioprio.enabled()) +mconfig_data.set10('SUPPORT_OOM_ADJ', support_oom_adj.auto() and platform == 'linux' or support_oom_adj.enabled()) if use_utmpx.enabled() or (use_utmpx.auto() and compiler.has_header_symbol('utmpx.h', '_PATH_UTMPX') and compiler.has_header_symbol('utmpx.h', '_PATH_WTMPX')) mconfig_data.set('USE_UTMPX', '1') diff --git a/meson_options.txt b/meson_options.txt index 617669ec..af92b7eb 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -83,7 +83,25 @@ option( 'support-cgroups', type : 'feature', value : 'auto', - description : 'Enable Cgroups supprot.' + description : 'Enable Cgroups support.' +) +option( + 'support-capabilities', + type : 'feature', + value : 'auto', + description : 'Enable capabilities support.' +) +option( + 'support-ioprio', + type : 'feature', + value : 'auto', + description : 'Enable ioprio support.' +) +option( + 'support-oom-adj', + type : 'feature', + value : 'auto', + description : 'Enable OOM score adjustment support.' ) option( 'build-shutdown', diff --git a/src/Makefile b/src/Makefile index 9600d47f..d3ce665e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -9,6 +9,10 @@ ifeq ($(BUILD_SHUTDOWN),yes) SHUTDOWN=$(SHUTDOWN_PREFIX)shutdown endif +ifeq ($(SUPPORT_CAPABILITIES),1) + ALL_LDFLAGS+=-lcap +endif + dinit_objects = dinit.o load-service.o service.o proc-service.o baseproc-service.o control.o dinit-log.o \ dinit-main.o run-child-proc.o options-processing.o dinit-env.o settings.o diff --git a/src/baseproc-service.cc b/src/baseproc-service.cc index 9347dc87..4bb3c684 100644 --- a/src/baseproc-service.cc +++ b/src/baseproc-service.cc @@ -257,9 +257,23 @@ bool base_process_service::start_ps_process(const std::vector &cmd run_params.env_file = env_file.c_str(); run_params.output_fd = log_output_fd; run_params.input_fd = input_fd; + run_params.nice_is_set = nice_is_set; + run_params.nice = nice; #if SUPPORT_CGROUPS run_params.run_in_cgroup = run_in_cgroup.c_str(); #endif + #if SUPPORT_CAPABILITIES + run_params.cap_iab = cap_iab.get(); + run_params.secbits = secbits; + run_params.no_new_privs = onstart_flags.no_new_privs; + #endif + #if SUPPORT_IOPRIO + run_params.ioprio = ioprio; + #endif + #if SUPPORT_OOM_ADJ + run_params.oom_adj_is_set = oom_adj_is_set; + run_params.oom_adj = oom_adj; + #endif run_child_proc(run_params); } else { diff --git a/src/includes/load-service.h b/src/includes/load-service.h index d8b7ceb1..fcaa5892 100644 --- a/src/includes/load-service.h +++ b/src/includes/load-service.h @@ -25,6 +25,10 @@ #include #include +#if SUPPORT_CAPABILITIES +#include +#endif + struct service_flags_t { // on-start flags: @@ -42,14 +46,51 @@ struct service_flags_t bool signal_process_only : 1; // signal the session process, not the whole group bool always_chain : 1; // always start chain-to service on exit bool kill_all_on_stop : 1; // kill all other processes before stopping this service + bool no_new_privs : 1; // set PR_SET_NO_NEW_PRIVS service_flags_t() noexcept : rw_ready(false), log_ready(false), runs_on_console(false), starts_on_console(false), shares_console(false), unmask_intr(false), pass_cs_fd(false), start_interruptible(false), skippable(false), signal_process_only(false), - always_chain(false), kill_all_on_stop(false) + always_chain(false), kill_all_on_stop(false), no_new_privs(false) + { + } +}; + +#if SUPPORT_CAPABILITIES +struct secure_bits_t +{ + bool keep_caps : 1; + bool keep_caps_locked : 1; + bool no_setuid_fixup : 1; + bool no_setuid_fixup_locked : 1; + bool noroot : 1; + bool noroot_locked : 1; + + secure_bits_t() noexcept : keep_caps(false), keep_caps_locked(false), + no_setuid_fixup(false), no_setuid_fixup_locked(false), + noroot(false), noroot_locked(false) { } + + void clear() noexcept { + keep_caps = keep_caps_locked = false; + no_setuid_fixup = no_setuid_fixup_locked = false; + noroot = noroot_locked = false; + } + + unsigned int get() const noexcept { + unsigned int r = 0; + // as referenced in uapi + if (noroot) r |= 1 << 0; + if (noroot_locked) r |= 1 << 1; + if (no_setuid_fixup) r |= 1 << 2; + if (no_setuid_fixup_locked) r |= 1 << 3; + if (keep_caps) r |= 1 << 4; + if (keep_caps_locked) r |= 1 << 5; + return r; + } }; +#endif // Resource limits for a particular service & particular resource struct service_rlimits @@ -225,12 +266,22 @@ enum class setting_id_t { LOGFILE_GID, LOG_TYPE, LOG_BUFFER_SIZE, CONSUMER_OF, RESTART, SMOOTH_RECOVERY, OPTIONS, LOAD_OPTIONS, TERM_SIGNAL, TERMSIGNAL /* deprecated */, RESTART_LIMIT_INTERVAL, RESTART_DELAY, RESTART_LIMIT_COUNT, STOP_TIMEOUT, START_TIMEOUT, RUN_AS, CHAIN_TO, READY_NOTIFICATION, - INITTAB_ID, INITTAB_LINE, + INITTAB_ID, INITTAB_LINE, NICE, // Prefixed with SETTING_ to avoid name collision with system macros: SETTING_RLIMIT_NOFILE, SETTING_RLIMIT_CORE, SETTING_RLIMIT_DATA, SETTING_RLIMIT_ADDRSPACE, // Possibly unsupported depending on platform/build options: #if SUPPORT_CGROUPS - RUN_IN_CGROUP + RUN_IN_CGROUP, +#endif +#if SUPPORT_CAPABILITIES + CAPABILITIES, + SECURE_BITS, +#endif +#if SUPPORT_IOPRIO + IOPRIO, +#endif +#if SUPPORT_OOM_ADJ + OOM_SCORE_ADJ, #endif }; @@ -445,7 +496,8 @@ inline string read_config_name(string_iterator & i, string_iterator end, bool en // part will be added as [start,end). May be null. inline void read_setting_value(std::string &setting_val, setting_op_t operation, file_pos_ref input_pos, string_iterator &i, string_iterator end, - std::list> *part_positions = nullptr) + std::list> *part_positions = nullptr, + char delimiter = ' ') { using std::locale; using std::isspace; @@ -455,10 +507,11 @@ inline void read_setting_value(std::string &setting_val, setting_op_t operation, i = skipwsln(i, end, line_num); if (operation == setting_op_t::PLUSASSIGN) { - // Ensure whitespace at end of current value. This is really only for debugging niceness - // since the offsets (part_positions) are what really define the seperated components. + // Ensure that values are correctly delimited. This is usually only for debugging + // niceness as for commands where this is mostly used the offsets actually delimit + // the components, but e.g. for capabilities (comma-separated) it matters more. if (!setting_val.empty()) { - setting_val += ' '; + setting_val += delimiter; } } else { @@ -542,9 +595,9 @@ inline void read_setting_value(std::string &setting_val, setting_op_t operation, // See read_setting_value(std::string &, ...) inline void read_setting_value(ha_string &setting_val, setting_op_t operation, file_pos_ref input_pos, string_iterator &i, string_iterator end, - std::list> *part_positions = nullptr) { + std::list> *part_positions = nullptr, char delimiter = ' ') { std::string sval = std::string(setting_val.c_str(), setting_val.length()); - read_setting_value(sval, operation, input_pos, i, end, part_positions); + read_setting_value(sval, operation, input_pos, i, end, part_positions, delimiter); setting_val = sval; } @@ -793,6 +846,30 @@ inline unsigned long long parse_unum_param(file_pos_ref input_pos, const std::st } } +// Parse a signed numeric parameter value +inline long long parse_snum_param(file_pos_ref input_pos, const std::string ¶m, + const std::string &service_name, long long min = std::numeric_limits::min(), + long long max = std::numeric_limits::max()) +{ + const char * num_err_msg = "specified value contains invalid numeric characters or is outside " + "allowed range."; + + std::size_t ind = 0; + try { + long long v = std::stoll(param, &ind, 0); + if (v < min || v > max || ind != param.length()) { + throw service_description_exc(service_name, num_err_msg, input_pos); + } + return v; + } + catch (std::out_of_range &exc) { + throw service_description_exc(service_name, num_err_msg, input_pos); + } + catch (std::invalid_argument &exc) { + throw service_description_exc(service_name, num_err_msg, input_pos); + } +} + // In a vector, find or create rlimits for a particular resource type. inline service_rlimits &find_rlimits(std::vector &all_rlimits, int resource_id) { @@ -1326,6 +1403,9 @@ class service_settings_wrapper gid_t run_as_uid_gid = -1; // primary group of "run as" uid if known gid_t run_as_gid = -1; + bool nice_is_set = false; + int nice; + string chain_to_name; string consumer_of_name; @@ -1333,6 +1413,20 @@ class service_settings_wrapper string run_in_cgroup; #endif + #if SUPPORT_CAPABILITIES + string capabilities; + secure_bits_t secbits; + #endif + + #if SUPPORT_IOPRIO + int ioprio = -1; + #endif + + #if SUPPORT_OOM_ADJ + bool oom_adj_is_set = false; + short oom_adj = 0; + #endif + #if USE_UTMPX char inittab_id[sizeof(utmpx().ut_id)] = {0}; char inittab_line[sizeof(utmpx().ut_line)] = {0}; @@ -1379,6 +1473,14 @@ class service_settings_wrapper report_lint("'run-in-cgroup' specified, but ignored for the specified (or default) service type."); } #endif + #if SUPPORT_CAPABILITIES + if (!capabilities.empty()) { + report_lint("'capabilities' specified, but ignored for the specified (or default) service type."); + } + if (secbits.get()) { + report_lint("'secure-bits' specified, but ignored for the specified (or default) service type."); + } + #endif if (run_as_uid != (uid_t)-1) { report_lint("'run-as' specified, but ignored for the specified (or default) service type."); } @@ -1399,9 +1501,27 @@ class service_settings_wrapper if (onstart_flags.skippable) { report_lint("option 'skippable' was specified, but ignored for the specified (or default) service type."); } + #if SUPPORT_CAPABILITIES + if (onstart_flags.no_new_privs) { + report_lint("option 'no_new_privs' was specified, but ignored for the specified (or default) service type."); + } + #endif if (log_type != log_type_id::NONE) { report_lint("option 'log_type' was specified, but ignored for the specified (or default) service type."); } + if (nice_is_set) { + report_lint("option 'nice' was specified, but ignored for the specified (or default) service type."); + } + #if SUPPORT_IOPRIO + if (ioprio >= 0) { + report_lint("option 'ioprio' was specified, but ignored for the specified (or default) service type."); + } + #endif + #if SUPPORT_OOM_ADJ + if (oom_adj_is_set) { + report_lint("option 'oom-score-adj' was specified, but ignored for the specified (or default) service type."); + } + #endif } if (do_report_lint) { @@ -1544,6 +1664,89 @@ void process_service_line(settings_wrapper &settings, const char *name, const ch settings.run_in_cgroup = read_setting_value(input_pos, i, end, nullptr); break; #endif + #if SUPPORT_CAPABILITIES + case setting_id_t::CAPABILITIES: + read_setting_value(settings.capabilities, setting_op, input_pos, i, end, nullptr, ','); + break; + case setting_id_t::SECURE_BITS: + { + std::list> indices; + string onstart_cmds = read_setting_value(input_pos, i, end, &indices); + // plain assignment will clear, while append will add more + if (setting_op != setting_op_t::PLUSASSIGN) { + settings.secbits.clear(); + } + for (auto indexpair : indices) { + string secbit_txt = onstart_cmds.substr(indexpair.first, + indexpair.second - indexpair.first); + if (secbit_txt == "keep-caps") { + settings.secbits.keep_caps = true; + } + else if (secbit_txt == "keep-caps-locked") { + settings.secbits.keep_caps_locked = true; + } + else if (secbit_txt == "no-setuid-fixup") { + settings.secbits.no_setuid_fixup = true; + } + else if (secbit_txt == "no-setuid-fixup-locked") { + settings.secbits.no_setuid_fixup_locked = true; + } + else if (secbit_txt == "noroot") { + settings.secbits.noroot = true; + } + else if (secbit_txt == "noroot-locked") { + settings.secbits.noroot_locked = true; + } + else { + throw service_description_exc(name, "unknown secure bit: " + secbit_txt, + "secure-bits", input_pos); + } + } + break; + } + #endif + case setting_id_t::NICE: + { + string nice_str = read_setting_value(input_pos, i, end); + settings.nice_is_set = true; + settings.nice = (int)parse_snum_param(input_pos, nice_str, name, + std::numeric_limits::min() / 2, std::numeric_limits::max() / 2); + break; + } + #if SUPPORT_IOPRIO + case setting_id_t::IOPRIO: + { + string ioprio_str = read_setting_value(input_pos, i, end); + if (ioprio_str == "none") { + settings.ioprio = 0; + } + else if (starts_with(ioprio_str, "realtime:")) { + auto nval = parse_unum_param(input_pos, ioprio_str.substr(9 /* len 'realtime:' */), name, 7); + settings.ioprio = (1 << 13) | nval; + } + else if (starts_with(ioprio_str, "best-effort:")) { + auto nval = parse_unum_param(input_pos, ioprio_str.substr(12 /* len 'best-effort:' */), name, 7); + settings.ioprio = (2 << 13) | nval; + } + else if (ioprio_str == "idle") { + settings.ioprio = 3 << 13; + } + else { + throw service_description_exc(name, "invalid value for ioprio: " + ioprio_str, + name, input_pos); + } + break; + } + #endif + #if SUPPORT_OOM_ADJ + case setting_id_t::OOM_SCORE_ADJ: + { + string oom_adj_str = read_setting_value(input_pos, i, end); + settings.oom_adj_is_set = true; + settings.oom_adj = (int)parse_snum_param(input_pos, oom_adj_str, name, -1000, 1000); + break; + } + #endif case setting_id_t::SOCKET_LISTEN: settings.socket_path = read_setting_value(input_pos, i, end, nullptr); break; @@ -1797,6 +2000,11 @@ void process_service_line(settings_wrapper &settings, const char *name, const ch else if (option_txt == "kill-all-on-stop") { settings.onstart_flags.kill_all_on_stop = true; } +#if SUPPORT_CAPABILITIES + else if (option_txt == "no-new-privs") { + settings.onstart_flags.no_new_privs = true; + } +#endif else { throw service_description_exc(name, "unknown option: " + option_txt, "options", input_pos); diff --git a/src/includes/proc-service.h b/src/includes/proc-service.h index 3f3dc176..e7c4e1e7 100644 --- a/src/includes/proc-service.h +++ b/src/includes/proc-service.h @@ -5,6 +5,10 @@ #include #include +#if SUPPORT_CAPABILITIES +#include +#endif + #include #include #include @@ -14,6 +18,43 @@ class process_service; +#if SUPPORT_CAPABILITIES +// A thin wrapper around the structure to avoid having to worry about freeing +// it in various places (move semantics and destructor will take care of it) +struct cap_iab_wrapper { + cap_iab_wrapper() {} + cap_iab_wrapper(std::string const &str) noexcept { + if (str.empty()) return; + // this may end up being nullptr + // throwing from constructors is bad, so always check .get() afterwards + iab = cap_iab_from_text(str.c_str()); + } + + cap_iab_wrapper(cap_iab_wrapper const &) = delete; + cap_iab_wrapper(cap_iab_wrapper &&v) noexcept: iab(v.iab) { + v.iab = nullptr; + } + + cap_iab_wrapper &operator=(cap_iab_wrapper const &) = delete; + cap_iab_wrapper &operator=(cap_iab_wrapper &&v) noexcept { + iab = v.iab; + v.iab = nullptr; + return *this; + } + + ~cap_iab_wrapper() noexcept { + if (iab) cap_free(iab); + } + + cap_iab_t get() const noexcept { + return iab; + } + +private: + cap_iab_t iab = nullptr; +}; +#endif + // Given a string and a list of pairs of (start,end) indices for each argument in that string, // store a null terminator for the argument. Return a `char *` vector containing the beginning // of each argument and a trailing nullptr. (The returned array is invalidated if the string is later @@ -31,9 +72,23 @@ struct run_proc_params #if SUPPORT_CGROUPS const char *run_in_cgroup = nullptr; // cgroup path #endif + #if SUPPORT_CAPABILITIES + cap_iab_t cap_iab; + unsigned int secbits = 0; + bool no_new_privs = false; + #endif bool on_console; // whether to run on console bool in_foreground; // if on console: whether to run in foreground bool unmask_sigint = false; // if in foreground: whether to unmask SIGINT + bool nice_is_set = false; + int nice = 0; // the process nice value + #if SUPPORT_IOPRIO + int ioprio = -1; // scheduling class and priority for the process + #endif + #if SUPPORT_OOM_ADJ + bool oom_adj_is_set = false; + short oom_adj = 0; // oom score adjustment value + #endif int wpipefd; // pipe to which error status will be sent (if error occurs) int csfd; // control socket fd (or -1); may be moved int socket_fd; // pre-opened socket fd (or -1); may be moved @@ -186,8 +241,26 @@ class base_process_service : public service_record unsigned log_buf_size = 0; // log buffer current size std::vector> log_buffer; + bool nice_is_set = false; + int nice; + +#if SUPPORT_IOPRIO + int ioprio = -1; +#endif + +#if SUPPORT_OOM_ADJ + bool oom_adj_is_set = false; + short oom_adj = 0; +#endif + std::vector rlimits; // resource limits +#if SUPPORT_CAPABILITIES + cap_iab_wrapper cap_iab; + unsigned int secbits = 0; + bool no_new_privs = false; +#endif + #if SUPPORT_CGROUPS string run_in_cgroup; #endif @@ -459,6 +532,35 @@ class base_process_service : public service_record } #endif + #if SUPPORT_CAPABILITIES + void set_cap(cap_iab_wrapper &&iab, unsigned int sbits) noexcept + { + cap_iab = std::move(iab); + secbits = sbits; + } + #endif + + void set_nice(int nice_v) noexcept + { + nice_is_set = true; + nice = nice_v; + } + + #if SUPPORT_IOPRIO + void set_ioprio(int ioprio_v) noexcept + { + ioprio = ioprio_v; + } + #endif + + #if SUPPORT_OOM_ADJ + void set_oom_adj(short oom_adj_v) noexcept + { + oom_adj_is_set = true; + oom_adj = oom_adj_v; + } + #endif + void set_rlimits(std::vector &&rlimits_p) { rlimits = std::move(rlimits_p); diff --git a/src/includes/service-constants.h b/src/includes/service-constants.h index 2787c15e..d7ac8c7d 100644 --- a/src/includes/service-constants.h +++ b/src/includes/service-constants.h @@ -71,9 +71,10 @@ enum class exec_stage { ARRANGE_FDS, READ_ENV_FILE, SET_NOTIFYFD_VAR, SETUP_ACTIVATION_SOCKET, SETUP_CONTROL_SOCKET, CHDIR, SETUP_STDINOUTERR, ENTER_CGROUP, SET_RLIMITS, SET_UIDGID, OPEN_LOGFILE, // this is used instead of SETUP_STDINOUTERR if output is to logfile + SET_CAPS, SET_PRIO, /* values for future expansion: */ - SPARE2, SPARE3, SPARE4, SPARE5, SPARE6, SPARE7, SPARE8, + SPARE4, SPARE5, SPARE6, SPARE7, SPARE8, /* must be last: */ DO_EXEC }; diff --git a/src/load-service.cc b/src/load-service.cc index 42429319..731d3f31 100644 --- a/src/load-service.cc +++ b/src/load-service.cc @@ -708,6 +708,14 @@ service_record * dirload_service_set::load_reload_service(const char *fullname, // - this will be done later) } + // We may have capabilities, process them now + #if SUPPORT_CAPABILITIES + cap_iab_wrapper cap_iab(settings.capabilities); + if (!settings.capabilities.empty() && !cap_iab.get()) { + throw service_load_exc(name, "the 'capabilities' string has an invalid format"); + } + #endif + if (service_type == service_type_t::PROCESS) { do_env_subst("command", settings.command, settings.command_offsets, srv_envmap, argval); do_env_subst("stop-command", settings.stop_command, settings.stop_command_offsets, srv_envmap, argval); @@ -733,6 +741,16 @@ service_record * dirload_service_set::load_reload_service(const char *fullname, #if SUPPORT_CGROUPS rvalps->set_cgroup(std::move(settings.run_in_cgroup)); #endif + #if SUPPORT_CAPABILITIES + rvalps->set_cap(std::move(cap_iab), settings.secbits.get()); + #endif + if (settings.nice_is_set) rvalps->set_nice(settings.nice); + #if SUPPORT_IOPRIO + rvalps->set_ioprio(settings.ioprio); + #endif + #if SUPPORT_OOM_ADJ + if (settings.oom_adj_is_set) rvalps->set_oom_adj(settings.oom_adj); + #endif rvalps->set_rlimits(std::move(settings.rlimits)); rvalps->set_restart_interval(settings.restart_interval, settings.max_restarts); rvalps->set_restart_delay(settings.restart_delay); @@ -776,6 +794,16 @@ service_record * dirload_service_set::load_reload_service(const char *fullname, #if SUPPORT_CGROUPS rvalps->set_cgroup(std::move(settings.run_in_cgroup)); #endif + #if SUPPORT_CAPABILITIES + rvalps->set_cap(std::move(cap_iab), settings.secbits.get()); + #endif + if (settings.nice_is_set) rvalps->set_nice(settings.nice); + #if SUPPORT_IOPRIO + rvalps->set_ioprio(settings.ioprio); + #endif + #if SUPPORT_OOM_ADJ + if (settings.oom_adj_is_set) rvalps->set_oom_adj(settings.oom_adj); + #endif rvalps->set_rlimits(std::move(settings.rlimits)); rvalps->set_pid_file(std::move(settings.pid_file)); rvalps->set_restart_interval(settings.restart_interval, settings.max_restarts); @@ -815,6 +843,16 @@ service_record * dirload_service_set::load_reload_service(const char *fullname, #if SUPPORT_CGROUPS rvalps->set_cgroup(std::move(settings.run_in_cgroup)); #endif + #if SUPPORT_CAPABILITIES + rvalps->set_cap(std::move(cap_iab), settings.secbits.get()); + #endif + if (settings.nice_is_set) rvalps->set_nice(settings.nice); + #if SUPPORT_IOPRIO + rvalps->set_ioprio(settings.ioprio); + #endif + #if SUPPORT_OOM_ADJ + if (settings.oom_adj_is_set) rvalps->set_oom_adj(settings.oom_adj); + #endif rvalps->set_rlimits(std::move(settings.rlimits)); rvalps->set_stop_timeout(settings.stop_timeout); rvalps->set_start_timeout(settings.start_timeout); diff --git a/src/meson.build b/src/meson.build index 75b3aced..bb1f976b 100644 --- a/src/meson.build +++ b/src/meson.build @@ -27,7 +27,8 @@ shutdown_built = false misc_args = { 'include_directories': default_incdir, 'install': true, - 'install_dir': sbindir + 'install_dir': sbindir, + 'dependencies': [libcap_dep] } ## src/'s defines for igr-tests/ diff --git a/src/proc-service.cc b/src/proc-service.cc index 96e4c06e..a0bb69b0 100644 --- a/src/proc-service.cc +++ b/src/proc-service.cc @@ -932,9 +932,23 @@ bool process_service::start_stop_process(const std::vector &cmd) n run_params.force_notify_fd = -1; run_params.notify_var = nullptr; run_params.env_file = env_file.c_str(); + run_params.nice_is_set = nice_is_set; + run_params.nice = nice; #if SUPPORT_CGROUPS run_params.run_in_cgroup = run_in_cgroup.c_str(); #endif + #if SUPPORT_CAPABILITIES + run_params.cap_iab = cap_iab.get(); + run_params.secbits = secbits; + run_params.no_new_privs = onstart_flags.no_new_privs; + #endif + #if SUPPORT_IOPRIO + run_params.ioprio = ioprio; + #endif + #if SUPPORT_OOM_ADJ + run_params.oom_adj_is_set = oom_adj_is_set; + run_params.oom_adj = oom_adj; + #endif run_child_proc(run_params); } else { diff --git a/src/run-child-proc.cc b/src/run-child-proc.cc index be443624..e97be6e0 100644 --- a/src/run-child-proc.cc +++ b/src/run-child-proc.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,11 @@ #include "proc-service.h" #include "mconfig.h" +#if SUPPORT_CAPABILITIES +#include +#include +#endif + #ifdef SUPPORT_CGROUPS extern std::string cgroups_path; extern bool have_cgroups_path; @@ -60,6 +66,8 @@ void base_process_service::run_child_proc(run_proc_params params) noexcept const char *working_dir = params.working_dir; const char *logfile = params.logfile; bool on_console = params.on_console; + bool nice_is_set = params.nice_is_set; + int nice = params.nice; int wpipefd = params.wpipefd; int csfd = params.csfd; int notify_fd = params.notify_fd; @@ -69,6 +77,18 @@ void base_process_service::run_child_proc(run_proc_params params) noexcept gid_t gid = params.gid; const std::vector &rlimits = params.rlimits; int output_fd = params.output_fd; + #if SUPPORT_CAPABILITIES + cap_iab_t cap_iab = params.cap_iab; + unsigned int secbits = params.secbits; + bool no_new_privs = params.no_new_privs; + #endif + #if SUPPORT_IOPRIO + int ioprio = params.ioprio; + #endif + #if SUPPORT_OOM_ADJ + bool oom_adj_is_set = params.oom_adj_is_set; + int oom_adj = params.oom_adj; + #endif // If the console already has a session leader, presumably it is us. On the other hand // if it has no session leader, and we don't create one, then control inputs such as @@ -291,6 +311,56 @@ void base_process_service::run_child_proc(run_proc_params params) noexcept if (setrlimit(limit.resource_id, &setlimits) != 0) goto failure_out; } + // nice + if (nice_is_set) { + err.stage = exec_stage::SET_PRIO; + #ifdef __linux__ + // clamp the values to known range so the autogroup hack below works + if (nice > 19) nice = 19; + if (nice < -20) nice = -20; + #endif + if (setpriority(PRIO_PROCESS, getpid(), nice) != 0) goto failure_out; + #ifdef __linux__ + // we usually create a new session leader; that makes nice not very + // useful as the Linux kernel will autogroup processes by session id + // except when disabled - so also work around this where enabled + // the r+ is used in order to avoid creating it where already disabled + errno = 0; + FILE *ag = std::fopen("/proc/self/autogroup", "r+"); + if (ag) { + std::fprintf(ag, "%d\n", nice); + std::fclose(ag); + } + else if (errno != ENOENT) goto failure_out; + #endif + } + + #if SUPPORT_IOPRIO + // ioprio + if (ioprio >= 0) { + err.stage = exec_stage::SET_PRIO; + if (syscall(__NR_ioprio_set, 1, (int)getpid(), ioprio) != 0) goto failure_out; + } + #endif + + #if SUPPORT_OOM_ADJ + // oom score adjustment + if (oom_adj_is_set) { + err.stage = exec_stage::SET_PRIO; + errno = 0; + int fd = open("/proc/self/oom_score_adj", O_WRONLY); + if (fd < 0) goto failure_out; + // +4: round up, minus sign, newline, nul terminator + char val_str[std::numeric_limits::digits10 + 4]; + int num_chars = snprintf(val_str, sizeof(val_str), "%hd\n", oom_adj); + if (write(fd, val_str, num_chars) < 0) { + close(fd); + goto failure_out; + } + close(fd); + } + #endif + #if SUPPORT_CGROUPS if (params.run_in_cgroup != nullptr && *params.run_in_cgroup != 0) { err.stage = exec_stage::ENTER_CGROUP; @@ -377,9 +447,28 @@ void base_process_service::run_child_proc(run_proc_params params) noexcept if (setregid(gid, gid) != 0) goto failure_out; } #endif +#if SUPPORT_CAPABILITIES + if (cap_setuid(uid) != 0) goto failure_out; +#else if (setreuid(uid, uid) != 0) goto failure_out; +#endif } +#if SUPPORT_CAPABILITIES + if (cap_iab) { + err.stage = exec_stage::SET_CAPS; + if (cap_iab_set_proc(cap_iab) != 0) goto failure_out; + } + if (secbits) { + err.stage = exec_stage::SET_CAPS; + if (cap_set_secbits(secbits) < 0) goto failure_out; + } + if (no_new_privs) { + err.stage = exec_stage::SET_CAPS; + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) goto failure_out; + } +#endif + // Restore signal mask. If running on the console, we'll keep various control signals that can // be invoked from the terminal masked, with the exception of SIGHUP and possibly SIGINT. { diff --git a/src/settings.cc b/src/settings.cc index f789bcf7..8b62a0b9 100644 --- a/src/settings.cc +++ b/src/settings.cc @@ -57,10 +57,25 @@ setting_details all_settings[] = { {"rlimit-data", setting_id_t::SETTING_RLIMIT_DATA, false, true, false}, {"rlimit-addrspace", setting_id_t::SETTING_RLIMIT_ADDRSPACE, false, true, false}, + {"nice", setting_id_t::NICE, false, true, false}, + #if SUPPORT_CGROUPS {"run-in-cgroup", setting_id_t::RUN_IN_CGROUP, false, true, false}, #endif +#if SUPPORT_CAPABILTIIES + {"capabilities", setting_id_t::CAPABILITIES, false, true, true}, + {"secure-bits", setting_id_t::SECURE_BITS, false, true, true}, +#endif + +#if SUPPORT_IOPRIO + {"ioprio", setting_id_t::IOPRIO, false, true, false}, +#endif + +#if SUPPORT_OOM_ADJ + {"oom-score-adj", setting_id_t::OOM_SCORE_ADJ, false, true, false}, +#endif + {nullptr, setting_id_t::LAST, false, false, false} }; diff --git a/src/tests/Makefile b/src/tests/Makefile index 3acfc6ed..04fa9234 100644 --- a/src/tests/Makefile +++ b/src/tests/Makefile @@ -3,6 +3,10 @@ ALL_TEST_CXXFLAGS=$(CPPFLAGS) $(TEST_CXXFLAGS) $(TEST_CXXFLAGS_EXTRA) ALL_TEST_LDFLAGS=$(TEST_LDFLAGS) $(TEST_LDFLAGS_EXTRA) +ifeq ($(SUPPORT_CAPABILITIES),1) + ALL_TEST_LDFLAGS+=-lcap +endif + objects = tests.o test-dinit.o proctests.o loadtests.o envtests.o test-run-child-proc.o test-bpsys.o parent_objs = service.o proc-service.o dinit-log.o load-service.o baseproc-service.o dinit-env.o control.o settings.o diff --git a/src/tests/cptests/Makefile b/src/tests/cptests/Makefile index 220614f0..8d498af2 100644 --- a/src/tests/cptests/Makefile +++ b/src/tests/cptests/Makefile @@ -3,6 +3,10 @@ ALL_TEST_CXXFLAGS=$(CPPFLAGS) $(TEST_CXXFLAGS) $(TEST_CXXFLAGS_EXTRA) ALL_TEST_LDFLAGS=$(TEST_LDFLAGS) $(TEST_LDFLAGS_EXTRA) +ifeq ($(SUPPORT_CAPABILITIES),1) + ALL_TEST_LDFLAGS+=-lcap +endif + objects = cptests.o parent_test_objects = ../test-bpsys.o ../test-dinit.o ../test-run-child-proc.o parent_objs = control.o dinit-log.o service.o load-service.o proc-service.o baseproc-service.o dinit-env.o settings.o diff --git a/src/tests/cptests/meson.build b/src/tests/cptests/meson.build index a8a85ec2..808a50a0 100644 --- a/src/tests/cptests/meson.build +++ b/src/tests/cptests/meson.build @@ -33,7 +33,8 @@ if unit_tests 'cptests', 'cptests.cc', sources, - include_directories: incdir + include_directories: incdir, + dependencies: [libcap_dep] ) test('cptests', cptests_exec, suite: 'unit_tests') endif diff --git a/src/tests/meson.build b/src/tests/meson.build index bba890b9..527b3089 100644 --- a/src/tests/meson.build +++ b/src/tests/meson.build @@ -30,28 +30,32 @@ tests_exec = executable( 'tests.cc', for_tests_general_sources, for_tests_dinit_sources, - include_directories: for_tests_incdir + include_directories: for_tests_incdir, + dependencies: [libcap_dep] ) proctests_exec = executable( 'proctests', 'proctests.cc', for_tests_general_sources, for_tests_dinit_sources, - include_directories: for_tests_incdir + include_directories: for_tests_incdir, + dependencies: [libcap_dep] ) loadtests_exec = executable( 'loadtests', 'loadtests.cc', for_tests_general_sources, for_tests_dinit_sources, - include_directories: for_tests_incdir + include_directories: for_tests_incdir, + dependencies: [libcap_dep] ) envtests_exec = executable( 'envtests', 'envtests.cc', for_tests_general_sources, for_tests_dinit_sources, - include_directories: for_tests_incdir + include_directories: for_tests_incdir, + dependencies: [libcap_dep] ) test('tests', tests_exec, suite: 'unit_tests') test('proctests', proctests_exec, suite: 'unit_tests')