Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

validator: fix potential infinite loop #5813

Merged
merged 1 commit into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions sched/script_validator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ void validate_handler_usage() {
);
}

// see validate_util2.h for return values
// run script to check a result
// if script exits with VAL_RESULT_TRANSIENT_ERROR, return that;
// the WU will be validated again after a delay.
//
// any other nonzero return means the result is not valid
//
int init_result(RESULT& result, void*&) {
if (init_script.empty()) {
Expand Down Expand Up @@ -144,10 +148,13 @@ int init_result(RESULT& result, void*&) {
int s = WEXITSTATUS(retval);
if (!s) return 0;
if (s == VAL_RESULT_TRANSIENT_ERROR) {
log_messages.printf(MSG_NORMAL,
"init script return transient error"
);
return VAL_RESULT_TRANSIENT_ERROR;
}
log_messages.printf(MSG_CRITICAL,
"init script %s failed: %d\n", cmd, s
log_messages.printf(MSG_NORMAL,
"init script %s returned: %d\n", cmd, s
);
return -1;
}
Expand Down
17 changes: 10 additions & 7 deletions sched/validate_util2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ using std::vector;
// (i.e. there was a broken NFS mount).
// Should call this again after a while.
//
// always return zero
//
int check_set(
vector<RESULT>& results, WORKUNIT& wu,
DB_ID_TYPE& canonicalid, double&, bool& retry
Expand All @@ -74,6 +76,9 @@ int check_set(
had_error.resize(n);

// Initialize results
// For each one we potentially allocate data,
// so always exit via goto cleanup:
// to free this mem

for (i=0; i<n; i++) {
data[i] = NULL;
Expand Down Expand Up @@ -134,29 +139,29 @@ int check_set(

if (good_results < wu.min_quorum) goto cleanup;

// Compare results
// for each result, count how many it matches (including itself)
// If this is at least min_valid, it's the canonical result

for (i=0; i<n; i++) {
if (had_error[i]) continue;
vector<bool> matches;
matches.resize(n);
neq = 0;
for (j=0; j!=n; j++) {
for (j=0; j<n; j++) {
if (had_error[j]) continue;
bool match = false;
if (i == j) {
++neq;
matches[j] = true;
continue;
}
bool match = false;
retval = compare_results(
results[i], data[i], results[j], data[j], match
);
switch (retval) {
case ERR_OPENDIR:
case VAL_RESULT_TRANSIENT_ERROR:
retry = true;
retval = 0;
goto cleanup;
case 0:
if (match) {
Expand All @@ -169,7 +174,6 @@ int check_set(
"check_set(): compare_results([RESULT#%lu %s], [RESULT#%lu %s]) failed\n",
results[i].id, results[i].name, results[j].id, results[j].name
);
goto cleanup;
}
}
if (neq >= min_valid) {
Expand All @@ -185,12 +189,11 @@ int check_set(
}
}

retval = 0;
cleanup:
for (i=0; i<n; i++) {
cleanup_result(results[i], data[i]);
}
return retval;
return 0;
}

// a straggler instance has arrived after the WU is already validated.
Expand Down
Loading