Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More efficient method for check_parallel_jacobian #1395

Open
wants to merge 37 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
62a6346
implement new method
dallan-keylogic Apr 10, 2024
6a46229
run black
dallan-keylogic Apr 10, 2024
5e4da39
Nonstrict zip
dallan-keylogic Apr 11, 2024
8fbb339
only relative criterion, filter out zero norm vectors
dallan-keylogic Apr 11, 2024
da664b0
Correct preallocation
dallan-keylogic Apr 11, 2024
9b8a25d
run Black
dallan-keylogic Apr 11, 2024
7fe9c5e
efficiency
dallan-keylogic Apr 11, 2024
ab30049
excessive optimization
dallan-keylogic Apr 11, 2024
8c9959c
add optional jacobian and nlp arguments plus fix test
dallan-keylogic Apr 11, 2024
5021f29
Merge branch 'main' into parallel_constraints
dallan-keylogic Apr 11, 2024
e4ce707
run black
dallan-keylogic Apr 11, 2024
8438d09
fix failing test and add additional comments
dallan-keylogic Apr 12, 2024
e048a22
Get rid of old method, try to make code more readable
dallan-keylogic Apr 19, 2024
6d53c8d
Merge branch 'main' into parallel_constraints
dallan-keylogic Apr 19, 2024
12109f9
Merge branch 'main' into parallel_constraints
dallan-keylogic May 3, 2024
e175405
merge
dallan-keylogic May 3, 2024
8cedc01
failures
dallan-keylogic May 3, 2024
a8da700
Merge branch 'main' into parallel_constraints
dallan-keylogic Jun 14, 2024
345a813
Begun changes to address issues
dallan-keylogic Jun 15, 2024
330953f
finish cleanup
dallan-keylogic Jun 17, 2024
5939198
Merge branch 'main' into parallel_constraints
dallan-keylogic Jun 17, 2024
e30c6d5
commit black
dallan-keylogic Jun 17, 2024
50903cc
pylint
dallan-keylogic Jun 17, 2024
a54f189
Begun dealing with negative phase flows
dallan-keylogic Jun 18, 2024
9bd6ce8
run Black
dallan-keylogic Jun 18, 2024
0cd2d2d
Reversion
dallan-keylogic Jun 18, 2024
42e00a2
after me, the deluge
dallan-keylogic Jun 18, 2024
940d253
reversion
dallan-keylogic Jun 18, 2024
c922565
andrew's changes
dallan-keylogic Jun 18, 2024
fb1bb91
readd attribution
dallan-keylogic Jun 20, 2024
e226647
merge
dallan-keylogic Jun 20, 2024
1aed06f
merge in complementarity, leave memorials to its sins
dallan-keylogic Jun 20, 2024
02db37e
tighten tolerance
dallan-keylogic Jun 20, 2024
631e7c5
more feedback from Robby
dallan-keylogic Jun 28, 2024
bf5f26d
Merge branch 'main' into parallel_constraints
dallan-keylogic Jun 28, 2024
ffd0071
Merge branch 'main' into parallel_constraints
Robbybp Oct 25, 2024
10250a5
add scaled option to some methods
Robbybp Oct 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions idaes/core/base/control_volume1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -2554,3 +2554,27 @@ def calculate_scaling_factors(self):
iscale.get_scaling_factor(self.element_accumulation[t, x, e]),
overwrite=False,
)

# Collocation (Lagrange-Legendre) support
if hasattr(self, "_flow_terms_length_domain_cont_eq"):
for (t, x, p, j), c in self._flow_terms_length_domain_cont_eq.items():
sf = iscale.get_scaling_factor(self._flow_terms[t, x, p, j])
iscale.constraint_scaling_transform(c, sf, overwrite=False)

if hasattr(self, "elemental_flow_term_length_domain_cont_eq"):
for (t, x, e), c in self.elemental_flow_term_length_domain_cont_eq.items():
sf = iscale.get_scaling_factor(self.elemental_flow_term[t, x, e])
iscale.constraint_scaling_transform(c, sf, overwrite=False)

if hasattr(self, "pressure_length_domain_cont_eq"):
for (t, x), c in self.pressure_length_domain_cont_eq.items():
sf_P = iscale.get_scaling_factor(
self.properties[t, x].pressure, default=1, warning=True
)
iscale.constraint_scaling_transform(c, sf_P, overwrite=False)

if hasattr(self, "_enthalpy_flow_length_domain_cont_eq"):
for (t, x, p), c in self._enthalpy_flow_length_domain_cont_eq.items():
# No need for default because it should already have a scaling factor
sf = iscale.get_scaling_factor(self._enthalpy_flow[t, x, p])
iscale.constraint_scaling_transform(c, sf, overwrite=False)
107 changes: 59 additions & 48 deletions idaes/core/util/model_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import numpy as np
from scipy.linalg import svd
from scipy.sparse.linalg import svds, norm
from scipy.sparse import issparse, find
from scipy.sparse import issparse, find, triu, diags

from pyomo.environ import (
Binary,
Expand Down Expand Up @@ -3712,8 +3712,9 @@ def ipopt_solve_halt_on_error(model, options=None):

def check_parallel_jacobian(
model,
tolerance: float = 1e-4,
tolerance: float = 1e-8,
direction: str = "row",
zero_norm_tolerance: float = 1e-8,
jac=None,
nlp=None,
):
Expand Down Expand Up @@ -3746,8 +3747,6 @@ def check_parallel_jacobian(
list of 2-tuples containing parallel Pyomo components

"""
# Thanks to Robby Parker for the sparse matrix implementation and
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should not be deleted, but added to.

# significant performance improvements

if direction not in ["row", "column"]:
raise ValueError(
Expand All @@ -3762,52 +3761,64 @@ def check_parallel_jacobian(
# they correspond to.
if direction == "row":
components = nlp.get_pyomo_constraints()
csrjac = jac.tocsr()
# Make everything a column vector (CSC) for consistency
vectors = [csrjac[i, :].transpose().tocsc() for i in range(len(components))]
mat = jac.tocsr()

elif direction == "column":
components = nlp.get_pyomo_variables()
cscjac = jac.tocsc()
vectors = [cscjac[:, i] for i in range(len(components))]

# List to store pairs of parallel components
parallel = []

vectors_by_nz = {}
for vecidx, vec in enumerate(vectors):
maxval = max(np.abs(vec.data))
# Construct tuple of sorted col/row indices that participate
# in this vector (with non-negligible coefficient).
nz = tuple(
sorted(
idx
for idx, val in zip(vec.indices, vec.data)
if abs(val) > tolerance and abs(val) / maxval > tolerance
)
)
if nz in vectors_by_nz:
# Store the index as well so we know what component this
# correrponds to.
vectors_by_nz[nz].append((vec, vecidx))
else:
vectors_by_nz[nz] = [(vec, vecidx)]

for vecs in vectors_by_nz.values():
for idx, (u, uidx) in enumerate(vecs):
# idx is the "local index", uidx is the "global index"
# Frobenius norm of the matrix is 2-norm of this column vector
unorm = norm(u, ord="fro")
for v, vidx in vecs[idx + 1 :]:
vnorm = norm(v, ord="fro")

# Explicitly multiply a row vector * column vector
prod = u.transpose().dot(v)
absprod = abs(prod[0, 0])
diff = abs(absprod - unorm * vnorm)
if diff <= tolerance or diff <= tolerance * max(unorm, vnorm):
parallel.append((uidx, vidx))

parallel = [(components[uidx], components[vidx]) for uidx, vidx in parallel]
mat = jac.transpose().tocsr()

# Take product of all rows/columns with all rows/columns by taking outer
# product of matrix with itself
outer = mat @ mat.transpose()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The previous implementation was designed to avoid taking every possible dot product by first sorting vectors by their nonzero structure. I would have expected this to be slow due to unnecessary dot products, but in some testing, it doesn't seem slower.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is sparse matrix multiplication carried out by Scipy. The key here is that 1) Sorting through which products to take should be carried out in the sparse matrix multiplication routine, not manually and
2) The looping required to do this sorting and multiplication is done in compiled C++ code, not Python code, and (should) be much faster than doing it in Python code.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The scipy matrix product is taking every possible combination of non-zero dot products, while the previous implementation takes only dot products between vectors with the same sparsity structure. This is fewer dot products, although, in practice, probably not by a wide margin.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid I don't entirely follow. Scipy's matrix multiply, as near as I can tell, does the same thing. It implements the SMMP algorithm which occurs in two steps: the first to determine the nonzero structure of the matrix, the second to actually compute the values (as near as I can tell). That's basically what you're doing, except 1) it isn't filtering out entries as being "too small to contribute" and 2) it's doing it in C++ instead of Python, which is much faster.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For example, if two rows are (1, 2, 3) and (1, 2, 0), they cannot possibly be parallel. The previous implementation will not compute this dot product, while the new implementation will.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So you're ruling out rows being parallel based on one having an element for the third index (3) and the other having zero for the third index? That can fail if the first row was (1, 2, 3e-8) instead of (1, 2, 3): (1, 2, 3e-8) is still effectively parallel to (1, 2, 0) even if they structurally differ.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do this after filtering out small entries.


# Along the diagonal of the outer product you get the dot product of the row
# with itself, which is equal to the norm squared.
norms = np.sqrt(outer.diagonal())

# Want to ignore indices with zero norm. By zeroing out the corresponding
# entries of the scaling matrix, we set the corresponding rows and columns
# to zero, which will then be ignored.

zero_norm_indices = np.nonzero(np.abs(norms) <= zero_norm_tolerance)
inv_norms = 1 / norms
inv_norms[zero_norm_indices] = 0

# Divide each row and each column by the vector norm. This leaves
# the entries as dot(u, v) / (norm(u) * norm(v)). The exception is
# entries with "zero norm", whose corresponding rows and columns are
# set to zero.
scaling = diags(inv_norms)
outer = scaling * outer * scaling

# Get rid of duplicate values by only taking upper triangular part of
# resulting matrix
upper_tri = triu(outer)

# Set diagonal elements to zero
# Subtracting diags(upper_tri.diagonal()) is a more reliable
# method of getting the entries to exactly zero than subtracting
# an identity matrix, where one can encounter values of 1e-16
upper_tri = upper_tri - diags(upper_tri.diagonal())
dallan-keylogic marked this conversation as resolved.
Show resolved Hide resolved

# Get the nonzero entries of upper_tri in three arrays,
# corresponding to row indices, column indices, and values
rows, columns, values = find(upper_tri)

# We have that dot(u,v) == norm(u) * norm(v) * cos(theta) in which
# theta is the angle between u and v. If theta is approximately
# 0 or pi, sqrt(2*(1 - abs(dot(u,v)) / (norm(u) * norm(v)))) approximately
# equals the number of radians from 0 or pi. A tolerance of 1e-8 corresponds
# to a tolerance of sqrt(2) * 1e-4 radians

# The expression (1 - abs(values) < tolerance) returns an array with values
# of ones and zeros, depending on whether the condition is fulfilled or not.
# We then find indices where it is filled using np.nonzero.
parallel_1D = np.nonzero(1 - abs(values) < tolerance)[0]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the motivation for checking 1 - abs(u.dot(v)) / (unorm * vnorm) instead of unorm*vnorm - abs(u.dot(v))? The previous implementation, as well as Gurobi's implementation (https://github.com/Gurobi/gurobi-modelanalyzer/blob/a8a34f70e09f3e439b1b7f308b68010a2dfac5b3/src/gurobi_modelanalyzer/results_analyzer.py#L1241), use the latter quantity, which is fewer operations (less round-off error). Division in particular seems to be risky if the denominator is small, and with the default zero_norm_tolerance here, it can approach 1e-16.

Copy link
Contributor Author

@dallan-keylogic dallan-keylogic Jun 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both the previous implementation and the Gurobi implementation checked against a semi-normalized epsilon:

                #
                # Normalize the tolerance in the test; want the same
                # result for vectors u and v and 1000u and 1000v
                #
                if abs(abs(dotprod) - norm1 * norm2) > partol * norm1:
                    continue

Note that this formulation doesn't scale the tolerance with norm2 for some reason. Also, possibly for reasons of efficiency, they use 1-norms instead of 2-norms. That's a false economy, though, because it's no longer guaranteed that abs(dotprod) <= norm1 * norm2 (Cauchy-Schwartz inequality). The equation we're using comes from the formula for angle between two vectors: u.dot(v) = unorm*vnorm*cos(theta) along with Taylor expansions around theta=0 and theta=pi.

We could implement the check like unorm*vnorm - abs(u.dot(v)) < tolerance*unorm*vnorm, but that has the disadvantage of being harder to vectorize. Floating point division, even division by some number on the order of 1e-16, is well defined and can be computed accurately. From Wikipedia:

There are no cancellation or absorption problems with multiplication or division, though small errors may accumulate as operations are performed in succession.

It's addition and subtraction that can cause problems with catastrophic cancellation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would advocate for unorm*vnorm - abs(u.dot(v)) < tolerance*max(1, unorm*vnorm), where all norms are 2-norms. Point taken on division, but the above is still fewer operations than the current implementation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the intention with the max there? Even relatively orthogonal vectors will be considered "parallel" if the rows have small norm: [1e-6, 0] and [1e-6, 1e-6)] are considered parallel by this metric.

I don't think avoiding vectorized operations is going to be a major improvement in performance, especially since we'll be adding two multiplications (unorm*vnorm and tol*unorm*vnorm) for each pair for which u.dot(v) is nonzero.


parallel = [
(components[rows[idx]], components[columns[idx]]) for idx in parallel_1D
]

return parallel


Expand Down
20 changes: 7 additions & 13 deletions idaes/core/util/tests/test_model_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,18 +990,12 @@ def test_display_near_parallel_variables(self):

stream = StringIO()
dt.display_near_parallel_variables(stream)

expected = """====================================================================================
The following pairs of variables are nearly parallel:

v1, v2
v1, v4
v2, v4

====================================================================================
"""

assert stream.getvalue() == expected
expected_pairs = ["v1, v2", "v1, v4", "v2, v4"]
assert (
"The following pairs of variables are nearly parallel:" in stream.getvalue()
)
for pair in expected_pairs:
assert pair in stream.getvalue()

@pytest.mark.component
def test_collect_structural_warnings_base_case(self, model):
Expand Down Expand Up @@ -1469,7 +1463,7 @@ def test_report_numerical_issues_jacobian(self):
WARNING: 1 Constraint with large residuals (>1.0E-05)
WARNING: 2 Variables with extreme Jacobian values (<1.0E-08 or >1.0E+08)
WARNING: 1 Constraint with extreme Jacobian values (<1.0E-08 or >1.0E+08)
WARNING: 3 pairs of variables are parallel (to tolerance 1.0E-08)
WARNING: 1 pair of variables are parallel (to tolerance 1.0E-08)

------------------------------------------------------------------------------------
4 Cautions
Expand Down
Loading
Loading