Skip to content
Draft
Changes from 8 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
cdd7d52
Work on multistart implement 4/23 morning
sscini Apr 23, 2025
eca0ba8
Finished first draft of pseudocode for multistart
sscini Apr 23, 2025
2160aec
Fixed logical errors in pseudocode
sscini Apr 23, 2025
266beea
Started implementing review comments 4/30
sscini Apr 30, 2025
b877ada
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini Apr 30, 2025
9f1ffe5
Work on edits, 5/1/25
sscini May 1, 2025
43f1ab3
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini May 1, 2025
ea067c8
Made edits, still debugging
sscini May 2, 2025
3b839ef
Addressed some comments in code. Still working through example to debug
sscini May 14, 2025
3a7aa1d
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini May 14, 2025
c688f2d
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini May 21, 2025
50c36bc
Got dataframe formatted, still working on executing Q_opt
sscini May 21, 2025
8e5f078
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini Jun 2, 2025
f4c7018
Working code, adding features 6/2/25
sscini Jun 2, 2025
4444e6d
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini Jun 3, 2025
e788000
Added questions for next round of reviews
sscini Jun 3, 2025
4429caf
Merge branch 'multistart-in-parmest' of https://github.com/sscini/pyo…
sscini Jun 3, 2025
f071718
Removed diagnostic tables to simplify output
sscini Jun 3, 2025
9b1545d
Work from Wednesday of Sprint week
sscini Jun 4, 2025
80079cb
Create Simple_Multimodal_Multistart.ipynb
sscini Jun 4, 2025
1695519
New features Thursday morning
sscini Jun 5, 2025
0634014
First successful running multistart feature, before Alex recommended …
sscini Jun 5, 2025
a959346
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini Jun 5, 2025
04a9096
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini Jun 23, 2025
06e0a72
Ran black, removed temp example
sscini Jun 24, 2025
6b3ee40
Added utility to update model using suffix values
sscini Jun 27, 2025
5cadfac
Work on Friday 6/27 applying PR comments
sscini Jun 27, 2025
922fd57
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini Jun 30, 2025
1be2d9e
Addressed some reviewer comments and ran black.
sscini Jun 30, 2025
56800f5
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini Jul 1, 2025
05381c5
Updated argument for theta_est_multistart
sscini Jul 6, 2025
5b4f9c1
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini Jul 7, 2025
07ae1e8
Addressed majority of review comments. State before 7/8 dev meeting
sscini Jul 8, 2025
33d838f
Fixing conflict
sscini Jul 8, 2025
65a9cff
Merge branch 'main' into multistart-in-parmest
sscini Jul 15, 2025
90093df
Merge branch 'Pyomo:main' into multistart-in-parmest
sscini Jul 17, 2025
e7b2df1
Added in TODO items based on Dan morning meeting
sscini Jul 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 262 additions & 0 deletions pyomo/contrib/parmest/parmest.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ def SSE(model):
return expr


'''Adding pseudocode for draft implementation of the estimator class,
incorporating multistart.
'''
class Estimator(object):
"""
Parameter estimation class
Expand Down Expand Up @@ -275,6 +278,11 @@ def __init__(
solver_options=None,
):

'''first theta would be provided by the user in the initialization of
the Estimator class through the unknown parameter variables. Additional
would need to be generated using the sampling method provided by the user.
'''

# check that we have a (non-empty) list of experiments
assert isinstance(experiment_list, list)
self.exp_list = experiment_list
Expand Down Expand Up @@ -447,6 +455,130 @@ def TotalCost_rule(model):
parmest_model = utils.convert_params_to_vars(model, theta_names, fix_vars=False)

return parmest_model

# Make new private method, _generate_initial_theta:
# This method will be used to generate the initial theta values for multistart
# optimization. It will take the theta names and the initial theta values
# and return a dictionary of theta names and their corresponding values.
def _generate_initial_theta(self, parmest_model, seed=None, n_restarts=None, multistart_sampling_method=None, user_provided=None):
if n_restarts == 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like just sending a warning, and not returning. For example, n_restarts might be 1 by default. You should check if n_restarts is an int as well. Then, if n_restarts is 1, you should send a warning that the tool is intended for this number to be greater than one and solve as normal.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alex had initially said just return message, but I will ask him again

# If only one restart, return an empty list
return print("No multistart optimization needed. Please use normal theta_est()")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should raise a warning/log something here instead of using a print statement. That way you can use a debugger to control whether the message is displayed.


# Get the theta names and initial theta values
theta_names = self._return_theta_names()
initial_theta = [parmest_model.find_component(name)() for name in theta_names]

# Get the lower and upper bounds for the theta values
lower_bound = np.array([parmest_model.find_component(name).lb for name in theta_names])
upper_bound = np.array([parmest_model.find_component(name).ub for name in theta_names])
# Check if the lower and upper bounds are defined
if any(bound is None for bound in lower_bound) and any(bound is None for bound in upper_bound):
raise ValueError(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You probably already know this, but you will need to check all the errors are raised when expected.

"The lower and upper bounds for the theta values must be defined."
)

# Check the length of theta_names and initial_theta, and make sure bounds are defined
if len(theta_names) != len(initial_theta):
raise ValueError(
"The length of theta_names and initial_theta must be the same."
)

if multistart_sampling_method == "random":
np.random.seed(seed)
# Generate random theta values
theta_vals_multistart = np.random.uniform(lower_bound, upper_bound, size=len(theta_names))

# Generate theta values using Latin hypercube sampling or Sobol sampling

elif multistart_sampling_method == "latin_hypercube":
# Generate theta values using Latin hypercube sampling
sampler = scipy.stats.qmc.LatinHypercube(d=len(theta_names), seed=seed)
samples = sampler.random(n=n_restarts)
theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in samples])


elif multistart_sampling_method == "sobol":
sampler = scipy.stats.qmc.Sobol(d=len(theta_names), seed=seed)
# Generate theta values using Sobol sampling
# The first value of the Sobol sequence is 0, so we skip it
samples = sampler.random(n=n_restarts+1)[1:]
theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in samples])

elif multistart_sampling_method == "user_provided":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think maybe user_provided does not imply values. For instance, what if a user wants to provide their own random sample generator?

Probably should use user_provided_values to be more explicit.

Also, should have some comments at the beginning of this describing what the method does, just like the other options.

# Add user provided dataframe option
if user_provided is not None:

if isinstance(user_provided, np.ndarray):
# Check if the user provided numpy array has the same number of rows as the number of restarts
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure comments are not too long. Break up over multiple lines like you did above...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Running black may help?

if user_provided.shape[0] != n_restarts:
raise ValueError(
"The user provided numpy array must have the same number of rows as the number of restarts."
)
# Check if the user provided numpy array has the same number of columns as the number of theta names
if user_provided.shape[1] != len(theta_names):
raise ValueError(
"The user provided numpy array must have the same number of columns as the number of theta names."
)
# Check if the user provided numpy array has the same theta names as the model
# if not, raise an error
# if not all(theta in theta_names for theta in user_provided.columns):
raise ValueError(
"The user provided numpy array must have the same theta names as the model."
)
# If all checks pass, return the user provided numpy array
theta_vals_multistart = user_provided
elif isinstance(user_provided, pd.DataFrame):
# Check if the user provided dataframe has the same number of rows as the number of restarts
if user_provided.shape[0] != n_restarts:
raise ValueError(
"The user provided dataframe must have the same number of rows as the number of restarts."
)
# Check if the user provided dataframe has the same number of columns as the number of theta names
if user_provided.shape[1] != len(theta_names):
raise ValueError(
"The user provided dataframe must have the same number of columns as the number of theta names."
)
# Check if the user provided dataframe has the same theta names as the model
# if not, raise an error
# if not all(theta in theta_names for theta in user_provided.columns):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be uncommented...

raise ValueError(
"The user provided dataframe must have the same theta names as the model."
)
# If all checks pass, return the user provided dataframe
theta_vals_multistart = user_provided.iloc[0: len(initial_theta)].values
else:
raise ValueError(
"The user must provide a numpy array or pandas dataframe from a previous attempt to use the 'user_provided' method."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same thing with all these long output messages, make sure they don't exceed one line (break them up).

)

else:
raise ValueError(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would probably be more consistent with other code (and other suggestions) if the options were using an Enum object. You can check the DoE code, or Shammah's PR. It just makes it so that the strings are attached to an object instead (safer).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added Enum class above, working to implement here. Making note to talk to shammah about this

"Invalid sampling method. Choose 'random', 'latin_hypercube', 'sobol' or 'user_provided'."
)

# Make an output dataframe with the theta names and their corresponding values for each restart,
# and nan for the output info values
df_multistart = pd.DataFrame(
theta_vals_multistart, columns=theta_names
)


# Add the initial theta values to the first row of the dataframe
for i in range(1, n_restarts):
df_multistart.iloc[i, :] = theta_vals_multistart[i, :]
df_multistart.iloc[0, :] = initial_theta


# Add the output info values to the dataframe, starting values as nan
for i in range(len(theta_names)):
df_multistart[f'converged_{theta_names[i]}'] = np.nan
df_multistart["initial objective"] = np.nan
df_multistart["final objective"] = np.nan
df_multistart["solver termination"] = np.nan
df_multistart["solve_time"] = np.nan

return df_multistart

def _instance_creation_callback(self, experiment_number=None, cb_data=None):
model = self._create_parmest_model(experiment_number)
Expand Down Expand Up @@ -921,6 +1053,136 @@ def theta_est(
cov_n=cov_n,
)

def theta_est_multistart(
self,
n_restarts=20,
buffer=10,
multistart_sampling_method="random",
user_provided=None,
seed=None,
save_results=False,
theta_vals=None,
solver="ef_ipopt",
file_name = "multistart_results.csv",
return_values=[],
):
"""
Parameter estimation using multistart optimization

Parameters
----------
n_restarts: int, optional
Number of restarts for multistart. Default is 1.
theta_sampling_method: string, optional
Method used to sample theta values. Options are "random", "latin_hypercube", or "sobol".
Default is "random".
solver: string, optional
Currently only "ef_ipopt" is supported. Default is "ef_ipopt".
return_values: list, optional
List of Variable names, used to return values from the model for data reconciliation


Returns
-------
objectiveval: float
The objective function value
thetavals: pd.Series
Estimated values for theta
variable values: pd.DataFrame
Variable values for each variable name in return_values (only for solver='ef_ipopt')

"""

# check if we are using deprecated parmest
if self.pest_deprecated is not None:
return print(
"Multistart is not supported in the deprecated parmest interface"
)

assert isinstance(n_restarts, int)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also check that this is > 1

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please look at other Pyomo code fgor exampels of throwing exceptions

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with @adowling2 here, you need to throw an exception so you can test the exception is caught.

assert isinstance(multistart_sampling_method, str)
assert isinstance(solver, str)
assert isinstance(return_values, list)

if n_restarts > 1 and multistart_sampling_method is not None:

# Find the initialized values of theta from the labeled parmest model
# and the theta names from the estimator object
parmest_model = self._create_parmest_model(experiment_number=0)
theta_names = self._return_theta_names()
initial_theta = [parmest_model.find_component(name)() for name in theta_names]

# Generate theta values using the sampling method
results_df = self._generate_initial_theta(parmest_model, seed=seed, n_restarts=n_restarts,
multistart_sampling_method=multistart_sampling_method, user_provided=user_provided)
results_df = pd.DataFrame(results_df)
# Extract theta_vals from the dataframe
theta_vals = results_df.iloc[:, :len(theta_names)]
converged_theta_vals = np.zeros((n_restarts, len(theta_names)))

# make empty list to store results
for i in range(n_restarts):
# for number of restarts, call the self._Q_opt method
# with the theta values generated using the _generalize_initial_theta method

# set the theta values in the model
theta_vals_current = theta_vals.iloc[i, :]


# Call the _Q_opt method with the generated theta values
objectiveval, converged_theta, variable_values = self._Q_opt(
ThetaVals=theta_vals_current,
solver=solver,
return_values=return_values,
)

# Check if the solver terminated successfully
if variable_values.solver.termination_condition != pyo.TerminationCondition.optimal:
# If not, set the objective value to NaN
solver_termination = variable_values.solver.termination_condition
solve_time = variable_values.solver.time
thetavals = np.nan
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is never used?


else:

# If the solver terminated successfully, set the objective value
converged_theta_vals[i, :] = converged_theta.values()
init_objectiveval = objectiveval
final_objectiveval = variable_values.solver.objective()
solver_termination = variable_values.solver.termination_condition
solve_time = variable_values.solver.time

# Check if the objective value is better than the best objective value
if final_objectiveval < best_objectiveval:
best_objectiveval = objectiveval
best_theta = thetavals

# Store the results in a list or DataFrame
# depending on the number of restarts
results_df.iloc[i, len(theta_names):len(theta_names) + len(theta_names)] = converged_theta_vals[i, :]
results_df.iloc[i, -4] = init_objectiveval
results_df.iloc[i, -3] = objectiveval
results_df.iloc[i, -2] = variable_values.solver.termination_condition
results_df.iloc[i, -1] = variable_values.solver.time

# Add buffer to save the dataframe dynamically, if save_results is True
if save_results and (i + 1) % buffer == 0:
mode = 'w' if i + 1 == buffer else 'a'
header = i + 1 == buffer
results_df.to_csv(
file_name, mode=mode, header=header, index=False
)
print(f"Intermediate results saved after {i + 1} iterations.")

# Final save after all iterations
if save_results:
results_df.to_csv(file_name, mode='a', header=False, index=False)
print("Final results saved.")

return results_df, best_theta, best_objectiveval



def theta_est_bootstrap(
self,
bootstrap_samples,
Expand Down