23template<
typename FunctionType>
25template<
typename FunctionType>
28template<
typename FunctionType,
typename ConstantType>
32 const ConstantType precisionAsConstant =
37 ConstantType
const oldPosAsConstant = utility::convertNumber<ConstantType>(position[steppingParameter]);
39 ConstantType projectedGradient;
42 ConstantType newPlainPosition = oldPosAsConstant + precisionAsConstant * gradient.at(steppingParameter);
43 if (newPlainPosition < utility::zero<ConstantType>() + precisionAsConstant || newPlainPosition > utility::one<ConstantType>() - precisionAsConstant) {
44 projectedGradient = 0;
46 projectedGradient = gradient.at(steppingParameter);
50 const double expX = std::exp(utility::convertNumber<double>(oldPos));
51 projectedGradient = gradient.at(steppingParameter) * utility::convertNumber<ConstantType>(expX / std::pow(expX + 1, 2));
53 if (oldPosAsConstant < precisionAsConstant) {
54 projectedGradient = 1000;
55 }
else if (oldPosAsConstant > utility::one<ConstantType>() - precisionAsConstant) {
56 projectedGradient = -1000;
58 projectedGradient = gradient.at(steppingParameter);
67 if (oldPosAsConstant >= precisionAsConstant && oldPosAsConstant <= utility::one<ConstantType>() - precisionAsConstant) {
69 if (oldPosAsConstant * 2 < utility::one<ConstantType>()) {
70 projectedGradient = gradient.at(steppingParameter) + logarithmicBarrierTerm / (oldPosAsConstant - precisionAsConstant);
73 gradient.at(steppingParameter) - logarithmicBarrierTerm / (utility::one<ConstantType>() - precisionAsConstant - oldPosAsConstant);
76 if (oldPosAsConstant < precisionAsConstant) {
77 projectedGradient = utility::one<ConstantType>() / logarithmicBarrierTerm;
78 }
else if (oldPosAsConstant > utility::one<ConstantType>() - precisionAsConstant) {
79 projectedGradient = -utility::one<ConstantType>() / logarithmicBarrierTerm;
83 projectedGradient = gradient.at(steppingParameter);
88 if (Adam* adam = boost::get<Adam>(&gradientDescentType)) {
91 adam->decayingStepAverage[steppingParameter] =
92 adam->averageDecay * adam->decayingStepAverage[steppingParameter] + (utility::one<ConstantType>() - adam->averageDecay) * projectedGradient;
93 adam->decayingStepAverageSquared[steppingParameter] = adam->squaredAverageDecay * adam->decayingStepAverageSquared[steppingParameter] +
94 (utility::one<ConstantType>() - adam->squaredAverageDecay) *
utility::pow(projectedGradient, 2);
96 const ConstantType correctedGradient =
97 adam->decayingStepAverage[steppingParameter] / (utility::one<ConstantType>() -
utility::pow(adam->averageDecay, stepNum + 1));
98 const ConstantType correctedSquaredGradient =
99 adam->decayingStepAverageSquared[steppingParameter] / (utility::one<ConstantType>() -
utility::pow(adam->squaredAverageDecay, stepNum + 1));
101 const ConstantType toSqrt = correctedSquaredGradient;
102 ConstantType sqrtResult = constantTypeSqrt(toSqrt);
104 step = (adam->learningRate / (sqrtResult + precisionAsConstant)) * correctedGradient;
105 }
else if (RAdam* radam = boost::get<RAdam>(&gradientDescentType)) {
110 const ConstantType maxLengthApproxSMA = 2 / (utility::one<ConstantType>() - radam->squaredAverageDecay) - utility::one<ConstantType>();
113 radam->decayingStepAverageSquared[steppingParameter] = radam->squaredAverageDecay * radam->decayingStepAverageSquared[steppingParameter] +
114 (utility::one<ConstantType>() - radam->squaredAverageDecay) *
utility::pow(projectedGradient, 2);
116 radam->decayingStepAverage[steppingParameter] =
117 radam->averageDecay * radam->decayingStepAverage[steppingParameter] + (utility::one<ConstantType>() - radam->averageDecay) * projectedGradient;
119 const ConstantType biasCorrectedMovingAverage =
120 radam->decayingStepAverage[steppingParameter] / (utility::one<ConstantType>() -
utility::pow(radam->averageDecay, stepNum + 1));
121 const ConstantType squaredAverageDecayPow =
utility::pow(radam->squaredAverageDecay, stepNum + 1);
123 const ConstantType lengthApproxSMA =
125 ((2 * (utility::convertNumber<ConstantType>(stepNum) + utility::one<ConstantType>()) * squaredAverageDecayPow) / (1 - squaredAverageDecayPow));
127 if (lengthApproxSMA > 4) {
129 const ConstantType adaptiveLearningRate =
130 constantTypeSqrt((utility::one<ConstantType>() - squaredAverageDecayPow) / radam->decayingStepAverageSquared[steppingParameter]);
132 const ConstantType varianceRectification =
133 constantTypeSqrt(((lengthApproxSMA - 4) / (maxLengthApproxSMA - 4)) * ((lengthApproxSMA - 2) / (maxLengthApproxSMA - 2)) *
134 ((maxLengthApproxSMA) / (lengthApproxSMA)));
136 step = radam->learningRate * varianceRectification * biasCorrectedMovingAverage * adaptiveLearningRate;
139 step = radam->learningRate * biasCorrectedMovingAverage;
141 }
else if (RmsProp* rmsProp = boost::get<RmsProp>(&gradientDescentType)) {
142 rmsProp->rootMeanSquare[steppingParameter] = rmsProp->averageDecay * rmsProp->rootMeanSquare[steppingParameter] +
143 (utility::one<ConstantType>() - rmsProp->averageDecay) * projectedGradient * projectedGradient;
145 const ConstantType toSqrt = rmsProp->rootMeanSquare[steppingParameter] + precisionAsConstant;
146 ConstantType sqrtResult = constantTypeSqrt(toSqrt);
148 step = (rmsProp->learningRate / sqrtResult) * projectedGradient;
149 }
else if (Plain* plain = boost::get<Plain>(&gradientDescentType)) {
151 if (projectedGradient < utility::zero<ConstantType>()) {
152 step = -plain->learningRate;
153 }
else if (projectedGradient > utility::zero<ConstantType>()) {
154 step = plain->learningRate;
156 step = utility::zero<ConstantType>();
159 step = plain->learningRate * projectedGradient;
161 }
else if (Momentum* momentum = boost::get<Momentum>(&gradientDescentType)) {
163 if (projectedGradient < utility::zero<ConstantType>()) {
164 step = -momentum->learningRate;
165 }
else if (projectedGradient > utility::zero<ConstantType>()) {
166 step = momentum->learningRate;
168 step = utility::zero<ConstantType>();
171 step = momentum->learningRate * projectedGradient;
173 step += momentum->momentumTerm * momentum->pastStep.at(steppingParameter);
174 momentum->pastStep[steppingParameter] = step;
175 }
else if (Nesterov* nesterov = boost::get<Nesterov>(&gradientDescentType)) {
177 if (projectedGradient < utility::zero<ConstantType>()) {
178 step = -nesterov->learningRate;
179 }
else if (projectedGradient > utility::zero<ConstantType>()) {
180 step = nesterov->learningRate;
182 step = utility::zero<ConstantType>();
185 step = nesterov->learningRate * projectedGradient;
187 step += nesterov->momentumTerm * nesterov->pastStep.at(steppingParameter);
188 nesterov->pastStep[steppingParameter] = step;
195 position[steppingParameter] = newPos;
198 position[steppingParameter] =
utility::max(precision, position[steppingParameter]);
200 position[steppingParameter] =
utility::min(upperBound, position[steppingParameter]);
202 return utility::abs<ConstantType>(oldPosAsConstant - utility::convertNumber<ConstantType>(position[steppingParameter]));
205template<
typename FunctionType,
typename ConstantType>
206ConstantType GradientDescentInstantiationSearcher<FunctionType, ConstantType>::stochasticGradientDescent(
207 std::map<VariableType<FunctionType>, CoefficientType<FunctionType>>& position) {
208 uint_fast64_t initialStateModel = model.getStates(
"init").getNextSetIndex(0);
210 ConstantType currentValue;
211 switch (this->synthesisTask->getBound().comparisonType) {
214 currentValue = -utility::infinity<ConstantType>();
218 currentValue = utility::infinity<ConstantType>();
223 uint64_t tinyChangeIterations = 0;
225 std::map<VariableType<FunctionType>, ConstantType> deltaVector;
227 std::vector<VariableType<FunctionType>> parameterEnumeration;
228 for (
auto parameter : this->parameters) {
229 parameterEnumeration.push_back(parameter);
232 utility::Stopwatch printUpdateStopwatch;
233 printUpdateStopwatch.start();
237 uint_fast64_t parameterNum = 0;
238 for (uint_fast64_t stepNum = 0;
true; ++stepNum) {
239 if (printUpdateStopwatch.getTimeInSeconds() >= 15) {
240 printUpdateStopwatch.restart();
244 std::vector<VariableType<FunctionType>> miniBatch;
245 for (uint_fast64_t i = parameterNum;
i < std::min((uint_fast64_t)parameterEnumeration.size(), parameterNum + miniBatchSize);
i++) {
246 miniBatch.push_back(parameterEnumeration[i]);
249 ConstantType oldValue = currentValue;
250 CoefficientType<FunctionType>
const precision = storm::utility::convertNumber<CoefficientType<FunctionType>>(
254 std::map<VariableType<FunctionType>, CoefficientType<FunctionType>> nesterovPredictedPosition(position);
255 if (Nesterov* nesterov = boost::get<Nesterov>(&gradientDescentType)) {
256 CoefficientType<FunctionType>
const upperBound = (utility::one<CoefficientType<FunctionType>>() - precision);
257 for (
auto const& parameter : miniBatch) {
258 ConstantType
const addedTerm = nesterov->momentumTerm * nesterov->pastStep[parameter];
259 nesterovPredictedPosition[parameter] += storm::utility::convertNumber<CoefficientType<FunctionType>>(addedTerm);
260 nesterovPredictedPosition[parameter] =
utility::max(precision, nesterovPredictedPosition[parameter]);
261 nesterovPredictedPosition[parameter] =
utility::min(upperBound, nesterovPredictedPosition[parameter]);
266 for (
auto const& parameter : parameters) {
267 nesterovPredictedPosition[parameter] =
268 utility::one<CoefficientType<FunctionType>>() /
270 utility::convertNumber<CoefficientType<FunctionType>>(std::exp(-utility::convertNumber<double>(nesterovPredictedPosition[parameter]))));
284 bool stochasticPosition =
true;
285 for (
auto const& parameter : parameters) {
286 if (nesterovPredictedPosition[parameter] < 0 + precision || nesterovPredictedPosition[parameter] > 1 - precision) {
287 stochasticPosition =
false;
292 bool computeValue =
true;
294 if (!stochasticPosition) {
295 computeValue =
false;
300 std::unique_ptr<storm::modelchecker::CheckResult> intermediateResult = instantiationModelChecker->check(env, nesterovPredictedPosition);
301 std::vector<ConstantType> valueVector = intermediateResult->asExplicitQuantitativeCheckResult<ConstantType>().getValueVector();
302 if (Nesterov* nesterov = boost::get<Nesterov>(&gradientDescentType)) {
303 std::map<VariableType<FunctionType>, CoefficientType<FunctionType>> modelCheckPosition(position);
305 for (
auto const& parameter : parameters) {
306 modelCheckPosition[parameter] =
307 utility::one<CoefficientType<FunctionType>>() /
309 utility::convertNumber<CoefficientType<FunctionType>>(std::exp(-utility::convertNumber<double>(modelCheckPosition[parameter]))));
312 std::unique_ptr<storm::modelchecker::CheckResult> terminationResult = instantiationModelChecker->check(env, modelCheckPosition);
313 std::vector<ConstantType> terminationValueVector = terminationResult->asExplicitQuantitativeCheckResult<ConstantType>().getValueVector();
314 currentValue = terminationValueVector[initialStateModel];
316 currentValue = valueVector[initialStateModel];
319 if (synthesisTask->getBound().isSatisfied(currentValue) && stochasticPosition) {
323 for (
auto const& parameter : miniBatch) {
324 auto checkResult = derivativeEvaluationHelper->check(env, nesterovPredictedPosition, parameter, valueVector);
325 ConstantType delta = checkResult->getValueVector()[derivativeEvaluationHelper->getInitialState()];
330 deltaVector[parameter] = delta;
335 currentValue = utility::infinity<ConstantType>();
337 currentValue = -utility::infinity<ConstantType>();
343 VisualizationPoint point;
344 point.position = nesterovPredictedPosition;
345 point.value = currentValue;
346 walk.push_back(point);
353 for (
auto const& parameter : miniBatch) {
354 doStep(parameter, position, deltaVector, stepNum);
357 if (storm::utility::abs<ConstantType>(oldValue - currentValue) < terminationEpsilon) {
358 tinyChangeIterations += miniBatch.size();
359 if (tinyChangeIterations > parameterEnumeration.size()) {
363 tinyChangeIterations = 0;
367 parameterNum = parameterNum + miniBatchSize;
368 if (parameterNum >= parameterEnumeration.size()) {
373 STORM_LOG_WARN(
"Aborting Gradient Descent, returning non-optimal value.");
380template<
typename FunctionType,
typename ConstantType>
381std::pair<std::map<VariableType<FunctionType>, CoefficientType<FunctionType>>, ConstantType>
383 STORM_LOG_ASSERT(this->synthesisTask,
"Call setup before calling gradientDescent");
385 resetDynamicValues();
387 STORM_LOG_ASSERT(this->synthesisTask->isBoundSet(),
"Task does not involve a bound.");
390 ConstantType bestValue;
391 switch (this->synthesisTask->getBound().comparisonType) {
394 bestValue = -utility::infinity<ConstantType>();
398 bestValue = utility::infinity<ConstantType>();
402 std::random_device device;
403 std::default_random_engine engine(device());
404 std::uniform_real_distribution<> dist(0, 1);
405 bool initialGuess =
true;
410 STORM_PRINT_AND_LOG(
"Trying initial guess (p->0.5 for every parameter p or set start point)\n");
413 for (
auto const& param : this->parameters) {
415 logarithmicBarrierTerm = utility::convertNumber<ConstantType>(0.1);
417 point[param] = (*startPoint)[param];
419 point[param] = utility::convertNumber<CoefficientType<FunctionType>>(0.5 + 1e-6);
422 logarithmicBarrierTerm > utility::convertNumber<ConstantType>(0.00001)) {
425 logarithmicBarrierTerm = utility::convertNumber<ConstantType>(0.1);
426 point[param] = utility::convertNumber<CoefficientType<FunctionType>>(dist(engine));
429 initialGuess =
false;
433 stochasticWatch.start();
435 ConstantType prob = stochasticGradientDescent(point);
436 stochasticWatch.stop();
438 bool isFoundPointBetter =
false;
439 switch (this->synthesisTask->getBound().comparisonType) {
442 isFoundPointBetter = prob > bestValue;
446 isFoundPointBetter = prob < bestValue;
449 if (isFoundPointBetter) {
450 bestInstantiation = point;
454 if (synthesisTask->getBound().isSatisfied(bestValue)) {
461 logarithmicBarrierTerm = logarithmicBarrierTerm / 10;
462 STORM_PRINT_AND_LOG(
"Smaller term\n" << bestValue <<
"\n" << logarithmicBarrierTerm <<
"\n");
465 STORM_PRINT_AND_LOG(
"Sorry, couldn't satisfy the bound (yet). Best found value so far: " << bestValue <<
"\n");
472 for (
auto const& parameter : parameters) {
473 bestInstantiation[parameter] =
474 utility::one<CoefficientType<FunctionType>>() /
476 utility::convertNumber<CoefficientType<FunctionType>>(std::exp(-utility::convertNumber<double>(bestInstantiation[parameter]))));
480 return std::make_pair(bestInstantiation, bestValue);
483template<
typename FunctionType,
typename ConstantType>
485 if (Adam* adam = boost::get<Adam>(&gradientDescentType)) {
486 for (
auto const& parameter : this->parameters) {
487 adam->decayingStepAverage[parameter] = utility::zero<ConstantType>();
488 adam->decayingStepAverageSquared[parameter] = utility::zero<ConstantType>();
490 }
else if (RAdam* radam = boost::get<RAdam>(&gradientDescentType)) {
491 for (
auto const& parameter : this->parameters) {
492 radam->decayingStepAverage[parameter] = utility::zero<ConstantType>();
493 radam->decayingStepAverageSquared[parameter] = utility::zero<ConstantType>();
495 }
else if (RmsProp* rmsProp = boost::get<RmsProp>(&gradientDescentType)) {
496 for (
auto const& parameter : this->parameters) {
497 rmsProp->rootMeanSquare[parameter] = utility::zero<ConstantType>();
499 }
else if (Momentum* momentum = boost::get<Momentum>(&gradientDescentType)) {
500 for (
auto const& parameter : this->parameters) {
501 momentum->pastStep[parameter] = utility::zero<ConstantType>();
503 }
else if (Nesterov* nesterov = boost::get<Nesterov>(&gradientDescentType)) {
504 for (
auto const& parameter : this->parameters) {
505 nesterov->pastStep[parameter] = utility::zero<ConstantType>();
510template<
typename FunctionType,
typename ConstantType>
513 for (
auto s = walk.begin(); s != walk.end(); ++s) {
515 auto point = s->position;
516 for (
auto iter = point.begin(); iter != point.end(); ++iter) {
518 STORM_PRINT(
":" << utility::convertNumber<double>(iter->second) <<
",");
521 if (std::next(s) != walk.end()) {
527 STORM_PRINT(storm::utility::convertNumber<double>(walk.at(walk.size() - 1).value) <<
"\n");
530template<
typename FunctionType,
typename ConstantType>
531std::vector<typename GradientDescentInstantiationSearcher<FunctionType, ConstantType>::VisualizationPoint>
void printRunAsJson()
Print the previously done run as JSON.
std::vector< VisualizationPoint > getVisualizationWalk()
Get the visualization walk that is recorded if recordRun is set to true in the constructor (false by ...
std::pair< std::map< typename utility::parametric::VariableType< FunctionType >::type, typename utility::parametric::CoefficientType< FunctionType >::type >, ConstantType > gradientDescent()
Perform Gradient Descent.
#define STORM_LOG_WARN(message)
#define STORM_LOG_ERROR(message)
#define STORM_LOG_ASSERT(cond, message)
#define STORM_PRINT_AND_LOG(message)
#define STORM_PRINT(message)
Define the macros that print information and optionally also log it.
typename utility::parametric::VariableType< FunctionType >::type VariableType
typename utility::parametric::CoefficientType< FunctionType >::type CoefficientType
SettingsType const & getModule()
Get module.
bool isTerminate()
Check whether the program should terminate (due to some abort signal).
ValueType max(ValueType const &first, ValueType const &second)
ValueType min(ValueType const &first, ValueType const &second)
ValueType pow(ValueType const &value, int_fast64_t exponent)