25namespace modelchecker {
29template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
31 : lowerBound(lower), upperBound(upper) {
35template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
39 if (diff < storm::utility::zero<ValueType>()) {
41 "Upper bound '" << upperBound <<
"' is smaller than lower bound '" << lowerBound <<
"': Difference is " << diff <<
".");
42 diff = storm::utility::zero<ValueType>();
50template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
52 if (value > lowerBound) {
59template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
61 if (value < upperBound) {
68template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
70 : beliefMdpDetectedToBeFinite(false),
71 refinementFixpointDetected(false),
72 overApproximationBuildAborted(false),
73 underApproximationBuildAborted(false),
78template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
83 beliefTypeCC(
storm::utility::convertNumber<BeliefValueType>(this->options.numericPrecision), false),
84 valueTypeCC(this->options.numericPrecision, false) {
86 STORM_LOG_ERROR_COND(inputPomdp->isCanonic(),
"Input Pomdp is not known to be canonic. This might lead to unexpected verification results.");
91template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
99 auto initialPomdpValueBounds = preProcessingMC.getValueBounds(preProcEnv, formula);
100 pomdpValueBounds.trivialPomdpValueBounds = initialPomdpValueBounds;
104 pomdpValueBounds.extremePomdpValueBound = preProcessingMC.getExtremeValueBound(preProcEnv, formula);
108template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
112 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
113 return check(env, formula, env, additionalUnderApproximationBounds);
116template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
119 storm::logic::Formula const& formula, std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
121 return check(env, formula, env, additionalUnderApproximationBounds);
124template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
128 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
130 return check(env, formula, preProcEnv, additionalUnderApproximationBounds);
133template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
137 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
138 STORM_LOG_ASSERT(options.unfold || options.discretize || options.interactiveUnfolding,
139 "Invoked belief exploration but no task (unfold or discretize) given.");
141 preprocessedPomdp.reset();
144 statistics = Statistics();
145 statistics.totalTime.start();
149 precomputeValueBounds(formula, preProcEnv);
150 if (!additionalUnderApproximationBounds.empty()) {
151 pomdpValueBounds.fmSchedulerValueList = additionalUnderApproximationBounds;
153 uint64_t initialPomdpState = pomdp().getInitialStates().getNextSetIndex(0);
154 Result result(pomdpValueBounds.trivialPomdpValueBounds.getHighestLowerBound(initialPomdpState),
155 pomdpValueBounds.trivialPomdpValueBounds.getSmallestUpperBound(initialPomdpState));
158 std::optional<std::string> rewardModelName;
159 std::set<uint32_t> targetObservations;
160 if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) {
161 if (formulaInfo.getTargetStates().observationClosed) {
162 targetObservations = formulaInfo.getTargetStates().observations;
165 std::tie(preprocessedPomdp, targetObservations) = obsCloser.
transform(formulaInfo.getTargetStates().states);
167 if (formulaInfo.isNonNestedReachabilityProbability()) {
168 if (!formulaInfo.getSinkStates().empty()) {
172 auto matrix = pomdp().getTransitionMatrix();
173 matrix.makeRowGroupsAbsorbing(formulaInfo.getSinkStates().states);
176 if (pomdp().hasChoiceLabeling()) {
179 if (pomdp().hasObservationValuations()) {
182 preprocessedPomdp = std::make_shared<storm::models::sparse::Pomdp<ValueType>>(std::move(components),
true);
184 pomdp().getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
185 reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
186 STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException,
187 "There are sink states that can reach non-sink states. This is currently not supported");
191 rewardModelName = formulaInfo.getRewardModelName();
194 STORM_LOG_THROW(
false, storm::exceptions::NotSupportedException,
"Unsupported formula '" << formula <<
"'.");
198 statistics.beliefMdpDetectedToBeFinite =
true;
200 if (options.interactiveUnfolding) {
201 unfoldInteractively(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
203 refineReachability(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
206 if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) {
207 result.
lowerBound = -storm::utility::infinity<ValueType>();
209 if ((formulaInfo.maximize() && !options.discretize) || (formulaInfo.minimize() && !options.unfold)) {
210 result.
upperBound = storm::utility::infinity<ValueType>();
214 statistics.aborted =
true;
216 statistics.totalTime.stop();
220template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
222 stream <<
"##### POMDP Approximation Statistics ######\n";
223 stream <<
"# Input model: \n";
224 pomdp().printModelInformationToStream(stream);
225 stream <<
"# Max. Number of states with same observation: " << pomdp().getMaxNrStatesWithSameObservation() <<
'\n';
226 if (statistics.beliefMdpDetectedToBeFinite) {
227 stream <<
"# Pre-computations detected that the belief MDP is finite.\n";
229 if (statistics.aborted) {
230 stream <<
"# Computation aborted early\n";
233 stream <<
"# Total check time: " << statistics.totalTime <<
'\n';
235 if (statistics.refinementSteps) {
236 stream <<
"# Number of refinement steps: " << statistics.refinementSteps.value() <<
'\n';
238 if (statistics.refinementFixpointDetected) {
239 stream <<
"# Detected a refinement fixpoint.\n";
243 if (statistics.overApproximationStates) {
244 stream <<
"# Number of states in the ";
245 if (options.refine) {
248 stream <<
"grid MDP for the over-approximation: ";
249 if (statistics.overApproximationBuildAborted) {
252 stream << statistics.overApproximationStates.value() <<
'\n';
253 stream <<
"# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.value() <<
'\n';
254 stream <<
"# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime <<
'\n';
255 stream <<
"# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime <<
'\n';
259 if (statistics.underApproximationStates) {
260 stream <<
"# Number of states in the ";
261 if (options.refine) {
264 stream <<
"belief MDP for the under-approximation: ";
265 if (statistics.underApproximationBuildAborted) {
268 stream << statistics.underApproximationStates.value() <<
'\n';
269 if (statistics.nrClippingAttempts) {
270 stream <<
"# Clipping attempts (clipped states) for the under-approximation: ";
271 if (statistics.underApproximationBuildAborted) {
274 stream << statistics.nrClippingAttempts.value() <<
" (" << statistics.nrClippedStates.value() <<
")\n";
275 stream <<
"# Total clipping preprocessing time: " << statistics.clippingPreTime <<
"\n";
276 stream <<
"# Total clipping time: " << statistics.clipWatch <<
"\n";
277 }
else if (statistics.nrTruncatedStates) {
278 stream <<
"# Truncated states for the under-approximation: ";
279 if (statistics.underApproximationBuildAborted) {
282 stream << statistics.nrTruncatedStates.value() <<
"\n";
284 if (statistics.underApproximationStateLimit) {
285 stream <<
"# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.value() <<
'\n';
287 stream <<
"# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime <<
'\n';
288 stream <<
"# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime <<
'\n';
291 stream <<
"##########################################\n";
296template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
298 if (preprocessedPomdp) {
299 return *preprocessedPomdp;
305template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
306void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::refineReachability(
307 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min, std::optional<std::string> rewardModelName,
309 statistics.refinementSteps = 0;
312 std::vector<BeliefValueType> observationResolutionVector;
313 std::shared_ptr<BeliefManagerType> overApproxBeliefManager;
314 std::shared_ptr<ExplorerType> overApproximation;
315 HeuristicParameters overApproxHeuristicPar{};
316 if (options.discretize) {
317 observationResolutionVector =
318 std::vector<BeliefValueType>(pomdp().getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit));
319 overApproxBeliefManager = std::make_shared<BeliefManagerType>(
320 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
321 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
322 if (rewardModelName) {
323 overApproxBeliefManager->setRewardModel(rewardModelName);
326 overApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
327 overApproxHeuristicPar.observationThreshold = options.obsThresholdInit;
328 overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit;
329 overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
331 buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(),
false, overApproxHeuristicPar, observationResolutionVector,
332 overApproxBeliefManager, overApproximation);
336 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
337 bool betterBound =
min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
339 STORM_LOG_INFO(
"Initial Over-approx result obtained after " << statistics.totalTime <<
". Value is '" << newValue <<
"'.\n");
343 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
344 std::shared_ptr<ExplorerType> underApproximation;
345 HeuristicParameters underApproxHeuristicPar{};
346 if (options.unfold) {
347 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
348 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
349 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
350 if (rewardModelName) {
351 underApproxBeliefManager->setRewardModel(rewardModelName);
353 underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
354 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
355 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
356 underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
357 if (underApproxHeuristicPar.sizeThreshold == 0) {
358 if (!options.refine && options.explorationTimeLimit != 0) {
359 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max();
361 underApproxHeuristicPar.sizeThreshold = pomdp().getNumberOfStates() * pomdp().getMaxNrStatesWithSameObservation();
362 STORM_LOG_INFO(
"Heuristically selected an under-approximation MDP size threshold of " << underApproxHeuristicPar.sizeThreshold <<
".\n");
366 if (options.useClipping && rewardModelName.has_value()) {
372 buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(),
false, underApproxHeuristicPar, underApproxBeliefManager,
373 underApproximation,
false);
377 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
378 bool betterBound =
min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
380 STORM_LOG_INFO(
"Initial Under-approx result obtained after " << statistics.totalTime <<
". Value is '" << newValue <<
"'.\n");
385 STORM_LOG_INFO(
"Completed (initial) computation. Current checktime is " << statistics.totalTime <<
".");
386 bool computingLowerBound =
false;
387 bool computingUpperBound =
false;
388 if (options.discretize) {
389 STORM_LOG_INFO(
"\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() <<
".");
390 (
min ? computingLowerBound : computingUpperBound) =
true;
392 if (options.unfold) {
393 STORM_LOG_INFO(
"\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() <<
".");
394 (
min ? computingUpperBound : computingLowerBound) =
true;
396 if (computingLowerBound && computingUpperBound) {
397 STORM_LOG_INFO(
"\tObtained result is [" << result.lowerBound <<
", " << result.upperBound <<
"].");
398 }
else if (computingLowerBound) {
399 STORM_LOG_INFO(
"\tObtained result is ≥" << result.lowerBound <<
".");
400 }
else if (computingUpperBound) {
401 STORM_LOG_INFO(
"\tObtained result is ≤" << result.upperBound <<
".");
405 if (options.refine) {
407 "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout");
409 "Refinement goal precision is given, but only one bound is going to be refined.");
410 while ((options.refineStepLimit == 0 || statistics.refinementSteps.value() < options.refineStepLimit) && result.diff() > options.refinePrecision) {
411 bool overApproxFixPoint =
true;
412 bool underApproxFixPoint =
true;
413 if (options.discretize) {
416 overApproximation->takeCurrentValuesAsLowerBounds();
418 overApproximation->takeCurrentValuesAsUpperBounds();
420 overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
421 overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
422 storm::utility::convertNumber<ValueType, uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor);
423 overApproxHeuristicPar.observationThreshold +=
424 options.obsThresholdIncrementFactor * (storm::utility::one<ValueType>() - overApproxHeuristicPar.observationThreshold);
425 overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
426 overApproxFixPoint = buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(),
true, overApproxHeuristicPar,
427 observationResolutionVector, overApproxBeliefManager, overApproximation);
429 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
430 bool betterBound =
min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
432 STORM_LOG_INFO(
"Over-approx result for refinement improved after " << statistics.totalTime <<
" in refinement step #"
433 << (statistics.refinementSteps.value() + 1) <<
". New value is '"
434 << newValue <<
"'.");
441 if (options.unfold && result.diff() > options.refinePrecision) {
443 underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
444 underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
445 storm::utility::convertNumber<ValueType, uint64_t>(underApproximation->getExploredMdp()->getNumberOfStates()) *
446 options.sizeThresholdFactor);
447 underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
448 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(),
true, underApproxHeuristicPar,
449 underApproxBeliefManager, underApproximation,
true);
451 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
452 bool betterBound =
min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
454 STORM_LOG_INFO(
"Under-approx result for refinement improved after " << statistics.totalTime <<
" in refinement step #"
455 << (statistics.refinementSteps.value() + 1) <<
". New value is '"
456 << newValue <<
"'.");
466 ++statistics.refinementSteps.value();
468 if (statistics.refinementSteps.value() <= 1000) {
469 STORM_LOG_INFO(
"Completed iteration #" << statistics.refinementSteps.value() <<
". Current checktime is " << statistics.totalTime <<
".");
470 computingLowerBound =
false;
471 computingUpperBound =
false;
472 if (options.discretize) {
473 STORM_LOG_INFO(
"\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() <<
".");
474 (
min ? computingLowerBound : computingUpperBound) =
true;
476 if (options.unfold) {
477 STORM_LOG_INFO(
"\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() <<
".");
478 (
min ? computingUpperBound : computingLowerBound) =
true;
480 if (computingLowerBound && computingUpperBound) {
481 STORM_LOG_INFO(
"\tCurrent result is [" << result.lowerBound <<
", " << result.upperBound <<
"].");
482 }
else if (computingLowerBound) {
483 STORM_LOG_INFO(
"\tCurrent result is ≥" << result.lowerBound <<
".");
484 }
else if (computingUpperBound) {
485 STORM_LOG_INFO(
"\tCurrent result is ≤" << result.upperBound <<
".");
487 STORM_LOG_WARN_COND(statistics.refinementSteps.value() < 1000,
"Refinement requires more than 1000 iterations.");
490 if (overApproxFixPoint && underApproxFixPoint) {
491 STORM_LOG_INFO(
"Refinement fixpoint reached after " << statistics.refinementSteps.value() <<
" iterations.\n");
492 statistics.refinementFixpointDetected =
true;
498 if (options.discretize && overApproximation->hasComputedValues()) {
499 auto printOverInfo = [&overApproximation]() {
500 std::stringstream str;
501 str <<
"Explored and checked Over-Approximation MDP:\n";
502 overApproximation->getExploredMdp()->printModelInformationToStream(str);
507 if (options.unfold && underApproximation->hasComputedValues()) {
508 auto printUnderInfo = [&underApproximation]() {
509 std::stringstream str;
510 str <<
"Explored and checked Under-Approximation MDP:\n";
511 underApproximation->getExploredMdp()->printModelInformationToStream(str);
515 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = underApproximation->getExploredMdp();
516 if (!options.useStateEliminationCutoff) {
518 auto nrPreprocessingScheds =
min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
519 for (uint64_t i = 0;
i < nrPreprocessingScheds; ++
i) {
520 newLabeling.addLabel(
"sched_" + std::to_string(i));
522 newLabeling.addLabel(
"cutoff");
523 newLabeling.addLabel(
"clipping");
524 newLabeling.addLabel(
"finite_mem");
526 auto transMatrix = scheduledModel->getTransitionMatrix();
527 for (uint64_t i = 0;
i < scheduledModel->getNumberOfStates(); ++
i) {
528 if (newLabeling.getStateHasLabel(
"truncated", i)) {
529 uint64_t localChosenActionIndex = underApproximation->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
530 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[
i];
531 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
532 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
533 if (label.rfind(
"clip", 0) == 0) {
534 newLabeling.addLabelToState(
"clipping", i);
535 auto chosenRow = transMatrix.getRow(i, 0);
536 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
537 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[
i], candidateIndex);
538 }
else if (label.rfind(
"mem_node", 0) == 0) {
539 if (!newLabeling.containsLabel(
"finite_mem_" + label.substr(9, 1))) {
540 newLabeling.addLabel(
"finite_mem_" + label.substr(9, 1));
542 newLabeling.addLabelToState(
"finite_mem_" + label.substr(9, 1),
i);
543 newLabeling.addLabelToState(
"cutoff", i);
545 newLabeling.addLabelToState(label, i);
546 newLabeling.addLabelToState(
"cutoff", i);
551 newLabeling.removeLabel(
"truncated");
553 transMatrix.dropZeroEntries();
555 if (scheduledModel->hasChoiceLabeling()) {
556 modelComponents.choiceLabeling = scheduledModel->getChoiceLabeling();
559 auto inducedMC = newMDP.applyScheduler(*(underApproximation->getSchedulerForExploredMdp()),
true);
560 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
562 auto inducedMC = underApproximation->getExploredMdp()->applyScheduler(*(underApproximation->getSchedulerForExploredMdp()),
true);
563 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
565 result.schedulerAsMarkovChain = scheduledModel;
567 result.cutoffSchedulers = underApproximation->getUpperValueBoundSchedulers();
569 result.cutoffSchedulers = underApproximation->getLowerValueBoundSchedulers();
574template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
576 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min, std::optional<std::string> rewardModelName,
578 statistics.refinementSteps = 0;
579 interactiveResult = result;
580 unfoldingStatus = Status::Uninitialized;
581 unfoldingControl = UnfoldingControl::Run;
584 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
585 HeuristicParameters underApproxHeuristicPar{};
586 bool firstIteration =
true;
588 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
589 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
590 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
591 if (rewardModelName) {
592 underApproxBeliefManager->setRewardModel(rewardModelName);
596 interactiveUnderApproximationExplorer = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
597 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
598 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
599 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max() - 1;
601 if (options.useClipping && rewardModelName.has_value()) {
610 while (!(unfoldingControl ==
612 bool underApproxFixPoint =
true;
613 bool hasTruncatedStates =
false;
614 if (unfoldingStatus != Status::Converged) {
616 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(),
false, underApproxHeuristicPar,
617 underApproxBeliefManager, interactiveUnderApproximationExplorer, firstIteration);
619 ValueType const& newValue = interactiveUnderApproximationExplorer->getComputedValueAtInitialState();
620 bool betterBound = min ? interactiveResult.updateUpperBound(newValue) : interactiveResult.updateLowerBound(newValue);
622 STORM_LOG_INFO(
"Under-approximation result improved after " << statistics.totalTime <<
" in step #"
623 << (statistics.refinementSteps.value() + 1) <<
". New value is '" << newValue
626 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = interactiveUnderApproximationExplorer->getExploredMdp();
627 if (!options.useStateEliminationCutoff) {
629 auto nrPreprocessingScheds = min ? interactiveUnderApproximationExplorer->getNrSchedulersForUpperBounds()
630 : interactiveUnderApproximationExplorer->getNrSchedulersForLowerBounds();
631 for (uint64_t i = 0; i < nrPreprocessingScheds; ++i) {
632 newLabeling.
addLabel(
"sched_" + std::to_string(i));
638 auto transMatrix = scheduledModel->getTransitionMatrix();
639 for (uint64_t i = 0; i < scheduledModel->getNumberOfStates(); ++i) {
641 hasTruncatedStates =
true;
642 uint64_t localChosenActionIndex =
643 interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
644 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[i];
645 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
646 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
647 if (label.rfind(
"clip", 0) == 0) {
649 auto chosenRow = transMatrix.getRow(i, 0);
650 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
651 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[i], candidateIndex);
652 }
else if (label.rfind(
"mem_node", 0) == 0) {
653 if (!newLabeling.
containsLabel(
"finite_mem_" + label.substr(9, 1))) {
654 newLabeling.
addLabel(
"finite_mem_" + label.substr(9, 1));
667 transMatrix.dropZeroEntries();
669 if (scheduledModel->hasChoiceLabeling()) {
670 modelComponents.
choiceLabeling = scheduledModel->getChoiceLabeling();
673 auto inducedMC = newMDP.
applyScheduler(*(interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()),
true);
674 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
676 interactiveResult.schedulerAsMarkovChain = scheduledModel;
678 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getUpperValueBoundSchedulers();
680 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getLowerValueBoundSchedulers();
682 if (firstIteration) {
683 firstIteration =
false;
685 unfoldingStatus = Status::ResultAvailable;
693 ++statistics.refinementSteps.value();
695 if (statistics.refinementSteps.value() <= 1000) {
696 STORM_LOG_INFO(
"Completed iteration #" << statistics.refinementSteps.value() <<
". Current checktime is " << statistics.totalTime <<
".");
697 bool computingLowerBound =
false;
698 bool computingUpperBound =
false;
699 if (options.unfold) {
700 STORM_LOG_INFO(
"\tUnder-approx MDP has size " << interactiveUnderApproximationExplorer->getExploredMdp()->getNumberOfStates() <<
".");
701 (min ? computingUpperBound : computingLowerBound) =
true;
703 if (computingLowerBound && computingUpperBound) {
704 STORM_LOG_INFO(
"\tCurrent result is [" << interactiveResult.lowerBound <<
", " << interactiveResult.upperBound <<
"].");
705 }
else if (computingLowerBound) {
706 STORM_LOG_INFO(
"\tCurrent result is ≥" << interactiveResult.lowerBound <<
".");
707 }
else if (computingUpperBound) {
708 STORM_LOG_INFO(
"\tCurrent result is ≤" << interactiveResult.upperBound <<
".");
712 if (underApproxFixPoint) {
713 STORM_LOG_INFO(
"Fixpoint reached after " << statistics.refinementSteps.value() <<
" iterations.\n");
714 statistics.refinementFixpointDetected =
true;
715 unfoldingStatus = Status::Converged;
716 unfoldingControl = UnfoldingControl::Pause;
718 if (!hasTruncatedStates) {
719 STORM_LOG_INFO(
"No states have been truncated, so continued iteration does not yield new results.\n");
720 unfoldingStatus = Status::Converged;
721 unfoldingControl = UnfoldingControl::Pause;
725 while (unfoldingControl ==
734template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
736 std::set<uint32_t>
const& targetObservations,
bool min, std::optional<std::string> rewardModelName,
739 unfoldInteractively(env, targetObservations, min, rewardModelName, valueBounds, result);
742template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
745 return interactiveResult;
748template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
750 if (unfoldingStatus == Status::Uninitialized)
752 if (unfoldingStatus == Status::Exploring)
754 if (unfoldingStatus == Status::ModelExplorationFinished)
756 if (unfoldingStatus == Status::ResultAvailable)
758 if (unfoldingStatus == Status::Terminated)
764template<
typename ValueType>
765ValueType
getGap(ValueType
const& l, ValueType
const& u) {
766 STORM_LOG_ASSERT(l >= storm::utility::zero<ValueType>() && u >= storm::utility::zero<ValueType>(),
767 "Gap computation currently does not handle negative values.");
770 return storm::utility::zero<ValueType>();
780 return storm::utility::abs<ValueType>(u - l) * storm::utility::convertNumber<ValueType, uint64_t>(2) / (l + u);
784template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
785bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildOverApproximation(
786 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min,
bool computeRewards,
bool refine,
787 HeuristicParameters
const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector,
788 std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
790 bool fixPoint =
true;
792 statistics.overApproximationBuildTime.start();
796 if (computeRewards) {
797 overApproximation->startNewExploration(storm::utility::zero<ValueType>());
799 overApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
803 overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon,
true);
806 auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector);
808 auto numericPrecision = storm::utility::convertNumber<BeliefValueType>(options.numericPrecision);
809 if (std::any_of(obsRatings.begin(), obsRatings.end(),
810 [&numericPrecision](BeliefValueType
const& value) { return value + numericPrecision < storm::utility::one<BeliefValueType>(); })) {
811 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because there are still observations to refine.");
814 refinedObservations = storm::utility::vector::filter<BeliefValueType>(obsRatings, [&heuristicParameters](BeliefValueType
const& r) {
815 return r <= storm::utility::convertNumber<BeliefValueType>(heuristicParameters.observationThreshold);
818 for (
auto const obs : refinedObservations) {
821 storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber<storm::RationalNumber>(observationResolutionVector[obs]) *
822 storm::utility::convertNumber<storm::RationalNumber>(options.resolutionFactor);
825 newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber>(std::numeric_limits<double>::max())) {
826 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(std::numeric_limits<double>::max());
828 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(newObsResolutionAsRational);
831 overApproximation->restartExploration();
833 statistics.overApproximationMaxResolution =
storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()));
837 if (options.explorationTimeLimit != 0) {
838 explorationTime.
start();
840 bool timeLimitExceeded =
false;
841 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations;
842 uint64_t numRewiredOrExploredStates = 0;
843 while (overApproximation->hasUnexploredState()) {
844 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
845 static_cast<uint64_t
>(explorationTime.
getTimeInSeconds()) > options.explorationTimeLimit) {
847 timeLimitExceeded =
true;
848 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because the exploration time limit is exceeded.");
852 uint64_t currId = overApproximation->exploreNextState();
853 bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior();
854 if (!hasOldBehavior) {
855 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because a new state is explored");
858 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
859 if (targetObservations.count(currObservation) != 0) {
860 overApproximation->setCurrentStateIsTarget();
861 overApproximation->addSelfloopTransition();
877 bool exploreAllActions =
false;
878 bool truncateAllActions =
false;
879 bool restoreAllActions =
false;
880 bool checkRewireForAllActions =
false;
882 ValueType gap =
getGap(overApproximation->getLowerValueBoundAtCurrentState(), overApproximation->getUpperValueBoundAtCurrentState());
883 if (!hasOldBehavior) {
887 if (!timeLimitExceeded && gap >= heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
888 exploreAllActions =
true;
890 truncateAllActions =
true;
891 overApproximation->setCurrentStateIsTruncated();
893 }
else if (overApproximation->getCurrentStateWasTruncated()) {
895 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
896 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
897 exploreAllActions =
true;
898 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because a previously truncated state is now explored.");
901 truncateAllActions =
true;
902 overApproximation->setCurrentStateIsTruncated();
906 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because we truncate a state with non-zero gap "
907 << gap <<
" that is reachable via an optimal sched.");
919 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
920 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
921 checkRewireForAllActions =
true;
923 restoreAllActions =
true;
925 checkRewireForAllActions =
true;
928 bool expandedAtLeastOneAction =
false;
929 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
930 bool expandCurrentAction = exploreAllActions || truncateAllActions;
931 if (checkRewireForAllActions) {
934 assert(!expandCurrentAction);
938 assert(overApproximation->currentStateHasOldBehavior());
939 if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
940 overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) {
942 if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) {
944 expandCurrentAction =
true;
945 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because we rewire a state.");
949 overApproximation->setCurrentChoiceIsDelayed(action);
952 if (!overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
953 (overApproximation->currentStateIsOptimalSchedulerReachable() &&
956 "Not reaching a refinement fixpoint because we delay a rewiring of a state with non-zero gap "
957 << gap <<
" that is reachable via an optimal scheduler.");
965 if (expandCurrentAction) {
966 expandedAtLeastOneAction =
true;
967 if (!truncateAllActions) {
969 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
970 for (
auto const& successor : successorGridPoints) {
971 overApproximation->addTransitionToBelief(action, successor.first, successor.second,
false);
973 if (computeRewards) {
974 overApproximation->computeRewardAtCurrentState(action);
978 auto truncationProbability = storm::utility::zero<ValueType>();
979 auto truncationValueBound = storm::utility::zero<ValueType>();
980 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
981 for (
auto const& successor : successorGridPoints) {
982 bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second,
true);
985 truncationProbability += successor.second;
986 truncationValueBound += successor.second * (
min ? overApproximation->computeLowerValueBoundAtBelief(successor.first)
987 : overApproximation->computeUpperValueBoundAtBelief(successor.first));
990 if (computeRewards) {
992 overApproximation->addTransitionsToExtraStates(action, truncationProbability);
993 overApproximation->computeRewardAtCurrentState(action, truncationValueBound);
995 overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
1000 overApproximation->restoreOldBehaviorAtCurrentState(action);
1003 if (expandedAtLeastOneAction) {
1004 ++numRewiredOrExploredStates;
1008 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
1009 if (pomdp().hasChoiceLabeling()) {
1010 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1011 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1012 overApproximation->addChoiceLabelToCurrentState(action, *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1024 if (!statistics.overApproximationStates) {
1025 statistics.overApproximationBuildAborted =
true;
1026 statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates();
1028 statistics.overApproximationBuildTime.stop();
1032 overApproximation->finishExploration();
1033 statistics.overApproximationBuildTime.stop();
1035 statistics.overApproximationCheckTime.start();
1037 statistics.overApproximationCheckTime.stop();
1041 statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates();
1046template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1047bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildUnderApproximation(
1048 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min,
bool computeRewards,
bool refine,
1049 HeuristicParameters
const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation,
1050 bool firstIteration) {
1051 statistics.underApproximationBuildTime.start();
1053 unfoldingStatus = Status::Exploring;
1054 if (options.useClipping) {
1056 statistics.nrClippingAttempts = 0;
1057 statistics.nrClippedStates = 0;
1060 uint64_t nrCutoffStrategies =
min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
1062 bool fixPoint =
true;
1063 if (heuristicParameters.sizeThreshold != std::numeric_limits<uint64_t>::max()) {
1064 statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold;
1067 if (options.interactiveUnfolding && !firstIteration) {
1068 underApproximation->restoreExplorationState();
1069 }
else if (computeRewards) {
1071 underApproximation->startNewExploration(storm::utility::zero<ValueType>(), storm::utility::infinity<ValueType>());
1073 underApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
1077 underApproximation->restartExploration();
1083 printUpdateStopwatch.
start();
1084 if (options.explorationTimeLimit != 0) {
1085 explorationTime.
start();
1087 bool timeLimitExceeded =
false;
1088 bool stateStored =
false;
1089 while (underApproximation->hasUnexploredState()) {
1090 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
1091 static_cast<uint64_t
>(explorationTime.
getTimeInSeconds()) > options.explorationTimeLimit) {
1093 timeLimitExceeded =
true;
1096 printUpdateStopwatch.
restart();
1097 STORM_LOG_INFO(
"### " << underApproximation->getCurrentNumberOfMdpStates() <<
" beliefs in underapproximation MDP" <<
" ##### "
1098 << underApproximation->getUnexploredStates().size() <<
" beliefs queued\n");
1099 if (underApproximation->getCurrentNumberOfMdpStates() > heuristicParameters.sizeThreshold && options.useClipping) {
1100 STORM_LOG_INFO(
"##### Clipping Attempts: " << statistics.nrClippingAttempts.value() <<
" ##### "
1101 <<
"Clipped States: " << statistics.nrClippedStates.value() <<
"\n");
1104 if (unfoldingControl == UnfoldingControl::Pause && !stateStored) {
1105 underApproximation->storeExplorationState();
1108 uint64_t currId = underApproximation->exploreNextState();
1109 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
1110 uint64_t addedActions = 0;
1111 bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated();
1112 if (!stateAlreadyExplored || timeLimitExceeded) {
1115 if (targetObservations.count(beliefManager->getBeliefObservation(currId)) != 0) {
1116 underApproximation->setCurrentStateIsTarget();
1117 underApproximation->addSelfloopTransition();
1118 underApproximation->addChoiceLabelToCurrentState(0,
"loop");
1120 bool stopExploration =
false;
1121 bool clipBelief =
false;
1122 if (timeLimitExceeded || (options.interactiveUnfolding && unfoldingControl != UnfoldingControl::Run)) {
1123 clipBelief = options.useClipping;
1124 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1125 }
else if (!stateAlreadyExplored) {
1127 ValueType gap =
getGap(underApproximation->getLowerValueBoundAtCurrentState(), underApproximation->getUpperValueBoundAtCurrentState());
1128 if ((gap < heuristicParameters.gapThreshold) || (gap == 0 && options.cutZeroGap)) {
1129 stopExploration =
true;
1130 }
else if (underApproximation->getCurrentNumberOfMdpStates() >=
1131 heuristicParameters.sizeThreshold ) {
1132 clipBelief = options.useClipping;
1133 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1137 if (clipBelief && !underApproximation->isMarkedAsGridBelief(currId)) {
1139 if (!options.useStateEliminationCutoff) {
1140 bool successfulClip = clipToGridExplicitly(currId, computeRewards, beliefManager, underApproximation, 0);
1142 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1143 if (successfulClip) {
1147 clipToGrid(currId, computeRewards, min, beliefManager, underApproximation);
1148 addedActions += beliefManager->getBeliefNumberOfChoices(currId);
1152 if (stopExploration) {
1153 underApproximation->setCurrentStateIsTruncated();
1155 if (options.useStateEliminationCutoff || !stopExploration) {
1157 uint64_t numActions = beliefManager->getBeliefNumberOfChoices(currId);
1158 if (underApproximation->needsActionAdjustment(numActions)) {
1159 underApproximation->adjustActions(numActions);
1161 for (uint64_t action = 0; action < numActions; ++action) {
1163 if (pomdp().hasChoiceLabeling()) {
1164 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1165 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1166 underApproximation->addChoiceLabelToCurrentState(addedActions + action,
1167 *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1170 if (stateAlreadyExplored) {
1171 underApproximation->restoreOldBehaviorAtCurrentState(action);
1173 auto truncationProbability = storm::utility::zero<ValueType>();
1174 auto truncationValueBound = storm::utility::zero<ValueType>();
1175 auto successors = beliefManager->expand(currId, action);
1176 for (
auto const& successor : successors) {
1177 bool added = underApproximation->addTransitionToBelief(addedActions + action, successor.first, successor.second, stopExploration);
1179 STORM_LOG_ASSERT(stopExploration,
"Didn't add a transition although exploration shouldn't be stopped.");
1181 truncationProbability += successor.second;
1186 truncationValueBound += successor.second * (
min ? underApproximation->computeUpperValueBoundAtBelief(successor.first)
1187 : underApproximation->computeLowerValueBoundAtBelief(successor.first));
1190 if (stopExploration) {
1191 if (computeRewards) {
1192 if (truncationValueBound == storm::utility::infinity<ValueType>()) {
1193 underApproximation->addTransitionsToExtraStates(addedActions + action, storm::utility::zero<ValueType>(),
1194 truncationProbability);
1196 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationProbability);
1199 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationValueBound,
1200 truncationProbability - truncationValueBound);
1203 if (computeRewards) {
1205 if (truncationValueBound != storm::utility::infinity<ValueType>()) {
1207 underApproximation->computeRewardAtCurrentState(action, truncationValueBound);
1209 underApproximation->addRewardToCurrentState(addedActions + action,
1210 beliefManager->getBeliefActionReward(currId, action) + truncationValueBound);
1217 for (uint64_t i = 0;
i < nrCutoffStrategies && !options.skipHeuristicSchedulers; ++
i) {
1218 auto cutOffValue =
min ? underApproximation->computeUpperValueBoundForScheduler(currId, i)
1219 : underApproximation->computeLowerValueBoundForScheduler(currId, i);
1220 if (computeRewards) {
1221 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1222 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::one<ValueType>());
1223 underApproximation->addRewardToCurrentState(addedActions, cutOffValue);
1225 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::zero<ValueType>(), storm::utility::one<ValueType>());
1228 underApproximation->addTransitionsToExtraStates(addedActions, cutOffValue, storm::utility::one<ValueType>() - cutOffValue);
1230 if (pomdp().hasChoiceLabeling()) {
1231 underApproximation->addChoiceLabelToCurrentState(addedActions,
"sched_" + std::to_string(i));
1235 if (underApproximation->hasFMSchedulerValues()) {
1236 uint64_t transitionNr = 0;
1237 for (uint64_t i = 0;
i < underApproximation->getNrOfMemoryNodesForObservation(currObservation); ++
i) {
1238 auto resPair = underApproximation->computeFMSchedulerValueForMemoryNode(currId, i);
1240 if (resPair.first) {
1241 cutOffValue = resPair.second;
1243 STORM_LOG_DEBUG(
"Skipped cut-off of belief with ID " << currId <<
" with finite memory scheduler in memory node " << i
1244 <<
". Missing values.");
1247 if (computeRewards) {
1248 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1249 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::one<ValueType>());
1250 underApproximation->addRewardToCurrentState(addedActions + transitionNr, cutOffValue);
1252 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::zero<ValueType>(),
1253 storm::utility::one<ValueType>());
1256 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, cutOffValue,
1257 storm::utility::one<ValueType>() - cutOffValue);
1259 if (pomdp().hasChoiceLabeling()) {
1260 underApproximation->addChoiceLabelToCurrentState(addedActions + transitionNr,
"mem_node_" + std::to_string(i));
1274 if (!statistics.underApproximationStates) {
1275 statistics.underApproximationBuildAborted =
true;
1276 statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates();
1278 statistics.underApproximationBuildTime.stop();
1282 underApproximation->finishExploration();
1283 statistics.underApproximationBuildTime.stop();
1284 printUpdateStopwatch.
stop();
1285 STORM_LOG_INFO(
"Finished exploring under-approximation MDP.\nStart analysis...\n");
1286 unfoldingStatus = Status::ModelExplorationFinished;
1287 statistics.underApproximationCheckTime.start();
1289 statistics.underApproximationCheckTime.stop();
1290 if (underApproximation->getExploredMdp()->getStateLabeling().getStates(
"truncated").getNumberOfSetBits() > 0) {
1291 statistics.nrTruncatedStates = underApproximation->getExploredMdp()->getStateLabeling().getStates(
"truncated").getNumberOfSetBits();
1295 statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates();
1300template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1301void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGrid(uint64_t clippingStateId,
bool computeRewards,
bool min,
1302 std::shared_ptr<BeliefManagerType>& beliefManager,
1303 std::shared_ptr<ExplorerType>& beliefExplorer) {
1306 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(clippingStateId); action < numActions; ++action) {
1307 auto rewardBound = utility::zero<BeliefValueType>();
1308 auto successors = beliefManager->expand(clippingStateId, action);
1309 auto absDelta = utility::zero<BeliefValueType>();
1310 for (
auto const& successor : successors) {
1314 bool added = beliefExplorer->addTransitionToBelief(action, successor.first, successor.second,
true);
1317 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1318 auto clipping = beliefManager->clipBeliefToGrid(
1319 successor.first, options.clippingGridRes, computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() :
storm::storage::
BitVector());
1320 if (clipping.isClippable) {
1322 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1324 BeliefValueType transitionProb =
1325 (utility::one<BeliefValueType>() - clipping.delta) * utility::convertNumber<BeliefValueType>(successor.second);
1326 beliefExplorer->addTransitionToBelief(action, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb),
false);
1328 absDelta += clipping.delta * utility::convertNumber<BeliefValueType>(successor.second);
1329 if (computeRewards) {
1331 auto localRew = utility::zero<BeliefValueType>();
1332 for (
auto const& deltaValue : clipping.deltaValues) {
1333 localRew += deltaValue.second *
1334 utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1336 if (localRew == utility::infinity<BeliefValueType>()) {
1339 rewardBound += localRew * utility::convertNumber<BeliefValueType>(successor.second);
1341 }
else if (clipping.onGrid) {
1343 beliefExplorer->addTransitionToBelief(action, successor.first, successor.second,
false);
1346 absDelta += utility::convertNumber<BeliefValueType>(successor.second);
1347 rewardBound += utility::convertNumber<BeliefValueType>(successor.second) *
1348 utility::convertNumber<BeliefValueType>(min ? beliefExplorer->computeUpperValueBoundAtBelief(successor.first)
1349 : beliefExplorer->computeLowerValueBoundAtBelief(successor.first));
1354 if (absDelta != utility::zero<BeliefValueType>()) {
1355 if (computeRewards) {
1356 if (rewardBound == utility::infinity<BeliefValueType>()) {
1358 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1360 beliefExplorer->addTransitionsToExtraStates(action, utility::convertNumber<BeliefMDPType>(absDelta));
1361 BeliefValueType totalRewardVal = rewardBound / absDelta;
1362 beliefExplorer->addClippingRewardToCurrentState(action, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1365 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1368 if (computeRewards) {
1369 beliefExplorer->computeRewardAtCurrentState(action);
1374template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1375bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGridExplicitly(uint64_t clippingStateId,
bool computeRewards,
1376 std::shared_ptr<BeliefManagerType>& beliefManager,
1377 std::shared_ptr<ExplorerType>& beliefExplorer,
1378 uint64_t localActionIndex) {
1379 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1380 auto clipping = beliefManager->clipBeliefToGrid(clippingStateId, options.clippingGridRes,
1381 computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() :
storm::storage::
BitVector());
1382 if (clipping.isClippable) {
1384 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1386 BeliefValueType transitionProb = (utility::one<BeliefValueType>() - clipping.delta);
1387 beliefExplorer->addTransitionToBelief(localActionIndex, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb),
false);
1388 beliefExplorer->markAsGridBelief(clipping.targetBelief);
1389 if (computeRewards) {
1391 auto reward = utility::zero<BeliefValueType>();
1392 for (
auto const& deltaValue : clipping.deltaValues) {
1393 reward += deltaValue.second * utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1395 if (reward == utility::infinity<BeliefValueType>()) {
1398 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1399 utility::convertNumber<BeliefMDPType>(clipping.delta));
1401 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::convertNumber<BeliefMDPType>(clipping.delta));
1402 BeliefValueType totalRewardVal = reward / clipping.delta;
1403 beliefExplorer->addClippingRewardToCurrentState(localActionIndex, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1406 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1407 utility::convertNumber<BeliefMDPType>(clipping.delta));
1409 beliefExplorer->addChoiceLabelToCurrentState(localActionIndex,
"clip");
1412 if (clipping.onGrid) {
1414 beliefExplorer->markAsGridBelief(clippingStateId);
1420template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1421void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::setUnfoldingControl(
1422 storm::pomdp::modelchecker::BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::UnfoldingControl newUnfoldingControl) {
1423 unfoldingControl = newUnfoldingControl;
1426template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1429 setUnfoldingControl(UnfoldingControl::Pause);
1432template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1435 setUnfoldingControl(UnfoldingControl::Run);
1438template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1441 setUnfoldingControl(UnfoldingControl::Terminate);
1444template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1446 return unfoldingStatus == Status::ResultAvailable || unfoldingStatus == Status::Converged;
1449template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1451 return unfoldingStatus == Status::Converged;
1454template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1456 return unfoldingStatus == Status::Exploring;
1459template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1460std::shared_ptr<storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType>>
1462 return interactiveUnderApproximationExplorer;
1465template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1467 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> valueList) {
1468 interactiveUnderApproximationExplorer->setFMSchedValueList(valueList);
1471template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1473 typename ExplorerType::SuccessorObservationInformation
const& info, BeliefValueType
const& observationResolution, BeliefValueType
const& maxResolution) {
1474 auto n = storm::utility::convertNumber<BeliefValueType, uint64_t>(info.support.size());
1475 auto one = storm::utility::one<BeliefValueType>();
1482 auto obsChoiceRating = storm::utility::convertNumber<BeliefValueType, ValueType>(info.maxProbabilityToSuccessorWithObs / info.observationProbability);
1487 obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
1489 obsChoiceRating *= observationResolution / maxResolution;
1490 return obsChoiceRating;
1494template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1495std::vector<BeliefValueType> BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getObservationRatings(
1496 std::shared_ptr<ExplorerType>
const& overApproximation, std::vector<BeliefValueType>
const& observationResolutionVector) {
1497 uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates();
1498 auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices();
1499 BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
1501 std::vector<BeliefValueType> resultingRatings(pomdp().getNrObservations(), storm::utility::one<BeliefValueType>());
1503 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations;
1504 for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) {
1507 if (overApproximation->stateIsOptimalSchedulerReachable(mdpState)) {
1508 for (uint64_t mdpChoice = choiceIndices[mdpState]; mdpChoice < choiceIndices[mdpState + 1]; ++mdpChoice) {
1510 if (overApproximation->actionIsOptimal(mdpChoice)) {
1512 gatheredSuccessorObservations.clear();
1513 overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
1514 for (
auto const& obsInfo : gatheredSuccessorObservations) {
1515 auto const& obs = obsInfo.first;
1516 BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
1519 resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
1525 return resultingRatings;
1528template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1529typename PomdpModelType::ValueType BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getGap(
1530 typename PomdpModelType::ValueType
const& l,
typename PomdpModelType::ValueType
const& u) {
1531 STORM_LOG_ASSERT(l >= storm::utility::zero<typename PomdpModelType::ValueType>() && u >= storm::utility::zero<typename PomdpModelType::ValueType>(),
1532 "Gap computation currently does not handle negative values.");
1535 return storm::utility::zero<typename PomdpModelType::ValueType>();
1545 return storm::utility::abs<typename PomdpModelType::ValueType>(u - l) * storm::utility::convertNumber<typename PomdpModelType::ValueType, uint64_t>(2) /
1552template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>>;
1554template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>, storm::RationalNumber>;
1556template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>,
double>;
1558template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>>;
void addLabel(std::string const &label)
Adds a new label to the labelings.
bool containsLabel(std::string const &label) const
Checks whether a label is registered within this labeling.
void removeLabel(std::string const &label)
Removes a label from the labelings.
This class represents a (discrete-time) Markov decision process.
std::shared_ptr< storm::models::sparse::Model< ValueType, RewardModelType > > applyScheduler(storm::storage::Scheduler< ValueType > const &scheduler, bool dropUnreachableStates=true, bool preserveModelType=false) const
Applies the given scheduler to this model.
This class manages the labeling of the state space with a number of (atomic) labels.
bool getStateHasLabel(std::string const &label, storm::storage::sparse::state_type state) const
Checks whether a given state is labeled with the given label.
void addLabelToState(std::string const &label, storm::storage::sparse::state_type state)
Adds a label to a given state.
Model checker for checking reachability queries on POMDPs using approximations based on exploration o...
BeliefExplorationPomdpModelChecker(std::shared_ptr< PomdpModelType > pomdp, Options options=Options())
Constructor.
int64_t getStatus()
Get the current status of the interactive unfolding.
bool hasConverged()
Indicates whether the interactive unfolding has coonverged, i.e.
bool isExploring()
Indicates whether the interactive unfolding is currently in the process of exploring the belief MDP.
std::shared_ptr< ExplorerType > getInteractiveBeliefExplorer()
Get a pointer to the belief explorer used in the interactive unfolding.
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
void unfoldInteractively(storm::Environment const &env, std::set< uint32_t > const &targetObservations, bool min, std::optional< std::string > rewardModelName, storm::pomdp::modelchecker::POMDPValueBounds< ValueType > const &valueBounds, Result &result)
Allows to generate an under-approximation using a controllable unfolding.
bool isResultReady()
Indicates whether there is a result after an interactive unfolding was paused.
Result check(storm::Environment const &env, storm::logic::Formula const &formula, storm::Environment const &preProcEnv, std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > const &additionalUnderApproximationBounds=std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > >())
Performs model checking of the given POMDP with regards to a formula using the previously specified o...
Result getInteractiveResult()
Get the latest saved result obtained by the interactive unfolding.
void printStatisticsToStream(std::ostream &stream) const
Prints statistics of the process to a given output stream.
void pauseUnfolding()
Pauses a running interactive unfolding.
void precomputeValueBounds(const logic::Formula &formula, storm::Environment const &preProcEnv)
Uses model checking on the underlying MDP to generate values used for cut-offs and for clipping compe...
void continueUnfolding()
Continues a previously paused interactive unfolding.
void terminateUnfolding()
Terminates a running interactive unfolding.
A bit vector that is internally represented as a vector of 64-bit values.
uint64_t getNumberOfSetBits() const
Returns the number of bits that are set to true in this bit vector.
size_t size() const
Retrieves the number of bits this bit vector can store.
A class that provides convenience operations to display run times.
void start()
Start stopwatch (again) and start measuring time.
void restart()
Reset the stopwatch and immediately start it.
SecondType getTimeInSeconds() const
Gets the measured time in seconds.
void stop()
Stop stopwatch and add measured time to total time.
#define STORM_LOG_INFO(message)
#define STORM_LOG_WARN(message)
#define STORM_LOG_DEBUG(message)
#define STORM_LOG_TRACE(message)
#define STORM_LOG_ASSERT(cond, message)
#define STORM_LOG_WARN_COND(cond, message)
#define STORM_LOG_ERROR_COND(cond, message)
#define STORM_LOG_THROW(cond, exception, message)
#define STORM_LOG_INFO_COND(cond, message)
SFTBDDChecker::ValueType ValueType
FormulaInformation getFormulaInformation(PomdpType const &pomdp, storm::logic::ProbabilityOperatorFormula const &formula)
ValueType getGap(ValueType const &l, ValueType const &u)
bool detectFiniteBeliefMdp(storm::models::sparse::Pomdp< ValueType > const &pomdp, std::optional< storm::storage::BitVector > const &targetStates)
This method tries to detect that the beliefmdp is finite.
storm::storage::BitVector getReachableStates(storm::storage::SparseMatrix< T > const &transitionMatrix, storm::storage::BitVector const &initialStates, storm::storage::BitVector const &constraintStates, storm::storage::BitVector const &targetStates, bool useStepBound, uint_fast64_t maximalSteps, boost::optional< storm::storage::BitVector > const &choiceFilter)
Performs a forward depth-first search through the underlying graph structure to identify the states t...
storage::BitVector BitVector
bool isTerminate()
Check whether the program should terminate (due to some abort signal).
bool isOne(ValueType const &a)
ValueType min(ValueType const &first, ValueType const &second)
bool isZero(ValueType const &a)
ValueType ceil(ValueType const &number)
bool isInfinity(ValueType const &a)
solver::OptimizationDirection OptimizationDirection
Struct used to store the results of the model checker.
Result(ValueType lower, ValueType upper)
ValueType diff(bool relative=false) const
bool updateUpperBound(ValueType const &value)
bool updateLowerBound(ValueType const &value)
Structure for storing values on the POMDP used for cut-offs and clipping.
std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > fmSchedulerValueList
storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > extremePomdpValueBound
storm::pomdp::storage::PreprocessingPomdpValueBounds< ValueType > trivialPomdpValueBounds
std::unordered_map< std::string, RewardModelType > rewardModels
storm::storage::SparseMatrix< ValueType > transitionMatrix
std::optional< storm::storage::sparse::StateValuations > observationValuations
std::optional< storm::models::sparse::ChoiceLabeling > choiceLabeling
storm::models::sparse::StateLabeling stateLabeling
std::optional< std::vector< uint32_t > > observabilityClasses