25namespace modelchecker {
29template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
31 : lowerBound(lower), upperBound(upper) {
35template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
39 if (diff < storm::utility::zero<ValueType>()) {
41 "Upper bound '" << upperBound <<
"' is smaller than lower bound '" << lowerBound <<
"': Difference is " << diff <<
".");
42 diff = storm::utility::zero<ValueType>();
50template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
52 if (value > lowerBound) {
59template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
61 if (value < upperBound) {
68template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
70 : beliefMdpDetectedToBeFinite(false),
71 refinementFixpointDetected(false),
72 overApproximationBuildAborted(false),
73 underApproximationBuildAborted(false),
80template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
83 : options(options), inputPomdp(pomdp) {
85 STORM_LOG_ERROR_COND(inputPomdp->isCanonic(),
"Input Pomdp is not known to be canonic. This might lead to unexpected verification results.");
93template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
101 auto initialPomdpValueBounds = preProcessingMC.getValueBounds(preProcEnv, formula);
102 pomdpValueBounds.trivialPomdpValueBounds = initialPomdpValueBounds;
106 pomdpValueBounds.extremePomdpValueBound = preProcessingMC.getExtremeValueBound(preProcEnv, formula);
110template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
114 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
115 return check(env, formula, env, additionalUnderApproximationBounds);
118template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
121 storm::logic::Formula const& formula, std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
123 return check(env, formula, env, additionalUnderApproximationBounds);
126template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
130 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
132 return check(env, formula, preProcEnv, additionalUnderApproximationBounds);
135template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
139 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
140 STORM_LOG_ASSERT(options.unfold || options.discretize || options.interactiveUnfolding,
141 "Invoked belief exploration but no task (unfold or discretize) given.");
143 preprocessedPomdp.reset();
146 statistics = Statistics();
147 statistics.totalTime.start();
151 precomputeValueBounds(formula, preProcEnv);
152 if (!additionalUnderApproximationBounds.empty()) {
153 pomdpValueBounds.fmSchedulerValueList = additionalUnderApproximationBounds;
155 uint64_t initialPomdpState = pomdp().getInitialStates().getNextSetIndex(0);
156 Result result(pomdpValueBounds.trivialPomdpValueBounds.getHighestLowerBound(initialPomdpState),
157 pomdpValueBounds.trivialPomdpValueBounds.getSmallestUpperBound(initialPomdpState));
160 std::optional<std::string> rewardModelName;
161 std::set<uint32_t> targetObservations;
162 if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) {
163 if (formulaInfo.getTargetStates().observationClosed) {
164 targetObservations = formulaInfo.getTargetStates().observations;
167 std::tie(preprocessedPomdp, targetObservations) = obsCloser.
transform(formulaInfo.getTargetStates().states);
169 if (formulaInfo.isNonNestedReachabilityProbability()) {
170 if (!formulaInfo.getSinkStates().empty()) {
174 auto matrix = pomdp().getTransitionMatrix();
175 matrix.makeRowGroupsAbsorbing(formulaInfo.getSinkStates().states);
178 if (pomdp().hasChoiceLabeling()) {
181 if (pomdp().hasObservationValuations()) {
184 preprocessedPomdp = std::make_shared<storm::models::sparse::Pomdp<ValueType>>(std::move(components),
true);
186 pomdp().getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
187 reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
188 STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException,
189 "There are sink states that can reach non-sink states. This is currently not supported");
193 rewardModelName = formulaInfo.getRewardModelName();
196 STORM_LOG_THROW(
false, storm::exceptions::NotSupportedException,
"Unsupported formula '" << formula <<
"'.");
200 statistics.beliefMdpDetectedToBeFinite =
true;
202 if (options.interactiveUnfolding) {
203 unfoldInteractively(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
205 refineReachability(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
208 if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) {
209 result.
lowerBound = -storm::utility::infinity<ValueType>();
211 if ((formulaInfo.maximize() && !options.discretize) || (formulaInfo.minimize() && !options.unfold)) {
212 result.
upperBound = storm::utility::infinity<ValueType>();
216 statistics.aborted =
true;
218 statistics.totalTime.stop();
222template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
224 stream <<
"##### POMDP Approximation Statistics ######\n";
225 stream <<
"# Input model: \n";
226 pomdp().printModelInformationToStream(stream);
227 stream <<
"# Max. Number of states with same observation: " << pomdp().getMaxNrStatesWithSameObservation() <<
'\n';
228 if (statistics.beliefMdpDetectedToBeFinite) {
229 stream <<
"# Pre-computations detected that the belief MDP is finite.\n";
231 if (statistics.aborted) {
232 stream <<
"# Computation aborted early\n";
235 stream <<
"# Total check time: " << statistics.totalTime <<
'\n';
237 if (statistics.refinementSteps) {
238 stream <<
"# Number of refinement steps: " << statistics.refinementSteps.value() <<
'\n';
240 if (statistics.refinementFixpointDetected) {
241 stream <<
"# Detected a refinement fixpoint.\n";
245 if (statistics.overApproximationStates) {
246 stream <<
"# Number of states in the ";
247 if (options.refine) {
250 stream <<
"grid MDP for the over-approximation: ";
251 if (statistics.overApproximationBuildAborted) {
254 stream << statistics.overApproximationStates.value() <<
'\n';
255 stream <<
"# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.value() <<
'\n';
256 stream <<
"# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime <<
'\n';
257 stream <<
"# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime <<
'\n';
261 if (statistics.underApproximationStates) {
262 stream <<
"# Number of states in the ";
263 if (options.refine) {
266 stream <<
"belief MDP for the under-approximation: ";
267 if (statistics.underApproximationBuildAborted) {
270 stream << statistics.underApproximationStates.value() <<
'\n';
271 if (statistics.nrClippingAttempts) {
272 stream <<
"# Clipping attempts (clipped states) for the under-approximation: ";
273 if (statistics.underApproximationBuildAborted) {
276 stream << statistics.nrClippingAttempts.value() <<
" (" << statistics.nrClippedStates.value() <<
")\n";
277 stream <<
"# Total clipping preprocessing time: " << statistics.clippingPreTime <<
"\n";
278 stream <<
"# Total clipping time: " << statistics.clipWatch <<
"\n";
279 }
else if (statistics.nrTruncatedStates) {
280 stream <<
"# Truncated states for the under-approximation: ";
281 if (statistics.underApproximationBuildAborted) {
284 stream << statistics.nrTruncatedStates.value() <<
"\n";
286 if (statistics.underApproximationStateLimit) {
287 stream <<
"# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.value() <<
'\n';
289 stream <<
"# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime <<
'\n';
290 stream <<
"# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime <<
'\n';
293 stream <<
"##########################################\n";
298template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
300 if (preprocessedPomdp) {
301 return *preprocessedPomdp;
307template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
308void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::refineReachability(
309 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min, std::optional<std::string> rewardModelName,
311 statistics.refinementSteps = 0;
314 std::vector<BeliefValueType> observationResolutionVector;
315 std::shared_ptr<BeliefManagerType> overApproxBeliefManager;
316 std::shared_ptr<ExplorerType> overApproximation;
317 HeuristicParameters overApproxHeuristicPar{};
318 if (options.discretize) {
319 observationResolutionVector =
320 std::vector<BeliefValueType>(pomdp().getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit));
321 overApproxBeliefManager = std::make_shared<BeliefManagerType>(
322 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
323 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
324 if (rewardModelName) {
325 overApproxBeliefManager->setRewardModel(rewardModelName);
328 overApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
329 overApproxHeuristicPar.observationThreshold = options.obsThresholdInit;
330 overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit;
331 overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
333 buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(),
false, overApproxHeuristicPar, observationResolutionVector,
334 overApproxBeliefManager, overApproximation);
338 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
339 bool betterBound =
min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
341 STORM_LOG_INFO(
"Initial Over-approx result obtained after " << statistics.totalTime <<
". Value is '" << newValue <<
"'.\n");
345 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
346 std::shared_ptr<ExplorerType> underApproximation;
347 HeuristicParameters underApproxHeuristicPar{};
348 if (options.unfold) {
349 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
350 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
351 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
352 if (rewardModelName) {
353 underApproxBeliefManager->setRewardModel(rewardModelName);
355 underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
356 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
357 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
358 underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
359 if (underApproxHeuristicPar.sizeThreshold == 0) {
360 if (!options.refine && options.explorationTimeLimit != 0) {
361 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max();
363 underApproxHeuristicPar.sizeThreshold = pomdp().getNumberOfStates() * pomdp().getMaxNrStatesWithSameObservation();
364 STORM_LOG_INFO(
"Heuristically selected an under-approximation MDP size threshold of " << underApproxHeuristicPar.sizeThreshold <<
".\n");
368 if (options.useClipping && rewardModelName.has_value()) {
374 buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(),
false, underApproxHeuristicPar, underApproxBeliefManager,
375 underApproximation,
false);
379 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
380 bool betterBound =
min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
382 STORM_LOG_INFO(
"Initial Under-approx result obtained after " << statistics.totalTime <<
". Value is '" << newValue <<
"'.\n");
387 STORM_LOG_INFO(
"Completed (initial) computation. Current checktime is " << statistics.totalTime <<
".");
388 bool computingLowerBound =
false;
389 bool computingUpperBound =
false;
390 if (options.discretize) {
391 STORM_LOG_INFO(
"\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() <<
".");
392 (
min ? computingLowerBound : computingUpperBound) =
true;
394 if (options.unfold) {
395 STORM_LOG_INFO(
"\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() <<
".");
396 (
min ? computingUpperBound : computingLowerBound) =
true;
398 if (computingLowerBound && computingUpperBound) {
399 STORM_LOG_INFO(
"\tObtained result is [" << result.lowerBound <<
", " << result.upperBound <<
"].");
400 }
else if (computingLowerBound) {
401 STORM_LOG_INFO(
"\tObtained result is ≥" << result.lowerBound <<
".");
402 }
else if (computingUpperBound) {
403 STORM_LOG_INFO(
"\tObtained result is ≤" << result.upperBound <<
".");
407 if (options.refine) {
409 "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout");
411 "Refinement goal precision is given, but only one bound is going to be refined.");
412 while ((options.refineStepLimit == 0 || statistics.refinementSteps.value() < options.refineStepLimit) && result.diff() > options.refinePrecision) {
413 bool overApproxFixPoint =
true;
414 bool underApproxFixPoint =
true;
415 if (options.discretize) {
418 overApproximation->takeCurrentValuesAsLowerBounds();
420 overApproximation->takeCurrentValuesAsUpperBounds();
422 overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
423 overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
424 storm::utility::convertNumber<ValueType, uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor);
425 overApproxHeuristicPar.observationThreshold +=
426 options.obsThresholdIncrementFactor * (storm::utility::one<ValueType>() - overApproxHeuristicPar.observationThreshold);
427 overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
428 overApproxFixPoint = buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(),
true, overApproxHeuristicPar,
429 observationResolutionVector, overApproxBeliefManager, overApproximation);
431 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
432 bool betterBound =
min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
434 STORM_LOG_INFO(
"Over-approx result for refinement improved after " << statistics.totalTime <<
" in refinement step #"
435 << (statistics.refinementSteps.value() + 1) <<
". New value is '"
436 << newValue <<
"'.");
443 if (options.unfold && result.diff() > options.refinePrecision) {
445 underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
446 underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
447 storm::utility::convertNumber<ValueType, uint64_t>(underApproximation->getExploredMdp()->getNumberOfStates()) *
448 options.sizeThresholdFactor);
449 underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
450 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(),
true, underApproxHeuristicPar,
451 underApproxBeliefManager, underApproximation,
true);
453 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
454 bool betterBound =
min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
456 STORM_LOG_INFO(
"Under-approx result for refinement improved after " << statistics.totalTime <<
" in refinement step #"
457 << (statistics.refinementSteps.value() + 1) <<
". New value is '"
458 << newValue <<
"'.");
468 ++statistics.refinementSteps.value();
470 if (statistics.refinementSteps.value() <= 1000) {
471 STORM_LOG_INFO(
"Completed iteration #" << statistics.refinementSteps.value() <<
". Current checktime is " << statistics.totalTime <<
".");
472 computingLowerBound =
false;
473 computingUpperBound =
false;
474 if (options.discretize) {
475 STORM_LOG_INFO(
"\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() <<
".");
476 (
min ? computingLowerBound : computingUpperBound) =
true;
478 if (options.unfold) {
479 STORM_LOG_INFO(
"\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() <<
".");
480 (
min ? computingUpperBound : computingLowerBound) =
true;
482 if (computingLowerBound && computingUpperBound) {
483 STORM_LOG_INFO(
"\tCurrent result is [" << result.lowerBound <<
", " << result.upperBound <<
"].");
484 }
else if (computingLowerBound) {
485 STORM_LOG_INFO(
"\tCurrent result is ≥" << result.lowerBound <<
".");
486 }
else if (computingUpperBound) {
487 STORM_LOG_INFO(
"\tCurrent result is ≤" << result.upperBound <<
".");
489 STORM_LOG_WARN_COND(statistics.refinementSteps.value() < 1000,
"Refinement requires more than 1000 iterations.");
492 if (overApproxFixPoint && underApproxFixPoint) {
493 STORM_LOG_INFO(
"Refinement fixpoint reached after " << statistics.refinementSteps.value() <<
" iterations.\n");
494 statistics.refinementFixpointDetected =
true;
500 if (options.discretize && overApproximation->hasComputedValues()) {
501 auto printOverInfo = [&overApproximation]() {
502 std::stringstream str;
503 str <<
"Explored and checked Over-Approximation MDP:\n";
504 overApproximation->getExploredMdp()->printModelInformationToStream(str);
509 if (options.unfold && underApproximation->hasComputedValues()) {
510 auto printUnderInfo = [&underApproximation]() {
511 std::stringstream str;
512 str <<
"Explored and checked Under-Approximation MDP:\n";
513 underApproximation->getExploredMdp()->printModelInformationToStream(str);
517 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = underApproximation->getExploredMdp();
518 if (!options.useStateEliminationCutoff) {
520 auto nrPreprocessingScheds =
min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
521 for (uint64_t i = 0;
i < nrPreprocessingScheds; ++
i) {
522 newLabeling.addLabel(
"sched_" + std::to_string(i));
524 newLabeling.addLabel(
"cutoff");
525 newLabeling.addLabel(
"clipping");
526 newLabeling.addLabel(
"finite_mem");
528 auto transMatrix = scheduledModel->getTransitionMatrix();
529 for (uint64_t i = 0;
i < scheduledModel->getNumberOfStates(); ++
i) {
530 if (newLabeling.getStateHasLabel(
"truncated", i)) {
531 uint64_t localChosenActionIndex = underApproximation->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
532 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[
i];
533 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
534 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
535 if (label.rfind(
"clip", 0) == 0) {
536 newLabeling.addLabelToState(
"clipping", i);
537 auto chosenRow = transMatrix.getRow(i, 0);
538 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
539 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[
i], candidateIndex);
540 }
else if (label.rfind(
"mem_node", 0) == 0) {
541 if (!newLabeling.containsLabel(
"finite_mem_" + label.substr(9, 1))) {
542 newLabeling.addLabel(
"finite_mem_" + label.substr(9, 1));
544 newLabeling.addLabelToState(
"finite_mem_" + label.substr(9, 1),
i);
545 newLabeling.addLabelToState(
"cutoff", i);
547 newLabeling.addLabelToState(label, i);
548 newLabeling.addLabelToState(
"cutoff", i);
553 newLabeling.removeLabel(
"truncated");
555 transMatrix.dropZeroEntries();
557 if (scheduledModel->hasChoiceLabeling()) {
558 modelComponents.choiceLabeling = scheduledModel->getChoiceLabeling();
561 auto inducedMC = newMDP.applyScheduler(*(underApproximation->getSchedulerForExploredMdp()),
true);
562 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
564 auto inducedMC = underApproximation->getExploredMdp()->applyScheduler(*(underApproximation->getSchedulerForExploredMdp()),
true);
565 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
567 result.schedulerAsMarkovChain = scheduledModel;
569 result.cutoffSchedulers = underApproximation->getUpperValueBoundSchedulers();
571 result.cutoffSchedulers = underApproximation->getLowerValueBoundSchedulers();
576template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
578 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min, std::optional<std::string> rewardModelName,
580 statistics.refinementSteps = 0;
581 interactiveResult = result;
582 unfoldingStatus = Status::Uninitialized;
583 unfoldingControl = UnfoldingControl::Run;
586 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
587 HeuristicParameters underApproxHeuristicPar{};
588 bool firstIteration =
true;
590 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
591 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
592 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
593 if (rewardModelName) {
594 underApproxBeliefManager->setRewardModel(rewardModelName);
598 interactiveUnderApproximationExplorer = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
599 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
600 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
601 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max() - 1;
603 if (options.useClipping && rewardModelName.has_value()) {
612 while (!(unfoldingControl ==
614 bool underApproxFixPoint =
true;
615 bool hasTruncatedStates =
false;
616 if (unfoldingStatus != Status::Converged) {
618 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(),
false, underApproxHeuristicPar,
619 underApproxBeliefManager, interactiveUnderApproximationExplorer, firstIteration);
621 ValueType const& newValue = interactiveUnderApproximationExplorer->getComputedValueAtInitialState();
622 bool betterBound = min ? interactiveResult.updateUpperBound(newValue) : interactiveResult.updateLowerBound(newValue);
624 STORM_LOG_INFO(
"Under-approximation result improved after " << statistics.totalTime <<
" in step #"
625 << (statistics.refinementSteps.value() + 1) <<
". New value is '" << newValue
628 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = interactiveUnderApproximationExplorer->getExploredMdp();
629 if (!options.useStateEliminationCutoff) {
631 auto nrPreprocessingScheds = min ? interactiveUnderApproximationExplorer->getNrSchedulersForUpperBounds()
632 : interactiveUnderApproximationExplorer->getNrSchedulersForLowerBounds();
633 for (uint64_t i = 0; i < nrPreprocessingScheds; ++i) {
634 newLabeling.
addLabel(
"sched_" + std::to_string(i));
640 auto transMatrix = scheduledModel->getTransitionMatrix();
641 for (uint64_t i = 0; i < scheduledModel->getNumberOfStates(); ++i) {
643 hasTruncatedStates =
true;
644 uint64_t localChosenActionIndex =
645 interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
646 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[i];
647 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
648 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
649 if (label.rfind(
"clip", 0) == 0) {
651 auto chosenRow = transMatrix.getRow(i, 0);
652 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
653 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[i], candidateIndex);
654 }
else if (label.rfind(
"mem_node", 0) == 0) {
655 if (!newLabeling.
containsLabel(
"finite_mem_" + label.substr(9, 1))) {
656 newLabeling.
addLabel(
"finite_mem_" + label.substr(9, 1));
669 transMatrix.dropZeroEntries();
671 if (scheduledModel->hasChoiceLabeling()) {
672 modelComponents.
choiceLabeling = scheduledModel->getChoiceLabeling();
675 auto inducedMC = newMDP.
applyScheduler(*(interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()),
true);
676 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
678 interactiveResult.schedulerAsMarkovChain = scheduledModel;
680 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getUpperValueBoundSchedulers();
682 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getLowerValueBoundSchedulers();
684 if (firstIteration) {
685 firstIteration =
false;
687 unfoldingStatus = Status::ResultAvailable;
695 ++statistics.refinementSteps.value();
697 if (statistics.refinementSteps.value() <= 1000) {
698 STORM_LOG_INFO(
"Completed iteration #" << statistics.refinementSteps.value() <<
". Current checktime is " << statistics.totalTime <<
".");
699 bool computingLowerBound =
false;
700 bool computingUpperBound =
false;
701 if (options.unfold) {
702 STORM_LOG_INFO(
"\tUnder-approx MDP has size " << interactiveUnderApproximationExplorer->getExploredMdp()->getNumberOfStates() <<
".");
703 (min ? computingUpperBound : computingLowerBound) =
true;
705 if (computingLowerBound && computingUpperBound) {
706 STORM_LOG_INFO(
"\tCurrent result is [" << interactiveResult.lowerBound <<
", " << interactiveResult.upperBound <<
"].");
707 }
else if (computingLowerBound) {
708 STORM_LOG_INFO(
"\tCurrent result is ≥" << interactiveResult.lowerBound <<
".");
709 }
else if (computingUpperBound) {
710 STORM_LOG_INFO(
"\tCurrent result is ≤" << interactiveResult.upperBound <<
".");
714 if (underApproxFixPoint) {
715 STORM_LOG_INFO(
"Fixpoint reached after " << statistics.refinementSteps.value() <<
" iterations.\n");
716 statistics.refinementFixpointDetected =
true;
717 unfoldingStatus = Status::Converged;
718 unfoldingControl = UnfoldingControl::Pause;
720 if (!hasTruncatedStates) {
721 STORM_LOG_INFO(
"No states have been truncated, so continued iteration does not yield new results.\n");
722 unfoldingStatus = Status::Converged;
723 unfoldingControl = UnfoldingControl::Pause;
727 while (unfoldingControl ==
734template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
736 std::set<uint32_t>
const& targetObservations,
bool min, std::optional<std::string> rewardModelName,
739 unfoldInteractively(env, targetObservations, min, rewardModelName, valueBounds, result);
742template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
745 return interactiveResult;
748template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
750 if (unfoldingStatus == Status::Uninitialized)
752 if (unfoldingStatus == Status::Exploring)
754 if (unfoldingStatus == Status::ModelExplorationFinished)
756 if (unfoldingStatus == Status::ResultAvailable)
758 if (unfoldingStatus == Status::Terminated)
764template<
typename ValueType>
765ValueType
getGap(ValueType
const& l, ValueType
const& u) {
766 STORM_LOG_ASSERT(l >= storm::utility::zero<ValueType>() && u >= storm::utility::zero<ValueType>(),
767 "Gap computation currently does not handle negative values.");
770 return storm::utility::zero<ValueType>();
780 return storm::utility::abs<ValueType>(u - l) * storm::utility::convertNumber<ValueType, uint64_t>(2) / (l + u);
784template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
785bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildOverApproximation(
786 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min,
bool computeRewards,
bool refine,
787 HeuristicParameters
const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector,
788 std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
790 bool fixPoint =
true;
792 statistics.overApproximationBuildTime.start();
796 if (computeRewards) {
797 overApproximation->startNewExploration(storm::utility::zero<ValueType>());
799 overApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
803 overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon,
true);
806 auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector);
808 auto numericPrecision = storm::utility::convertNumber<BeliefValueType>(options.numericPrecision);
809 if (std::any_of(obsRatings.begin(), obsRatings.end(),
810 [&numericPrecision](BeliefValueType
const& value) { return value + numericPrecision < storm::utility::one<BeliefValueType>(); })) {
811 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because there are still observations to refine.");
814 refinedObservations = storm::utility::vector::filter<BeliefValueType>(obsRatings, [&heuristicParameters](BeliefValueType
const& r) {
815 return r <= storm::utility::convertNumber<BeliefValueType>(heuristicParameters.observationThreshold);
818 for (
auto const obs : refinedObservations) {
821 storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber<storm::RationalNumber>(observationResolutionVector[obs]) *
822 storm::utility::convertNumber<storm::RationalNumber>(options.resolutionFactor);
825 newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber>(std::numeric_limits<double>::max())) {
826 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(std::numeric_limits<double>::max());
828 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(newObsResolutionAsRational);
831 overApproximation->restartExploration();
833 statistics.overApproximationMaxResolution =
storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()));
837 if (options.explorationTimeLimit != 0) {
838 explorationTime.
start();
840 bool timeLimitExceeded =
false;
841 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations;
842 uint64_t numRewiredOrExploredStates = 0;
843 while (overApproximation->hasUnexploredState()) {
844 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
845 static_cast<uint64_t
>(explorationTime.
getTimeInSeconds()) > options.explorationTimeLimit) {
847 timeLimitExceeded =
true;
848 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because the exploration time limit is exceeded.");
852 uint64_t currId = overApproximation->exploreNextState();
853 bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior();
854 if (!hasOldBehavior) {
855 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because a new state is explored");
858 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
859 if (targetObservations.count(currObservation) != 0) {
860 overApproximation->setCurrentStateIsTarget();
861 overApproximation->addSelfloopTransition();
877 bool exploreAllActions =
false;
878 bool truncateAllActions =
false;
879 bool restoreAllActions =
false;
880 bool checkRewireForAllActions =
false;
882 ValueType gap =
getGap(overApproximation->getLowerValueBoundAtCurrentState(), overApproximation->getUpperValueBoundAtCurrentState());
883 if (!hasOldBehavior) {
887 if (!timeLimitExceeded && gap >= heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
888 exploreAllActions =
true;
890 truncateAllActions =
true;
891 overApproximation->setCurrentStateIsTruncated();
893 }
else if (overApproximation->getCurrentStateWasTruncated()) {
895 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
896 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
897 exploreAllActions =
true;
898 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because a previously truncated state is now explored.");
901 truncateAllActions =
true;
902 overApproximation->setCurrentStateIsTruncated();
906 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because we truncate a state with non-zero gap "
907 << gap <<
" that is reachable via an optimal sched.");
919 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
920 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
921 checkRewireForAllActions =
true;
923 restoreAllActions =
true;
925 checkRewireForAllActions =
true;
928 bool expandedAtLeastOneAction =
false;
929 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
930 bool expandCurrentAction = exploreAllActions || truncateAllActions;
931 if (checkRewireForAllActions) {
934 assert(!expandCurrentAction);
938 assert(overApproximation->currentStateHasOldBehavior());
939 if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
940 overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) {
942 if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) {
944 expandCurrentAction =
true;
945 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because we rewire a state.");
949 overApproximation->setCurrentChoiceIsDelayed(action);
952 if (!overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
953 (overApproximation->currentStateIsOptimalSchedulerReachable() &&
956 "Not reaching a refinement fixpoint because we delay a rewiring of a state with non-zero gap "
957 << gap <<
" that is reachable via an optimal scheduler.");
965 if (expandCurrentAction) {
966 expandedAtLeastOneAction =
true;
967 if (!truncateAllActions) {
969 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
970 for (
auto const& successor : successorGridPoints) {
971 overApproximation->addTransitionToBelief(action, successor.first, successor.second,
false);
973 if (computeRewards) {
974 overApproximation->computeRewardAtCurrentState(action);
978 auto truncationProbability = storm::utility::zero<ValueType>();
979 auto truncationValueBound = storm::utility::zero<ValueType>();
980 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
981 for (
auto const& successor : successorGridPoints) {
982 bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second,
true);
985 truncationProbability += successor.second;
986 truncationValueBound += successor.second * (
min ? overApproximation->computeLowerValueBoundAtBelief(successor.first)
987 : overApproximation->computeUpperValueBoundAtBelief(successor.first));
990 if (computeRewards) {
992 overApproximation->addTransitionsToExtraStates(action, truncationProbability);
993 overApproximation->computeRewardAtCurrentState(action, truncationValueBound);
995 overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
1000 overApproximation->restoreOldBehaviorAtCurrentState(action);
1003 if (expandedAtLeastOneAction) {
1004 ++numRewiredOrExploredStates;
1008 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
1009 if (pomdp().hasChoiceLabeling()) {
1010 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1011 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1012 overApproximation->addChoiceLabelToCurrentState(action, *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1024 if (!statistics.overApproximationStates) {
1025 statistics.overApproximationBuildAborted =
true;
1026 statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates();
1028 statistics.overApproximationBuildTime.stop();
1032 overApproximation->finishExploration();
1033 statistics.overApproximationBuildTime.stop();
1035 statistics.overApproximationCheckTime.start();
1037 statistics.overApproximationCheckTime.stop();
1041 statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates();
1046template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1047bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildUnderApproximation(
1048 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min,
bool computeRewards,
bool refine,
1049 HeuristicParameters
const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation,
1050 bool firstIteration) {
1051 statistics.underApproximationBuildTime.start();
1053 unfoldingStatus = Status::Exploring;
1054 if (options.useClipping) {
1056 statistics.nrClippingAttempts = 0;
1057 statistics.nrClippedStates = 0;
1060 uint64_t nrCutoffStrategies =
min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
1062 bool fixPoint =
true;
1063 if (heuristicParameters.sizeThreshold != std::numeric_limits<uint64_t>::max()) {
1064 statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold;
1067 if (options.interactiveUnfolding && !firstIteration) {
1068 underApproximation->restoreExplorationState();
1069 }
else if (computeRewards) {
1071 underApproximation->startNewExploration(storm::utility::zero<ValueType>(), storm::utility::infinity<ValueType>());
1073 underApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
1077 underApproximation->restartExploration();
1083 printUpdateStopwatch.
start();
1084 if (options.explorationTimeLimit != 0) {
1085 explorationTime.
start();
1087 bool timeLimitExceeded =
false;
1088 bool stateStored =
false;
1089 while (underApproximation->hasUnexploredState()) {
1090 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
1091 static_cast<uint64_t
>(explorationTime.
getTimeInSeconds()) > options.explorationTimeLimit) {
1093 timeLimitExceeded =
true;
1096 printUpdateStopwatch.
restart();
1097 STORM_LOG_INFO(
"### " << underApproximation->getCurrentNumberOfMdpStates() <<
" beliefs in underapproximation MDP" <<
" ##### "
1098 << underApproximation->getUnexploredStates().size() <<
" beliefs queued\n");
1099 if (underApproximation->getCurrentNumberOfMdpStates() > heuristicParameters.sizeThreshold && options.useClipping) {
1100 STORM_LOG_INFO(
"##### Clipping Attempts: " << statistics.nrClippingAttempts.value() <<
" ##### "
1101 <<
"Clipped States: " << statistics.nrClippedStates.value() <<
"\n");
1104 if (unfoldingControl == UnfoldingControl::Pause && !stateStored) {
1105 underApproximation->storeExplorationState();
1108 uint64_t currId = underApproximation->exploreNextState();
1109 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
1110 uint64_t addedActions = 0;
1111 bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated();
1112 if (!stateAlreadyExplored || timeLimitExceeded) {
1115 if (targetObservations.count(beliefManager->getBeliefObservation(currId)) != 0) {
1116 underApproximation->setCurrentStateIsTarget();
1117 underApproximation->addSelfloopTransition();
1118 underApproximation->addChoiceLabelToCurrentState(0,
"loop");
1120 bool stopExploration =
false;
1121 bool clipBelief =
false;
1122 if (timeLimitExceeded || (options.interactiveUnfolding && unfoldingControl != UnfoldingControl::Run)) {
1123 clipBelief = options.useClipping;
1124 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1125 }
else if (!stateAlreadyExplored) {
1127 ValueType gap =
getGap(underApproximation->getLowerValueBoundAtCurrentState(), underApproximation->getUpperValueBoundAtCurrentState());
1128 if ((gap < heuristicParameters.gapThreshold) || (gap == 0 && options.cutZeroGap)) {
1129 stopExploration =
true;
1130 }
else if (underApproximation->getCurrentNumberOfMdpStates() >=
1131 heuristicParameters.sizeThreshold ) {
1132 clipBelief = options.useClipping;
1133 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1137 if (clipBelief && !underApproximation->isMarkedAsGridBelief(currId)) {
1139 if (!options.useStateEliminationCutoff) {
1140 bool successfulClip = clipToGridExplicitly(currId, computeRewards, beliefManager, underApproximation, 0);
1142 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1143 if (successfulClip) {
1147 clipToGrid(currId, computeRewards, min, beliefManager, underApproximation);
1148 addedActions += beliefManager->getBeliefNumberOfChoices(currId);
1152 if (stopExploration) {
1153 underApproximation->setCurrentStateIsTruncated();
1155 if (options.useStateEliminationCutoff || !stopExploration) {
1157 uint64_t numActions = beliefManager->getBeliefNumberOfChoices(currId);
1158 if (underApproximation->needsActionAdjustment(numActions)) {
1159 underApproximation->adjustActions(numActions);
1161 for (uint64_t action = 0; action < numActions; ++action) {
1163 if (pomdp().hasChoiceLabeling()) {
1164 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1165 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1166 underApproximation->addChoiceLabelToCurrentState(addedActions + action,
1167 *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1170 if (stateAlreadyExplored) {
1171 underApproximation->restoreOldBehaviorAtCurrentState(action);
1173 auto truncationProbability = storm::utility::zero<ValueType>();
1174 auto truncationValueBound = storm::utility::zero<ValueType>();
1175 auto successors = beliefManager->expand(currId, action);
1176 for (
auto const& successor : successors) {
1177 bool added = underApproximation->addTransitionToBelief(addedActions + action, successor.first, successor.second, stopExploration);
1179 STORM_LOG_ASSERT(stopExploration,
"Didn't add a transition although exploration shouldn't be stopped.");
1181 truncationProbability += successor.second;
1186 truncationValueBound += successor.second * (
min ? underApproximation->computeUpperValueBoundAtBelief(successor.first)
1187 : underApproximation->computeLowerValueBoundAtBelief(successor.first));
1190 if (stopExploration) {
1191 if (computeRewards) {
1192 if (truncationValueBound == storm::utility::infinity<ValueType>()) {
1193 underApproximation->addTransitionsToExtraStates(addedActions + action, storm::utility::zero<ValueType>(),
1194 truncationProbability);
1196 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationProbability);
1199 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationValueBound,
1200 truncationProbability - truncationValueBound);
1203 if (computeRewards) {
1205 if (truncationValueBound != storm::utility::infinity<ValueType>()) {
1207 underApproximation->computeRewardAtCurrentState(action, truncationValueBound);
1209 underApproximation->addRewardToCurrentState(addedActions + action,
1210 beliefManager->getBeliefActionReward(currId, action) + truncationValueBound);
1217 for (uint64_t i = 0;
i < nrCutoffStrategies && !options.skipHeuristicSchedulers; ++
i) {
1218 auto cutOffValue =
min ? underApproximation->computeUpperValueBoundForScheduler(currId, i)
1219 : underApproximation->computeLowerValueBoundForScheduler(currId, i);
1220 if (computeRewards) {
1221 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1222 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::one<ValueType>());
1223 underApproximation->addRewardToCurrentState(addedActions, cutOffValue);
1225 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::zero<ValueType>(), storm::utility::one<ValueType>());
1228 underApproximation->addTransitionsToExtraStates(addedActions, cutOffValue, storm::utility::one<ValueType>() - cutOffValue);
1230 if (pomdp().hasChoiceLabeling()) {
1231 underApproximation->addChoiceLabelToCurrentState(addedActions,
"sched_" + std::to_string(i));
1235 if (underApproximation->hasFMSchedulerValues()) {
1236 uint64_t transitionNr = 0;
1237 for (uint64_t i = 0;
i < underApproximation->getNrOfMemoryNodesForObservation(currObservation); ++
i) {
1238 auto resPair = underApproximation->computeFMSchedulerValueForMemoryNode(currId, i);
1240 if (resPair.first) {
1241 cutOffValue = resPair.second;
1243 STORM_LOG_DEBUG(
"Skipped cut-off of belief with ID " << currId <<
" with finite memory scheduler in memory node " << i
1244 <<
". Missing values.");
1247 if (computeRewards) {
1248 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1249 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::one<ValueType>());
1250 underApproximation->addRewardToCurrentState(addedActions + transitionNr, cutOffValue);
1252 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::zero<ValueType>(),
1253 storm::utility::one<ValueType>());
1256 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, cutOffValue,
1257 storm::utility::one<ValueType>() - cutOffValue);
1259 if (pomdp().hasChoiceLabeling()) {
1260 underApproximation->addChoiceLabelToCurrentState(addedActions + transitionNr,
"mem_node_" + std::to_string(i));
1274 if (!statistics.underApproximationStates) {
1275 statistics.underApproximationBuildAborted =
true;
1276 statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates();
1278 statistics.underApproximationBuildTime.stop();
1282 underApproximation->finishExploration();
1283 statistics.underApproximationBuildTime.stop();
1284 printUpdateStopwatch.
stop();
1285 STORM_LOG_INFO(
"Finished exploring under-approximation MDP.\nStart analysis...\n");
1286 unfoldingStatus = Status::ModelExplorationFinished;
1287 statistics.underApproximationCheckTime.start();
1289 statistics.underApproximationCheckTime.stop();
1290 if (underApproximation->getExploredMdp()->getStateLabeling().getStates(
"truncated").getNumberOfSetBits() > 0) {
1291 statistics.nrTruncatedStates = underApproximation->getExploredMdp()->getStateLabeling().getStates(
"truncated").getNumberOfSetBits();
1295 statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates();
1300template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1301void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGrid(uint64_t clippingStateId,
bool computeRewards,
bool min,
1302 std::shared_ptr<BeliefManagerType>& beliefManager,
1303 std::shared_ptr<ExplorerType>& beliefExplorer) {
1306 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(clippingStateId); action < numActions; ++action) {
1307 auto rewardBound = utility::zero<BeliefValueType>();
1308 auto successors = beliefManager->expand(clippingStateId, action);
1309 auto absDelta = utility::zero<BeliefValueType>();
1310 for (
auto const& successor : successors) {
1314 bool added = beliefExplorer->addTransitionToBelief(action, successor.first, successor.second,
true);
1317 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1318 auto clipping = beliefManager->clipBeliefToGrid(
1319 successor.first, options.clippingGridRes, computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() :
storm::storage::
BitVector());
1320 if (clipping.isClippable) {
1322 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1324 BeliefValueType transitionProb =
1325 (utility::one<BeliefValueType>() - clipping.delta) * utility::convertNumber<BeliefValueType>(successor.second);
1326 beliefExplorer->addTransitionToBelief(action, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb),
false);
1328 absDelta += clipping.delta * utility::convertNumber<BeliefValueType>(successor.second);
1329 if (computeRewards) {
1331 auto localRew = utility::zero<BeliefValueType>();
1332 for (
auto const& deltaValue : clipping.deltaValues) {
1333 localRew += deltaValue.second *
1334 utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1336 if (localRew == utility::infinity<BeliefValueType>()) {
1339 rewardBound += localRew * utility::convertNumber<BeliefValueType>(successor.second);
1341 }
else if (clipping.onGrid) {
1343 beliefExplorer->addTransitionToBelief(action, successor.first, successor.second,
false);
1346 absDelta += utility::convertNumber<BeliefValueType>(successor.second);
1347 rewardBound += utility::convertNumber<BeliefValueType>(successor.second) *
1348 utility::convertNumber<BeliefValueType>(min ? beliefExplorer->computeUpperValueBoundAtBelief(successor.first)
1349 : beliefExplorer->computeLowerValueBoundAtBelief(successor.first));
1354 if (absDelta != utility::zero<BeliefValueType>()) {
1355 if (computeRewards) {
1356 if (rewardBound == utility::infinity<BeliefValueType>()) {
1358 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1360 beliefExplorer->addTransitionsToExtraStates(action, utility::convertNumber<BeliefMDPType>(absDelta));
1361 BeliefValueType totalRewardVal = rewardBound / absDelta;
1362 beliefExplorer->addClippingRewardToCurrentState(action, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1365 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1368 if (computeRewards) {
1369 beliefExplorer->computeRewardAtCurrentState(action);
1374template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1375bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGridExplicitly(uint64_t clippingStateId,
bool computeRewards,
1376 std::shared_ptr<BeliefManagerType>& beliefManager,
1377 std::shared_ptr<ExplorerType>& beliefExplorer,
1378 uint64_t localActionIndex) {
1379 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1380 auto clipping = beliefManager->clipBeliefToGrid(clippingStateId, options.clippingGridRes,
1381 computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() :
storm::storage::
BitVector());
1382 if (clipping.isClippable) {
1384 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1386 BeliefValueType transitionProb = (utility::one<BeliefValueType>() - clipping.delta);
1387 beliefExplorer->addTransitionToBelief(localActionIndex, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb),
false);
1388 beliefExplorer->markAsGridBelief(clipping.targetBelief);
1389 if (computeRewards) {
1391 auto reward = utility::zero<BeliefValueType>();
1392 for (
auto const& deltaValue : clipping.deltaValues) {
1393 reward += deltaValue.second * utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1395 if (reward == utility::infinity<BeliefValueType>()) {
1398 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1399 utility::convertNumber<BeliefMDPType>(clipping.delta));
1401 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::convertNumber<BeliefMDPType>(clipping.delta));
1402 BeliefValueType totalRewardVal = reward / clipping.delta;
1403 beliefExplorer->addClippingRewardToCurrentState(localActionIndex, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1406 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1407 utility::convertNumber<BeliefMDPType>(clipping.delta));
1409 beliefExplorer->addChoiceLabelToCurrentState(localActionIndex,
"clip");
1412 if (clipping.onGrid) {
1414 beliefExplorer->markAsGridBelief(clippingStateId);
1420template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1421void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::setUnfoldingControl(
1422 storm::pomdp::modelchecker::BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::UnfoldingControl newUnfoldingControl) {
1423 unfoldingControl = newUnfoldingControl;
1426template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1429 setUnfoldingControl(UnfoldingControl::Pause);
1432template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1435 setUnfoldingControl(UnfoldingControl::Run);
1438template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1441 setUnfoldingControl(UnfoldingControl::Terminate);
1444template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1446 return unfoldingStatus == Status::ResultAvailable || unfoldingStatus == Status::Converged;
1449template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1451 return unfoldingStatus == Status::Converged;
1454template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1456 return unfoldingStatus == Status::Exploring;
1459template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1460std::shared_ptr<storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType>>
1462 return interactiveUnderApproximationExplorer;
1465template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1467 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> valueList) {
1468 interactiveUnderApproximationExplorer->setFMSchedValueList(valueList);
1471template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1473 typename ExplorerType::SuccessorObservationInformation
const& info, BeliefValueType
const& observationResolution, BeliefValueType
const& maxResolution) {
1474 auto n = storm::utility::convertNumber<BeliefValueType, uint64_t>(info.support.size());
1475 auto one = storm::utility::one<BeliefValueType>();
1482 auto obsChoiceRating = storm::utility::convertNumber<BeliefValueType, ValueType>(info.maxProbabilityToSuccessorWithObs / info.observationProbability);
1487 obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
1489 obsChoiceRating *= observationResolution / maxResolution;
1490 return obsChoiceRating;
1494template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1495std::vector<BeliefValueType> BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getObservationRatings(
1496 std::shared_ptr<ExplorerType>
const& overApproximation, std::vector<BeliefValueType>
const& observationResolutionVector) {
1497 uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates();
1498 auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices();
1499 BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
1501 std::vector<BeliefValueType> resultingRatings(pomdp().getNrObservations(), storm::utility::one<BeliefValueType>());
1503 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations;
1504 for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) {
1507 if (overApproximation->stateIsOptimalSchedulerReachable(mdpState)) {
1508 for (uint64_t mdpChoice = choiceIndices[mdpState]; mdpChoice < choiceIndices[mdpState + 1]; ++mdpChoice) {
1510 if (overApproximation->actionIsOptimal(mdpChoice)) {
1512 gatheredSuccessorObservations.clear();
1513 overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
1514 for (
auto const& obsInfo : gatheredSuccessorObservations) {
1515 auto const& obs = obsInfo.first;
1516 BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
1519 resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
1525 return resultingRatings;
1528template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1529typename PomdpModelType::ValueType BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getGap(
1530 typename PomdpModelType::ValueType
const& l,
typename PomdpModelType::ValueType
const& u) {
1531 STORM_LOG_ASSERT(l >= storm::utility::zero<typename PomdpModelType::ValueType>() && u >= storm::utility::zero<typename PomdpModelType::ValueType>(),
1532 "Gap computation currently does not handle negative values.");
1535 return storm::utility::zero<typename PomdpModelType::ValueType>();
1545 return storm::utility::abs<typename PomdpModelType::ValueType>(u - l) * storm::utility::convertNumber<typename PomdpModelType::ValueType, uint64_t>(2) /
1552template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>>;
1554template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>, storm::RationalNumber>;
1556template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>,
double>;
1558template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>>;
void addLabel(std::string const &label)
Adds a new label to the labelings.
bool containsLabel(std::string const &label) const
Checks whether a label is registered within this labeling.
void removeLabel(std::string const &label)
Removes a label from the labelings.
This class represents a (discrete-time) Markov decision process.
std::shared_ptr< storm::models::sparse::Model< ValueType, RewardModelType > > applyScheduler(storm::storage::Scheduler< ValueType > const &scheduler, bool dropUnreachableStates=true, bool preserveModelType=false) const
Applies the given scheduler to this model.
This class manages the labeling of the state space with a number of (atomic) labels.
bool getStateHasLabel(std::string const &label, storm::storage::sparse::state_type state) const
Checks whether a given state is labeled with the given label.
void addLabelToState(std::string const &label, storm::storage::sparse::state_type state)
Adds a label to a given state.
Model checker for checking reachability queries on POMDPs using approximations based on exploration o...
BeliefExplorationPomdpModelChecker(std::shared_ptr< PomdpModelType > pomdp, Options options=Options())
Constructor.
int64_t getStatus()
Get the current status of the interactive unfolding.
bool hasConverged()
Indicates whether the interactive unfolding has coonverged, i.e.
bool isExploring()
Indicates whether the interactive unfolding is currently in the process of exploring the belief MDP.
std::shared_ptr< ExplorerType > getInteractiveBeliefExplorer()
Get a pointer to the belief explorer used in the interactive unfolding.
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
void unfoldInteractively(storm::Environment const &env, std::set< uint32_t > const &targetObservations, bool min, std::optional< std::string > rewardModelName, storm::pomdp::modelchecker::POMDPValueBounds< ValueType > const &valueBounds, Result &result)
Allows to generate an under-approximation using a controllable unfolding.
bool isResultReady()
Indicates whether there is a result after an interactive unfolding was paused.
Result check(storm::Environment const &env, storm::logic::Formula const &formula, storm::Environment const &preProcEnv, std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > const &additionalUnderApproximationBounds=std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > >())
Performs model checking of the given POMDP with regards to a formula using the previously specified o...
Result getInteractiveResult()
Get the latest saved result obtained by the interactive unfolding.
void printStatisticsToStream(std::ostream &stream) const
Prints statistics of the process to a given output stream.
void pauseUnfolding()
Pauses a running interactive unfolding.
void precomputeValueBounds(const logic::Formula &formula, storm::Environment const &preProcEnv)
Uses model checking on the underlying MDP to generate values used for cut-offs and for clipping compe...
void continueUnfolding()
Continues a previously paused interactive unfolding.
void terminateUnfolding()
Terminates a running interactive unfolding.
A bit vector that is internally represented as a vector of 64-bit values.
size_t size() const
Retrieves the number of bits this bit vector can store.
uint_fast64_t getNumberOfSetBits() const
Returns the number of bits that are set to true in this bit vector.
A class that provides convenience operations to display run times.
void start()
Start stopwatch (again) and start measuring time.
void restart()
Reset the stopwatch and immediately start it.
SecondType getTimeInSeconds() const
Gets the measured time in seconds.
void stop()
Stop stopwatch and add measured time to total time.
#define STORM_LOG_INFO(message)
#define STORM_LOG_WARN(message)
#define STORM_LOG_DEBUG(message)
#define STORM_LOG_TRACE(message)
#define STORM_LOG_ASSERT(cond, message)
#define STORM_LOG_WARN_COND(cond, message)
#define STORM_LOG_ERROR_COND(cond, message)
#define STORM_LOG_THROW(cond, exception, message)
#define STORM_LOG_INFO_COND(cond, message)
SFTBDDChecker::ValueType ValueType
FormulaInformation getFormulaInformation(PomdpType const &pomdp, storm::logic::ProbabilityOperatorFormula const &formula)
ValueType getGap(ValueType const &l, ValueType const &u)
bool detectFiniteBeliefMdp(storm::models::sparse::Pomdp< ValueType > const &pomdp, std::optional< storm::storage::BitVector > const &targetStates)
This method tries to detect that the beliefmdp is finite.
storm::storage::BitVector getReachableStates(storm::storage::SparseMatrix< T > const &transitionMatrix, storm::storage::BitVector const &initialStates, storm::storage::BitVector const &constraintStates, storm::storage::BitVector const &targetStates, bool useStepBound, uint_fast64_t maximalSteps, boost::optional< storm::storage::BitVector > const &choiceFilter)
Performs a forward depth-first search through the underlying graph structure to identify the states t...
storage::BitVector BitVector
bool isTerminate()
Check whether the program should terminate (due to some abort signal).
bool isOne(ValueType const &a)
ValueType min(ValueType const &first, ValueType const &second)
bool isZero(ValueType const &a)
ValueType ceil(ValueType const &number)
bool isInfinity(ValueType const &a)
solver::OptimizationDirection OptimizationDirection
Struct used to store the results of the model checker.
Result(ValueType lower, ValueType upper)
ValueType diff(bool relative=false) const
bool updateUpperBound(ValueType const &value)
bool updateLowerBound(ValueType const &value)
ValueType numericPrecision
Structure for storing values on the POMDP used for cut-offs and clipping.
std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > fmSchedulerValueList
storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > extremePomdpValueBound
storm::pomdp::storage::PreprocessingPomdpValueBounds< ValueType > trivialPomdpValueBounds
std::unordered_map< std::string, RewardModelType > rewardModels
storm::storage::SparseMatrix< ValueType > transitionMatrix
std::optional< storm::storage::sparse::StateValuations > observationValuations
std::optional< storm::models::sparse::ChoiceLabeling > choiceLabeling
storm::models::sparse::StateLabeling stateLabeling
std::optional< std::vector< uint32_t > > observabilityClasses