26namespace modelchecker {
30template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
32 : lowerBound(lower), upperBound(upper) {
36template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
40 if (diff < storm::utility::zero<ValueType>()) {
42 "Upper bound '" << upperBound <<
"' is smaller than lower bound '" << lowerBound <<
"': Difference is " << diff <<
".");
43 diff = storm::utility::zero<ValueType>();
51template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
53 if (value > lowerBound) {
60template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
62 if (value < upperBound) {
69template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
71 : beliefMdpDetectedToBeFinite(false),
72 refinementFixpointDetected(false),
73 overApproximationBuildAborted(false),
74 underApproximationBuildAborted(false),
81template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
84 : options(options), inputPomdp(pomdp) {
86 STORM_LOG_ERROR_COND(inputPomdp->isCanonic(),
"Input Pomdp is not known to be canonic. This might lead to unexpected verification results.");
94template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
102 auto initialPomdpValueBounds = preProcessingMC.getValueBounds(preProcEnv, formula);
103 pomdpValueBounds.trivialPomdpValueBounds = initialPomdpValueBounds;
107 pomdpValueBounds.extremePomdpValueBound = preProcessingMC.getExtremeValueBound(preProcEnv, formula);
111template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
115 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
116 return check(env, formula, env, additionalUnderApproximationBounds);
119template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
122 storm::logic::Formula const& formula, std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
124 return check(env, formula, env, additionalUnderApproximationBounds);
127template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
131 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
133 return check(env, formula, preProcEnv, additionalUnderApproximationBounds);
136template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
140 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>>
const& additionalUnderApproximationBounds) {
141 STORM_LOG_ASSERT(options.unfold || options.discretize || options.interactiveUnfolding,
142 "Invoked belief exploration but no task (unfold or discretize) given.");
144 preprocessedPomdp.reset();
147 statistics = Statistics();
148 statistics.totalTime.start();
152 precomputeValueBounds(formula, preProcEnv);
153 if (!additionalUnderApproximationBounds.empty()) {
154 pomdpValueBounds.fmSchedulerValueList = additionalUnderApproximationBounds;
156 uint64_t initialPomdpState = pomdp().getInitialStates().getNextSetIndex(0);
157 Result result(pomdpValueBounds.trivialPomdpValueBounds.getHighestLowerBound(initialPomdpState),
158 pomdpValueBounds.trivialPomdpValueBounds.getSmallestUpperBound(initialPomdpState));
161 std::optional<std::string> rewardModelName;
162 std::set<uint32_t> targetObservations;
163 if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) {
164 if (formulaInfo.getTargetStates().observationClosed) {
165 targetObservations = formulaInfo.getTargetStates().observations;
168 std::tie(preprocessedPomdp, targetObservations) = obsCloser.
transform(formulaInfo.getTargetStates().states);
170 if (formulaInfo.isNonNestedReachabilityProbability()) {
171 if (!formulaInfo.getSinkStates().empty()) {
175 auto matrix = pomdp().getTransitionMatrix();
176 matrix.makeRowGroupsAbsorbing(formulaInfo.getSinkStates().states);
179 if (pomdp().hasChoiceLabeling()) {
182 if (pomdp().hasObservationValuations()) {
185 preprocessedPomdp = std::make_shared<storm::models::sparse::Pomdp<ValueType>>(std::move(components),
true);
187 pomdp().getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
188 reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
189 STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException,
190 "There are sink states that can reach non-sink states. This is currently not supported");
194 rewardModelName = formulaInfo.getRewardModelName();
197 STORM_LOG_THROW(
false, storm::exceptions::NotSupportedException,
"Unsupported formula '" << formula <<
"'.");
201 statistics.beliefMdpDetectedToBeFinite =
true;
203 if (options.interactiveUnfolding) {
204 unfoldInteractively(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
206 refineReachability(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
209 if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) {
210 result.
lowerBound = -storm::utility::infinity<ValueType>();
212 if ((formulaInfo.maximize() && !options.discretize) || (formulaInfo.minimize() && !options.unfold)) {
213 result.
upperBound = storm::utility::infinity<ValueType>();
217 statistics.aborted =
true;
219 statistics.totalTime.stop();
223template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
225 stream <<
"##### POMDP Approximation Statistics ######\n";
226 stream <<
"# Input model: \n";
227 pomdp().printModelInformationToStream(stream);
228 stream <<
"# Max. Number of states with same observation: " << pomdp().getMaxNrStatesWithSameObservation() <<
'\n';
229 if (statistics.beliefMdpDetectedToBeFinite) {
230 stream <<
"# Pre-computations detected that the belief MDP is finite.\n";
232 if (statistics.aborted) {
233 stream <<
"# Computation aborted early\n";
236 stream <<
"# Total check time: " << statistics.totalTime <<
'\n';
238 if (statistics.refinementSteps) {
239 stream <<
"# Number of refinement steps: " << statistics.refinementSteps.value() <<
'\n';
241 if (statistics.refinementFixpointDetected) {
242 stream <<
"# Detected a refinement fixpoint.\n";
246 if (statistics.overApproximationStates) {
247 stream <<
"# Number of states in the ";
248 if (options.refine) {
251 stream <<
"grid MDP for the over-approximation: ";
252 if (statistics.overApproximationBuildAborted) {
255 stream << statistics.overApproximationStates.value() <<
'\n';
256 stream <<
"# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.value() <<
'\n';
257 stream <<
"# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime <<
'\n';
258 stream <<
"# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime <<
'\n';
262 if (statistics.underApproximationStates) {
263 stream <<
"# Number of states in the ";
264 if (options.refine) {
267 stream <<
"belief MDP for the under-approximation: ";
268 if (statistics.underApproximationBuildAborted) {
271 stream << statistics.underApproximationStates.value() <<
'\n';
272 if (statistics.nrClippingAttempts) {
273 stream <<
"# Clipping attempts (clipped states) for the under-approximation: ";
274 if (statistics.underApproximationBuildAborted) {
277 stream << statistics.nrClippingAttempts.value() <<
" (" << statistics.nrClippedStates.value() <<
")\n";
278 stream <<
"# Total clipping preprocessing time: " << statistics.clippingPreTime <<
"\n";
279 stream <<
"# Total clipping time: " << statistics.clipWatch <<
"\n";
280 }
else if (statistics.nrTruncatedStates) {
281 stream <<
"# Truncated states for the under-approximation: ";
282 if (statistics.underApproximationBuildAborted) {
285 stream << statistics.nrTruncatedStates.value() <<
"\n";
287 if (statistics.underApproximationStateLimit) {
288 stream <<
"# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.value() <<
'\n';
290 stream <<
"# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime <<
'\n';
291 stream <<
"# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime <<
'\n';
294 stream <<
"##########################################\n";
299template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
301 if (preprocessedPomdp) {
302 return *preprocessedPomdp;
308template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
309void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::refineReachability(
310 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min, std::optional<std::string> rewardModelName,
312 statistics.refinementSteps = 0;
315 std::vector<BeliefValueType> observationResolutionVector;
316 std::shared_ptr<BeliefManagerType> overApproxBeliefManager;
317 std::shared_ptr<ExplorerType> overApproximation;
318 HeuristicParameters overApproxHeuristicPar{};
319 if (options.discretize) {
320 observationResolutionVector =
321 std::vector<BeliefValueType>(pomdp().getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit));
322 overApproxBeliefManager = std::make_shared<BeliefManagerType>(
323 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
324 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
325 if (rewardModelName) {
326 overApproxBeliefManager->setRewardModel(rewardModelName);
329 overApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
330 overApproxHeuristicPar.observationThreshold = options.obsThresholdInit;
331 overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit;
332 overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
334 buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(),
false, overApproxHeuristicPar, observationResolutionVector,
335 overApproxBeliefManager, overApproximation);
339 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
340 bool betterBound =
min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
342 STORM_LOG_INFO(
"Initial Over-approx result obtained after " << statistics.totalTime <<
". Value is '" << newValue <<
"'.\n");
346 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
347 std::shared_ptr<ExplorerType> underApproximation;
348 HeuristicParameters underApproxHeuristicPar{};
349 if (options.unfold) {
350 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
351 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
352 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
353 if (rewardModelName) {
354 underApproxBeliefManager->setRewardModel(rewardModelName);
356 underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
357 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
358 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
359 underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
360 if (underApproxHeuristicPar.sizeThreshold == 0) {
361 if (!options.refine && options.explorationTimeLimit != 0) {
362 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max();
364 underApproxHeuristicPar.sizeThreshold = pomdp().getNumberOfStates() * pomdp().getMaxNrStatesWithSameObservation();
365 STORM_LOG_INFO(
"Heuristically selected an under-approximation MDP size threshold of " << underApproxHeuristicPar.sizeThreshold <<
".\n");
369 if (options.useClipping && rewardModelName.has_value()) {
375 buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(),
false, underApproxHeuristicPar, underApproxBeliefManager,
376 underApproximation,
false);
380 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
381 bool betterBound =
min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
383 STORM_LOG_INFO(
"Initial Under-approx result obtained after " << statistics.totalTime <<
". Value is '" << newValue <<
"'.\n");
388 STORM_LOG_INFO(
"Completed (initial) computation. Current checktime is " << statistics.totalTime <<
".");
389 bool computingLowerBound =
false;
390 bool computingUpperBound =
false;
391 if (options.discretize) {
392 STORM_LOG_INFO(
"\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() <<
".");
393 (
min ? computingLowerBound : computingUpperBound) =
true;
395 if (options.unfold) {
396 STORM_LOG_INFO(
"\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() <<
".");
397 (
min ? computingUpperBound : computingLowerBound) =
true;
399 if (computingLowerBound && computingUpperBound) {
400 STORM_LOG_INFO(
"\tObtained result is [" << result.lowerBound <<
", " << result.upperBound <<
"].");
401 }
else if (computingLowerBound) {
402 STORM_LOG_INFO(
"\tObtained result is ≥" << result.lowerBound <<
".");
403 }
else if (computingUpperBound) {
404 STORM_LOG_INFO(
"\tObtained result is ≤" << result.upperBound <<
".");
408 if (options.refine) {
410 "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout");
412 "Refinement goal precision is given, but only one bound is going to be refined.");
413 while ((options.refineStepLimit == 0 || statistics.refinementSteps.value() < options.refineStepLimit) && result.diff() > options.refinePrecision) {
414 bool overApproxFixPoint =
true;
415 bool underApproxFixPoint =
true;
416 if (options.discretize) {
419 overApproximation->takeCurrentValuesAsLowerBounds();
421 overApproximation->takeCurrentValuesAsUpperBounds();
423 overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
424 overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
425 storm::utility::convertNumber<ValueType, uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor);
426 overApproxHeuristicPar.observationThreshold +=
427 options.obsThresholdIncrementFactor * (storm::utility::one<ValueType>() - overApproxHeuristicPar.observationThreshold);
428 overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
429 overApproxFixPoint = buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(),
true, overApproxHeuristicPar,
430 observationResolutionVector, overApproxBeliefManager, overApproximation);
432 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
433 bool betterBound =
min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
435 STORM_LOG_INFO(
"Over-approx result for refinement improved after " << statistics.totalTime <<
" in refinement step #"
436 << (statistics.refinementSteps.value() + 1) <<
". New value is '"
437 << newValue <<
"'.");
444 if (options.unfold && result.diff() > options.refinePrecision) {
446 underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
447 underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
448 storm::utility::convertNumber<ValueType, uint64_t>(underApproximation->getExploredMdp()->getNumberOfStates()) *
449 options.sizeThresholdFactor);
450 underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
451 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(),
true, underApproxHeuristicPar,
452 underApproxBeliefManager, underApproximation,
true);
454 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
455 bool betterBound =
min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
457 STORM_LOG_INFO(
"Under-approx result for refinement improved after " << statistics.totalTime <<
" in refinement step #"
458 << (statistics.refinementSteps.value() + 1) <<
". New value is '"
459 << newValue <<
"'.");
469 ++statistics.refinementSteps.value();
471 if (statistics.refinementSteps.value() <= 1000) {
472 STORM_LOG_INFO(
"Completed iteration #" << statistics.refinementSteps.value() <<
". Current checktime is " << statistics.totalTime <<
".");
473 computingLowerBound =
false;
474 computingUpperBound =
false;
475 if (options.discretize) {
476 STORM_LOG_INFO(
"\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() <<
".");
477 (
min ? computingLowerBound : computingUpperBound) =
true;
479 if (options.unfold) {
480 STORM_LOG_INFO(
"\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() <<
".");
481 (
min ? computingUpperBound : computingLowerBound) =
true;
483 if (computingLowerBound && computingUpperBound) {
484 STORM_LOG_INFO(
"\tCurrent result is [" << result.lowerBound <<
", " << result.upperBound <<
"].");
485 }
else if (computingLowerBound) {
486 STORM_LOG_INFO(
"\tCurrent result is ≥" << result.lowerBound <<
".");
487 }
else if (computingUpperBound) {
488 STORM_LOG_INFO(
"\tCurrent result is ≤" << result.upperBound <<
".");
490 STORM_LOG_WARN_COND(statistics.refinementSteps.value() < 1000,
"Refinement requires more than 1000 iterations.");
493 if (overApproxFixPoint && underApproxFixPoint) {
494 STORM_LOG_INFO(
"Refinement fixpoint reached after " << statistics.refinementSteps.value() <<
" iterations.\n");
495 statistics.refinementFixpointDetected =
true;
501 if (options.discretize && overApproximation->hasComputedValues()) {
502 auto printOverInfo = [&overApproximation]() {
503 std::stringstream str;
504 str <<
"Explored and checked Over-Approximation MDP:\n";
505 overApproximation->getExploredMdp()->printModelInformationToStream(str);
510 if (options.unfold && underApproximation->hasComputedValues()) {
511 auto printUnderInfo = [&underApproximation]() {
512 std::stringstream str;
513 str <<
"Explored and checked Under-Approximation MDP:\n";
514 underApproximation->getExploredMdp()->printModelInformationToStream(str);
518 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = underApproximation->getExploredMdp();
519 if (!options.useStateEliminationCutoff) {
521 auto nrPreprocessingScheds =
min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
522 for (uint64_t i = 0;
i < nrPreprocessingScheds; ++
i) {
523 newLabeling.addLabel(
"sched_" + std::to_string(i));
525 newLabeling.addLabel(
"cutoff");
526 newLabeling.addLabel(
"clipping");
527 newLabeling.addLabel(
"finite_mem");
529 auto transMatrix = scheduledModel->getTransitionMatrix();
530 for (uint64_t i = 0;
i < scheduledModel->getNumberOfStates(); ++
i) {
531 if (newLabeling.getStateHasLabel(
"truncated", i)) {
532 uint64_t localChosenActionIndex = underApproximation->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
533 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[
i];
534 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
535 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
536 if (label.rfind(
"clip", 0) == 0) {
537 newLabeling.addLabelToState(
"clipping", i);
538 auto chosenRow = transMatrix.getRow(i, 0);
539 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
540 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[
i], candidateIndex);
541 }
else if (label.rfind(
"mem_node", 0) == 0) {
542 newLabeling.addLabelToState(
"finite_mem", i);
543 newLabeling.addLabelToState(
"cutoff", i);
545 newLabeling.addLabelToState(label, i);
546 newLabeling.addLabelToState(
"cutoff", i);
551 newLabeling.removeLabel(
"truncated");
553 transMatrix.dropZeroEntries();
555 if (scheduledModel->hasChoiceLabeling()) {
556 modelComponents.choiceLabeling = scheduledModel->getChoiceLabeling();
559 auto inducedMC = newMDP.applyScheduler(*(underApproximation->getSchedulerForExploredMdp()),
true);
560 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
562 auto inducedMC = underApproximation->getExploredMdp()->applyScheduler(*(underApproximation->getSchedulerForExploredMdp()),
true);
563 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
565 result.schedulerAsMarkovChain = scheduledModel;
567 result.cutoffSchedulers = underApproximation->getUpperValueBoundSchedulers();
569 result.cutoffSchedulers = underApproximation->getLowerValueBoundSchedulers();
574template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
576 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min, std::optional<std::string> rewardModelName,
578 statistics.refinementSteps = 0;
579 interactiveResult = result;
580 unfoldingStatus = Status::Uninitialized;
581 unfoldingControl = UnfoldingControl::Run;
584 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
585 HeuristicParameters underApproxHeuristicPar{};
586 bool firstIteration =
true;
588 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
589 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
590 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
591 if (rewardModelName) {
592 underApproxBeliefManager->setRewardModel(rewardModelName);
596 interactiveUnderApproximationExplorer = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
597 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
598 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
599 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max() - 1;
601 if (options.useClipping && rewardModelName.has_value()) {
610 while (!(unfoldingControl ==
612 bool underApproxFixPoint =
true;
613 bool hasTruncatedStates =
false;
614 if (unfoldingStatus != Status::Converged) {
616 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(),
false, underApproxHeuristicPar,
617 underApproxBeliefManager, interactiveUnderApproximationExplorer, firstIteration);
619 ValueType const& newValue = interactiveUnderApproximationExplorer->getComputedValueAtInitialState();
620 bool betterBound = min ? interactiveResult.updateUpperBound(newValue) : interactiveResult.updateLowerBound(newValue);
622 STORM_LOG_INFO(
"Under-approximation result improved after " << statistics.totalTime <<
" in step #"
623 << (statistics.refinementSteps.value() + 1) <<
". New value is '" << newValue
626 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = interactiveUnderApproximationExplorer->getExploredMdp();
627 if (!options.useStateEliminationCutoff) {
629 auto nrPreprocessingScheds = min ? interactiveUnderApproximationExplorer->getNrSchedulersForUpperBounds()
630 : interactiveUnderApproximationExplorer->getNrSchedulersForLowerBounds();
631 for (uint64_t i = 0; i < nrPreprocessingScheds; ++i) {
632 newLabeling.
addLabel(
"sched_" + std::to_string(i));
638 auto transMatrix = scheduledModel->getTransitionMatrix();
639 for (uint64_t i = 0; i < scheduledModel->getNumberOfStates(); ++i) {
641 hasTruncatedStates =
true;
642 uint64_t localChosenActionIndex =
643 interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
644 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[i];
645 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
646 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
647 if (label.rfind(
"clip", 0) == 0) {
649 auto chosenRow = transMatrix.getRow(i, 0);
650 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
651 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[i], candidateIndex);
652 }
else if (label.rfind(
"mem_node", 0) == 0) {
664 transMatrix.dropZeroEntries();
666 if (scheduledModel->hasChoiceLabeling()) {
667 modelComponents.
choiceLabeling = scheduledModel->getChoiceLabeling();
670 auto inducedMC = newMDP.
applyScheduler(*(interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()),
true);
671 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
673 interactiveResult.schedulerAsMarkovChain = scheduledModel;
675 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getUpperValueBoundSchedulers();
677 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getLowerValueBoundSchedulers();
679 if (firstIteration) {
680 firstIteration =
false;
682 unfoldingStatus = Status::ResultAvailable;
690 ++statistics.refinementSteps.value();
692 if (statistics.refinementSteps.value() <= 1000) {
693 STORM_LOG_INFO(
"Completed iteration #" << statistics.refinementSteps.value() <<
". Current checktime is " << statistics.totalTime <<
".");
694 bool computingLowerBound =
false;
695 bool computingUpperBound =
false;
696 if (options.unfold) {
697 STORM_LOG_INFO(
"\tUnder-approx MDP has size " << interactiveUnderApproximationExplorer->getExploredMdp()->getNumberOfStates() <<
".");
698 (min ? computingUpperBound : computingLowerBound) =
true;
700 if (computingLowerBound && computingUpperBound) {
701 STORM_LOG_INFO(
"\tCurrent result is [" << interactiveResult.lowerBound <<
", " << interactiveResult.upperBound <<
"].");
702 }
else if (computingLowerBound) {
703 STORM_LOG_INFO(
"\tCurrent result is ≥" << interactiveResult.lowerBound <<
".");
704 }
else if (computingUpperBound) {
705 STORM_LOG_INFO(
"\tCurrent result is ≤" << interactiveResult.upperBound <<
".");
709 if (underApproxFixPoint) {
710 STORM_LOG_INFO(
"Fixpoint reached after " << statistics.refinementSteps.value() <<
" iterations.\n");
711 statistics.refinementFixpointDetected =
true;
712 unfoldingStatus = Status::Converged;
713 unfoldingControl = UnfoldingControl::Pause;
715 if (!hasTruncatedStates) {
716 STORM_LOG_INFO(
"No states have been truncated, so continued iteration does not yield new results.\n");
717 unfoldingStatus = Status::Converged;
718 unfoldingControl = UnfoldingControl::Pause;
722 while (unfoldingControl ==
729template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
731 std::set<uint32_t>
const& targetObservations,
bool min, std::optional<std::string> rewardModelName,
734 unfoldInteractively(env, targetObservations, min, rewardModelName, valueBounds, result);
737template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
740 return interactiveResult;
743template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
745 if (unfoldingStatus == Status::Uninitialized)
747 if (unfoldingStatus == Status::Exploring)
749 if (unfoldingStatus == Status::ModelExplorationFinished)
751 if (unfoldingStatus == Status::ResultAvailable)
753 if (unfoldingStatus == Status::Terminated)
759template<
typename ValueType>
760ValueType
getGap(ValueType
const& l, ValueType
const& u) {
761 STORM_LOG_ASSERT(l >= storm::utility::zero<ValueType>() && u >= storm::utility::zero<ValueType>(),
762 "Gap computation currently does not handle negative values.");
765 return storm::utility::zero<ValueType>();
775 return storm::utility::abs<ValueType>(u - l) * storm::utility::convertNumber<ValueType, uint64_t>(2) / (l + u);
779template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
780bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildOverApproximation(
781 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min,
bool computeRewards,
bool refine,
782 HeuristicParameters
const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector,
783 std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
785 bool fixPoint =
true;
787 statistics.overApproximationBuildTime.start();
791 if (computeRewards) {
792 overApproximation->startNewExploration(storm::utility::zero<ValueType>());
794 overApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
798 overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon,
true);
801 auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector);
803 auto numericPrecision = storm::utility::convertNumber<BeliefValueType>(options.numericPrecision);
804 if (std::any_of(obsRatings.begin(), obsRatings.end(),
805 [&numericPrecision](BeliefValueType
const& value) { return value + numericPrecision < storm::utility::one<BeliefValueType>(); })) {
806 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because there are still observations to refine.");
809 refinedObservations = storm::utility::vector::filter<BeliefValueType>(obsRatings, [&heuristicParameters](BeliefValueType
const& r) {
810 return r <= storm::utility::convertNumber<BeliefValueType>(heuristicParameters.observationThreshold);
813 for (
auto const obs : refinedObservations) {
816 storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber<storm::RationalNumber>(observationResolutionVector[obs]) *
817 storm::utility::convertNumber<storm::RationalNumber>(options.resolutionFactor);
820 newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber>(std::numeric_limits<double>::max())) {
821 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(std::numeric_limits<double>::max());
823 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(newObsResolutionAsRational);
826 overApproximation->restartExploration();
828 statistics.overApproximationMaxResolution =
storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()));
832 if (options.explorationTimeLimit != 0) {
833 explorationTime.
start();
835 bool timeLimitExceeded =
false;
836 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations;
837 uint64_t numRewiredOrExploredStates = 0;
838 while (overApproximation->hasUnexploredState()) {
839 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
840 static_cast<uint64_t
>(explorationTime.
getTimeInSeconds()) > options.explorationTimeLimit) {
842 timeLimitExceeded =
true;
843 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because the exploration time limit is exceeded.");
847 uint64_t currId = overApproximation->exploreNextState();
848 bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior();
849 if (!hasOldBehavior) {
850 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because a new state is explored");
853 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
854 if (targetObservations.count(currObservation) != 0) {
855 overApproximation->setCurrentStateIsTarget();
856 overApproximation->addSelfloopTransition();
872 bool exploreAllActions =
false;
873 bool truncateAllActions =
false;
874 bool restoreAllActions =
false;
875 bool checkRewireForAllActions =
false;
877 ValueType gap =
getGap(overApproximation->getLowerValueBoundAtCurrentState(), overApproximation->getUpperValueBoundAtCurrentState());
878 if (!hasOldBehavior) {
882 if (!timeLimitExceeded && gap >= heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
883 exploreAllActions =
true;
885 truncateAllActions =
true;
886 overApproximation->setCurrentStateIsTruncated();
888 }
else if (overApproximation->getCurrentStateWasTruncated()) {
890 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
891 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
892 exploreAllActions =
true;
893 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because a previously truncated state is now explored.");
896 truncateAllActions =
true;
897 overApproximation->setCurrentStateIsTruncated();
901 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because we truncate a state with non-zero gap "
902 << gap <<
" that is reachable via an optimal sched.");
914 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
915 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
916 checkRewireForAllActions =
true;
918 restoreAllActions =
true;
920 checkRewireForAllActions =
true;
923 bool expandedAtLeastOneAction =
false;
924 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
925 bool expandCurrentAction = exploreAllActions || truncateAllActions;
926 if (checkRewireForAllActions) {
929 assert(!expandCurrentAction);
933 assert(overApproximation->currentStateHasOldBehavior());
934 if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
935 overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) {
937 if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) {
939 expandCurrentAction =
true;
940 STORM_LOG_INFO_COND(!fixPoint,
"Not reaching a refinement fixpoint because we rewire a state.");
944 overApproximation->setCurrentChoiceIsDelayed(action);
947 if (!overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
948 (overApproximation->currentStateIsOptimalSchedulerReachable() &&
951 "Not reaching a refinement fixpoint because we delay a rewiring of a state with non-zero gap "
952 << gap <<
" that is reachable via an optimal scheduler.");
960 if (expandCurrentAction) {
961 expandedAtLeastOneAction =
true;
962 if (!truncateAllActions) {
964 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
965 for (
auto const& successor : successorGridPoints) {
966 overApproximation->addTransitionToBelief(action, successor.first, successor.second,
false);
968 if (computeRewards) {
969 overApproximation->computeRewardAtCurrentState(action);
973 auto truncationProbability = storm::utility::zero<ValueType>();
974 auto truncationValueBound = storm::utility::zero<ValueType>();
975 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
976 for (
auto const& successor : successorGridPoints) {
977 bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second,
true);
980 truncationProbability += successor.second;
981 truncationValueBound += successor.second * (
min ? overApproximation->computeLowerValueBoundAtBelief(successor.first)
982 : overApproximation->computeUpperValueBoundAtBelief(successor.first));
985 if (computeRewards) {
987 overApproximation->addTransitionsToExtraStates(action, truncationProbability);
988 overApproximation->computeRewardAtCurrentState(action, truncationValueBound);
990 overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
995 overApproximation->restoreOldBehaviorAtCurrentState(action);
998 if (expandedAtLeastOneAction) {
999 ++numRewiredOrExploredStates;
1003 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
1004 if (pomdp().hasChoiceLabeling()) {
1005 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1006 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1007 overApproximation->addChoiceLabelToCurrentState(action, *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1019 if (!statistics.overApproximationStates) {
1020 statistics.overApproximationBuildAborted =
true;
1021 statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates();
1023 statistics.overApproximationBuildTime.stop();
1027 overApproximation->finishExploration();
1028 statistics.overApproximationBuildTime.stop();
1030 statistics.overApproximationCheckTime.start();
1032 statistics.overApproximationCheckTime.stop();
1036 statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates();
1041template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1042bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildUnderApproximation(
1043 storm::Environment const& env, std::set<uint32_t>
const& targetObservations,
bool min,
bool computeRewards,
bool refine,
1044 HeuristicParameters
const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation,
1045 bool firstIteration) {
1046 statistics.underApproximationBuildTime.start();
1048 unfoldingStatus = Status::Exploring;
1049 if (options.useClipping) {
1051 statistics.nrClippingAttempts = 0;
1052 statistics.nrClippedStates = 0;
1055 uint64_t nrCutoffStrategies =
min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
1057 bool fixPoint =
true;
1058 if (heuristicParameters.sizeThreshold != std::numeric_limits<uint64_t>::max()) {
1059 statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold;
1062 if (options.interactiveUnfolding && !firstIteration) {
1063 underApproximation->restoreExplorationState();
1064 }
else if (computeRewards) {
1066 underApproximation->startNewExploration(storm::utility::zero<ValueType>(), storm::utility::infinity<ValueType>());
1068 underApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
1072 underApproximation->restartExploration();
1078 printUpdateStopwatch.
start();
1079 if (options.explorationTimeLimit != 0) {
1080 explorationTime.
start();
1082 bool timeLimitExceeded =
false;
1083 bool stateStored =
false;
1084 while (underApproximation->hasUnexploredState()) {
1085 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
1086 static_cast<uint64_t
>(explorationTime.
getTimeInSeconds()) > options.explorationTimeLimit) {
1088 timeLimitExceeded =
true;
1091 printUpdateStopwatch.
restart();
1092 STORM_LOG_INFO(
"### " << underApproximation->getCurrentNumberOfMdpStates() <<
" beliefs in underapproximation MDP" <<
" ##### "
1093 << underApproximation->getUnexploredStates().size() <<
" beliefs queued\n");
1094 if (underApproximation->getCurrentNumberOfMdpStates() > heuristicParameters.sizeThreshold && options.useClipping) {
1095 STORM_LOG_INFO(
"##### Clipping Attempts: " << statistics.nrClippingAttempts.value() <<
" ##### "
1096 <<
"Clipped States: " << statistics.nrClippedStates.value() <<
"\n");
1099 if (unfoldingControl == UnfoldingControl::Pause && !stateStored) {
1100 underApproximation->storeExplorationState();
1103 uint64_t currId = underApproximation->exploreNextState();
1104 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
1105 uint64_t addedActions = 0;
1106 bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated();
1107 if (!stateAlreadyExplored || timeLimitExceeded) {
1110 if (targetObservations.count(beliefManager->getBeliefObservation(currId)) != 0) {
1111 underApproximation->setCurrentStateIsTarget();
1112 underApproximation->addSelfloopTransition();
1113 underApproximation->addChoiceLabelToCurrentState(0,
"loop");
1115 bool stopExploration =
false;
1116 bool clipBelief =
false;
1117 if (timeLimitExceeded || (options.interactiveUnfolding && unfoldingControl != UnfoldingControl::Run)) {
1118 clipBelief = options.useClipping;
1119 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1120 }
else if (!stateAlreadyExplored) {
1122 ValueType gap =
getGap(underApproximation->getLowerValueBoundAtCurrentState(), underApproximation->getUpperValueBoundAtCurrentState());
1123 if ((gap < heuristicParameters.gapThreshold) || (gap == 0 && options.cutZeroGap)) {
1124 stopExploration =
true;
1125 }
else if (underApproximation->getCurrentNumberOfMdpStates() >=
1126 heuristicParameters.sizeThreshold ) {
1127 clipBelief = options.useClipping;
1128 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1132 if (clipBelief && !underApproximation->isMarkedAsGridBelief(currId)) {
1134 if (!options.useStateEliminationCutoff) {
1135 bool successfulClip = clipToGridExplicitly(currId, computeRewards, beliefManager, underApproximation, 0);
1137 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1138 if (successfulClip) {
1142 clipToGrid(currId, computeRewards, min, beliefManager, underApproximation);
1143 addedActions += beliefManager->getBeliefNumberOfChoices(currId);
1147 if (stopExploration) {
1148 underApproximation->setCurrentStateIsTruncated();
1150 if (options.useStateEliminationCutoff || !stopExploration) {
1152 uint64_t numActions = beliefManager->getBeliefNumberOfChoices(currId);
1153 if (underApproximation->needsActionAdjustment(numActions)) {
1154 underApproximation->adjustActions(numActions);
1156 for (uint64_t action = 0; action < numActions; ++action) {
1158 if (pomdp().hasChoiceLabeling()) {
1159 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1160 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1161 underApproximation->addChoiceLabelToCurrentState(addedActions + action,
1162 *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1165 if (stateAlreadyExplored) {
1166 underApproximation->restoreOldBehaviorAtCurrentState(action);
1168 auto truncationProbability = storm::utility::zero<ValueType>();
1169 auto truncationValueBound = storm::utility::zero<ValueType>();
1170 auto successors = beliefManager->expand(currId, action);
1171 for (
auto const& successor : successors) {
1172 bool added = underApproximation->addTransitionToBelief(addedActions + action, successor.first, successor.second, stopExploration);
1174 STORM_LOG_ASSERT(stopExploration,
"Didn't add a transition although exploration shouldn't be stopped.");
1176 truncationProbability += successor.second;
1181 truncationValueBound += successor.second * (
min ? underApproximation->computeUpperValueBoundAtBelief(successor.first)
1182 : underApproximation->computeLowerValueBoundAtBelief(successor.first));
1185 if (stopExploration) {
1186 if (computeRewards) {
1187 if (truncationValueBound == storm::utility::infinity<ValueType>()) {
1188 underApproximation->addTransitionsToExtraStates(addedActions + action, storm::utility::zero<ValueType>(),
1189 truncationProbability);
1191 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationProbability);
1194 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationValueBound,
1195 truncationProbability - truncationValueBound);
1198 if (computeRewards) {
1200 if (truncationValueBound != storm::utility::infinity<ValueType>()) {
1202 underApproximation->computeRewardAtCurrentState(action, truncationValueBound);
1204 underApproximation->addRewardToCurrentState(addedActions + action,
1205 beliefManager->getBeliefActionReward(currId, action) + truncationValueBound);
1212 for (uint64_t i = 0;
i < nrCutoffStrategies && !options.skipHeuristicSchedulers; ++
i) {
1213 auto cutOffValue =
min ? underApproximation->computeUpperValueBoundForScheduler(currId, i)
1214 : underApproximation->computeLowerValueBoundForScheduler(currId, i);
1215 if (computeRewards) {
1216 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1217 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::one<ValueType>());
1218 underApproximation->addRewardToCurrentState(addedActions, cutOffValue);
1220 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::zero<ValueType>(), storm::utility::one<ValueType>());
1223 underApproximation->addTransitionsToExtraStates(addedActions, cutOffValue, storm::utility::one<ValueType>() - cutOffValue);
1225 if (pomdp().hasChoiceLabeling()) {
1226 underApproximation->addChoiceLabelToCurrentState(addedActions,
"sched_" + std::to_string(i));
1230 if (underApproximation->hasFMSchedulerValues()) {
1231 uint64_t transitionNr = 0;
1232 for (uint64_t i = 0;
i < underApproximation->getNrOfMemoryNodesForObservation(currObservation); ++
i) {
1233 auto resPair = underApproximation->computeFMSchedulerValueForMemoryNode(currId, i);
1235 if (resPair.first) {
1236 cutOffValue = resPair.second;
1238 STORM_LOG_DEBUG(
"Skipped cut-off of belief with ID " << currId <<
" with finite memory scheduler in memory node " << i
1239 <<
". Missing values.");
1242 if (computeRewards) {
1243 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1244 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::one<ValueType>());
1245 underApproximation->addRewardToCurrentState(addedActions + transitionNr, cutOffValue);
1247 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::zero<ValueType>(),
1248 storm::utility::one<ValueType>());
1251 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, cutOffValue,
1252 storm::utility::one<ValueType>() - cutOffValue);
1254 if (pomdp().hasChoiceLabeling()) {
1255 underApproximation->addChoiceLabelToCurrentState(addedActions + transitionNr,
"mem_node_" + std::to_string(i));
1269 if (!statistics.underApproximationStates) {
1270 statistics.underApproximationBuildAborted =
true;
1271 statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates();
1273 statistics.underApproximationBuildTime.stop();
1277 underApproximation->finishExploration();
1278 statistics.underApproximationBuildTime.stop();
1279 printUpdateStopwatch.
stop();
1280 STORM_LOG_INFO(
"Finished exploring under-approximation MDP.\nStart analysis...\n");
1281 unfoldingStatus = Status::ModelExplorationFinished;
1282 statistics.underApproximationCheckTime.start();
1284 statistics.underApproximationCheckTime.stop();
1285 if (underApproximation->getExploredMdp()->getStateLabeling().getStates(
"truncated").getNumberOfSetBits() > 0) {
1286 statistics.nrTruncatedStates = underApproximation->getExploredMdp()->getStateLabeling().getStates(
"truncated").getNumberOfSetBits();
1290 statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates();
1295template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1296void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGrid(uint64_t clippingStateId,
bool computeRewards,
bool min,
1297 std::shared_ptr<BeliefManagerType>& beliefManager,
1298 std::shared_ptr<ExplorerType>& beliefExplorer) {
1301 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(clippingStateId); action < numActions; ++action) {
1302 auto rewardBound = utility::zero<BeliefValueType>();
1303 auto successors = beliefManager->expand(clippingStateId, action);
1304 auto absDelta = utility::zero<BeliefValueType>();
1305 for (
auto const& successor : successors) {
1309 bool added = beliefExplorer->addTransitionToBelief(action, successor.first, successor.second,
true);
1312 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1313 auto clipping = beliefManager->clipBeliefToGrid(
1314 successor.first, options.clippingGridRes, computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() :
storm::storage::
BitVector());
1315 if (clipping.isClippable) {
1317 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1319 BeliefValueType transitionProb =
1320 (utility::one<BeliefValueType>() - clipping.delta) * utility::convertNumber<BeliefValueType>(successor.second);
1321 beliefExplorer->addTransitionToBelief(action, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb),
false);
1323 absDelta += clipping.delta * utility::convertNumber<BeliefValueType>(successor.second);
1324 if (computeRewards) {
1326 auto localRew = utility::zero<BeliefValueType>();
1327 for (
auto const& deltaValue : clipping.deltaValues) {
1328 localRew += deltaValue.second *
1329 utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1331 if (localRew == utility::infinity<BeliefValueType>()) {
1334 rewardBound += localRew * utility::convertNumber<BeliefValueType>(successor.second);
1336 }
else if (clipping.onGrid) {
1338 beliefExplorer->addTransitionToBelief(action, successor.first, successor.second,
false);
1341 absDelta += utility::convertNumber<BeliefValueType>(successor.second);
1342 rewardBound += utility::convertNumber<BeliefValueType>(successor.second) *
1343 utility::convertNumber<BeliefValueType>(min ? beliefExplorer->computeUpperValueBoundAtBelief(successor.first)
1344 : beliefExplorer->computeLowerValueBoundAtBelief(successor.first));
1349 if (absDelta != utility::zero<BeliefValueType>()) {
1350 if (computeRewards) {
1351 if (rewardBound == utility::infinity<BeliefValueType>()) {
1353 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1355 beliefExplorer->addTransitionsToExtraStates(action, utility::convertNumber<BeliefMDPType>(absDelta));
1356 BeliefValueType totalRewardVal = rewardBound / absDelta;
1357 beliefExplorer->addClippingRewardToCurrentState(action, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1360 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1363 if (computeRewards) {
1364 beliefExplorer->computeRewardAtCurrentState(action);
1369template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1370bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGridExplicitly(uint64_t clippingStateId,
bool computeRewards,
1371 std::shared_ptr<BeliefManagerType>& beliefManager,
1372 std::shared_ptr<ExplorerType>& beliefExplorer,
1373 uint64_t localActionIndex) {
1374 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1375 auto clipping = beliefManager->clipBeliefToGrid(clippingStateId, options.clippingGridRes,
1376 computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() :
storm::storage::
BitVector());
1377 if (clipping.isClippable) {
1379 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1381 BeliefValueType transitionProb = (utility::one<BeliefValueType>() - clipping.delta);
1382 beliefExplorer->addTransitionToBelief(localActionIndex, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb),
false);
1383 beliefExplorer->markAsGridBelief(clipping.targetBelief);
1384 if (computeRewards) {
1386 auto reward = utility::zero<BeliefValueType>();
1387 for (
auto const& deltaValue : clipping.deltaValues) {
1388 reward += deltaValue.second * utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1390 if (reward == utility::infinity<BeliefValueType>()) {
1393 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1394 utility::convertNumber<BeliefMDPType>(clipping.delta));
1396 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::convertNumber<BeliefMDPType>(clipping.delta));
1397 BeliefValueType totalRewardVal = reward / clipping.delta;
1398 beliefExplorer->addClippingRewardToCurrentState(localActionIndex, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1401 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1402 utility::convertNumber<BeliefMDPType>(clipping.delta));
1404 beliefExplorer->addChoiceLabelToCurrentState(localActionIndex,
"clip");
1407 if (clipping.onGrid) {
1409 beliefExplorer->markAsGridBelief(clippingStateId);
1415template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1416void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::setUnfoldingControl(
1417 storm::pomdp::modelchecker::BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::UnfoldingControl newUnfoldingControl) {
1418 unfoldingControl = newUnfoldingControl;
1421template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1424 setUnfoldingControl(UnfoldingControl::Pause);
1427template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1430 setUnfoldingControl(UnfoldingControl::Run);
1433template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1436 setUnfoldingControl(UnfoldingControl::Terminate);
1439template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1441 return unfoldingStatus == Status::ResultAvailable || unfoldingStatus == Status::Converged;
1444template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1446 return unfoldingStatus == Status::Converged;
1449template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1451 return unfoldingStatus == Status::Exploring;
1454template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1455std::shared_ptr<storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType>>
1457 return interactiveUnderApproximationExplorer;
1460template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1462 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> valueList) {
1463 interactiveUnderApproximationExplorer->setFMSchedValueList(valueList);
1466template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1468 typename ExplorerType::SuccessorObservationInformation
const& info, BeliefValueType
const& observationResolution, BeliefValueType
const& maxResolution) {
1469 auto n = storm::utility::convertNumber<BeliefValueType, uint64_t>(info.support.size());
1470 auto one = storm::utility::one<BeliefValueType>();
1477 auto obsChoiceRating = storm::utility::convertNumber<BeliefValueType, ValueType>(info.maxProbabilityToSuccessorWithObs / info.observationProbability);
1482 obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
1484 obsChoiceRating *= observationResolution / maxResolution;
1485 return obsChoiceRating;
1489template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1490std::vector<BeliefValueType> BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getObservationRatings(
1491 std::shared_ptr<ExplorerType>
const& overApproximation, std::vector<BeliefValueType>
const& observationResolutionVector) {
1492 uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates();
1493 auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices();
1494 BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
1496 std::vector<BeliefValueType> resultingRatings(pomdp().getNrObservations(), storm::utility::one<BeliefValueType>());
1498 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations;
1499 for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) {
1502 if (overApproximation->stateIsOptimalSchedulerReachable(mdpState)) {
1503 for (uint64_t mdpChoice = choiceIndices[mdpState]; mdpChoice < choiceIndices[mdpState + 1]; ++mdpChoice) {
1505 if (overApproximation->actionIsOptimal(mdpChoice)) {
1507 gatheredSuccessorObservations.clear();
1508 overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
1509 for (
auto const& obsInfo : gatheredSuccessorObservations) {
1510 auto const& obs = obsInfo.first;
1511 BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
1514 resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
1520 return resultingRatings;
1523template<
typename PomdpModelType,
typename BeliefValueType,
typename BeliefMDPType>
1524typename PomdpModelType::ValueType BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getGap(
1525 typename PomdpModelType::ValueType
const& l,
typename PomdpModelType::ValueType
const& u) {
1526 STORM_LOG_ASSERT(l >= storm::utility::zero<typename PomdpModelType::ValueType>() && u >= storm::utility::zero<typename PomdpModelType::ValueType>(),
1527 "Gap computation currently does not handle negative values.");
1530 return storm::utility::zero<typename PomdpModelType::ValueType>();
1540 return storm::utility::abs<typename PomdpModelType::ValueType>(u - l) * storm::utility::convertNumber<typename PomdpModelType::ValueType, uint64_t>(2) /
1547template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>>;
1549template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>, storm::RationalNumber>;
1551template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>,
double>;
1553template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>>;
void addLabel(std::string const &label)
Adds a new label to the labelings.
void removeLabel(std::string const &label)
Removes a label from the labelings.
This class represents a (discrete-time) Markov decision process.
std::shared_ptr< storm::models::sparse::Model< ValueType, RewardModelType > > applyScheduler(storm::storage::Scheduler< ValueType > const &scheduler, bool dropUnreachableStates=true, bool preserveModelType=false) const
Applies the given scheduler to this model.
This class manages the labeling of the state space with a number of (atomic) labels.
bool getStateHasLabel(std::string const &label, storm::storage::sparse::state_type state) const
Checks whether a given state is labeled with the given label.
void addLabelToState(std::string const &label, storm::storage::sparse::state_type state)
Adds a label to a given state.
Model checker for checking reachability queries on POMDPs using approximations based on exploration o...
BeliefExplorationPomdpModelChecker(std::shared_ptr< PomdpModelType > pomdp, Options options=Options())
Constructor.
int64_t getStatus()
Get the current status of the interactive unfolding.
bool hasConverged()
Indicates whether the interactive unfolding has coonverged, i.e.
bool isExploring()
Indicates whether the interactive unfolding is currently in the process of exploring the belief MDP.
std::shared_ptr< ExplorerType > getInteractiveBeliefExplorer()
Get a pointer to the belief explorer used in the interactive unfolding.
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
void unfoldInteractively(storm::Environment const &env, std::set< uint32_t > const &targetObservations, bool min, std::optional< std::string > rewardModelName, storm::pomdp::modelchecker::POMDPValueBounds< ValueType > const &valueBounds, Result &result)
Allows to generate an under-approximation using a controllable unfolding.
bool isResultReady()
Indicates whether there is a result after an interactive unfolding was paused.
Result check(storm::Environment const &env, storm::logic::Formula const &formula, storm::Environment const &preProcEnv, std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > const &additionalUnderApproximationBounds=std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > >())
Performs model checking of the given POMDP with regards to a formula using the previously specified o...
Result getInteractiveResult()
Get the latest saved result obtained by the interactive unfolding.
void printStatisticsToStream(std::ostream &stream) const
Prints statistics of the process to a given output stream.
void pauseUnfolding()
Pauses a running interactive unfolding.
void precomputeValueBounds(const logic::Formula &formula, storm::Environment const &preProcEnv)
Uses model checking on the underlying MDP to generate values used for cut-offs and for clipping compe...
void continueUnfolding()
Continues a previously paused interactive unfolding.
void terminateUnfolding()
Terminates a running interactive unfolding.
A bit vector that is internally represented as a vector of 64-bit values.
size_t size() const
Retrieves the number of bits this bit vector can store.
uint_fast64_t getNumberOfSetBits() const
Returns the number of bits that are set to true in this bit vector.
A class that provides convenience operations to display run times.
void start()
Start stopwatch (again) and start measuring time.
void restart()
Reset the stopwatch and immediately start it.
SecondType getTimeInSeconds() const
Gets the measured time in seconds.
void stop()
Stop stopwatch and add measured time to total time.
#define STORM_LOG_INFO(message)
#define STORM_LOG_WARN(message)
#define STORM_LOG_DEBUG(message)
#define STORM_LOG_TRACE(message)
#define STORM_LOG_ASSERT(cond, message)
#define STORM_LOG_WARN_COND(cond, message)
#define STORM_LOG_ERROR_COND(cond, message)
#define STORM_LOG_THROW(cond, exception, message)
#define STORM_LOG_INFO_COND(cond, message)
SFTBDDChecker::ValueType ValueType
FormulaInformation getFormulaInformation(PomdpType const &pomdp, storm::logic::ProbabilityOperatorFormula const &formula)
ValueType getGap(ValueType const &l, ValueType const &u)
bool detectFiniteBeliefMdp(storm::models::sparse::Pomdp< ValueType > const &pomdp, std::optional< storm::storage::BitVector > const &targetStates)
This method tries to detect that the beliefmdp is finite.
storm::storage::BitVector getReachableStates(storm::storage::SparseMatrix< T > const &transitionMatrix, storm::storage::BitVector const &initialStates, storm::storage::BitVector const &constraintStates, storm::storage::BitVector const &targetStates, bool useStepBound, uint_fast64_t maximalSteps, boost::optional< storm::storage::BitVector > const &choiceFilter)
Performs a forward depth-first search through the underlying graph structure to identify the states t...
storage::BitVector BitVector
bool isTerminate()
Check whether the program should terminate (due to some abort signal).
bool isOne(ValueType const &a)
ValueType min(ValueType const &first, ValueType const &second)
bool isZero(ValueType const &a)
ValueType ceil(ValueType const &number)
bool isInfinity(ValueType const &a)
solver::OptimizationDirection OptimizationDirection
Struct used to store the results of the model checker.
Result(ValueType lower, ValueType upper)
ValueType diff(bool relative=false) const
bool updateUpperBound(ValueType const &value)
bool updateLowerBound(ValueType const &value)
ValueType numericPrecision
Structure for storing values on the POMDP used for cut-offs and clipping.
std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > fmSchedulerValueList
storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > extremePomdpValueBound
storm::pomdp::storage::PreprocessingPomdpValueBounds< ValueType > trivialPomdpValueBounds
std::unordered_map< std::string, RewardModelType > rewardModels
storm::storage::SparseMatrix< ValueType > transitionMatrix
std::optional< storm::storage::sparse::StateValuations > observationValuations
std::optional< storm::models::sparse::ChoiceLabeling > choiceLabeling
storm::models::sparse::StateLabeling stateLabeling
std::optional< std::vector< uint32_t > > observabilityClasses