Storm
A Modern Probabilistic Model Checker
Loading...
Searching...
No Matches
BeliefExplorationPomdpModelChecker.cpp
Go to the documentation of this file.
2
3#include <tuple>
4
8
12
16
20#include "storm/utility/graph.h"
22
23namespace storm {
24namespace pomdp {
25namespace modelchecker {
26
27/* Struct Functions */
28
29template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
31 : lowerBound(lower), upperBound(upper) {
32 // Intentionally left empty
33}
34
35template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
38 ValueType diff = upperBound - lowerBound;
39 if (diff < storm::utility::zero<ValueType>()) {
40 STORM_LOG_WARN_COND(diff >= storm::utility::convertNumber<ValueType>(1e-6),
41 "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << ".");
42 diff = storm::utility::zero<ValueType>();
43 }
44 if (relative && !storm::utility::isZero(upperBound)) {
45 diff /= upperBound;
46 }
47 return diff;
48}
49
50template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
52 if (value > lowerBound) {
53 lowerBound = value;
54 return true;
55 }
56 return false;
57}
58
59template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
61 if (value < upperBound) {
62 upperBound = value;
63 return true;
64 }
65 return false;
66}
67
68template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
70 : beliefMdpDetectedToBeFinite(false),
71 refinementFixpointDetected(false),
72 overApproximationBuildAborted(false),
73 underApproximationBuildAborted(false),
74 aborted(false) {
75 // intentionally left empty;
76}
77
78/* Constructor */
79
80template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
82 Options options)
83 : options(options), inputPomdp(pomdp) {
84 STORM_LOG_ASSERT(inputPomdp, "The given POMDP is not initialized.");
85 STORM_LOG_ERROR_COND(inputPomdp->isCanonic(), "Input Pomdp is not known to be canonic. This might lead to unexpected verification results.");
86
87 beliefTypeCC = storm::utility::ConstantsComparator<BeliefValueType>(storm::utility::convertNumber<BeliefValueType>(this->options.numericPrecision), false);
88 valueTypeCC = storm::utility::ConstantsComparator<ValueType>(this->options.numericPrecision, false);
89}
90
91/* Public Functions */
92
93template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
95 storm::Environment const& preProcEnv) {
96 auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp(), formula);
97
98 // Compute some initial bounds on the values for each state of the pomdp
99 // We work with the Belief MDP value type, so if the POMDP is exact, but the belief MDP is not, we need to convert
100 auto preProcessingMC = PreprocessingPomdpValueBoundsModelChecker<ValueType>(pomdp());
101 auto initialPomdpValueBounds = preProcessingMC.getValueBounds(preProcEnv, formula);
102 pomdpValueBounds.trivialPomdpValueBounds = initialPomdpValueBounds;
103
104 // If we clip and compute rewards, compute the values necessary for the correction terms
105 if (options.useClipping && formula.isRewardOperatorFormula()) {
106 pomdpValueBounds.extremePomdpValueBound = preProcessingMC.getExtremeValueBound(preProcEnv, formula);
107 }
108}
109
110template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
113 storm::Environment const& env, storm::logic::Formula const& formula,
114 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
115 return check(env, formula, env, additionalUnderApproximationBounds);
116}
117
118template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
121 storm::logic::Formula const& formula, std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
123 return check(env, formula, env, additionalUnderApproximationBounds);
124}
125
126template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
129 storm::logic::Formula const& formula, storm::Environment const& preProcEnv,
130 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
132 return check(env, formula, preProcEnv, additionalUnderApproximationBounds);
133}
134
135template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
138 storm::Environment const& env, storm::logic::Formula const& formula, storm::Environment const& preProcEnv,
139 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
140 STORM_LOG_ASSERT(options.unfold || options.discretize || options.interactiveUnfolding,
141 "Invoked belief exploration but no task (unfold or discretize) given.");
142 // Potentially reset preprocessed model from previous call
143 preprocessedPomdp.reset();
144
145 // Reset all collected statistics
146 statistics = Statistics();
147 statistics.totalTime.start();
148 // Extract the relevant information from the formula
149 auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp(), formula);
150
151 precomputeValueBounds(formula, preProcEnv);
152 if (!additionalUnderApproximationBounds.empty()) {
153 pomdpValueBounds.fmSchedulerValueList = additionalUnderApproximationBounds;
154 }
155 uint64_t initialPomdpState = pomdp().getInitialStates().getNextSetIndex(0);
156 Result result(pomdpValueBounds.trivialPomdpValueBounds.getHighestLowerBound(initialPomdpState),
157 pomdpValueBounds.trivialPomdpValueBounds.getSmallestUpperBound(initialPomdpState));
158 STORM_LOG_INFO("Initial value bounds are [" << result.lowerBound << ", " << result.upperBound << "]");
159
160 std::optional<std::string> rewardModelName;
161 std::set<uint32_t> targetObservations;
162 if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) {
163 if (formulaInfo.getTargetStates().observationClosed) {
164 targetObservations = formulaInfo.getTargetStates().observations;
165 } else {
167 std::tie(preprocessedPomdp, targetObservations) = obsCloser.transform(formulaInfo.getTargetStates().states);
168 }
169 if (formulaInfo.isNonNestedReachabilityProbability()) {
170 if (!formulaInfo.getSinkStates().empty()) {
172 components.stateLabeling = pomdp().getStateLabeling();
173 components.rewardModels = pomdp().getRewardModels();
174 auto matrix = pomdp().getTransitionMatrix();
175 matrix.makeRowGroupsAbsorbing(formulaInfo.getSinkStates().states);
176 components.transitionMatrix = matrix;
177 components.observabilityClasses = pomdp().getObservations();
178 if (pomdp().hasChoiceLabeling()) {
179 components.choiceLabeling = pomdp().getChoiceLabeling();
180 }
181 if (pomdp().hasObservationValuations()) {
182 components.observationValuations = pomdp().getObservationValuations();
183 }
184 preprocessedPomdp = std::make_shared<storm::models::sparse::Pomdp<ValueType>>(std::move(components), true);
185 auto reachableFromSinkStates = storm::utility::graph::getReachableStates(
186 pomdp().getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
187 reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
188 STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException,
189 "There are sink states that can reach non-sink states. This is currently not supported");
190 }
191 } else {
192 // Expected reward formula!
193 rewardModelName = formulaInfo.getRewardModelName();
194 }
195 } else {
196 STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
197 }
198 if (storm::pomdp::detectFiniteBeliefMdp(pomdp(), formulaInfo.getTargetStates().states)) {
199 STORM_LOG_INFO("Detected that the belief MDP is finite.");
200 statistics.beliefMdpDetectedToBeFinite = true;
201 }
202 if (options.interactiveUnfolding) {
203 unfoldInteractively(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
204 } else {
205 refineReachability(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
206 }
207 // "clear" results in case they were actually not requested (this will make the output a bit more clear)
208 if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) {
209 result.lowerBound = -storm::utility::infinity<ValueType>();
210 }
211 if ((formulaInfo.maximize() && !options.discretize) || (formulaInfo.minimize() && !options.unfold)) {
212 result.upperBound = storm::utility::infinity<ValueType>();
213 }
214
216 statistics.aborted = true;
217 }
218 statistics.totalTime.stop();
219 return result;
220}
221
222template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
224 stream << "##### POMDP Approximation Statistics ######\n";
225 stream << "# Input model: \n";
226 pomdp().printModelInformationToStream(stream);
227 stream << "# Max. Number of states with same observation: " << pomdp().getMaxNrStatesWithSameObservation() << '\n';
228 if (statistics.beliefMdpDetectedToBeFinite) {
229 stream << "# Pre-computations detected that the belief MDP is finite.\n";
230 }
231 if (statistics.aborted) {
232 stream << "# Computation aborted early\n";
233 }
234
235 stream << "# Total check time: " << statistics.totalTime << '\n';
236 // Refinement information:
237 if (statistics.refinementSteps) {
238 stream << "# Number of refinement steps: " << statistics.refinementSteps.value() << '\n';
239 }
240 if (statistics.refinementFixpointDetected) {
241 stream << "# Detected a refinement fixpoint.\n";
242 }
243
244 // The overapproximation MDP:
245 if (statistics.overApproximationStates) {
246 stream << "# Number of states in the ";
247 if (options.refine) {
248 stream << "final ";
249 }
250 stream << "grid MDP for the over-approximation: ";
251 if (statistics.overApproximationBuildAborted) {
252 stream << ">=";
253 }
254 stream << statistics.overApproximationStates.value() << '\n';
255 stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.value() << '\n';
256 stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << '\n';
257 stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << '\n';
258 }
259
260 // The underapproximation MDP:
261 if (statistics.underApproximationStates) {
262 stream << "# Number of states in the ";
263 if (options.refine) {
264 stream << "final ";
265 }
266 stream << "belief MDP for the under-approximation: ";
267 if (statistics.underApproximationBuildAborted) {
268 stream << ">=";
269 }
270 stream << statistics.underApproximationStates.value() << '\n';
271 if (statistics.nrClippingAttempts) {
272 stream << "# Clipping attempts (clipped states) for the under-approximation: ";
273 if (statistics.underApproximationBuildAborted) {
274 stream << ">=";
275 }
276 stream << statistics.nrClippingAttempts.value() << " (" << statistics.nrClippedStates.value() << ")\n";
277 stream << "# Total clipping preprocessing time: " << statistics.clippingPreTime << "\n";
278 stream << "# Total clipping time: " << statistics.clipWatch << "\n";
279 } else if (statistics.nrTruncatedStates) {
280 stream << "# Truncated states for the under-approximation: ";
281 if (statistics.underApproximationBuildAborted) {
282 stream << ">=";
283 }
284 stream << statistics.nrTruncatedStates.value() << "\n";
285 }
286 if (statistics.underApproximationStateLimit) {
287 stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.value() << '\n';
288 }
289 stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << '\n';
290 stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << '\n';
291 }
292
293 stream << "##########################################\n";
294}
295
296/* Private Functions */
297
298template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
300 if (preprocessedPomdp) {
301 return *preprocessedPomdp;
302 } else {
303 return *inputPomdp;
304 }
305}
306
307template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
308void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::refineReachability(
309 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, std::optional<std::string> rewardModelName,
310 storm::pomdp::modelchecker::POMDPValueBounds<ValueType> const& valueBounds, Result& result) {
311 statistics.refinementSteps = 0;
312 auto trivialPOMDPBounds = valueBounds.trivialPomdpValueBounds;
313 // Set up exploration data
314 std::vector<BeliefValueType> observationResolutionVector;
315 std::shared_ptr<BeliefManagerType> overApproxBeliefManager;
316 std::shared_ptr<ExplorerType> overApproximation;
317 HeuristicParameters overApproxHeuristicPar{};
318 if (options.discretize) { // Setup and build first OverApproximation
319 observationResolutionVector =
320 std::vector<BeliefValueType>(pomdp().getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit));
321 overApproxBeliefManager = std::make_shared<BeliefManagerType>(
322 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
323 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
324 if (rewardModelName) {
325 overApproxBeliefManager->setRewardModel(rewardModelName);
326 }
327 overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, trivialPOMDPBounds, storm::builder::ExplorationHeuristic::BreadthFirst);
328 overApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
329 overApproxHeuristicPar.observationThreshold = options.obsThresholdInit;
330 overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit;
331 overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
332
333 buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(), false, overApproxHeuristicPar, observationResolutionVector,
334 overApproxBeliefManager, overApproximation);
335 if (!overApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) {
336 return;
337 }
338 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
339 bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
340 if (betterBound) {
341 STORM_LOG_INFO("Initial Over-approx result obtained after " << statistics.totalTime << ". Value is '" << newValue << "'.\n");
342 }
343 }
344
345 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
346 std::shared_ptr<ExplorerType> underApproximation;
347 HeuristicParameters underApproxHeuristicPar{};
348 if (options.unfold) { // Setup and build first UnderApproximation
349 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
350 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
351 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
352 if (rewardModelName) {
353 underApproxBeliefManager->setRewardModel(rewardModelName);
354 }
355 underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
356 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
357 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
358 underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
359 if (underApproxHeuristicPar.sizeThreshold == 0) {
360 if (!options.refine && options.explorationTimeLimit != 0) {
361 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max();
362 } else {
363 underApproxHeuristicPar.sizeThreshold = pomdp().getNumberOfStates() * pomdp().getMaxNrStatesWithSameObservation();
364 STORM_LOG_INFO("Heuristically selected an under-approximation MDP size threshold of " << underApproxHeuristicPar.sizeThreshold << ".\n");
365 }
366 }
367
368 if (options.useClipping && rewardModelName.has_value()) {
369 underApproximation->setExtremeValueBound(valueBounds.extremePomdpValueBound);
370 }
371 if (!valueBounds.fmSchedulerValueList.empty()) {
372 underApproximation->setFMSchedValueList(valueBounds.fmSchedulerValueList);
373 }
374 buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(), false, underApproxHeuristicPar, underApproxBeliefManager,
375 underApproximation, false);
376 if (!underApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) {
377 return;
378 }
379 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
380 bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
381 if (betterBound) {
382 STORM_LOG_INFO("Initial Under-approx result obtained after " << statistics.totalTime << ". Value is '" << newValue << "'.\n");
383 }
384 }
385
386 // Do some output
387 STORM_LOG_INFO("Completed (initial) computation. Current checktime is " << statistics.totalTime << ".");
388 bool computingLowerBound = false;
389 bool computingUpperBound = false;
390 if (options.discretize) {
391 STORM_LOG_INFO("\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << ".");
392 (min ? computingLowerBound : computingUpperBound) = true;
393 }
394 if (options.unfold) {
395 STORM_LOG_INFO("\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << ".");
396 (min ? computingUpperBound : computingLowerBound) = true;
397 }
398 if (computingLowerBound && computingUpperBound) {
399 STORM_LOG_INFO("\tObtained result is [" << result.lowerBound << ", " << result.upperBound << "].");
400 } else if (computingLowerBound) {
401 STORM_LOG_INFO("\tObtained result is ≥" << result.lowerBound << ".");
402 } else if (computingUpperBound) {
403 STORM_LOG_INFO("\tObtained result is ≤" << result.upperBound << ".");
404 }
405
406 // Start refinement
407 if (options.refine) {
408 STORM_LOG_WARN_COND(options.refineStepLimit != 0 || !storm::utility::isZero(options.refinePrecision),
409 "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout");
410 STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize),
411 "Refinement goal precision is given, but only one bound is going to be refined.");
412 while ((options.refineStepLimit == 0 || statistics.refinementSteps.value() < options.refineStepLimit) && result.diff() > options.refinePrecision) {
413 bool overApproxFixPoint = true;
414 bool underApproxFixPoint = true;
415 if (options.discretize) {
416 // Refine over-approximation
417 if (min) {
418 overApproximation->takeCurrentValuesAsLowerBounds();
419 } else {
420 overApproximation->takeCurrentValuesAsUpperBounds();
421 }
422 overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
423 overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
424 storm::utility::convertNumber<ValueType, uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor);
425 overApproxHeuristicPar.observationThreshold +=
426 options.obsThresholdIncrementFactor * (storm::utility::one<ValueType>() - overApproxHeuristicPar.observationThreshold);
427 overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
428 overApproxFixPoint = buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(), true, overApproxHeuristicPar,
429 observationResolutionVector, overApproxBeliefManager, overApproximation);
430 if (overApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) {
431 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
432 bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
433 if (betterBound) {
434 STORM_LOG_INFO("Over-approx result for refinement improved after " << statistics.totalTime << " in refinement step #"
435 << (statistics.refinementSteps.value() + 1) << ". New value is '"
436 << newValue << "'.");
437 }
438 } else {
439 break;
440 }
441 }
442
443 if (options.unfold && result.diff() > options.refinePrecision) {
444 // Refine under-approximation
445 underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
446 underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
447 storm::utility::convertNumber<ValueType, uint64_t>(underApproximation->getExploredMdp()->getNumberOfStates()) *
448 options.sizeThresholdFactor);
449 underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
450 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(), true, underApproxHeuristicPar,
451 underApproxBeliefManager, underApproximation, true);
452 if (underApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) {
453 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
454 bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
455 if (betterBound) {
456 STORM_LOG_INFO("Under-approx result for refinement improved after " << statistics.totalTime << " in refinement step #"
457 << (statistics.refinementSteps.value() + 1) << ". New value is '"
458 << newValue << "'.");
459 }
460 } else {
461 break;
462 }
463 }
464
466 break;
467 } else {
468 ++statistics.refinementSteps.value();
469 // Don't make too many outputs (to avoid logfile clutter)
470 if (statistics.refinementSteps.value() <= 1000) {
471 STORM_LOG_INFO("Completed iteration #" << statistics.refinementSteps.value() << ". Current checktime is " << statistics.totalTime << ".");
472 computingLowerBound = false;
473 computingUpperBound = false;
474 if (options.discretize) {
475 STORM_LOG_INFO("\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << ".");
476 (min ? computingLowerBound : computingUpperBound) = true;
477 }
478 if (options.unfold) {
479 STORM_LOG_INFO("\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << ".");
480 (min ? computingUpperBound : computingLowerBound) = true;
481 }
482 if (computingLowerBound && computingUpperBound) {
483 STORM_LOG_INFO("\tCurrent result is [" << result.lowerBound << ", " << result.upperBound << "].");
484 } else if (computingLowerBound) {
485 STORM_LOG_INFO("\tCurrent result is ≥" << result.lowerBound << ".");
486 } else if (computingUpperBound) {
487 STORM_LOG_INFO("\tCurrent result is ≤" << result.upperBound << ".");
488 }
489 STORM_LOG_WARN_COND(statistics.refinementSteps.value() < 1000, "Refinement requires more than 1000 iterations.");
490 }
491 }
492 if (overApproxFixPoint && underApproxFixPoint) {
493 STORM_LOG_INFO("Refinement fixpoint reached after " << statistics.refinementSteps.value() << " iterations.\n");
494 statistics.refinementFixpointDetected = true;
495 break;
496 }
497 }
498 }
499 // Print model information of final over- / under-approximation MDP
500 if (options.discretize && overApproximation->hasComputedValues()) {
501 auto printOverInfo = [&overApproximation]() {
502 std::stringstream str;
503 str << "Explored and checked Over-Approximation MDP:\n";
504 overApproximation->getExploredMdp()->printModelInformationToStream(str);
505 return str.str();
506 };
507 STORM_LOG_INFO(printOverInfo());
508 }
509 if (options.unfold && underApproximation->hasComputedValues()) {
510 auto printUnderInfo = [&underApproximation]() {
511 std::stringstream str;
512 str << "Explored and checked Under-Approximation MDP:\n";
513 underApproximation->getExploredMdp()->printModelInformationToStream(str);
514 return str.str();
515 };
516 STORM_LOG_INFO(printUnderInfo());
517 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = underApproximation->getExploredMdp();
518 if (!options.useStateEliminationCutoff) {
519 storm::models::sparse::StateLabeling newLabeling(scheduledModel->getStateLabeling());
520 auto nrPreprocessingScheds = min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
521 for (uint64_t i = 0; i < nrPreprocessingScheds; ++i) {
522 newLabeling.addLabel("sched_" + std::to_string(i));
523 }
524 newLabeling.addLabel("cutoff");
525 newLabeling.addLabel("clipping");
526 newLabeling.addLabel("finite_mem");
527
528 auto transMatrix = scheduledModel->getTransitionMatrix();
529 for (uint64_t i = 0; i < scheduledModel->getNumberOfStates(); ++i) {
530 if (newLabeling.getStateHasLabel("truncated", i)) {
531 uint64_t localChosenActionIndex = underApproximation->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
532 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[i];
533 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
534 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
535 if (label.rfind("clip", 0) == 0) {
536 newLabeling.addLabelToState("clipping", i);
537 auto chosenRow = transMatrix.getRow(i, 0);
538 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
539 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[i], candidateIndex);
540 } else if (label.rfind("mem_node", 0) == 0) {
541 if (!newLabeling.containsLabel("finite_mem_" + label.substr(9, 1))) {
542 newLabeling.addLabel("finite_mem_" + label.substr(9, 1));
543 }
544 newLabeling.addLabelToState("finite_mem_" + label.substr(9, 1), i);
545 newLabeling.addLabelToState("cutoff", i);
546 } else {
547 newLabeling.addLabelToState(label, i);
548 newLabeling.addLabelToState("cutoff", i);
549 }
550 }
551 }
552 }
553 newLabeling.removeLabel("truncated");
554
555 transMatrix.dropZeroEntries();
556 storm::storage::sparse::ModelComponents<ValueType> modelComponents(transMatrix, newLabeling);
557 if (scheduledModel->hasChoiceLabeling()) {
558 modelComponents.choiceLabeling = scheduledModel->getChoiceLabeling();
559 }
560 storm::models::sparse::Mdp<ValueType> newMDP(modelComponents);
561 auto inducedMC = newMDP.applyScheduler(*(underApproximation->getSchedulerForExploredMdp()), true);
562 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
563 } else {
564 auto inducedMC = underApproximation->getExploredMdp()->applyScheduler(*(underApproximation->getSchedulerForExploredMdp()), true);
565 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
566 }
567 result.schedulerAsMarkovChain = scheduledModel;
568 if (min) {
569 result.cutoffSchedulers = underApproximation->getUpperValueBoundSchedulers();
570 } else {
571 result.cutoffSchedulers = underApproximation->getLowerValueBoundSchedulers();
572 }
573 }
574}
575
576template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
578 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, std::optional<std::string> rewardModelName,
580 statistics.refinementSteps = 0;
581 interactiveResult = result;
582 unfoldingStatus = Status::Uninitialized;
583 unfoldingControl = UnfoldingControl::Run;
584 auto trivialPOMDPBounds = valueBounds.trivialPomdpValueBounds;
585 // Set up exploration data
586 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
587 HeuristicParameters underApproxHeuristicPar{};
588 bool firstIteration = true;
589 // Set up belief manager
590 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
591 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
592 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
593 if (rewardModelName) {
594 underApproxBeliefManager->setRewardModel(rewardModelName);
595 }
596
597 // set up belief MDP explorer
598 interactiveUnderApproximationExplorer = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
599 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
600 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
601 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max() - 1; // we don't set a size threshold
602
603 if (options.useClipping && rewardModelName.has_value()) {
604 interactiveUnderApproximationExplorer->setExtremeValueBound(valueBounds.extremePomdpValueBound);
605 }
606
607 if (!valueBounds.fmSchedulerValueList.empty()) {
608 interactiveUnderApproximationExplorer->setFMSchedValueList(valueBounds.fmSchedulerValueList);
609 }
610
611 // Start iteration
612 while (!(unfoldingControl ==
614 bool underApproxFixPoint = true;
615 bool hasTruncatedStates = false;
616 if (unfoldingStatus != Status::Converged) {
617 // Continue unfolding underapproximation
618 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(), false, underApproxHeuristicPar,
619 underApproxBeliefManager, interactiveUnderApproximationExplorer, firstIteration);
620 if (interactiveUnderApproximationExplorer->hasComputedValues() && !storm::utility::resources::isTerminate()) {
621 ValueType const& newValue = interactiveUnderApproximationExplorer->getComputedValueAtInitialState();
622 bool betterBound = min ? interactiveResult.updateUpperBound(newValue) : interactiveResult.updateLowerBound(newValue);
623 if (betterBound) {
624 STORM_LOG_INFO("Under-approximation result improved after " << statistics.totalTime << " in step #"
625 << (statistics.refinementSteps.value() + 1) << ". New value is '" << newValue
626 << "'.");
627 }
628 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = interactiveUnderApproximationExplorer->getExploredMdp();
629 if (!options.useStateEliminationCutoff) {
630 storm::models::sparse::StateLabeling newLabeling(scheduledModel->getStateLabeling());
631 auto nrPreprocessingScheds = min ? interactiveUnderApproximationExplorer->getNrSchedulersForUpperBounds()
632 : interactiveUnderApproximationExplorer->getNrSchedulersForLowerBounds();
633 for (uint64_t i = 0; i < nrPreprocessingScheds; ++i) {
634 newLabeling.addLabel("sched_" + std::to_string(i));
635 }
636 newLabeling.addLabel("cutoff");
637 newLabeling.addLabel("clipping");
638 newLabeling.addLabel("finite_mem");
639
640 auto transMatrix = scheduledModel->getTransitionMatrix();
641 for (uint64_t i = 0; i < scheduledModel->getNumberOfStates(); ++i) {
642 if (newLabeling.getStateHasLabel("truncated", i)) {
643 hasTruncatedStates = true;
644 uint64_t localChosenActionIndex =
645 interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
646 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[i];
647 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
648 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
649 if (label.rfind("clip", 0) == 0) {
650 newLabeling.addLabelToState("clipping", i);
651 auto chosenRow = transMatrix.getRow(i, 0);
652 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
653 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[i], candidateIndex);
654 } else if (label.rfind("mem_node", 0) == 0) {
655 if (!newLabeling.containsLabel("finite_mem_" + label.substr(9, 1))) {
656 newLabeling.addLabel("finite_mem_" + label.substr(9, 1));
657 }
658 newLabeling.addLabelToState("finite_mem_" + label.substr(9, 1), i);
659 newLabeling.addLabelToState("cutoff", i);
660 } else {
661 newLabeling.addLabelToState(label, i);
662 newLabeling.addLabelToState("cutoff", i);
663 }
664 }
665 }
666 }
667 newLabeling.removeLabel("truncated");
668
669 transMatrix.dropZeroEntries();
670 storm::storage::sparse::ModelComponents<ValueType> modelComponents(transMatrix, newLabeling);
671 if (scheduledModel->hasChoiceLabeling()) {
672 modelComponents.choiceLabeling = scheduledModel->getChoiceLabeling();
673 }
674 storm::models::sparse::Mdp<ValueType> newMDP(modelComponents);
675 auto inducedMC = newMDP.applyScheduler(*(interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()), true);
676 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
677 }
678 interactiveResult.schedulerAsMarkovChain = scheduledModel;
679 if (min) {
680 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getUpperValueBoundSchedulers();
681 } else {
682 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getLowerValueBoundSchedulers();
683 }
684 if (firstIteration) {
685 firstIteration = false;
686 }
687 unfoldingStatus = Status::ResultAvailable;
688 } else {
689 break;
690 }
691
693 break;
694 } else {
695 ++statistics.refinementSteps.value();
696 // Don't make too many outputs (to avoid logfile clutter)
697 if (statistics.refinementSteps.value() <= 1000) {
698 STORM_LOG_INFO("Completed iteration #" << statistics.refinementSteps.value() << ". Current checktime is " << statistics.totalTime << ".");
699 bool computingLowerBound = false;
700 bool computingUpperBound = false;
701 if (options.unfold) {
702 STORM_LOG_INFO("\tUnder-approx MDP has size " << interactiveUnderApproximationExplorer->getExploredMdp()->getNumberOfStates() << ".");
703 (min ? computingUpperBound : computingLowerBound) = true;
704 }
705 if (computingLowerBound && computingUpperBound) {
706 STORM_LOG_INFO("\tCurrent result is [" << interactiveResult.lowerBound << ", " << interactiveResult.upperBound << "].");
707 } else if (computingLowerBound) {
708 STORM_LOG_INFO("\tCurrent result is ≥" << interactiveResult.lowerBound << ".");
709 } else if (computingUpperBound) {
710 STORM_LOG_INFO("\tCurrent result is ≤" << interactiveResult.upperBound << ".");
711 }
712 }
713 }
714 if (underApproxFixPoint) {
715 STORM_LOG_INFO("Fixpoint reached after " << statistics.refinementSteps.value() << " iterations.\n");
716 statistics.refinementFixpointDetected = true;
717 unfoldingStatus = Status::Converged;
718 unfoldingControl = UnfoldingControl::Pause;
719 }
720 if (!hasTruncatedStates) {
721 STORM_LOG_INFO("No states have been truncated, so continued iteration does not yield new results.\n");
722 unfoldingStatus = Status::Converged;
723 unfoldingControl = UnfoldingControl::Pause;
724 }
725 }
726 // While we tell the procedure to be paused, idle
727 while (unfoldingControl ==
730 }
731 STORM_LOG_INFO("\tInteractive Unfolding terminated.\n");
732}
733
734template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
736 std::set<uint32_t> const& targetObservations, bool min, std::optional<std::string> rewardModelName,
739 unfoldInteractively(env, targetObservations, min, rewardModelName, valueBounds, result);
740}
741
742template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
747
748template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
750 if (unfoldingStatus == Status::Uninitialized)
751 return 0;
752 if (unfoldingStatus == Status::Exploring)
753 return 1;
754 if (unfoldingStatus == Status::ModelExplorationFinished)
755 return 2;
756 if (unfoldingStatus == Status::ResultAvailable)
757 return 3;
758 if (unfoldingStatus == Status::Terminated)
759 return 4;
760
761 return -1;
762}
763
764template<typename ValueType>
765ValueType getGap(ValueType const& l, ValueType const& u) {
766 STORM_LOG_ASSERT(l >= storm::utility::zero<ValueType>() && u >= storm::utility::zero<ValueType>(),
767 "Gap computation currently does not handle negative values.");
770 return storm::utility::zero<ValueType>();
771 } else {
772 return u;
773 }
774 } else if (storm::utility::isZero(u)) {
775 STORM_LOG_ASSERT(storm::utility::isZero(l), "Upper bound is zero but lower bound is " << l << ".");
776 return u;
777 } else {
778 STORM_LOG_ASSERT(!storm::utility::isInfinity(l), "Lower bound is infinity, but upper bound is " << u << ".");
779 // get the relative gap
780 return storm::utility::abs<ValueType>(u - l) * storm::utility::convertNumber<ValueType, uint64_t>(2) / (l + u);
781 }
782}
783
784template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
785bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildOverApproximation(
786 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, bool computeRewards, bool refine,
787 HeuristicParameters const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector,
788 std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
789 // Detect whether the refinement reached a fixpoint.
790 bool fixPoint = true;
791
792 statistics.overApproximationBuildTime.start();
793 storm::storage::BitVector refinedObservations;
794 if (!refine) {
795 // If we build the model from scratch, we first have to set up the explorer for the overApproximation.
796 if (computeRewards) {
797 overApproximation->startNewExploration(storm::utility::zero<ValueType>());
798 } else {
799 overApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
800 }
801 } else {
802 // If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy
803 overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true);
804 // We also need to find out which observation resolutions needs refinement.
805 // current maximal resolution (needed for refinement heuristic)
806 auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector);
807 // If there is a score < 1, we have not reached a fixpoint, yet
808 auto numericPrecision = storm::utility::convertNumber<BeliefValueType>(options.numericPrecision);
809 if (std::any_of(obsRatings.begin(), obsRatings.end(),
810 [&numericPrecision](BeliefValueType const& value) { return value + numericPrecision < storm::utility::one<BeliefValueType>(); })) {
811 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because there are still observations to refine.");
812 fixPoint = false;
813 }
814 refinedObservations = storm::utility::vector::filter<BeliefValueType>(obsRatings, [&heuristicParameters](BeliefValueType const& r) {
815 return r <= storm::utility::convertNumber<BeliefValueType>(heuristicParameters.observationThreshold);
816 });
817 STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
818 for (auto const obs : refinedObservations) {
819 // Increment the resolution at the refined observations.
820 // Use storm's rational number to detect overflows properly.
821 storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber<storm::RationalNumber>(observationResolutionVector[obs]) *
822 storm::utility::convertNumber<storm::RationalNumber>(options.resolutionFactor);
823 static_assert(storm::NumberTraits<BeliefValueType>::IsExact || std::is_same<BeliefValueType, double>::value, "Unhandled belief value type");
825 newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber>(std::numeric_limits<double>::max())) {
826 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(std::numeric_limits<double>::max());
827 } else {
828 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(newObsResolutionAsRational);
829 }
830 }
831 overApproximation->restartExploration();
832 }
833 statistics.overApproximationMaxResolution = storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()));
834
835 // Start exploration
836 storm::utility::Stopwatch explorationTime;
837 if (options.explorationTimeLimit != 0) {
838 explorationTime.start();
839 }
840 bool timeLimitExceeded = false;
841 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
842 uint64_t numRewiredOrExploredStates = 0;
843 while (overApproximation->hasUnexploredState()) {
844 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
845 static_cast<uint64_t>(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit) {
846 STORM_LOG_INFO("Exploration time limit exceeded.");
847 timeLimitExceeded = true;
848 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because the exploration time limit is exceeded.");
849 fixPoint = false;
850 }
851
852 uint64_t currId = overApproximation->exploreNextState();
853 bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior();
854 if (!hasOldBehavior) {
855 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a new state is explored");
856 fixPoint = false; // Exploring a new state!
857 }
858 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
859 if (targetObservations.count(currObservation) != 0) {
860 overApproximation->setCurrentStateIsTarget();
861 overApproximation->addSelfloopTransition();
862 } else {
863 // We need to decide how to treat this state (and each individual enabled action). There are the following cases:
864 // 1 The state has no old behavior and
865 // 1.1 we explore all actions or
866 // 1.2 we truncate all actions
867 // 2 The state has old behavior and was truncated in the last iteration and
868 // 2.1 we explore all actions or
869 // 2.2 we truncate all actions (essentially restoring old behavior, but we do the truncation step again to benefit from updated bounds)
870 // 3 The state has old behavior and was not truncated in the last iteration and the current action
871 // 3.1 should be rewired or
872 // 3.2 should get the old behavior but either
873 // 3.2.1 none of the successor observation has been refined since the last rewiring or exploration of this action
874 // 3.2.2 rewiring is only delayed as it could still have an effect in a later refinement step
875
876 // Find out in which case we are
877 bool exploreAllActions = false;
878 bool truncateAllActions = false;
879 bool restoreAllActions = false;
880 bool checkRewireForAllActions = false;
881 // Get the relative gap
882 ValueType gap = getGap(overApproximation->getLowerValueBoundAtCurrentState(), overApproximation->getUpperValueBoundAtCurrentState());
883 if (!hasOldBehavior) {
884 // Case 1
885 // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that
886 // reaches this state
887 if (!timeLimitExceeded && gap >= heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
888 exploreAllActions = true; // Case 1.1
889 } else {
890 truncateAllActions = true; // Case 1.2
891 overApproximation->setCurrentStateIsTruncated();
892 }
893 } else if (overApproximation->getCurrentStateWasTruncated()) {
894 // Case 2
895 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
896 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
897 exploreAllActions = true; // Case 2.1
898 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a previously truncated state is now explored.");
899 fixPoint = false;
900 } else {
901 truncateAllActions = true; // Case 2.2
902 overApproximation->setCurrentStateIsTruncated();
903 if (fixPoint) {
904 // Properly check whether this can still be a fixpoint
905 if (overApproximation->currentStateIsOptimalSchedulerReachable() && !storm::utility::isZero(gap)) {
906 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we truncate a state with non-zero gap "
907 << gap << " that is reachable via an optimal sched.");
908 fixPoint = false;
909 }
910 // else {}
911 // In this case we truncated a state that is not reachable under optimal schedulers.
912 // If no other state is explored (i.e. fixPoint remains true), these states should still not be reachable in subsequent iterations
913 }
914 }
915 } else {
916 // Case 3
917 // The decision for rewiring also depends on the corresponding action, but we have some criteria that lead to case 3.2 (independent of the
918 // action)
919 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
920 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
921 checkRewireForAllActions = true; // Case 3.1 or Case 3.2
922 } else {
923 restoreAllActions = true; // Definitely Case 3.2
924 // We still need to check for each action whether rewiring makes sense later
925 checkRewireForAllActions = true;
926 }
927 }
928 bool expandedAtLeastOneAction = false;
929 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
930 bool expandCurrentAction = exploreAllActions || truncateAllActions;
931 if (checkRewireForAllActions) {
932 assert(refine);
933 // In this case, we still need to check whether this action needs to be expanded
934 assert(!expandCurrentAction);
935 // Check the action dependent conditions for rewiring
936 // First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring
937 // would actually change the successor states)
938 assert(overApproximation->currentStateHasOldBehavior());
939 if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
940 overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) {
941 // Then, check whether the other criteria for rewiring are satisfied
942 if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) {
943 // Do the rewiring now! (Case 3.1)
944 expandCurrentAction = true;
945 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we rewire a state.");
946 fixPoint = false;
947 } else {
948 // Delay the rewiring (Case 3.2.2)
949 overApproximation->setCurrentChoiceIsDelayed(action);
950 if (fixPoint) {
951 // Check whether this delay means that a fixpoint has not been reached
952 if (!overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
953 (overApproximation->currentStateIsOptimalSchedulerReachable() &&
954 overApproximation->actionAtCurrentStateWasOptimal(action) && !storm::utility::isZero(gap))) {
955 STORM_LOG_INFO_COND(!fixPoint,
956 "Not reaching a refinement fixpoint because we delay a rewiring of a state with non-zero gap "
957 << gap << " that is reachable via an optimal scheduler.");
958 fixPoint = false;
959 }
960 }
961 }
962 } // else { Case 3.2.1 }
963 }
964
965 if (expandCurrentAction) {
966 expandedAtLeastOneAction = true;
967 if (!truncateAllActions) {
968 // Cases 1.1, 2.1, or 3.1
969 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
970 for (auto const& successor : successorGridPoints) {
971 overApproximation->addTransitionToBelief(action, successor.first, successor.second, false);
972 }
973 if (computeRewards) {
974 overApproximation->computeRewardAtCurrentState(action);
975 }
976 } else {
977 // Cases 1.2 or 2.2
978 auto truncationProbability = storm::utility::zero<ValueType>();
979 auto truncationValueBound = storm::utility::zero<ValueType>();
980 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
981 for (auto const& successor : successorGridPoints) {
982 bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, true);
983 if (!added) {
984 // We did not explore this successor state. Get a bound on the "missing" value
985 truncationProbability += successor.second;
986 truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first)
987 : overApproximation->computeUpperValueBoundAtBelief(successor.first));
988 }
989 }
990 if (computeRewards) {
991 // The truncationValueBound will be added on top of the reward introduced by the current belief state.
992 overApproximation->addTransitionsToExtraStates(action, truncationProbability);
993 overApproximation->computeRewardAtCurrentState(action, truncationValueBound);
994 } else {
995 overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
996 }
997 }
998 } else {
999 // Case 3.2
1000 overApproximation->restoreOldBehaviorAtCurrentState(action);
1001 }
1002 }
1003 if (expandedAtLeastOneAction) {
1004 ++numRewiredOrExploredStates;
1005 }
1006 }
1007
1008 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
1009 if (pomdp().hasChoiceLabeling()) {
1010 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1011 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1012 overApproximation->addChoiceLabelToCurrentState(action, *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1013 }
1014 }
1015 }
1016
1018 break;
1019 }
1020 }
1021
1023 // don't overwrite statistics of a previous, successful computation
1024 if (!statistics.overApproximationStates) {
1025 statistics.overApproximationBuildAborted = true;
1026 statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates();
1027 }
1028 statistics.overApproximationBuildTime.stop();
1029 return false;
1030 }
1031
1032 overApproximation->finishExploration();
1033 statistics.overApproximationBuildTime.stop();
1034
1035 statistics.overApproximationCheckTime.start();
1036 overApproximation->computeValuesOfExploredMdp(env, min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
1037 statistics.overApproximationCheckTime.stop();
1038
1039 // don't overwrite statistics of a previous, successful computation
1040 if (!storm::utility::resources::isTerminate() || !statistics.overApproximationStates) {
1041 statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates();
1042 }
1043 return fixPoint;
1044}
1045
1046template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1047bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildUnderApproximation(
1048 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, bool computeRewards, bool refine,
1049 HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation,
1050 bool firstIteration) {
1051 statistics.underApproximationBuildTime.start();
1052
1053 unfoldingStatus = Status::Exploring;
1054 if (options.useClipping) {
1055 STORM_LOG_INFO("Use Belief Clipping with grid beliefs \n");
1056 statistics.nrClippingAttempts = 0;
1057 statistics.nrClippedStates = 0;
1058 }
1059
1060 uint64_t nrCutoffStrategies = min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
1061
1062 bool fixPoint = true;
1063 if (heuristicParameters.sizeThreshold != std::numeric_limits<uint64_t>::max()) {
1064 statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold;
1065 }
1066 if (!refine) {
1067 if (options.interactiveUnfolding && !firstIteration) {
1068 underApproximation->restoreExplorationState();
1069 } else if (computeRewards) { // Build a new under approximation
1070 // We use the sink state for infinite cut-off/clipping values
1071 underApproximation->startNewExploration(storm::utility::zero<ValueType>(), storm::utility::infinity<ValueType>());
1072 } else {
1073 underApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
1074 }
1075 } else {
1076 // Restart the building process
1077 underApproximation->restartExploration();
1078 }
1079
1080 // Expand the beliefs
1081 storm::utility::Stopwatch explorationTime;
1082 storm::utility::Stopwatch printUpdateStopwatch;
1083 printUpdateStopwatch.start();
1084 if (options.explorationTimeLimit != 0) {
1085 explorationTime.start();
1086 }
1087 bool timeLimitExceeded = false;
1088 bool stateStored = false;
1089 while (underApproximation->hasUnexploredState()) {
1090 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
1091 static_cast<uint64_t>(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit) {
1092 STORM_LOG_INFO("Exploration time limit exceeded.");
1093 timeLimitExceeded = true;
1094 }
1095 if (printUpdateStopwatch.getTimeInSeconds() >= 60) {
1096 printUpdateStopwatch.restart();
1097 STORM_LOG_INFO("### " << underApproximation->getCurrentNumberOfMdpStates() << " beliefs in underapproximation MDP" << " ##### "
1098 << underApproximation->getUnexploredStates().size() << " beliefs queued\n");
1099 if (underApproximation->getCurrentNumberOfMdpStates() > heuristicParameters.sizeThreshold && options.useClipping) {
1100 STORM_LOG_INFO("##### Clipping Attempts: " << statistics.nrClippingAttempts.value() << " ##### "
1101 << "Clipped States: " << statistics.nrClippedStates.value() << "\n");
1102 }
1103 }
1104 if (unfoldingControl == UnfoldingControl::Pause && !stateStored) {
1105 underApproximation->storeExplorationState();
1106 stateStored = true;
1107 }
1108 uint64_t currId = underApproximation->exploreNextState();
1109 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
1110 uint64_t addedActions = 0;
1111 bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated();
1112 if (!stateAlreadyExplored || timeLimitExceeded) {
1113 fixPoint = false;
1114 }
1115 if (targetObservations.count(beliefManager->getBeliefObservation(currId)) != 0) {
1116 underApproximation->setCurrentStateIsTarget();
1117 underApproximation->addSelfloopTransition();
1118 underApproximation->addChoiceLabelToCurrentState(0, "loop");
1119 } else {
1120 bool stopExploration = false;
1121 bool clipBelief = false;
1122 if (timeLimitExceeded || (options.interactiveUnfolding && unfoldingControl != UnfoldingControl::Run)) {
1123 clipBelief = options.useClipping;
1124 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1125 } else if (!stateAlreadyExplored) {
1126 // Check whether we want to explore the state now!
1127 ValueType gap = getGap(underApproximation->getLowerValueBoundAtCurrentState(), underApproximation->getUpperValueBoundAtCurrentState());
1128 if ((gap < heuristicParameters.gapThreshold) || (gap == 0 && options.cutZeroGap)) {
1129 stopExploration = true;
1130 } else if (underApproximation->getCurrentNumberOfMdpStates() >=
1131 heuristicParameters.sizeThreshold /*&& !statistics.beliefMdpDetectedToBeFinite*/) {
1132 clipBelief = options.useClipping;
1133 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1134 }
1135 }
1136
1137 if (clipBelief && !underApproximation->isMarkedAsGridBelief(currId)) {
1138 // Use a belief grid as clipping candidates
1139 if (!options.useStateEliminationCutoff) {
1140 bool successfulClip = clipToGridExplicitly(currId, computeRewards, beliefManager, underApproximation, 0);
1141 // Set again as the current belief might have been detected to be a grid belief
1142 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1143 if (successfulClip) {
1144 addedActions += 1;
1145 }
1146 } else {
1147 clipToGrid(currId, computeRewards, min, beliefManager, underApproximation);
1148 addedActions += beliefManager->getBeliefNumberOfChoices(currId);
1149 }
1150 } // end Clipping Procedure
1151
1152 if (stopExploration) {
1153 underApproximation->setCurrentStateIsTruncated();
1154 }
1155 if (options.useStateEliminationCutoff || !stopExploration) {
1156 // Add successor transitions or cut-off transitions when exploration is stopped
1157 uint64_t numActions = beliefManager->getBeliefNumberOfChoices(currId);
1158 if (underApproximation->needsActionAdjustment(numActions)) {
1159 underApproximation->adjustActions(numActions);
1160 }
1161 for (uint64_t action = 0; action < numActions; ++action) {
1162 // Always restore old behavior if available
1163 if (pomdp().hasChoiceLabeling()) {
1164 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1165 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1166 underApproximation->addChoiceLabelToCurrentState(addedActions + action,
1167 *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1168 }
1169 }
1170 if (stateAlreadyExplored) {
1171 underApproximation->restoreOldBehaviorAtCurrentState(action);
1172 } else {
1173 auto truncationProbability = storm::utility::zero<ValueType>();
1174 auto truncationValueBound = storm::utility::zero<ValueType>();
1175 auto successors = beliefManager->expand(currId, action);
1176 for (auto const& successor : successors) {
1177 bool added = underApproximation->addTransitionToBelief(addedActions + action, successor.first, successor.second, stopExploration);
1178 if (!added) {
1179 STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
1180 // We did not explore this successor state. Get a bound on the "missing" value
1181 truncationProbability += successor.second;
1182 // Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of
1183 // other under-approximation values. In general, this does not yield a sound underapproximation anymore as the
1184 // values might be achieved by different schedulers. However, in our case this is still the case as the
1185 // under-approximation values are based on a single memory-less scheduler.
1186 truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first)
1187 : underApproximation->computeLowerValueBoundAtBelief(successor.first));
1188 }
1189 }
1190 if (stopExploration) {
1191 if (computeRewards) {
1192 if (truncationValueBound == storm::utility::infinity<ValueType>()) {
1193 underApproximation->addTransitionsToExtraStates(addedActions + action, storm::utility::zero<ValueType>(),
1194 truncationProbability);
1195 } else {
1196 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationProbability);
1197 }
1198 } else {
1199 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationValueBound,
1200 truncationProbability - truncationValueBound);
1201 }
1202 }
1203 if (computeRewards) {
1204 // The truncationValueBound will be added on top of the reward introduced by the current belief state.
1205 if (truncationValueBound != storm::utility::infinity<ValueType>()) {
1206 if (!clipBelief) {
1207 underApproximation->computeRewardAtCurrentState(action, truncationValueBound);
1208 } else {
1209 underApproximation->addRewardToCurrentState(addedActions + action,
1210 beliefManager->getBeliefActionReward(currId, action) + truncationValueBound);
1211 }
1212 }
1213 }
1214 }
1215 }
1216 } else {
1217 for (uint64_t i = 0; i < nrCutoffStrategies && !options.skipHeuristicSchedulers; ++i) {
1218 auto cutOffValue = min ? underApproximation->computeUpperValueBoundForScheduler(currId, i)
1219 : underApproximation->computeLowerValueBoundForScheduler(currId, i);
1220 if (computeRewards) {
1221 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1222 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::one<ValueType>());
1223 underApproximation->addRewardToCurrentState(addedActions, cutOffValue);
1224 } else {
1225 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::zero<ValueType>(), storm::utility::one<ValueType>());
1226 }
1227 } else {
1228 underApproximation->addTransitionsToExtraStates(addedActions, cutOffValue, storm::utility::one<ValueType>() - cutOffValue);
1229 }
1230 if (pomdp().hasChoiceLabeling()) {
1231 underApproximation->addChoiceLabelToCurrentState(addedActions, "sched_" + std::to_string(i));
1232 }
1233 addedActions++;
1234 }
1235 if (underApproximation->hasFMSchedulerValues()) {
1236 uint64_t transitionNr = 0;
1237 for (uint64_t i = 0; i < underApproximation->getNrOfMemoryNodesForObservation(currObservation); ++i) {
1238 auto resPair = underApproximation->computeFMSchedulerValueForMemoryNode(currId, i);
1239 ValueType cutOffValue;
1240 if (resPair.first) {
1241 cutOffValue = resPair.second;
1242 } else {
1243 STORM_LOG_DEBUG("Skipped cut-off of belief with ID " << currId << " with finite memory scheduler in memory node " << i
1244 << ". Missing values.");
1245 continue;
1246 }
1247 if (computeRewards) {
1248 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1249 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::one<ValueType>());
1250 underApproximation->addRewardToCurrentState(addedActions + transitionNr, cutOffValue);
1251 } else {
1252 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::zero<ValueType>(),
1253 storm::utility::one<ValueType>());
1254 }
1255 } else {
1256 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, cutOffValue,
1257 storm::utility::one<ValueType>() - cutOffValue);
1258 }
1259 if (pomdp().hasChoiceLabeling()) {
1260 underApproximation->addChoiceLabelToCurrentState(addedActions + transitionNr, "mem_node_" + std::to_string(i));
1261 }
1262 ++transitionNr;
1263 }
1264 }
1265 }
1266 }
1268 break;
1269 }
1270 }
1271
1273 // don't overwrite statistics of a previous, successful computation
1274 if (!statistics.underApproximationStates) {
1275 statistics.underApproximationBuildAborted = true;
1276 statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates();
1277 }
1278 statistics.underApproximationBuildTime.stop();
1279 return false;
1280 }
1281
1282 underApproximation->finishExploration();
1283 statistics.underApproximationBuildTime.stop();
1284 printUpdateStopwatch.stop();
1285 STORM_LOG_INFO("Finished exploring under-approximation MDP.\nStart analysis...\n");
1286 unfoldingStatus = Status::ModelExplorationFinished;
1287 statistics.underApproximationCheckTime.start();
1288 underApproximation->computeValuesOfExploredMdp(env, min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
1289 statistics.underApproximationCheckTime.stop();
1290 if (underApproximation->getExploredMdp()->getStateLabeling().getStates("truncated").getNumberOfSetBits() > 0) {
1291 statistics.nrTruncatedStates = underApproximation->getExploredMdp()->getStateLabeling().getStates("truncated").getNumberOfSetBits();
1292 }
1293 // don't overwrite statistics of a previous, successful computation
1294 if (!storm::utility::resources::isTerminate() || !statistics.underApproximationStates) {
1295 statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates();
1296 }
1297 return fixPoint;
1298}
1299
1300template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1301void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGrid(uint64_t clippingStateId, bool computeRewards, bool min,
1302 std::shared_ptr<BeliefManagerType>& beliefManager,
1303 std::shared_ptr<ExplorerType>& beliefExplorer) {
1304 // Add all transitions to states which are already in the MDP, clip all others to a grid
1305 // To make the resulting MDP smaller, we eliminate intermediate successor states when clipping is applied
1306 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(clippingStateId); action < numActions; ++action) {
1307 auto rewardBound = utility::zero<BeliefValueType>();
1308 auto successors = beliefManager->expand(clippingStateId, action);
1309 auto absDelta = utility::zero<BeliefValueType>();
1310 for (auto const& successor : successors) {
1311 // Add transition if successor is in explored space.
1312 // We can directly add the transitions as there is at most one successor for each observation
1313 // Therefore no belief can be clipped to an already added successor
1314 bool added = beliefExplorer->addTransitionToBelief(action, successor.first, successor.second, true);
1315 if (!added) {
1316 // The successor is not in the explored space. Clip it
1317 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1318 auto clipping = beliefManager->clipBeliefToGrid(
1319 successor.first, options.clippingGridRes, computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() : storm::storage::BitVector());
1320 if (clipping.isClippable) {
1321 // The belief is not on the grid and there is a candidate with finite reward
1322 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1323 // Transition probability to candidate is (probability to successor) * (clipping transition probability)
1324 BeliefValueType transitionProb =
1325 (utility::one<BeliefValueType>() - clipping.delta) * utility::convertNumber<BeliefValueType>(successor.second);
1326 beliefExplorer->addTransitionToBelief(action, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb), false);
1327 // Collect weighted clipping values
1328 absDelta += clipping.delta * utility::convertNumber<BeliefValueType>(successor.second);
1329 if (computeRewards) {
1330 // collect cumulative reward bounds
1331 auto localRew = utility::zero<BeliefValueType>();
1332 for (auto const& deltaValue : clipping.deltaValues) {
1333 localRew += deltaValue.second *
1334 utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1335 }
1336 if (localRew == utility::infinity<BeliefValueType>()) {
1337 STORM_LOG_WARN("Infinite reward in clipping!");
1338 }
1339 rewardBound += localRew * utility::convertNumber<BeliefValueType>(successor.second);
1340 }
1341 } else if (clipping.onGrid) {
1342 // If the belief is not clippable, but on the grid, it may need to be explored, too
1343 beliefExplorer->addTransitionToBelief(action, successor.first, successor.second, false);
1344 } else {
1345 // Otherwise, the reward for all candidates is infinite, clipping does not make sense. Cut it off instead
1346 absDelta += utility::convertNumber<BeliefValueType>(successor.second);
1347 rewardBound += utility::convertNumber<BeliefValueType>(successor.second) *
1348 utility::convertNumber<BeliefValueType>(min ? beliefExplorer->computeUpperValueBoundAtBelief(successor.first)
1349 : beliefExplorer->computeLowerValueBoundAtBelief(successor.first));
1350 }
1351 }
1352 }
1353 // Add the collected clipping transition if necessary
1354 if (absDelta != utility::zero<BeliefValueType>()) {
1355 if (computeRewards) {
1356 if (rewardBound == utility::infinity<BeliefValueType>()) {
1357 // If the reward is infinite, add a transition to the sink state to collect infinite reward
1358 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1359 } else {
1360 beliefExplorer->addTransitionsToExtraStates(action, utility::convertNumber<BeliefMDPType>(absDelta));
1361 BeliefValueType totalRewardVal = rewardBound / absDelta;
1362 beliefExplorer->addClippingRewardToCurrentState(action, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1363 }
1364 } else {
1365 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1366 }
1367 }
1368 if (computeRewards) {
1369 beliefExplorer->computeRewardAtCurrentState(action);
1370 }
1371 }
1372}
1373
1374template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1375bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGridExplicitly(uint64_t clippingStateId, bool computeRewards,
1376 std::shared_ptr<BeliefManagerType>& beliefManager,
1377 std::shared_ptr<ExplorerType>& beliefExplorer,
1378 uint64_t localActionIndex) {
1379 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1380 auto clipping = beliefManager->clipBeliefToGrid(clippingStateId, options.clippingGridRes,
1381 computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() : storm::storage::BitVector());
1382 if (clipping.isClippable) {
1383 // The belief is not on the grid and there is a candidate with finite reward
1384 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1385 // Transition probability to candidate is clipping value
1386 BeliefValueType transitionProb = (utility::one<BeliefValueType>() - clipping.delta);
1387 beliefExplorer->addTransitionToBelief(localActionIndex, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb), false);
1388 beliefExplorer->markAsGridBelief(clipping.targetBelief);
1389 if (computeRewards) {
1390 // collect cumulative reward bounds
1391 auto reward = utility::zero<BeliefValueType>();
1392 for (auto const& deltaValue : clipping.deltaValues) {
1393 reward += deltaValue.second * utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1394 }
1395 if (reward == utility::infinity<BeliefValueType>()) {
1396 STORM_LOG_WARN("Infinite reward in clipping!");
1397 // If the reward is infinite, add a transition to the sink state to collect infinite reward in our semantics
1398 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1399 utility::convertNumber<BeliefMDPType>(clipping.delta));
1400 } else {
1401 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::convertNumber<BeliefMDPType>(clipping.delta));
1402 BeliefValueType totalRewardVal = reward / clipping.delta;
1403 beliefExplorer->addClippingRewardToCurrentState(localActionIndex, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1404 }
1405 } else {
1406 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1407 utility::convertNumber<BeliefMDPType>(clipping.delta));
1408 }
1409 beliefExplorer->addChoiceLabelToCurrentState(localActionIndex, "clip");
1410 return true;
1411 } else {
1412 if (clipping.onGrid) {
1413 // If the belief is not clippable, but on the grid, it may need to be explored, too
1414 beliefExplorer->markAsGridBelief(clippingStateId);
1415 }
1416 }
1417 return false;
1418}
1419
1420template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1421void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::setUnfoldingControl(
1422 storm::pomdp::modelchecker::BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::UnfoldingControl newUnfoldingControl) {
1423 unfoldingControl = newUnfoldingControl;
1424}
1425
1426template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1428 STORM_LOG_TRACE("PAUSE COMMAND ISSUED");
1429 setUnfoldingControl(UnfoldingControl::Pause);
1430}
1431
1432template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1434 STORM_LOG_TRACE("CONTINUATION COMMAND ISSUED");
1435 setUnfoldingControl(UnfoldingControl::Run);
1436}
1437
1438template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1440 STORM_LOG_TRACE("TERMINATION COMMAND ISSUED");
1441 setUnfoldingControl(UnfoldingControl::Terminate);
1442}
1443
1444template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1446 return unfoldingStatus == Status::ResultAvailable || unfoldingStatus == Status::Converged;
1447}
1448
1449template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1453
1454template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1458
1459template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1460std::shared_ptr<storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType>>
1464
1465template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1467 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> valueList) {
1468 interactiveUnderApproximationExplorer->setFMSchedValueList(valueList);
1469}
1470
1471template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1473 typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution) {
1474 auto n = storm::utility::convertNumber<BeliefValueType, uint64_t>(info.support.size());
1475 auto one = storm::utility::one<BeliefValueType>();
1476 if (storm::utility::isOne(n)) {
1477 // If the belief is Dirac, it has to be approximated precisely.
1478 // In this case, we return the best possible rating
1479 return one;
1480 } else {
1481 // Create the rating for this observation at this choice from the given info
1482 auto obsChoiceRating = storm::utility::convertNumber<BeliefValueType, ValueType>(info.maxProbabilityToSuccessorWithObs / info.observationProbability);
1483 // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1)
1484 // Normalize the rating so that it ranges from 0 to 1, where
1485 // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely
1486 // approximated.
1487 obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
1488 // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
1489 obsChoiceRating *= observationResolution / maxResolution;
1490 return obsChoiceRating;
1491 }
1492}
1493
1494template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1495std::vector<BeliefValueType> BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getObservationRatings(
1496 std::shared_ptr<ExplorerType> const& overApproximation, std::vector<BeliefValueType> const& observationResolutionVector) {
1497 uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates();
1498 auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices();
1499 BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
1500
1501 std::vector<BeliefValueType> resultingRatings(pomdp().getNrObservations(), storm::utility::one<BeliefValueType>());
1502
1503 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
1504 for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) {
1505 // Check whether this state is reached under an optimal scheduler.
1506 // The heuristic assumes that the remaining states are not relevant for the observation score.
1507 if (overApproximation->stateIsOptimalSchedulerReachable(mdpState)) {
1508 for (uint64_t mdpChoice = choiceIndices[mdpState]; mdpChoice < choiceIndices[mdpState + 1]; ++mdpChoice) {
1509 // Similarly, only optimal actions are relevant
1510 if (overApproximation->actionIsOptimal(mdpChoice)) {
1511 // score the observations for this choice
1512 gatheredSuccessorObservations.clear();
1513 overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
1514 for (auto const& obsInfo : gatheredSuccessorObservations) {
1515 auto const& obs = obsInfo.first;
1516 BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
1517
1518 // The rating of the observation will be the minimum over all choice-based observation ratings
1519 resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
1520 }
1521 }
1522 }
1523 }
1524 }
1525 return resultingRatings;
1526}
1527
1528template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1529typename PomdpModelType::ValueType BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getGap(
1530 typename PomdpModelType::ValueType const& l, typename PomdpModelType::ValueType const& u) {
1531 STORM_LOG_ASSERT(l >= storm::utility::zero<typename PomdpModelType::ValueType>() && u >= storm::utility::zero<typename PomdpModelType::ValueType>(),
1532 "Gap computation currently does not handle negative values.");
1535 return storm::utility::zero<typename PomdpModelType::ValueType>();
1536 } else {
1537 return u;
1538 }
1539 } else if (storm::utility::isZero(u)) {
1540 STORM_LOG_ASSERT(storm::utility::isZero(l), "Upper bound is zero but lower bound is " << l << ".");
1541 return u;
1542 } else {
1543 STORM_LOG_ASSERT(!storm::utility::isInfinity(l), "Lower bound is infinity, but upper bound is " << u << ".");
1544 // get the relative gap
1545 return storm::utility::abs<typename PomdpModelType::ValueType>(u - l) * storm::utility::convertNumber<typename PomdpModelType::ValueType, uint64_t>(2) /
1546 (l + u);
1547 }
1548}
1549
1550/* Template Instantiations */
1551
1552template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>>;
1553
1554template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>, storm::RationalNumber>;
1555
1556template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>, double>;
1557
1558template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>>;
1559
1560} // namespace modelchecker
1561} // namespace pomdp
1562} // namespace storm
virtual bool isRewardOperatorFormula() const
Definition Formula.cpp:176
void addLabel(std::string const &label)
Adds a new label to the labelings.
bool containsLabel(std::string const &label) const
Checks whether a label is registered within this labeling.
void removeLabel(std::string const &label)
Removes a label from the labelings.
This class represents a (discrete-time) Markov decision process.
Definition Mdp.h:14
std::shared_ptr< storm::models::sparse::Model< ValueType, RewardModelType > > applyScheduler(storm::storage::Scheduler< ValueType > const &scheduler, bool dropUnreachableStates=true, bool preserveModelType=false) const
Applies the given scheduler to this model.
This class manages the labeling of the state space with a number of (atomic) labels.
bool getStateHasLabel(std::string const &label, storm::storage::sparse::state_type state) const
Checks whether a given state is labeled with the given label.
void addLabelToState(std::string const &label, storm::storage::sparse::state_type state)
Adds a label to a given state.
Model checker for checking reachability queries on POMDPs using approximations based on exploration o...
BeliefExplorationPomdpModelChecker(std::shared_ptr< PomdpModelType > pomdp, Options options=Options())
Constructor.
int64_t getStatus()
Get the current status of the interactive unfolding.
bool hasConverged()
Indicates whether the interactive unfolding has coonverged, i.e.
bool isExploring()
Indicates whether the interactive unfolding is currently in the process of exploring the belief MDP.
std::shared_ptr< ExplorerType > getInteractiveBeliefExplorer()
Get a pointer to the belief explorer used in the interactive unfolding.
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
void unfoldInteractively(storm::Environment const &env, std::set< uint32_t > const &targetObservations, bool min, std::optional< std::string > rewardModelName, storm::pomdp::modelchecker::POMDPValueBounds< ValueType > const &valueBounds, Result &result)
Allows to generate an under-approximation using a controllable unfolding.
bool isResultReady()
Indicates whether there is a result after an interactive unfolding was paused.
Result check(storm::Environment const &env, storm::logic::Formula const &formula, storm::Environment const &preProcEnv, std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > const &additionalUnderApproximationBounds=std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > >())
Performs model checking of the given POMDP with regards to a formula using the previously specified o...
Result getInteractiveResult()
Get the latest saved result obtained by the interactive unfolding.
void printStatisticsToStream(std::ostream &stream) const
Prints statistics of the process to a given output stream.
void precomputeValueBounds(const logic::Formula &formula, storm::Environment const &preProcEnv)
Uses model checking on the underlying MDP to generate values used for cut-offs and for clipping compe...
void continueUnfolding()
Continues a previously paused interactive unfolding.
A bit vector that is internally represented as a vector of 64-bit values.
Definition BitVector.h:18
size_t size() const
Retrieves the number of bits this bit vector can store.
uint_fast64_t getNumberOfSetBits() const
Returns the number of bits that are set to true in this bit vector.
std::pair< std::shared_ptr< storm::models::sparse::Pomdp< ValueType > >, std::set< uint32_t > > transform(storm::storage::BitVector const &stateSet) const
Ensures that the given set of states is observation closed, potentially, adding new observation(s) A ...
A class that provides convenience operations to display run times.
Definition Stopwatch.h:14
void start()
Start stopwatch (again) and start measuring time.
Definition Stopwatch.cpp:48
void restart()
Reset the stopwatch and immediately start it.
Definition Stopwatch.cpp:59
SecondType getTimeInSeconds() const
Gets the measured time in seconds.
Definition Stopwatch.cpp:13
void stop()
Stop stopwatch and add measured time to total time.
Definition Stopwatch.cpp:42
#define STORM_LOG_INFO(message)
Definition logging.h:29
#define STORM_LOG_WARN(message)
Definition logging.h:30
#define STORM_LOG_DEBUG(message)
Definition logging.h:23
#define STORM_LOG_TRACE(message)
Definition logging.h:17
#define STORM_LOG_ASSERT(cond, message)
Definition macros.h:11
#define STORM_LOG_WARN_COND(cond, message)
Definition macros.h:38
#define STORM_LOG_ERROR_COND(cond, message)
Definition macros.h:52
#define STORM_LOG_THROW(cond, exception, message)
Definition macros.h:30
#define STORM_LOG_INFO_COND(cond, message)
Definition macros.h:45
SFTBDDChecker::ValueType ValueType
FormulaInformation getFormulaInformation(PomdpType const &pomdp, storm::logic::ProbabilityOperatorFormula const &formula)
ValueType getGap(ValueType const &l, ValueType const &u)
bool detectFiniteBeliefMdp(storm::models::sparse::Pomdp< ValueType > const &pomdp, std::optional< storm::storage::BitVector > const &targetStates)
This method tries to detect that the beliefmdp is finite.
storm::storage::BitVector getReachableStates(storm::storage::SparseMatrix< T > const &transitionMatrix, storm::storage::BitVector const &initialStates, storm::storage::BitVector const &constraintStates, storm::storage::BitVector const &targetStates, bool useStepBound, uint_fast64_t maximalSteps, boost::optional< storm::storage::BitVector > const &choiceFilter)
Performs a forward depth-first search through the underlying graph structure to identify the states t...
Definition graph.cpp:48
storage::BitVector BitVector
bool isTerminate()
Check whether the program should terminate (due to some abort signal).
bool isOne(ValueType const &a)
Definition constants.cpp:36
ValueType min(ValueType const &first, ValueType const &second)
bool isZero(ValueType const &a)
Definition constants.cpp:41
ValueType ceil(ValueType const &number)
bool isInfinity(ValueType const &a)
Definition constants.cpp:71
LabParser.cpp.
Definition cli.cpp:18
solver::OptimizationDirection OptimizationDirection
Structure for storing values on the POMDP used for cut-offs and clipping.
std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > fmSchedulerValueList
storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > extremePomdpValueBound
storm::pomdp::storage::PreprocessingPomdpValueBounds< ValueType > trivialPomdpValueBounds
std::unordered_map< std::string, RewardModelType > rewardModels
storm::storage::SparseMatrix< ValueType > transitionMatrix
std::optional< storm::storage::sparse::StateValuations > observationValuations
std::optional< storm::models::sparse::ChoiceLabeling > choiceLabeling
storm::models::sparse::StateLabeling stateLabeling
std::optional< std::vector< uint32_t > > observabilityClasses