Storm 1.11.1.1
A Modern Probabilistic Model Checker
Loading...
Searching...
No Matches
BeliefExplorationPomdpModelChecker.cpp
Go to the documentation of this file.
2
3#include <tuple>
4
8
12
16
20#include "storm/utility/graph.h"
22
23namespace storm {
24namespace pomdp {
25namespace modelchecker {
26
27/* Struct Functions */
28
29template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
31 : lowerBound(lower), upperBound(upper) {
32 // Intentionally left empty
33}
34
35template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
38 ValueType diff = upperBound - lowerBound;
39 if (diff < storm::utility::zero<ValueType>()) {
40 STORM_LOG_WARN_COND(diff >= storm::utility::convertNumber<ValueType>(1e-6),
41 "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << ".");
42 diff = storm::utility::zero<ValueType>();
43 }
44 if (relative && !storm::utility::isZero(upperBound)) {
45 diff /= upperBound;
46 }
47 return diff;
48}
49
50template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
52 if (value > lowerBound) {
53 lowerBound = value;
54 return true;
55 }
56 return false;
57}
58
59template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
61 if (value < upperBound) {
62 upperBound = value;
63 return true;
64 }
65 return false;
66}
67
68template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
70 : beliefMdpDetectedToBeFinite(false),
71 refinementFixpointDetected(false),
72 overApproximationBuildAborted(false),
73 underApproximationBuildAborted(false),
74 aborted(false) {
75 // intentionally left empty;
76}
77
78template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
80 Options options)
81 : options(options),
82 inputPomdp(pomdp),
83 beliefTypeCC(storm::utility::convertNumber<BeliefValueType>(this->options.numericPrecision), false),
84 valueTypeCC(this->options.numericPrecision, false) {
85 STORM_LOG_ASSERT(inputPomdp, "The given POMDP is not initialized.");
86 STORM_LOG_ERROR_COND(inputPomdp->isCanonic(), "Input Pomdp is not known to be canonic. This might lead to unexpected verification results.");
87}
88
89/* Public Functions */
90
91template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
93 storm::Environment const& preProcEnv) {
94 auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp(), formula);
95
96 // Compute some initial bounds on the values for each state of the pomdp
97 // We work with the Belief MDP value type, so if the POMDP is exact, but the belief MDP is not, we need to convert
98 auto preProcessingMC = PreprocessingPomdpValueBoundsModelChecker<ValueType>(pomdp());
99 auto initialPomdpValueBounds = preProcessingMC.getValueBounds(preProcEnv, formula);
100 pomdpValueBounds.trivialPomdpValueBounds = initialPomdpValueBounds;
101
102 // If we clip and compute rewards, compute the values necessary for the correction terms
103 if (options.useClipping && formula.isRewardOperatorFormula()) {
104 pomdpValueBounds.extremePomdpValueBound = preProcessingMC.getExtremeValueBound(preProcEnv, formula);
105 }
106}
107
108template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
111 storm::Environment const& env, storm::logic::Formula const& formula,
112 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
113 return check(env, formula, env, additionalUnderApproximationBounds);
114}
115
116template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
119 storm::logic::Formula const& formula, std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
121 return check(env, formula, env, additionalUnderApproximationBounds);
122}
123
124template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
127 storm::logic::Formula const& formula, storm::Environment const& preProcEnv,
128 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
130 return check(env, formula, preProcEnv, additionalUnderApproximationBounds);
131}
132
133template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
136 storm::Environment const& env, storm::logic::Formula const& formula, storm::Environment const& preProcEnv,
137 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
138 STORM_LOG_ASSERT(options.unfold || options.discretize || options.interactiveUnfolding,
139 "Invoked belief exploration but no task (unfold or discretize) given.");
140 // Potentially reset preprocessed model from previous call
141 preprocessedPomdp.reset();
142
143 // Reset all collected statistics
144 statistics = Statistics();
145 statistics.totalTime.start();
146 // Extract the relevant information from the formula
147 auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp(), formula);
148
149 precomputeValueBounds(formula, preProcEnv);
150 if (!additionalUnderApproximationBounds.empty()) {
151 pomdpValueBounds.fmSchedulerValueList = additionalUnderApproximationBounds;
152 }
153 uint64_t initialPomdpState = pomdp().getInitialStates().getNextSetIndex(0);
154 Result result(pomdpValueBounds.trivialPomdpValueBounds.getHighestLowerBound(initialPomdpState),
155 pomdpValueBounds.trivialPomdpValueBounds.getSmallestUpperBound(initialPomdpState));
156 STORM_LOG_INFO("Initial value bounds are [" << result.lowerBound << ", " << result.upperBound << "]");
157
158 std::optional<std::string> rewardModelName;
159 std::set<uint32_t> targetObservations;
160 if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) {
161 if (formulaInfo.getTargetStates().observationClosed) {
162 targetObservations = formulaInfo.getTargetStates().observations;
163 } else {
165 std::tie(preprocessedPomdp, targetObservations) = obsCloser.transform(formulaInfo.getTargetStates().states);
166 }
167 if (formulaInfo.isNonNestedReachabilityProbability()) {
168 if (!formulaInfo.getSinkStates().empty()) {
170 components.stateLabeling = pomdp().getStateLabeling();
171 components.rewardModels = pomdp().getRewardModels();
172 auto matrix = pomdp().getTransitionMatrix();
173 matrix.makeRowGroupsAbsorbing(formulaInfo.getSinkStates().states);
174 components.transitionMatrix = matrix;
175 components.observabilityClasses = pomdp().getObservations();
176 if (pomdp().hasChoiceLabeling()) {
177 components.choiceLabeling = pomdp().getChoiceLabeling();
178 }
179 if (pomdp().hasObservationValuations()) {
180 components.observationValuations = pomdp().getObservationValuations();
181 }
182 preprocessedPomdp = std::make_shared<storm::models::sparse::Pomdp<ValueType>>(std::move(components), true);
183 auto reachableFromSinkStates = storm::utility::graph::getReachableStates(
184 pomdp().getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
185 reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
186 STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException,
187 "There are sink states that can reach non-sink states. This is currently not supported");
188 }
189 } else {
190 // Expected reward formula!
191 rewardModelName = formulaInfo.getRewardModelName();
192 }
193 } else {
194 STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
195 }
196 if (storm::pomdp::detectFiniteBeliefMdp(pomdp(), formulaInfo.getTargetStates().states)) {
197 STORM_LOG_INFO("Detected that the belief MDP is finite.");
198 statistics.beliefMdpDetectedToBeFinite = true;
199 }
200 if (options.interactiveUnfolding) {
201 unfoldInteractively(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
202 } else {
203 refineReachability(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
204 }
205 // "clear" results in case they were actually not requested (this will make the output a bit more clear)
206 if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) {
207 result.lowerBound = -storm::utility::infinity<ValueType>();
208 }
209 if ((formulaInfo.maximize() && !options.discretize) || (formulaInfo.minimize() && !options.unfold)) {
210 result.upperBound = storm::utility::infinity<ValueType>();
211 }
212
214 statistics.aborted = true;
215 }
216 statistics.totalTime.stop();
217 return result;
218}
219
220template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
222 stream << "##### POMDP Approximation Statistics ######\n";
223 stream << "# Input model: \n";
224 pomdp().printModelInformationToStream(stream);
225 stream << "# Max. Number of states with same observation: " << pomdp().getMaxNrStatesWithSameObservation() << '\n';
226 if (statistics.beliefMdpDetectedToBeFinite) {
227 stream << "# Pre-computations detected that the belief MDP is finite.\n";
228 }
229 if (statistics.aborted) {
230 stream << "# Computation aborted early\n";
231 }
232
233 stream << "# Total check time: " << statistics.totalTime << '\n';
234 // Refinement information:
235 if (statistics.refinementSteps) {
236 stream << "# Number of refinement steps: " << statistics.refinementSteps.value() << '\n';
237 }
238 if (statistics.refinementFixpointDetected) {
239 stream << "# Detected a refinement fixpoint.\n";
240 }
241
242 // The overapproximation MDP:
243 if (statistics.overApproximationStates) {
244 stream << "# Number of states in the ";
245 if (options.refine) {
246 stream << "final ";
247 }
248 stream << "grid MDP for the over-approximation: ";
249 if (statistics.overApproximationBuildAborted) {
250 stream << ">=";
251 }
252 stream << statistics.overApproximationStates.value() << '\n';
253 stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.value() << '\n';
254 stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << '\n';
255 stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << '\n';
256 }
257
258 // The underapproximation MDP:
259 if (statistics.underApproximationStates) {
260 stream << "# Number of states in the ";
261 if (options.refine) {
262 stream << "final ";
263 }
264 stream << "belief MDP for the under-approximation: ";
265 if (statistics.underApproximationBuildAborted) {
266 stream << ">=";
267 }
268 stream << statistics.underApproximationStates.value() << '\n';
269 if (statistics.nrClippingAttempts) {
270 stream << "# Clipping attempts (clipped states) for the under-approximation: ";
271 if (statistics.underApproximationBuildAborted) {
272 stream << ">=";
273 }
274 stream << statistics.nrClippingAttempts.value() << " (" << statistics.nrClippedStates.value() << ")\n";
275 stream << "# Total clipping preprocessing time: " << statistics.clippingPreTime << "\n";
276 stream << "# Total clipping time: " << statistics.clipWatch << "\n";
277 } else if (statistics.nrTruncatedStates) {
278 stream << "# Truncated states for the under-approximation: ";
279 if (statistics.underApproximationBuildAborted) {
280 stream << ">=";
281 }
282 stream << statistics.nrTruncatedStates.value() << "\n";
283 }
284 if (statistics.underApproximationStateLimit) {
285 stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.value() << '\n';
286 }
287 stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << '\n';
288 stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << '\n';
289 }
290
291 stream << "##########################################\n";
292}
293
294/* Private Functions */
295
296template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
298 if (preprocessedPomdp) {
299 return *preprocessedPomdp;
300 } else {
301 return *inputPomdp;
302 }
303}
304
305template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
306void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::refineReachability(
307 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, std::optional<std::string> rewardModelName,
308 storm::pomdp::modelchecker::POMDPValueBounds<ValueType> const& valueBounds, Result& result) {
309 statistics.refinementSteps = 0;
310 auto trivialPOMDPBounds = valueBounds.trivialPomdpValueBounds;
311 // Set up exploration data
312 std::vector<BeliefValueType> observationResolutionVector;
313 std::shared_ptr<BeliefManagerType> overApproxBeliefManager;
314 std::shared_ptr<ExplorerType> overApproximation;
315 HeuristicParameters overApproxHeuristicPar{};
316 if (options.discretize) { // Setup and build first OverApproximation
317 observationResolutionVector =
318 std::vector<BeliefValueType>(pomdp().getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit));
319 overApproxBeliefManager = std::make_shared<BeliefManagerType>(
320 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
321 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
322 if (rewardModelName) {
323 overApproxBeliefManager->setRewardModel(rewardModelName);
324 }
325 overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, trivialPOMDPBounds, storm::builder::ExplorationHeuristic::BreadthFirst);
326 overApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
327 overApproxHeuristicPar.observationThreshold = options.obsThresholdInit;
328 overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit;
329 overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
330
331 buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(), false, overApproxHeuristicPar, observationResolutionVector,
332 overApproxBeliefManager, overApproximation);
333 if (!overApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) {
334 return;
335 }
336 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
337 bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
338 if (betterBound) {
339 STORM_LOG_INFO("Initial Over-approx result obtained after " << statistics.totalTime << ". Value is '" << newValue << "'.\n");
340 }
341 }
342
343 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
344 std::shared_ptr<ExplorerType> underApproximation;
345 HeuristicParameters underApproxHeuristicPar{};
346 if (options.unfold) { // Setup and build first UnderApproximation
347 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
348 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
349 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
350 if (rewardModelName) {
351 underApproxBeliefManager->setRewardModel(rewardModelName);
352 }
353 underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
354 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
355 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
356 underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
357 if (underApproxHeuristicPar.sizeThreshold == 0) {
358 if (!options.refine && options.explorationTimeLimit != 0) {
359 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max();
360 } else {
361 underApproxHeuristicPar.sizeThreshold = pomdp().getNumberOfStates() * pomdp().getMaxNrStatesWithSameObservation();
362 STORM_LOG_INFO("Heuristically selected an under-approximation MDP size threshold of " << underApproxHeuristicPar.sizeThreshold << ".\n");
363 }
364 }
365
366 if (options.useClipping && rewardModelName.has_value()) {
367 underApproximation->setExtremeValueBound(valueBounds.extremePomdpValueBound);
368 }
369 if (!valueBounds.fmSchedulerValueList.empty()) {
370 underApproximation->setFMSchedValueList(valueBounds.fmSchedulerValueList);
371 }
372 buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(), false, underApproxHeuristicPar, underApproxBeliefManager,
373 underApproximation, false);
374 if (!underApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) {
375 return;
376 }
377 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
378 bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
379 if (betterBound) {
380 STORM_LOG_INFO("Initial Under-approx result obtained after " << statistics.totalTime << ". Value is '" << newValue << "'.\n");
381 }
382 }
383
384 // Do some output
385 STORM_LOG_INFO("Completed (initial) computation. Current checktime is " << statistics.totalTime << ".");
386 bool computingLowerBound = false;
387 bool computingUpperBound = false;
388 if (options.discretize) {
389 STORM_LOG_INFO("\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << ".");
390 (min ? computingLowerBound : computingUpperBound) = true;
391 }
392 if (options.unfold) {
393 STORM_LOG_INFO("\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << ".");
394 (min ? computingUpperBound : computingLowerBound) = true;
395 }
396 if (computingLowerBound && computingUpperBound) {
397 STORM_LOG_INFO("\tObtained result is [" << result.lowerBound << ", " << result.upperBound << "].");
398 } else if (computingLowerBound) {
399 STORM_LOG_INFO("\tObtained result is ≥" << result.lowerBound << ".");
400 } else if (computingUpperBound) {
401 STORM_LOG_INFO("\tObtained result is ≤" << result.upperBound << ".");
402 }
403
404 // Start refinement
405 if (options.refine) {
406 STORM_LOG_WARN_COND(options.refineStepLimit != 0 || !storm::utility::isZero(options.refinePrecision),
407 "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout");
408 STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize),
409 "Refinement goal precision is given, but only one bound is going to be refined.");
410 while ((options.refineStepLimit == 0 || statistics.refinementSteps.value() < options.refineStepLimit) && result.diff() > options.refinePrecision) {
411 bool overApproxFixPoint = true;
412 bool underApproxFixPoint = true;
413 if (options.discretize) {
414 // Refine over-approximation
415 if (min) {
416 overApproximation->takeCurrentValuesAsLowerBounds();
417 } else {
418 overApproximation->takeCurrentValuesAsUpperBounds();
419 }
420 overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
421 overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
422 storm::utility::convertNumber<ValueType, uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor);
423 overApproxHeuristicPar.observationThreshold +=
424 options.obsThresholdIncrementFactor * (storm::utility::one<ValueType>() - overApproxHeuristicPar.observationThreshold);
425 overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
426 overApproxFixPoint = buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(), true, overApproxHeuristicPar,
427 observationResolutionVector, overApproxBeliefManager, overApproximation);
428 if (overApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) {
429 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
430 bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
431 if (betterBound) {
432 STORM_LOG_INFO("Over-approx result for refinement improved after " << statistics.totalTime << " in refinement step #"
433 << (statistics.refinementSteps.value() + 1) << ". New value is '"
434 << newValue << "'.");
435 }
436 } else {
437 break;
438 }
439 }
440
441 if (options.unfold && result.diff() > options.refinePrecision) {
442 // Refine under-approximation
443 underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
444 underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
445 storm::utility::convertNumber<ValueType, uint64_t>(underApproximation->getExploredMdp()->getNumberOfStates()) *
446 options.sizeThresholdFactor);
447 underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
448 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(), true, underApproxHeuristicPar,
449 underApproxBeliefManager, underApproximation, true);
450 if (underApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) {
451 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
452 bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
453 if (betterBound) {
454 STORM_LOG_INFO("Under-approx result for refinement improved after " << statistics.totalTime << " in refinement step #"
455 << (statistics.refinementSteps.value() + 1) << ". New value is '"
456 << newValue << "'.");
457 }
458 } else {
459 break;
460 }
461 }
462
464 break;
465 } else {
466 ++statistics.refinementSteps.value();
467 // Don't make too many outputs (to avoid logfile clutter)
468 if (statistics.refinementSteps.value() <= 1000) {
469 STORM_LOG_INFO("Completed iteration #" << statistics.refinementSteps.value() << ". Current checktime is " << statistics.totalTime << ".");
470 computingLowerBound = false;
471 computingUpperBound = false;
472 if (options.discretize) {
473 STORM_LOG_INFO("\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << ".");
474 (min ? computingLowerBound : computingUpperBound) = true;
475 }
476 if (options.unfold) {
477 STORM_LOG_INFO("\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << ".");
478 (min ? computingUpperBound : computingLowerBound) = true;
479 }
480 if (computingLowerBound && computingUpperBound) {
481 STORM_LOG_INFO("\tCurrent result is [" << result.lowerBound << ", " << result.upperBound << "].");
482 } else if (computingLowerBound) {
483 STORM_LOG_INFO("\tCurrent result is ≥" << result.lowerBound << ".");
484 } else if (computingUpperBound) {
485 STORM_LOG_INFO("\tCurrent result is ≤" << result.upperBound << ".");
486 }
487 STORM_LOG_WARN_COND(statistics.refinementSteps.value() < 1000, "Refinement requires more than 1000 iterations.");
488 }
489 }
490 if (overApproxFixPoint && underApproxFixPoint) {
491 STORM_LOG_INFO("Refinement fixpoint reached after " << statistics.refinementSteps.value() << " iterations.\n");
492 statistics.refinementFixpointDetected = true;
493 break;
494 }
495 }
496 }
497 // Print model information of final over- / under-approximation MDP
498 if (options.discretize && overApproximation->hasComputedValues()) {
499 auto printOverInfo = [&overApproximation]() {
500 std::stringstream str;
501 str << "Explored and checked Over-Approximation MDP:\n";
502 overApproximation->getExploredMdp()->printModelInformationToStream(str);
503 return str.str();
504 };
505 STORM_LOG_INFO(printOverInfo());
506 }
507 if (options.unfold && underApproximation->hasComputedValues()) {
508 auto printUnderInfo = [&underApproximation]() {
509 std::stringstream str;
510 str << "Explored and checked Under-Approximation MDP:\n";
511 underApproximation->getExploredMdp()->printModelInformationToStream(str);
512 return str.str();
513 };
514 STORM_LOG_INFO(printUnderInfo());
515 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = underApproximation->getExploredMdp();
516 if (!options.useStateEliminationCutoff) {
517 storm::models::sparse::StateLabeling newLabeling(scheduledModel->getStateLabeling());
518 auto nrPreprocessingScheds = min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
519 for (uint64_t i = 0; i < nrPreprocessingScheds; ++i) {
520 newLabeling.addLabel("sched_" + std::to_string(i));
521 }
522 newLabeling.addLabel("cutoff");
523 newLabeling.addLabel("clipping");
524 newLabeling.addLabel("finite_mem");
525
526 auto transMatrix = scheduledModel->getTransitionMatrix();
527 for (uint64_t i = 0; i < scheduledModel->getNumberOfStates(); ++i) {
528 if (newLabeling.getStateHasLabel("truncated", i)) {
529 uint64_t localChosenActionIndex = underApproximation->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
530 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[i];
531 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
532 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
533 if (label.rfind("clip", 0) == 0) {
534 newLabeling.addLabelToState("clipping", i);
535 auto chosenRow = transMatrix.getRow(i, 0);
536 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
537 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[i], candidateIndex);
538 } else if (label.rfind("mem_node", 0) == 0) {
539 if (!newLabeling.containsLabel("finite_mem_" + label.substr(9, 1))) {
540 newLabeling.addLabel("finite_mem_" + label.substr(9, 1));
541 }
542 newLabeling.addLabelToState("finite_mem_" + label.substr(9, 1), i);
543 newLabeling.addLabelToState("cutoff", i);
544 } else {
545 newLabeling.addLabelToState(label, i);
546 newLabeling.addLabelToState("cutoff", i);
547 }
548 }
549 }
550 }
551 newLabeling.removeLabel("truncated");
552
553 transMatrix.dropZeroEntries();
554 storm::storage::sparse::ModelComponents<ValueType> modelComponents(transMatrix, newLabeling);
555 if (scheduledModel->hasChoiceLabeling()) {
556 modelComponents.choiceLabeling = scheduledModel->getChoiceLabeling();
557 }
558 storm::models::sparse::Mdp<ValueType> newMDP(modelComponents);
559 auto inducedMC = newMDP.applyScheduler(*(underApproximation->getSchedulerForExploredMdp()), true);
560 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
561 } else {
562 auto inducedMC = underApproximation->getExploredMdp()->applyScheduler(*(underApproximation->getSchedulerForExploredMdp()), true);
563 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
564 }
565 result.schedulerAsMarkovChain = scheduledModel;
566 if (min) {
567 result.cutoffSchedulers = underApproximation->getUpperValueBoundSchedulers();
568 } else {
569 result.cutoffSchedulers = underApproximation->getLowerValueBoundSchedulers();
570 }
571 }
572}
573
574template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
576 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, std::optional<std::string> rewardModelName,
578 statistics.refinementSteps = 0;
579 interactiveResult = result;
580 unfoldingStatus = Status::Uninitialized;
581 unfoldingControl = UnfoldingControl::Run;
582 auto trivialPOMDPBounds = valueBounds.trivialPomdpValueBounds;
583 // Set up exploration data
584 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
585 HeuristicParameters underApproxHeuristicPar{};
586 bool firstIteration = true;
587 // Set up belief manager
588 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
589 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
590 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
591 if (rewardModelName) {
592 underApproxBeliefManager->setRewardModel(rewardModelName);
593 }
594
595 // set up belief MDP explorer
596 interactiveUnderApproximationExplorer = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
597 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
598 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
599 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max() - 1; // we don't set a size threshold
600
601 if (options.useClipping && rewardModelName.has_value()) {
602 interactiveUnderApproximationExplorer->setExtremeValueBound(valueBounds.extremePomdpValueBound);
603 }
604
605 if (!valueBounds.fmSchedulerValueList.empty()) {
606 interactiveUnderApproximationExplorer->setFMSchedValueList(valueBounds.fmSchedulerValueList);
607 }
608
609 // Start iteration
610 while (!(unfoldingControl ==
612 bool underApproxFixPoint = true;
613 bool hasTruncatedStates = false;
614 if (unfoldingStatus != Status::Converged) {
615 // Continue unfolding underapproximation
616 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(), false, underApproxHeuristicPar,
617 underApproxBeliefManager, interactiveUnderApproximationExplorer, firstIteration);
618 if (interactiveUnderApproximationExplorer->hasComputedValues() && !storm::utility::resources::isTerminate()) {
619 ValueType const& newValue = interactiveUnderApproximationExplorer->getComputedValueAtInitialState();
620 bool betterBound = min ? interactiveResult.updateUpperBound(newValue) : interactiveResult.updateLowerBound(newValue);
621 if (betterBound) {
622 STORM_LOG_INFO("Under-approximation result improved after " << statistics.totalTime << " in step #"
623 << (statistics.refinementSteps.value() + 1) << ". New value is '" << newValue
624 << "'.");
625 }
626 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = interactiveUnderApproximationExplorer->getExploredMdp();
627 if (!options.useStateEliminationCutoff) {
628 storm::models::sparse::StateLabeling newLabeling(scheduledModel->getStateLabeling());
629 auto nrPreprocessingScheds = min ? interactiveUnderApproximationExplorer->getNrSchedulersForUpperBounds()
630 : interactiveUnderApproximationExplorer->getNrSchedulersForLowerBounds();
631 for (uint64_t i = 0; i < nrPreprocessingScheds; ++i) {
632 newLabeling.addLabel("sched_" + std::to_string(i));
633 }
634 newLabeling.addLabel("cutoff");
635 newLabeling.addLabel("clipping");
636 newLabeling.addLabel("finite_mem");
637
638 auto transMatrix = scheduledModel->getTransitionMatrix();
639 for (uint64_t i = 0; i < scheduledModel->getNumberOfStates(); ++i) {
640 if (newLabeling.getStateHasLabel("truncated", i)) {
641 hasTruncatedStates = true;
642 uint64_t localChosenActionIndex =
643 interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
644 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[i];
645 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
646 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
647 if (label.rfind("clip", 0) == 0) {
648 newLabeling.addLabelToState("clipping", i);
649 auto chosenRow = transMatrix.getRow(i, 0);
650 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
651 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[i], candidateIndex);
652 } else if (label.rfind("mem_node", 0) == 0) {
653 if (!newLabeling.containsLabel("finite_mem_" + label.substr(9, 1))) {
654 newLabeling.addLabel("finite_mem_" + label.substr(9, 1));
655 }
656 newLabeling.addLabelToState("finite_mem_" + label.substr(9, 1), i);
657 newLabeling.addLabelToState("cutoff", i);
658 } else {
659 newLabeling.addLabelToState(label, i);
660 newLabeling.addLabelToState("cutoff", i);
661 }
662 }
663 }
664 }
665 newLabeling.removeLabel("truncated");
666
667 transMatrix.dropZeroEntries();
668 storm::storage::sparse::ModelComponents<ValueType> modelComponents(transMatrix, newLabeling);
669 if (scheduledModel->hasChoiceLabeling()) {
670 modelComponents.choiceLabeling = scheduledModel->getChoiceLabeling();
671 }
672 storm::models::sparse::Mdp<ValueType> newMDP(modelComponents);
673 auto inducedMC = newMDP.applyScheduler(*(interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()), true);
674 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
675 }
676 interactiveResult.schedulerAsMarkovChain = scheduledModel;
677 if (min) {
678 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getUpperValueBoundSchedulers();
679 } else {
680 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getLowerValueBoundSchedulers();
681 }
682 if (firstIteration) {
683 firstIteration = false;
684 }
685 unfoldingStatus = Status::ResultAvailable;
686 } else {
687 break;
688 }
689
691 break;
692 } else {
693 ++statistics.refinementSteps.value();
694 // Don't make too many outputs (to avoid logfile clutter)
695 if (statistics.refinementSteps.value() <= 1000) {
696 STORM_LOG_INFO("Completed iteration #" << statistics.refinementSteps.value() << ". Current checktime is " << statistics.totalTime << ".");
697 bool computingLowerBound = false;
698 bool computingUpperBound = false;
699 if (options.unfold) {
700 STORM_LOG_INFO("\tUnder-approx MDP has size " << interactiveUnderApproximationExplorer->getExploredMdp()->getNumberOfStates() << ".");
701 (min ? computingUpperBound : computingLowerBound) = true;
702 }
703 if (computingLowerBound && computingUpperBound) {
704 STORM_LOG_INFO("\tCurrent result is [" << interactiveResult.lowerBound << ", " << interactiveResult.upperBound << "].");
705 } else if (computingLowerBound) {
706 STORM_LOG_INFO("\tCurrent result is ≥" << interactiveResult.lowerBound << ".");
707 } else if (computingUpperBound) {
708 STORM_LOG_INFO("\tCurrent result is ≤" << interactiveResult.upperBound << ".");
709 }
710 }
711 }
712 if (underApproxFixPoint) {
713 STORM_LOG_INFO("Fixpoint reached after " << statistics.refinementSteps.value() << " iterations.\n");
714 statistics.refinementFixpointDetected = true;
715 unfoldingStatus = Status::Converged;
716 unfoldingControl = UnfoldingControl::Pause;
717 }
718 if (!hasTruncatedStates) {
719 STORM_LOG_INFO("No states have been truncated, so continued iteration does not yield new results.\n");
720 unfoldingStatus = Status::Converged;
721 unfoldingControl = UnfoldingControl::Pause;
722 }
723 }
724 // While we tell the procedure to be paused, idle
725 while (unfoldingControl ==
728 // Intentionally left empty
729 }
730 }
731 STORM_LOG_INFO("\tInteractive Unfolding terminated.\n");
732}
733
734template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
736 std::set<uint32_t> const& targetObservations, bool min, std::optional<std::string> rewardModelName,
739 unfoldInteractively(env, targetObservations, min, rewardModelName, valueBounds, result);
740}
741
742template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
747
748template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
750 if (unfoldingStatus == Status::Uninitialized)
751 return 0;
752 if (unfoldingStatus == Status::Exploring)
753 return 1;
754 if (unfoldingStatus == Status::ModelExplorationFinished)
755 return 2;
756 if (unfoldingStatus == Status::ResultAvailable)
757 return 3;
758 if (unfoldingStatus == Status::Terminated)
759 return 4;
760
761 return -1;
762}
763
764template<typename ValueType>
765ValueType getGap(ValueType const& l, ValueType const& u) {
766 STORM_LOG_ASSERT(l >= storm::utility::zero<ValueType>() && u >= storm::utility::zero<ValueType>(),
767 "Gap computation currently does not handle negative values.");
770 return storm::utility::zero<ValueType>();
771 } else {
772 return u;
773 }
774 } else if (storm::utility::isZero(u)) {
775 STORM_LOG_ASSERT(storm::utility::isZero(l), "Upper bound is zero but lower bound is " << l << ".");
776 return u;
777 } else {
778 STORM_LOG_ASSERT(!storm::utility::isInfinity(l), "Lower bound is infinity, but upper bound is " << u << ".");
779 // get the relative gap
780 return storm::utility::abs<ValueType>(u - l) * storm::utility::convertNumber<ValueType, uint64_t>(2) / (l + u);
781 }
782}
783
784template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
785bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildOverApproximation(
786 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, bool computeRewards, bool refine,
787 HeuristicParameters const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector,
788 std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
789 // Detect whether the refinement reached a fixpoint.
790 bool fixPoint = true;
791
792 statistics.overApproximationBuildTime.start();
793 storm::storage::BitVector refinedObservations;
794 if (!refine) {
795 // If we build the model from scratch, we first have to set up the explorer for the overApproximation.
796 if (computeRewards) {
797 overApproximation->startNewExploration(storm::utility::zero<ValueType>());
798 } else {
799 overApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
800 }
801 } else {
802 // If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy
803 overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true);
804 // We also need to find out which observation resolutions needs refinement.
805 // current maximal resolution (needed for refinement heuristic)
806 auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector);
807 // If there is a score < 1, we have not reached a fixpoint, yet
808 auto numericPrecision = storm::utility::convertNumber<BeliefValueType>(options.numericPrecision);
809 if (std::any_of(obsRatings.begin(), obsRatings.end(),
810 [&numericPrecision](BeliefValueType const& value) { return value + numericPrecision < storm::utility::one<BeliefValueType>(); })) {
811 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because there are still observations to refine.");
812 fixPoint = false;
813 }
814 refinedObservations = storm::utility::vector::filter<BeliefValueType>(obsRatings, [&heuristicParameters](BeliefValueType const& r) {
815 return r <= storm::utility::convertNumber<BeliefValueType>(heuristicParameters.observationThreshold);
816 });
817 STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
818 for (auto const obs : refinedObservations) {
819 // Increment the resolution at the refined observations.
820 // Use storm's rational number to detect overflows properly.
821 storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber<storm::RationalNumber>(observationResolutionVector[obs]) *
822 storm::utility::convertNumber<storm::RationalNumber>(options.resolutionFactor);
823 static_assert(storm::NumberTraits<BeliefValueType>::IsExact || std::is_same<BeliefValueType, double>::value, "Unhandled belief value type");
825 newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber>(std::numeric_limits<double>::max())) {
826 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(std::numeric_limits<double>::max());
827 } else {
828 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(newObsResolutionAsRational);
829 }
830 }
831 overApproximation->restartExploration();
832 }
833 statistics.overApproximationMaxResolution = storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()));
834
835 // Start exploration
836 storm::utility::Stopwatch explorationTime;
837 if (options.explorationTimeLimit != 0) {
838 explorationTime.start();
839 }
840 bool timeLimitExceeded = false;
841 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
842 uint64_t numRewiredOrExploredStates = 0;
843 while (overApproximation->hasUnexploredState()) {
844 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
845 static_cast<uint64_t>(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit) {
846 STORM_LOG_INFO("Exploration time limit exceeded.");
847 timeLimitExceeded = true;
848 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because the exploration time limit is exceeded.");
849 fixPoint = false;
850 }
851
852 uint64_t currId = overApproximation->exploreNextState();
853 bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior();
854 if (!hasOldBehavior) {
855 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a new state is explored");
856 fixPoint = false; // Exploring a new state!
857 }
858 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
859 if (targetObservations.count(currObservation) != 0) {
860 overApproximation->setCurrentStateIsTarget();
861 overApproximation->addSelfloopTransition();
862 } else {
863 // We need to decide how to treat this state (and each individual enabled action). There are the following cases:
864 // 1 The state has no old behavior and
865 // 1.1 we explore all actions or
866 // 1.2 we truncate all actions
867 // 2 The state has old behavior and was truncated in the last iteration and
868 // 2.1 we explore all actions or
869 // 2.2 we truncate all actions (essentially restoring old behavior, but we do the truncation step again to benefit from updated bounds)
870 // 3 The state has old behavior and was not truncated in the last iteration and the current action
871 // 3.1 should be rewired or
872 // 3.2 should get the old behavior but either
873 // 3.2.1 none of the successor observation has been refined since the last rewiring or exploration of this action
874 // 3.2.2 rewiring is only delayed as it could still have an effect in a later refinement step
875
876 // Find out in which case we are
877 bool exploreAllActions = false;
878 bool truncateAllActions = false;
879 bool restoreAllActions = false;
880 bool checkRewireForAllActions = false;
881 // Get the relative gap
882 ValueType gap = getGap(overApproximation->getLowerValueBoundAtCurrentState(), overApproximation->getUpperValueBoundAtCurrentState());
883 if (!hasOldBehavior) {
884 // Case 1
885 // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that
886 // reaches this state
887 if (!timeLimitExceeded && gap >= heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
888 exploreAllActions = true; // Case 1.1
889 } else {
890 truncateAllActions = true; // Case 1.2
891 overApproximation->setCurrentStateIsTruncated();
892 }
893 } else if (overApproximation->getCurrentStateWasTruncated()) {
894 // Case 2
895 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
896 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
897 exploreAllActions = true; // Case 2.1
898 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a previously truncated state is now explored.");
899 fixPoint = false;
900 } else {
901 truncateAllActions = true; // Case 2.2
902 overApproximation->setCurrentStateIsTruncated();
903 if (fixPoint) {
904 // Properly check whether this can still be a fixpoint
905 if (overApproximation->currentStateIsOptimalSchedulerReachable() && !storm::utility::isZero(gap)) {
906 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we truncate a state with non-zero gap "
907 << gap << " that is reachable via an optimal sched.");
908 fixPoint = false;
909 }
910 // else {}
911 // In this case we truncated a state that is not reachable under optimal schedulers.
912 // If no other state is explored (i.e. fixPoint remains true), these states should still not be reachable in subsequent iterations
913 }
914 }
915 } else {
916 // Case 3
917 // The decision for rewiring also depends on the corresponding action, but we have some criteria that lead to case 3.2 (independent of the
918 // action)
919 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
920 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
921 checkRewireForAllActions = true; // Case 3.1 or Case 3.2
922 } else {
923 restoreAllActions = true; // Definitely Case 3.2
924 // We still need to check for each action whether rewiring makes sense later
925 checkRewireForAllActions = true;
926 }
927 }
928 bool expandedAtLeastOneAction = false;
929 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
930 bool expandCurrentAction = exploreAllActions || truncateAllActions;
931 if (checkRewireForAllActions) {
932 assert(refine);
933 // In this case, we still need to check whether this action needs to be expanded
934 assert(!expandCurrentAction);
935 // Check the action dependent conditions for rewiring
936 // First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring
937 // would actually change the successor states)
938 assert(overApproximation->currentStateHasOldBehavior());
939 if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
940 overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) {
941 // Then, check whether the other criteria for rewiring are satisfied
942 if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) {
943 // Do the rewiring now! (Case 3.1)
944 expandCurrentAction = true;
945 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we rewire a state.");
946 fixPoint = false;
947 } else {
948 // Delay the rewiring (Case 3.2.2)
949 overApproximation->setCurrentChoiceIsDelayed(action);
950 if (fixPoint) {
951 // Check whether this delay means that a fixpoint has not been reached
952 if (!overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
953 (overApproximation->currentStateIsOptimalSchedulerReachable() &&
954 overApproximation->actionAtCurrentStateWasOptimal(action) && !storm::utility::isZero(gap))) {
955 STORM_LOG_INFO_COND(!fixPoint,
956 "Not reaching a refinement fixpoint because we delay a rewiring of a state with non-zero gap "
957 << gap << " that is reachable via an optimal scheduler.");
958 fixPoint = false;
959 }
960 }
961 }
962 } // else { Case 3.2.1 }
963 }
964
965 if (expandCurrentAction) {
966 expandedAtLeastOneAction = true;
967 if (!truncateAllActions) {
968 // Cases 1.1, 2.1, or 3.1
969 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
970 for (auto const& successor : successorGridPoints) {
971 overApproximation->addTransitionToBelief(action, successor.first, successor.second, false);
972 }
973 if (computeRewards) {
974 overApproximation->computeRewardAtCurrentState(action);
975 }
976 } else {
977 // Cases 1.2 or 2.2
978 auto truncationProbability = storm::utility::zero<ValueType>();
979 auto truncationValueBound = storm::utility::zero<ValueType>();
980 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
981 for (auto const& successor : successorGridPoints) {
982 bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, true);
983 if (!added) {
984 // We did not explore this successor state. Get a bound on the "missing" value
985 truncationProbability += successor.second;
986 truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first)
987 : overApproximation->computeUpperValueBoundAtBelief(successor.first));
988 }
989 }
990 if (computeRewards) {
991 // The truncationValueBound will be added on top of the reward introduced by the current belief state.
992 overApproximation->addTransitionsToExtraStates(action, truncationProbability);
993 overApproximation->computeRewardAtCurrentState(action, truncationValueBound);
994 } else {
995 overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
996 }
997 }
998 } else {
999 // Case 3.2
1000 overApproximation->restoreOldBehaviorAtCurrentState(action);
1001 }
1002 }
1003 if (expandedAtLeastOneAction) {
1004 ++numRewiredOrExploredStates;
1005 }
1006 }
1007
1008 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
1009 if (pomdp().hasChoiceLabeling()) {
1010 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1011 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1012 overApproximation->addChoiceLabelToCurrentState(action, *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1013 }
1014 }
1015 }
1016
1018 break;
1019 }
1020 }
1021
1023 // don't overwrite statistics of a previous, successful computation
1024 if (!statistics.overApproximationStates) {
1025 statistics.overApproximationBuildAborted = true;
1026 statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates();
1027 }
1028 statistics.overApproximationBuildTime.stop();
1029 return false;
1030 }
1031
1032 overApproximation->finishExploration();
1033 statistics.overApproximationBuildTime.stop();
1034
1035 statistics.overApproximationCheckTime.start();
1036 overApproximation->computeValuesOfExploredMdp(env, min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
1037 statistics.overApproximationCheckTime.stop();
1038
1039 // don't overwrite statistics of a previous, successful computation
1040 if (!storm::utility::resources::isTerminate() || !statistics.overApproximationStates) {
1041 statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates();
1042 }
1043 return fixPoint;
1044}
1045
1046template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1047bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildUnderApproximation(
1048 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, bool computeRewards, bool refine,
1049 HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation,
1050 bool firstIteration) {
1051 statistics.underApproximationBuildTime.start();
1052
1053 unfoldingStatus = Status::Exploring;
1054 if (options.useClipping) {
1055 STORM_LOG_INFO("Use Belief Clipping with grid beliefs \n");
1056 statistics.nrClippingAttempts = 0;
1057 statistics.nrClippedStates = 0;
1058 }
1059
1060 uint64_t nrCutoffStrategies = min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
1061
1062 bool fixPoint = true;
1063 if (heuristicParameters.sizeThreshold != std::numeric_limits<uint64_t>::max()) {
1064 statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold;
1065 }
1066 if (!refine) {
1067 if (options.interactiveUnfolding && !firstIteration) {
1068 underApproximation->restoreExplorationState();
1069 } else if (computeRewards) { // Build a new under approximation
1070 // We use the sink state for infinite cut-off/clipping values
1071 underApproximation->startNewExploration(storm::utility::zero<ValueType>(), storm::utility::infinity<ValueType>());
1072 } else {
1073 underApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
1074 }
1075 } else {
1076 // Restart the building process
1077 underApproximation->restartExploration();
1078 }
1079
1080 // Expand the beliefs
1081 storm::utility::Stopwatch explorationTime;
1082 storm::utility::Stopwatch printUpdateStopwatch;
1083 printUpdateStopwatch.start();
1084 if (options.explorationTimeLimit != 0) {
1085 explorationTime.start();
1086 }
1087 bool timeLimitExceeded = false;
1088 bool stateStored = false;
1089 while (underApproximation->hasUnexploredState()) {
1090 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
1091 static_cast<uint64_t>(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit) {
1092 STORM_LOG_INFO("Exploration time limit exceeded.");
1093 timeLimitExceeded = true;
1094 }
1095 if (printUpdateStopwatch.getTimeInSeconds() >= 60) {
1096 printUpdateStopwatch.restart();
1097 STORM_LOG_INFO("### " << underApproximation->getCurrentNumberOfMdpStates() << " beliefs in underapproximation MDP" << " ##### "
1098 << underApproximation->getUnexploredStates().size() << " beliefs queued\n");
1099 if (underApproximation->getCurrentNumberOfMdpStates() > heuristicParameters.sizeThreshold && options.useClipping) {
1100 STORM_LOG_INFO("##### Clipping Attempts: " << statistics.nrClippingAttempts.value() << " ##### "
1101 << "Clipped States: " << statistics.nrClippedStates.value() << "\n");
1102 }
1103 }
1104 if (unfoldingControl == UnfoldingControl::Pause && !stateStored) {
1105 underApproximation->storeExplorationState();
1106 stateStored = true;
1107 }
1108 uint64_t currId = underApproximation->exploreNextState();
1109 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
1110 uint64_t addedActions = 0;
1111 bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated();
1112 if (!stateAlreadyExplored || timeLimitExceeded) {
1113 fixPoint = false;
1114 }
1115 if (targetObservations.count(beliefManager->getBeliefObservation(currId)) != 0) {
1116 underApproximation->setCurrentStateIsTarget();
1117 underApproximation->addSelfloopTransition();
1118 underApproximation->addChoiceLabelToCurrentState(0, "loop");
1119 } else {
1120 bool stopExploration = false;
1121 bool clipBelief = false;
1122 if (timeLimitExceeded || (options.interactiveUnfolding && unfoldingControl != UnfoldingControl::Run)) {
1123 clipBelief = options.useClipping;
1124 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1125 } else if (!stateAlreadyExplored) {
1126 // Check whether we want to explore the state now!
1127 ValueType gap = getGap(underApproximation->getLowerValueBoundAtCurrentState(), underApproximation->getUpperValueBoundAtCurrentState());
1128 if ((gap < heuristicParameters.gapThreshold) || (gap == 0 && options.cutZeroGap)) {
1129 stopExploration = true;
1130 } else if (underApproximation->getCurrentNumberOfMdpStates() >=
1131 heuristicParameters.sizeThreshold /*&& !statistics.beliefMdpDetectedToBeFinite*/) {
1132 clipBelief = options.useClipping;
1133 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1134 }
1135 }
1136
1137 if (clipBelief && !underApproximation->isMarkedAsGridBelief(currId)) {
1138 // Use a belief grid as clipping candidates
1139 if (!options.useStateEliminationCutoff) {
1140 bool successfulClip = clipToGridExplicitly(currId, computeRewards, beliefManager, underApproximation, 0);
1141 // Set again as the current belief might have been detected to be a grid belief
1142 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1143 if (successfulClip) {
1144 addedActions += 1;
1145 }
1146 } else {
1147 clipToGrid(currId, computeRewards, min, beliefManager, underApproximation);
1148 addedActions += beliefManager->getBeliefNumberOfChoices(currId);
1149 }
1150 } // end Clipping Procedure
1151
1152 if (stopExploration) {
1153 underApproximation->setCurrentStateIsTruncated();
1154 }
1155 if (options.useStateEliminationCutoff || !stopExploration) {
1156 // Add successor transitions or cut-off transitions when exploration is stopped
1157 uint64_t numActions = beliefManager->getBeliefNumberOfChoices(currId);
1158 if (underApproximation->needsActionAdjustment(numActions)) {
1159 underApproximation->adjustActions(numActions);
1160 }
1161 for (uint64_t action = 0; action < numActions; ++action) {
1162 // Always restore old behavior if available
1163 if (pomdp().hasChoiceLabeling()) {
1164 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1165 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1166 underApproximation->addChoiceLabelToCurrentState(addedActions + action,
1167 *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1168 }
1169 }
1170 if (stateAlreadyExplored) {
1171 underApproximation->restoreOldBehaviorAtCurrentState(action);
1172 } else {
1173 auto truncationProbability = storm::utility::zero<ValueType>();
1174 auto truncationValueBound = storm::utility::zero<ValueType>();
1175 auto successors = beliefManager->expand(currId, action);
1176 for (auto const& successor : successors) {
1177 bool added = underApproximation->addTransitionToBelief(addedActions + action, successor.first, successor.second, stopExploration);
1178 if (!added) {
1179 STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
1180 // We did not explore this successor state. Get a bound on the "missing" value
1181 truncationProbability += successor.second;
1182 // Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of
1183 // other under-approximation values. In general, this does not yield a sound underapproximation anymore as the
1184 // values might be achieved by different schedulers. However, in our case this is still the case as the
1185 // under-approximation values are based on a single memory-less scheduler.
1186 truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first)
1187 : underApproximation->computeLowerValueBoundAtBelief(successor.first));
1188 }
1189 }
1190 if (stopExploration) {
1191 if (computeRewards) {
1192 if (truncationValueBound == storm::utility::infinity<ValueType>()) {
1193 underApproximation->addTransitionsToExtraStates(addedActions + action, storm::utility::zero<ValueType>(),
1194 truncationProbability);
1195 } else {
1196 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationProbability);
1197 }
1198 } else {
1199 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationValueBound,
1200 truncationProbability - truncationValueBound);
1201 }
1202 }
1203 if (computeRewards) {
1204 // The truncationValueBound will be added on top of the reward introduced by the current belief state.
1205 if (truncationValueBound != storm::utility::infinity<ValueType>()) {
1206 if (!clipBelief) {
1207 underApproximation->computeRewardAtCurrentState(action, truncationValueBound);
1208 } else {
1209 underApproximation->addRewardToCurrentState(addedActions + action,
1210 beliefManager->getBeliefActionReward(currId, action) + truncationValueBound);
1211 }
1212 }
1213 }
1214 }
1215 }
1216 } else {
1217 for (uint64_t i = 0; i < nrCutoffStrategies && !options.skipHeuristicSchedulers; ++i) {
1218 auto cutOffValue = min ? underApproximation->computeUpperValueBoundForScheduler(currId, i)
1219 : underApproximation->computeLowerValueBoundForScheduler(currId, i);
1220 if (computeRewards) {
1221 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1222 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::one<ValueType>());
1223 underApproximation->addRewardToCurrentState(addedActions, cutOffValue);
1224 } else {
1225 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::zero<ValueType>(), storm::utility::one<ValueType>());
1226 }
1227 } else {
1228 underApproximation->addTransitionsToExtraStates(addedActions, cutOffValue, storm::utility::one<ValueType>() - cutOffValue);
1229 }
1230 if (pomdp().hasChoiceLabeling()) {
1231 underApproximation->addChoiceLabelToCurrentState(addedActions, "sched_" + std::to_string(i));
1232 }
1233 addedActions++;
1234 }
1235 if (underApproximation->hasFMSchedulerValues()) {
1236 uint64_t transitionNr = 0;
1237 for (uint64_t i = 0; i < underApproximation->getNrOfMemoryNodesForObservation(currObservation); ++i) {
1238 auto resPair = underApproximation->computeFMSchedulerValueForMemoryNode(currId, i);
1239 ValueType cutOffValue;
1240 if (resPair.first) {
1241 cutOffValue = resPair.second;
1242 } else {
1243 STORM_LOG_DEBUG("Skipped cut-off of belief with ID " << currId << " with finite memory scheduler in memory node " << i
1244 << ". Missing values.");
1245 continue;
1246 }
1247 if (computeRewards) {
1248 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1249 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::one<ValueType>());
1250 underApproximation->addRewardToCurrentState(addedActions + transitionNr, cutOffValue);
1251 } else {
1252 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::zero<ValueType>(),
1253 storm::utility::one<ValueType>());
1254 }
1255 } else {
1256 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, cutOffValue,
1257 storm::utility::one<ValueType>() - cutOffValue);
1258 }
1259 if (pomdp().hasChoiceLabeling()) {
1260 underApproximation->addChoiceLabelToCurrentState(addedActions + transitionNr, "mem_node_" + std::to_string(i));
1261 }
1262 ++transitionNr;
1263 }
1264 }
1265 }
1266 }
1268 break;
1269 }
1270 }
1271
1273 // don't overwrite statistics of a previous, successful computation
1274 if (!statistics.underApproximationStates) {
1275 statistics.underApproximationBuildAborted = true;
1276 statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates();
1277 }
1278 statistics.underApproximationBuildTime.stop();
1279 return false;
1280 }
1281
1282 underApproximation->finishExploration();
1283 statistics.underApproximationBuildTime.stop();
1284 printUpdateStopwatch.stop();
1285 STORM_LOG_INFO("Finished exploring under-approximation MDP.\nStart analysis...\n");
1286 unfoldingStatus = Status::ModelExplorationFinished;
1287 statistics.underApproximationCheckTime.start();
1288 underApproximation->computeValuesOfExploredMdp(env, min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
1289 statistics.underApproximationCheckTime.stop();
1290 if (underApproximation->getExploredMdp()->getStateLabeling().getStates("truncated").getNumberOfSetBits() > 0) {
1291 statistics.nrTruncatedStates = underApproximation->getExploredMdp()->getStateLabeling().getStates("truncated").getNumberOfSetBits();
1292 }
1293 // don't overwrite statistics of a previous, successful computation
1294 if (!storm::utility::resources::isTerminate() || !statistics.underApproximationStates) {
1295 statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates();
1296 }
1297 return fixPoint;
1298}
1299
1300template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1301void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGrid(uint64_t clippingStateId, bool computeRewards, bool min,
1302 std::shared_ptr<BeliefManagerType>& beliefManager,
1303 std::shared_ptr<ExplorerType>& beliefExplorer) {
1304 // Add all transitions to states which are already in the MDP, clip all others to a grid
1305 // To make the resulting MDP smaller, we eliminate intermediate successor states when clipping is applied
1306 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(clippingStateId); action < numActions; ++action) {
1307 auto rewardBound = utility::zero<BeliefValueType>();
1308 auto successors = beliefManager->expand(clippingStateId, action);
1309 auto absDelta = utility::zero<BeliefValueType>();
1310 for (auto const& successor : successors) {
1311 // Add transition if successor is in explored space.
1312 // We can directly add the transitions as there is at most one successor for each observation
1313 // Therefore no belief can be clipped to an already added successor
1314 bool added = beliefExplorer->addTransitionToBelief(action, successor.first, successor.second, true);
1315 if (!added) {
1316 // The successor is not in the explored space. Clip it
1317 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1318 auto clipping = beliefManager->clipBeliefToGrid(
1319 successor.first, options.clippingGridRes, computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() : storm::storage::BitVector());
1320 if (clipping.isClippable) {
1321 // The belief is not on the grid and there is a candidate with finite reward
1322 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1323 // Transition probability to candidate is (probability to successor) * (clipping transition probability)
1324 BeliefValueType transitionProb =
1325 (utility::one<BeliefValueType>() - clipping.delta) * utility::convertNumber<BeliefValueType>(successor.second);
1326 beliefExplorer->addTransitionToBelief(action, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb), false);
1327 // Collect weighted clipping values
1328 absDelta += clipping.delta * utility::convertNumber<BeliefValueType>(successor.second);
1329 if (computeRewards) {
1330 // collect cumulative reward bounds
1331 auto localRew = utility::zero<BeliefValueType>();
1332 for (auto const& deltaValue : clipping.deltaValues) {
1333 localRew += deltaValue.second *
1334 utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1335 }
1336 if (localRew == utility::infinity<BeliefValueType>()) {
1337 STORM_LOG_WARN("Infinite reward in clipping!");
1338 }
1339 rewardBound += localRew * utility::convertNumber<BeliefValueType>(successor.second);
1340 }
1341 } else if (clipping.onGrid) {
1342 // If the belief is not clippable, but on the grid, it may need to be explored, too
1343 beliefExplorer->addTransitionToBelief(action, successor.first, successor.second, false);
1344 } else {
1345 // Otherwise, the reward for all candidates is infinite, clipping does not make sense. Cut it off instead
1346 absDelta += utility::convertNumber<BeliefValueType>(successor.second);
1347 rewardBound += utility::convertNumber<BeliefValueType>(successor.second) *
1348 utility::convertNumber<BeliefValueType>(min ? beliefExplorer->computeUpperValueBoundAtBelief(successor.first)
1349 : beliefExplorer->computeLowerValueBoundAtBelief(successor.first));
1350 }
1351 }
1352 }
1353 // Add the collected clipping transition if necessary
1354 if (absDelta != utility::zero<BeliefValueType>()) {
1355 if (computeRewards) {
1356 if (rewardBound == utility::infinity<BeliefValueType>()) {
1357 // If the reward is infinite, add a transition to the sink state to collect infinite reward
1358 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1359 } else {
1360 beliefExplorer->addTransitionsToExtraStates(action, utility::convertNumber<BeliefMDPType>(absDelta));
1361 BeliefValueType totalRewardVal = rewardBound / absDelta;
1362 beliefExplorer->addClippingRewardToCurrentState(action, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1363 }
1364 } else {
1365 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1366 }
1367 }
1368 if (computeRewards) {
1369 beliefExplorer->computeRewardAtCurrentState(action);
1370 }
1371 }
1372}
1373
1374template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1375bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGridExplicitly(uint64_t clippingStateId, bool computeRewards,
1376 std::shared_ptr<BeliefManagerType>& beliefManager,
1377 std::shared_ptr<ExplorerType>& beliefExplorer,
1378 uint64_t localActionIndex) {
1379 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1380 auto clipping = beliefManager->clipBeliefToGrid(clippingStateId, options.clippingGridRes,
1381 computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() : storm::storage::BitVector());
1382 if (clipping.isClippable) {
1383 // The belief is not on the grid and there is a candidate with finite reward
1384 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1385 // Transition probability to candidate is clipping value
1386 BeliefValueType transitionProb = (utility::one<BeliefValueType>() - clipping.delta);
1387 beliefExplorer->addTransitionToBelief(localActionIndex, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb), false);
1388 beliefExplorer->markAsGridBelief(clipping.targetBelief);
1389 if (computeRewards) {
1390 // collect cumulative reward bounds
1391 auto reward = utility::zero<BeliefValueType>();
1392 for (auto const& deltaValue : clipping.deltaValues) {
1393 reward += deltaValue.second * utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1394 }
1395 if (reward == utility::infinity<BeliefValueType>()) {
1396 STORM_LOG_WARN("Infinite reward in clipping!");
1397 // If the reward is infinite, add a transition to the sink state to collect infinite reward in our semantics
1398 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1399 utility::convertNumber<BeliefMDPType>(clipping.delta));
1400 } else {
1401 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::convertNumber<BeliefMDPType>(clipping.delta));
1402 BeliefValueType totalRewardVal = reward / clipping.delta;
1403 beliefExplorer->addClippingRewardToCurrentState(localActionIndex, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1404 }
1405 } else {
1406 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1407 utility::convertNumber<BeliefMDPType>(clipping.delta));
1408 }
1409 beliefExplorer->addChoiceLabelToCurrentState(localActionIndex, "clip");
1410 return true;
1411 } else {
1412 if (clipping.onGrid) {
1413 // If the belief is not clippable, but on the grid, it may need to be explored, too
1414 beliefExplorer->markAsGridBelief(clippingStateId);
1415 }
1416 }
1417 return false;
1418}
1419
1420template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1421void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::setUnfoldingControl(
1422 storm::pomdp::modelchecker::BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::UnfoldingControl newUnfoldingControl) {
1423 unfoldingControl = newUnfoldingControl;
1424}
1425
1426template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1428 STORM_LOG_TRACE("PAUSE COMMAND ISSUED");
1429 setUnfoldingControl(UnfoldingControl::Pause);
1430}
1431
1432template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1434 STORM_LOG_TRACE("CONTINUATION COMMAND ISSUED");
1435 setUnfoldingControl(UnfoldingControl::Run);
1436}
1437
1438template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1440 STORM_LOG_TRACE("TERMINATION COMMAND ISSUED");
1441 setUnfoldingControl(UnfoldingControl::Terminate);
1442}
1443
1444template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1446 return unfoldingStatus == Status::ResultAvailable || unfoldingStatus == Status::Converged;
1447}
1448
1449template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1453
1454template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1458
1459template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1460std::shared_ptr<storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType>>
1464
1465template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1467 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> valueList) {
1468 interactiveUnderApproximationExplorer->setFMSchedValueList(valueList);
1469}
1470
1471template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1473 typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution) {
1474 auto n = storm::utility::convertNumber<BeliefValueType, uint64_t>(info.support.size());
1475 auto one = storm::utility::one<BeliefValueType>();
1476 if (storm::utility::isOne(n)) {
1477 // If the belief is Dirac, it has to be approximated precisely.
1478 // In this case, we return the best possible rating
1479 return one;
1480 } else {
1481 // Create the rating for this observation at this choice from the given info
1482 auto obsChoiceRating = storm::utility::convertNumber<BeliefValueType, ValueType>(info.maxProbabilityToSuccessorWithObs / info.observationProbability);
1483 // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1)
1484 // Normalize the rating so that it ranges from 0 to 1, where
1485 // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely
1486 // approximated.
1487 obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
1488 // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
1489 obsChoiceRating *= observationResolution / maxResolution;
1490 return obsChoiceRating;
1491 }
1492}
1493
1494template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1495std::vector<BeliefValueType> BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getObservationRatings(
1496 std::shared_ptr<ExplorerType> const& overApproximation, std::vector<BeliefValueType> const& observationResolutionVector) {
1497 uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates();
1498 auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices();
1499 BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
1500
1501 std::vector<BeliefValueType> resultingRatings(pomdp().getNrObservations(), storm::utility::one<BeliefValueType>());
1502
1503 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
1504 for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) {
1505 // Check whether this state is reached under an optimal scheduler.
1506 // The heuristic assumes that the remaining states are not relevant for the observation score.
1507 if (overApproximation->stateIsOptimalSchedulerReachable(mdpState)) {
1508 for (uint64_t mdpChoice = choiceIndices[mdpState]; mdpChoice < choiceIndices[mdpState + 1]; ++mdpChoice) {
1509 // Similarly, only optimal actions are relevant
1510 if (overApproximation->actionIsOptimal(mdpChoice)) {
1511 // score the observations for this choice
1512 gatheredSuccessorObservations.clear();
1513 overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
1514 for (auto const& obsInfo : gatheredSuccessorObservations) {
1515 auto const& obs = obsInfo.first;
1516 BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
1517
1518 // The rating of the observation will be the minimum over all choice-based observation ratings
1519 resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
1520 }
1521 }
1522 }
1523 }
1524 }
1525 return resultingRatings;
1526}
1527
1528template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1529typename PomdpModelType::ValueType BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getGap(
1530 typename PomdpModelType::ValueType const& l, typename PomdpModelType::ValueType const& u) {
1531 STORM_LOG_ASSERT(l >= storm::utility::zero<typename PomdpModelType::ValueType>() && u >= storm::utility::zero<typename PomdpModelType::ValueType>(),
1532 "Gap computation currently does not handle negative values.");
1535 return storm::utility::zero<typename PomdpModelType::ValueType>();
1536 } else {
1537 return u;
1538 }
1539 } else if (storm::utility::isZero(u)) {
1540 STORM_LOG_ASSERT(storm::utility::isZero(l), "Upper bound is zero but lower bound is " << l << ".");
1541 return u;
1542 } else {
1543 STORM_LOG_ASSERT(!storm::utility::isInfinity(l), "Lower bound is infinity, but upper bound is " << u << ".");
1544 // get the relative gap
1545 return storm::utility::abs<typename PomdpModelType::ValueType>(u - l) * storm::utility::convertNumber<typename PomdpModelType::ValueType, uint64_t>(2) /
1546 (l + u);
1547 }
1548}
1549
1550/* Template Instantiations */
1551
1552template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>>;
1553
1554template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>, storm::RationalNumber>;
1555
1556template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>, double>;
1557
1558template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>>;
1559
1560} // namespace modelchecker
1561} // namespace pomdp
1562} // namespace storm
virtual bool isRewardOperatorFormula() const
Definition Formula.cpp:184
void addLabel(std::string const &label)
Adds a new label to the labelings.
bool containsLabel(std::string const &label) const
Checks whether a label is registered within this labeling.
void removeLabel(std::string const &label)
Removes a label from the labelings.
This class represents a (discrete-time) Markov decision process.
Definition Mdp.h:13
std::shared_ptr< storm::models::sparse::Model< ValueType, RewardModelType > > applyScheduler(storm::storage::Scheduler< ValueType > const &scheduler, bool dropUnreachableStates=true, bool preserveModelType=false) const
Applies the given scheduler to this model.
This class manages the labeling of the state space with a number of (atomic) labels.
bool getStateHasLabel(std::string const &label, storm::storage::sparse::state_type state) const
Checks whether a given state is labeled with the given label.
void addLabelToState(std::string const &label, storm::storage::sparse::state_type state)
Adds a label to a given state.
Model checker for checking reachability queries on POMDPs using approximations based on exploration o...
BeliefExplorationPomdpModelChecker(std::shared_ptr< PomdpModelType > pomdp, Options options=Options())
Constructor.
int64_t getStatus()
Get the current status of the interactive unfolding.
bool hasConverged()
Indicates whether the interactive unfolding has coonverged, i.e.
bool isExploring()
Indicates whether the interactive unfolding is currently in the process of exploring the belief MDP.
std::shared_ptr< ExplorerType > getInteractiveBeliefExplorer()
Get a pointer to the belief explorer used in the interactive unfolding.
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
void unfoldInteractively(storm::Environment const &env, std::set< uint32_t > const &targetObservations, bool min, std::optional< std::string > rewardModelName, storm::pomdp::modelchecker::POMDPValueBounds< ValueType > const &valueBounds, Result &result)
Allows to generate an under-approximation using a controllable unfolding.
bool isResultReady()
Indicates whether there is a result after an interactive unfolding was paused.
Result check(storm::Environment const &env, storm::logic::Formula const &formula, storm::Environment const &preProcEnv, std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > const &additionalUnderApproximationBounds=std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > >())
Performs model checking of the given POMDP with regards to a formula using the previously specified o...
Result getInteractiveResult()
Get the latest saved result obtained by the interactive unfolding.
void printStatisticsToStream(std::ostream &stream) const
Prints statistics of the process to a given output stream.
void precomputeValueBounds(const logic::Formula &formula, storm::Environment const &preProcEnv)
Uses model checking on the underlying MDP to generate values used for cut-offs and for clipping compe...
void continueUnfolding()
Continues a previously paused interactive unfolding.
A bit vector that is internally represented as a vector of 64-bit values.
Definition BitVector.h:16
uint64_t getNumberOfSetBits() const
Returns the number of bits that are set to true in this bit vector.
size_t size() const
Retrieves the number of bits this bit vector can store.
std::pair< std::shared_ptr< storm::models::sparse::Pomdp< ValueType > >, std::set< uint32_t > > transform(storm::storage::BitVector const &stateSet) const
Ensures that the given set of states is observation closed, potentially, adding new observation(s) A ...
A class that provides convenience operations to display run times.
Definition Stopwatch.h:14
void start()
Start stopwatch (again) and start measuring time.
Definition Stopwatch.cpp:48
void restart()
Reset the stopwatch and immediately start it.
Definition Stopwatch.cpp:59
SecondType getTimeInSeconds() const
Gets the measured time in seconds.
Definition Stopwatch.cpp:13
void stop()
Stop stopwatch and add measured time to total time.
Definition Stopwatch.cpp:42
#define STORM_LOG_INFO(message)
Definition logging.h:24
#define STORM_LOG_WARN(message)
Definition logging.h:25
#define STORM_LOG_DEBUG(message)
Definition logging.h:18
#define STORM_LOG_TRACE(message)
Definition logging.h:12
#define STORM_LOG_ASSERT(cond, message)
Definition macros.h:11
#define STORM_LOG_WARN_COND(cond, message)
Definition macros.h:38
#define STORM_LOG_ERROR_COND(cond, message)
Definition macros.h:52
#define STORM_LOG_THROW(cond, exception, message)
Definition macros.h:30
#define STORM_LOG_INFO_COND(cond, message)
Definition macros.h:45
SFTBDDChecker::ValueType ValueType
FormulaInformation getFormulaInformation(PomdpType const &pomdp, storm::logic::ProbabilityOperatorFormula const &formula)
ValueType getGap(ValueType const &l, ValueType const &u)
bool detectFiniteBeliefMdp(storm::models::sparse::Pomdp< ValueType > const &pomdp, std::optional< storm::storage::BitVector > const &targetStates)
This method tries to detect that the beliefmdp is finite.
storm::storage::BitVector getReachableStates(storm::storage::SparseMatrix< T > const &transitionMatrix, storm::storage::BitVector const &initialStates, storm::storage::BitVector const &constraintStates, storm::storage::BitVector const &targetStates, bool useStepBound, uint_fast64_t maximalSteps, boost::optional< storm::storage::BitVector > const &choiceFilter)
Performs a forward depth-first search through the underlying graph structure to identify the states t...
Definition graph.cpp:47
storage::BitVector BitVector
bool isTerminate()
Check whether the program should terminate (due to some abort signal).
bool isOne(ValueType const &a)
Definition constants.cpp:33
ValueType min(ValueType const &first, ValueType const &second)
bool isZero(ValueType const &a)
Definition constants.cpp:38
ValueType ceil(ValueType const &number)
bool isInfinity(ValueType const &a)
solver::OptimizationDirection OptimizationDirection
Structure for storing values on the POMDP used for cut-offs and clipping.
std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > fmSchedulerValueList
storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > extremePomdpValueBound
storm::pomdp::storage::PreprocessingPomdpValueBounds< ValueType > trivialPomdpValueBounds
std::unordered_map< std::string, RewardModelType > rewardModels
storm::storage::SparseMatrix< ValueType > transitionMatrix
std::optional< storm::storage::sparse::StateValuations > observationValuations
std::optional< storm::models::sparse::ChoiceLabeling > choiceLabeling
storm::models::sparse::StateLabeling stateLabeling
std::optional< std::vector< uint32_t > > observabilityClasses