Storm
A Modern Probabilistic Model Checker
Loading...
Searching...
No Matches
BeliefExplorationPomdpModelChecker.cpp
Go to the documentation of this file.
2
3#include <tuple>
4
8
12
17
21#include "storm/utility/graph.h"
23
24namespace storm {
25namespace pomdp {
26namespace modelchecker {
27
28/* Struct Functions */
29
30template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
32 : lowerBound(lower), upperBound(upper) {
33 // Intentionally left empty
34}
35
36template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
39 ValueType diff = upperBound - lowerBound;
40 if (diff < storm::utility::zero<ValueType>()) {
41 STORM_LOG_WARN_COND(diff >= storm::utility::convertNumber<ValueType>(1e-6),
42 "Upper bound '" << upperBound << "' is smaller than lower bound '" << lowerBound << "': Difference is " << diff << ".");
43 diff = storm::utility::zero<ValueType>();
44 }
45 if (relative && !storm::utility::isZero(upperBound)) {
46 diff /= upperBound;
47 }
48 return diff;
49}
50
51template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
53 if (value > lowerBound) {
54 lowerBound = value;
55 return true;
56 }
57 return false;
58}
59
60template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
62 if (value < upperBound) {
63 upperBound = value;
64 return true;
65 }
66 return false;
67}
68
69template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
71 : beliefMdpDetectedToBeFinite(false),
72 refinementFixpointDetected(false),
73 overApproximationBuildAborted(false),
74 underApproximationBuildAborted(false),
75 aborted(false) {
76 // intentionally left empty;
77}
78
79/* Constructor */
80
81template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
83 Options options)
84 : options(options), inputPomdp(pomdp) {
85 STORM_LOG_ASSERT(inputPomdp, "The given POMDP is not initialized.");
86 STORM_LOG_ERROR_COND(inputPomdp->isCanonic(), "Input Pomdp is not known to be canonic. This might lead to unexpected verification results.");
87
88 beliefTypeCC = storm::utility::ConstantsComparator<BeliefValueType>(storm::utility::convertNumber<BeliefValueType>(this->options.numericPrecision), false);
89 valueTypeCC = storm::utility::ConstantsComparator<ValueType>(this->options.numericPrecision, false);
90}
91
92/* Public Functions */
93
94template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
96 storm::Environment const& preProcEnv) {
97 auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp(), formula);
98
99 // Compute some initial bounds on the values for each state of the pomdp
100 // We work with the Belief MDP value type, so if the POMDP is exact, but the belief MDP is not, we need to convert
101 auto preProcessingMC = PreprocessingPomdpValueBoundsModelChecker<ValueType>(pomdp());
102 auto initialPomdpValueBounds = preProcessingMC.getValueBounds(preProcEnv, formula);
103 pomdpValueBounds.trivialPomdpValueBounds = initialPomdpValueBounds;
104
105 // If we clip and compute rewards, compute the values necessary for the correction terms
106 if (options.useClipping && formula.isRewardOperatorFormula()) {
107 pomdpValueBounds.extremePomdpValueBound = preProcessingMC.getExtremeValueBound(preProcEnv, formula);
108 }
109}
110
111template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
114 storm::Environment const& env, storm::logic::Formula const& formula,
115 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
116 return check(env, formula, env, additionalUnderApproximationBounds);
117}
118
119template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
122 storm::logic::Formula const& formula, std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
124 return check(env, formula, env, additionalUnderApproximationBounds);
125}
126
127template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
130 storm::logic::Formula const& formula, storm::Environment const& preProcEnv,
131 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
133 return check(env, formula, preProcEnv, additionalUnderApproximationBounds);
134}
135
136template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
139 storm::Environment const& env, storm::logic::Formula const& formula, storm::Environment const& preProcEnv,
140 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> const& additionalUnderApproximationBounds) {
141 STORM_LOG_ASSERT(options.unfold || options.discretize || options.interactiveUnfolding,
142 "Invoked belief exploration but no task (unfold or discretize) given.");
143 // Potentially reset preprocessed model from previous call
144 preprocessedPomdp.reset();
145
146 // Reset all collected statistics
147 statistics = Statistics();
148 statistics.totalTime.start();
149 // Extract the relevant information from the formula
150 auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp(), formula);
151
152 precomputeValueBounds(formula, preProcEnv);
153 if (!additionalUnderApproximationBounds.empty()) {
154 pomdpValueBounds.fmSchedulerValueList = additionalUnderApproximationBounds;
155 }
156 uint64_t initialPomdpState = pomdp().getInitialStates().getNextSetIndex(0);
157 Result result(pomdpValueBounds.trivialPomdpValueBounds.getHighestLowerBound(initialPomdpState),
158 pomdpValueBounds.trivialPomdpValueBounds.getSmallestUpperBound(initialPomdpState));
159 STORM_LOG_INFO("Initial value bounds are [" << result.lowerBound << ", " << result.upperBound << "]");
160
161 std::optional<std::string> rewardModelName;
162 std::set<uint32_t> targetObservations;
163 if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) {
164 if (formulaInfo.getTargetStates().observationClosed) {
165 targetObservations = formulaInfo.getTargetStates().observations;
166 } else {
168 std::tie(preprocessedPomdp, targetObservations) = obsCloser.transform(formulaInfo.getTargetStates().states);
169 }
170 if (formulaInfo.isNonNestedReachabilityProbability()) {
171 if (!formulaInfo.getSinkStates().empty()) {
173 components.stateLabeling = pomdp().getStateLabeling();
174 components.rewardModels = pomdp().getRewardModels();
175 auto matrix = pomdp().getTransitionMatrix();
176 matrix.makeRowGroupsAbsorbing(formulaInfo.getSinkStates().states);
177 components.transitionMatrix = matrix;
178 components.observabilityClasses = pomdp().getObservations();
179 if (pomdp().hasChoiceLabeling()) {
180 components.choiceLabeling = pomdp().getChoiceLabeling();
181 }
182 if (pomdp().hasObservationValuations()) {
183 components.observationValuations = pomdp().getObservationValuations();
184 }
185 preprocessedPomdp = std::make_shared<storm::models::sparse::Pomdp<ValueType>>(std::move(components), true);
186 auto reachableFromSinkStates = storm::utility::graph::getReachableStates(
187 pomdp().getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states);
188 reachableFromSinkStates &= ~formulaInfo.getSinkStates().states;
189 STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException,
190 "There are sink states that can reach non-sink states. This is currently not supported");
191 }
192 } else {
193 // Expected reward formula!
194 rewardModelName = formulaInfo.getRewardModelName();
195 }
196 } else {
197 STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'.");
198 }
199 if (storm::pomdp::detectFiniteBeliefMdp(pomdp(), formulaInfo.getTargetStates().states)) {
200 STORM_LOG_INFO("Detected that the belief MDP is finite.");
201 statistics.beliefMdpDetectedToBeFinite = true;
202 }
203 if (options.interactiveUnfolding) {
204 unfoldInteractively(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
205 } else {
206 refineReachability(env, targetObservations, formulaInfo.minimize(), rewardModelName, pomdpValueBounds, result);
207 }
208 // "clear" results in case they were actually not requested (this will make the output a bit more clear)
209 if ((formulaInfo.minimize() && !options.discretize) || (formulaInfo.maximize() && !options.unfold)) {
210 result.lowerBound = -storm::utility::infinity<ValueType>();
211 }
212 if ((formulaInfo.maximize() && !options.discretize) || (formulaInfo.minimize() && !options.unfold)) {
213 result.upperBound = storm::utility::infinity<ValueType>();
214 }
215
217 statistics.aborted = true;
218 }
219 statistics.totalTime.stop();
220 return result;
221}
222
223template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
225 stream << "##### POMDP Approximation Statistics ######\n";
226 stream << "# Input model: \n";
227 pomdp().printModelInformationToStream(stream);
228 stream << "# Max. Number of states with same observation: " << pomdp().getMaxNrStatesWithSameObservation() << '\n';
229 if (statistics.beliefMdpDetectedToBeFinite) {
230 stream << "# Pre-computations detected that the belief MDP is finite.\n";
231 }
232 if (statistics.aborted) {
233 stream << "# Computation aborted early\n";
234 }
235
236 stream << "# Total check time: " << statistics.totalTime << '\n';
237 // Refinement information:
238 if (statistics.refinementSteps) {
239 stream << "# Number of refinement steps: " << statistics.refinementSteps.value() << '\n';
240 }
241 if (statistics.refinementFixpointDetected) {
242 stream << "# Detected a refinement fixpoint.\n";
243 }
244
245 // The overapproximation MDP:
246 if (statistics.overApproximationStates) {
247 stream << "# Number of states in the ";
248 if (options.refine) {
249 stream << "final ";
250 }
251 stream << "grid MDP for the over-approximation: ";
252 if (statistics.overApproximationBuildAborted) {
253 stream << ">=";
254 }
255 stream << statistics.overApproximationStates.value() << '\n';
256 stream << "# Maximal resolution for over-approximation: " << statistics.overApproximationMaxResolution.value() << '\n';
257 stream << "# Time spend for building the over-approx grid MDP(s): " << statistics.overApproximationBuildTime << '\n';
258 stream << "# Time spend for checking the over-approx grid MDP(s): " << statistics.overApproximationCheckTime << '\n';
259 }
260
261 // The underapproximation MDP:
262 if (statistics.underApproximationStates) {
263 stream << "# Number of states in the ";
264 if (options.refine) {
265 stream << "final ";
266 }
267 stream << "belief MDP for the under-approximation: ";
268 if (statistics.underApproximationBuildAborted) {
269 stream << ">=";
270 }
271 stream << statistics.underApproximationStates.value() << '\n';
272 if (statistics.nrClippingAttempts) {
273 stream << "# Clipping attempts (clipped states) for the under-approximation: ";
274 if (statistics.underApproximationBuildAborted) {
275 stream << ">=";
276 }
277 stream << statistics.nrClippingAttempts.value() << " (" << statistics.nrClippedStates.value() << ")\n";
278 stream << "# Total clipping preprocessing time: " << statistics.clippingPreTime << "\n";
279 stream << "# Total clipping time: " << statistics.clipWatch << "\n";
280 } else if (statistics.nrTruncatedStates) {
281 stream << "# Truncated states for the under-approximation: ";
282 if (statistics.underApproximationBuildAborted) {
283 stream << ">=";
284 }
285 stream << statistics.nrTruncatedStates.value() << "\n";
286 }
287 if (statistics.underApproximationStateLimit) {
288 stream << "# Exploration state limit for under-approximation: " << statistics.underApproximationStateLimit.value() << '\n';
289 }
290 stream << "# Time spend for building the under-approx grid MDP(s): " << statistics.underApproximationBuildTime << '\n';
291 stream << "# Time spend for checking the under-approx grid MDP(s): " << statistics.underApproximationCheckTime << '\n';
292 }
293
294 stream << "##########################################\n";
295}
296
297/* Private Functions */
298
299template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
301 if (preprocessedPomdp) {
302 return *preprocessedPomdp;
303 } else {
304 return *inputPomdp;
305 }
306}
307
308template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
309void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::refineReachability(
310 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, std::optional<std::string> rewardModelName,
311 storm::pomdp::modelchecker::POMDPValueBounds<ValueType> const& valueBounds, Result& result) {
312 statistics.refinementSteps = 0;
313 auto trivialPOMDPBounds = valueBounds.trivialPomdpValueBounds;
314 // Set up exploration data
315 std::vector<BeliefValueType> observationResolutionVector;
316 std::shared_ptr<BeliefManagerType> overApproxBeliefManager;
317 std::shared_ptr<ExplorerType> overApproximation;
318 HeuristicParameters overApproxHeuristicPar{};
319 if (options.discretize) { // Setup and build first OverApproximation
320 observationResolutionVector =
321 std::vector<BeliefValueType>(pomdp().getNrObservations(), storm::utility::convertNumber<BeliefValueType>(options.resolutionInit));
322 overApproxBeliefManager = std::make_shared<BeliefManagerType>(
323 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
324 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
325 if (rewardModelName) {
326 overApproxBeliefManager->setRewardModel(rewardModelName);
327 }
328 overApproximation = std::make_shared<ExplorerType>(overApproxBeliefManager, trivialPOMDPBounds, storm::builder::ExplorationHeuristic::BreadthFirst);
329 overApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
330 overApproxHeuristicPar.observationThreshold = options.obsThresholdInit;
331 overApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit == 0 ? std::numeric_limits<uint64_t>::max() : options.sizeThresholdInit;
332 overApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
333
334 buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(), false, overApproxHeuristicPar, observationResolutionVector,
335 overApproxBeliefManager, overApproximation);
336 if (!overApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) {
337 return;
338 }
339 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
340 bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
341 if (betterBound) {
342 STORM_LOG_INFO("Initial Over-approx result obtained after " << statistics.totalTime << ". Value is '" << newValue << "'.\n");
343 }
344 }
345
346 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
347 std::shared_ptr<ExplorerType> underApproximation;
348 HeuristicParameters underApproxHeuristicPar{};
349 if (options.unfold) { // Setup and build first UnderApproximation
350 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
351 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
352 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
353 if (rewardModelName) {
354 underApproxBeliefManager->setRewardModel(rewardModelName);
355 }
356 underApproximation = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
357 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
358 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
359 underApproxHeuristicPar.sizeThreshold = options.sizeThresholdInit;
360 if (underApproxHeuristicPar.sizeThreshold == 0) {
361 if (!options.refine && options.explorationTimeLimit != 0) {
362 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max();
363 } else {
364 underApproxHeuristicPar.sizeThreshold = pomdp().getNumberOfStates() * pomdp().getMaxNrStatesWithSameObservation();
365 STORM_LOG_INFO("Heuristically selected an under-approximation MDP size threshold of " << underApproxHeuristicPar.sizeThreshold << ".\n");
366 }
367 }
368
369 if (options.useClipping && rewardModelName.has_value()) {
370 underApproximation->setExtremeValueBound(valueBounds.extremePomdpValueBound);
371 }
372 if (!valueBounds.fmSchedulerValueList.empty()) {
373 underApproximation->setFMSchedValueList(valueBounds.fmSchedulerValueList);
374 }
375 buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(), false, underApproxHeuristicPar, underApproxBeliefManager,
376 underApproximation, false);
377 if (!underApproximation->hasComputedValues() || storm::utility::resources::isTerminate()) {
378 return;
379 }
380 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
381 bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
382 if (betterBound) {
383 STORM_LOG_INFO("Initial Under-approx result obtained after " << statistics.totalTime << ". Value is '" << newValue << "'.\n");
384 }
385 }
386
387 // Do some output
388 STORM_LOG_INFO("Completed (initial) computation. Current checktime is " << statistics.totalTime << ".");
389 bool computingLowerBound = false;
390 bool computingUpperBound = false;
391 if (options.discretize) {
392 STORM_LOG_INFO("\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << ".");
393 (min ? computingLowerBound : computingUpperBound) = true;
394 }
395 if (options.unfold) {
396 STORM_LOG_INFO("\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << ".");
397 (min ? computingUpperBound : computingLowerBound) = true;
398 }
399 if (computingLowerBound && computingUpperBound) {
400 STORM_LOG_INFO("\tObtained result is [" << result.lowerBound << ", " << result.upperBound << "].");
401 } else if (computingLowerBound) {
402 STORM_LOG_INFO("\tObtained result is ≥" << result.lowerBound << ".");
403 } else if (computingUpperBound) {
404 STORM_LOG_INFO("\tObtained result is ≤" << result.upperBound << ".");
405 }
406
407 // Start refinement
408 if (options.refine) {
409 STORM_LOG_WARN_COND(options.refineStepLimit != 0 || !storm::utility::isZero(options.refinePrecision),
410 "No termination criterion for refinement given. Consider to specify a steplimit, a non-zero precisionlimit, or a timeout");
411 STORM_LOG_WARN_COND(storm::utility::isZero(options.refinePrecision) || (options.unfold && options.discretize),
412 "Refinement goal precision is given, but only one bound is going to be refined.");
413 while ((options.refineStepLimit == 0 || statistics.refinementSteps.value() < options.refineStepLimit) && result.diff() > options.refinePrecision) {
414 bool overApproxFixPoint = true;
415 bool underApproxFixPoint = true;
416 if (options.discretize) {
417 // Refine over-approximation
418 if (min) {
419 overApproximation->takeCurrentValuesAsLowerBounds();
420 } else {
421 overApproximation->takeCurrentValuesAsUpperBounds();
422 }
423 overApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
424 overApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
425 storm::utility::convertNumber<ValueType, uint64_t>(overApproximation->getExploredMdp()->getNumberOfStates()) * options.sizeThresholdFactor);
426 overApproxHeuristicPar.observationThreshold +=
427 options.obsThresholdIncrementFactor * (storm::utility::one<ValueType>() - overApproxHeuristicPar.observationThreshold);
428 overApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
429 overApproxFixPoint = buildOverApproximation(env, targetObservations, min, rewardModelName.has_value(), true, overApproxHeuristicPar,
430 observationResolutionVector, overApproxBeliefManager, overApproximation);
431 if (overApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) {
432 ValueType const& newValue = overApproximation->getComputedValueAtInitialState();
433 bool betterBound = min ? result.updateLowerBound(newValue) : result.updateUpperBound(newValue);
434 if (betterBound) {
435 STORM_LOG_INFO("Over-approx result for refinement improved after " << statistics.totalTime << " in refinement step #"
436 << (statistics.refinementSteps.value() + 1) << ". New value is '"
437 << newValue << "'.");
438 }
439 } else {
440 break;
441 }
442 }
443
444 if (options.unfold && result.diff() > options.refinePrecision) {
445 // Refine under-approximation
446 underApproxHeuristicPar.gapThreshold *= options.gapThresholdFactor;
447 underApproxHeuristicPar.sizeThreshold = storm::utility::convertNumber<uint64_t, ValueType>(
448 storm::utility::convertNumber<ValueType, uint64_t>(underApproximation->getExploredMdp()->getNumberOfStates()) *
449 options.sizeThresholdFactor);
450 underApproxHeuristicPar.optimalChoiceValueEpsilon *= options.optimalChoiceValueThresholdFactor;
451 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(), true, underApproxHeuristicPar,
452 underApproxBeliefManager, underApproximation, true);
453 if (underApproximation->hasComputedValues() && !storm::utility::resources::isTerminate()) {
454 ValueType const& newValue = underApproximation->getComputedValueAtInitialState();
455 bool betterBound = min ? result.updateUpperBound(newValue) : result.updateLowerBound(newValue);
456 if (betterBound) {
457 STORM_LOG_INFO("Under-approx result for refinement improved after " << statistics.totalTime << " in refinement step #"
458 << (statistics.refinementSteps.value() + 1) << ". New value is '"
459 << newValue << "'.");
460 }
461 } else {
462 break;
463 }
464 }
465
467 break;
468 } else {
469 ++statistics.refinementSteps.value();
470 // Don't make too many outputs (to avoid logfile clutter)
471 if (statistics.refinementSteps.value() <= 1000) {
472 STORM_LOG_INFO("Completed iteration #" << statistics.refinementSteps.value() << ". Current checktime is " << statistics.totalTime << ".");
473 computingLowerBound = false;
474 computingUpperBound = false;
475 if (options.discretize) {
476 STORM_LOG_INFO("\tOver-approx MDP has size " << overApproximation->getExploredMdp()->getNumberOfStates() << ".");
477 (min ? computingLowerBound : computingUpperBound) = true;
478 }
479 if (options.unfold) {
480 STORM_LOG_INFO("\tUnder-approx MDP has size " << underApproximation->getExploredMdp()->getNumberOfStates() << ".");
481 (min ? computingUpperBound : computingLowerBound) = true;
482 }
483 if (computingLowerBound && computingUpperBound) {
484 STORM_LOG_INFO("\tCurrent result is [" << result.lowerBound << ", " << result.upperBound << "].");
485 } else if (computingLowerBound) {
486 STORM_LOG_INFO("\tCurrent result is ≥" << result.lowerBound << ".");
487 } else if (computingUpperBound) {
488 STORM_LOG_INFO("\tCurrent result is ≤" << result.upperBound << ".");
489 }
490 STORM_LOG_WARN_COND(statistics.refinementSteps.value() < 1000, "Refinement requires more than 1000 iterations.");
491 }
492 }
493 if (overApproxFixPoint && underApproxFixPoint) {
494 STORM_LOG_INFO("Refinement fixpoint reached after " << statistics.refinementSteps.value() << " iterations.\n");
495 statistics.refinementFixpointDetected = true;
496 break;
497 }
498 }
499 }
500 // Print model information of final over- / under-approximation MDP
501 if (options.discretize && overApproximation->hasComputedValues()) {
502 auto printOverInfo = [&overApproximation]() {
503 std::stringstream str;
504 str << "Explored and checked Over-Approximation MDP:\n";
505 overApproximation->getExploredMdp()->printModelInformationToStream(str);
506 return str.str();
507 };
508 STORM_LOG_INFO(printOverInfo());
509 }
510 if (options.unfold && underApproximation->hasComputedValues()) {
511 auto printUnderInfo = [&underApproximation]() {
512 std::stringstream str;
513 str << "Explored and checked Under-Approximation MDP:\n";
514 underApproximation->getExploredMdp()->printModelInformationToStream(str);
515 return str.str();
516 };
517 STORM_LOG_INFO(printUnderInfo());
518 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = underApproximation->getExploredMdp();
519 if (!options.useStateEliminationCutoff) {
520 storm::models::sparse::StateLabeling newLabeling(scheduledModel->getStateLabeling());
521 auto nrPreprocessingScheds = min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
522 for (uint64_t i = 0; i < nrPreprocessingScheds; ++i) {
523 newLabeling.addLabel("sched_" + std::to_string(i));
524 }
525 newLabeling.addLabel("cutoff");
526 newLabeling.addLabel("clipping");
527 newLabeling.addLabel("finite_mem");
528
529 auto transMatrix = scheduledModel->getTransitionMatrix();
530 for (uint64_t i = 0; i < scheduledModel->getNumberOfStates(); ++i) {
531 if (newLabeling.getStateHasLabel("truncated", i)) {
532 uint64_t localChosenActionIndex = underApproximation->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
533 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[i];
534 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
535 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
536 if (label.rfind("clip", 0) == 0) {
537 newLabeling.addLabelToState("clipping", i);
538 auto chosenRow = transMatrix.getRow(i, 0);
539 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
540 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[i], candidateIndex);
541 } else if (label.rfind("mem_node", 0) == 0) {
542 newLabeling.addLabelToState("finite_mem", i);
543 newLabeling.addLabelToState("cutoff", i);
544 } else {
545 newLabeling.addLabelToState(label, i);
546 newLabeling.addLabelToState("cutoff", i);
547 }
548 }
549 }
550 }
551 newLabeling.removeLabel("truncated");
552
553 transMatrix.dropZeroEntries();
554 storm::storage::sparse::ModelComponents<ValueType> modelComponents(transMatrix, newLabeling);
555 if (scheduledModel->hasChoiceLabeling()) {
556 modelComponents.choiceLabeling = scheduledModel->getChoiceLabeling();
557 }
558 storm::models::sparse::Mdp<ValueType> newMDP(modelComponents);
559 auto inducedMC = newMDP.applyScheduler(*(underApproximation->getSchedulerForExploredMdp()), true);
560 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
561 } else {
562 auto inducedMC = underApproximation->getExploredMdp()->applyScheduler(*(underApproximation->getSchedulerForExploredMdp()), true);
563 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
564 }
565 result.schedulerAsMarkovChain = scheduledModel;
566 if (min) {
567 result.cutoffSchedulers = underApproximation->getUpperValueBoundSchedulers();
568 } else {
569 result.cutoffSchedulers = underApproximation->getLowerValueBoundSchedulers();
570 }
571 }
572}
573
574template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
576 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, std::optional<std::string> rewardModelName,
578 statistics.refinementSteps = 0;
579 interactiveResult = result;
580 unfoldingStatus = Status::Uninitialized;
581 unfoldingControl = UnfoldingControl::Run;
582 auto trivialPOMDPBounds = valueBounds.trivialPomdpValueBounds;
583 // Set up exploration data
584 std::shared_ptr<BeliefManagerType> underApproxBeliefManager;
585 HeuristicParameters underApproxHeuristicPar{};
586 bool firstIteration = true;
587 // Set up belief manager
588 underApproxBeliefManager = std::make_shared<BeliefManagerType>(
589 pomdp(), storm::utility::convertNumber<BeliefValueType>(options.numericPrecision),
590 options.dynamicTriangulation ? BeliefManagerType::TriangulationMode::Dynamic : BeliefManagerType::TriangulationMode::Static);
591 if (rewardModelName) {
592 underApproxBeliefManager->setRewardModel(rewardModelName);
593 }
594
595 // set up belief MDP explorer
596 interactiveUnderApproximationExplorer = std::make_shared<ExplorerType>(underApproxBeliefManager, trivialPOMDPBounds, options.explorationHeuristic);
597 underApproxHeuristicPar.gapThreshold = options.gapThresholdInit;
598 underApproxHeuristicPar.optimalChoiceValueEpsilon = options.optimalChoiceValueThresholdInit;
599 underApproxHeuristicPar.sizeThreshold = std::numeric_limits<uint64_t>::max() - 1; // we don't set a size threshold
600
601 if (options.useClipping && rewardModelName.has_value()) {
602 interactiveUnderApproximationExplorer->setExtremeValueBound(valueBounds.extremePomdpValueBound);
603 }
604
605 if (!valueBounds.fmSchedulerValueList.empty()) {
606 interactiveUnderApproximationExplorer->setFMSchedValueList(valueBounds.fmSchedulerValueList);
607 }
608
609 // Start iteration
610 while (!(unfoldingControl ==
612 bool underApproxFixPoint = true;
613 bool hasTruncatedStates = false;
614 if (unfoldingStatus != Status::Converged) {
615 // Continue unfolding underapproximation
616 underApproxFixPoint = buildUnderApproximation(env, targetObservations, min, rewardModelName.has_value(), false, underApproxHeuristicPar,
617 underApproxBeliefManager, interactiveUnderApproximationExplorer, firstIteration);
618 if (interactiveUnderApproximationExplorer->hasComputedValues() && !storm::utility::resources::isTerminate()) {
619 ValueType const& newValue = interactiveUnderApproximationExplorer->getComputedValueAtInitialState();
620 bool betterBound = min ? interactiveResult.updateUpperBound(newValue) : interactiveResult.updateLowerBound(newValue);
621 if (betterBound) {
622 STORM_LOG_INFO("Under-approximation result improved after " << statistics.totalTime << " in step #"
623 << (statistics.refinementSteps.value() + 1) << ". New value is '" << newValue
624 << "'.");
625 }
626 std::shared_ptr<storm::models::sparse::Model<ValueType>> scheduledModel = interactiveUnderApproximationExplorer->getExploredMdp();
627 if (!options.useStateEliminationCutoff) {
628 storm::models::sparse::StateLabeling newLabeling(scheduledModel->getStateLabeling());
629 auto nrPreprocessingScheds = min ? interactiveUnderApproximationExplorer->getNrSchedulersForUpperBounds()
630 : interactiveUnderApproximationExplorer->getNrSchedulersForLowerBounds();
631 for (uint64_t i = 0; i < nrPreprocessingScheds; ++i) {
632 newLabeling.addLabel("sched_" + std::to_string(i));
633 }
634 newLabeling.addLabel("cutoff");
635 newLabeling.addLabel("clipping");
636 newLabeling.addLabel("finite_mem");
637
638 auto transMatrix = scheduledModel->getTransitionMatrix();
639 for (uint64_t i = 0; i < scheduledModel->getNumberOfStates(); ++i) {
640 if (newLabeling.getStateHasLabel("truncated", i)) {
641 hasTruncatedStates = true;
642 uint64_t localChosenActionIndex =
643 interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()->getChoice(i).getDeterministicChoice();
644 auto rowIndex = scheduledModel->getTransitionMatrix().getRowGroupIndices()[i];
645 if (scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).size() > 0) {
646 auto label = *(scheduledModel->getChoiceLabeling().getLabelsOfChoice(rowIndex + localChosenActionIndex).begin());
647 if (label.rfind("clip", 0) == 0) {
648 newLabeling.addLabelToState("clipping", i);
649 auto chosenRow = transMatrix.getRow(i, 0);
650 auto candidateIndex = (chosenRow.end() - 1)->getColumn();
651 transMatrix.makeRowDirac(transMatrix.getRowGroupIndices()[i], candidateIndex);
652 } else if (label.rfind("mem_node", 0) == 0) {
653 newLabeling.addLabelToState("finite_mem", i);
654 newLabeling.addLabelToState("cutoff", i);
655 } else {
656 newLabeling.addLabelToState(label, i);
657 newLabeling.addLabelToState("cutoff", i);
658 }
659 }
660 }
661 }
662 newLabeling.removeLabel("truncated");
663
664 transMatrix.dropZeroEntries();
665 storm::storage::sparse::ModelComponents<ValueType> modelComponents(transMatrix, newLabeling);
666 if (scheduledModel->hasChoiceLabeling()) {
667 modelComponents.choiceLabeling = scheduledModel->getChoiceLabeling();
668 }
669 storm::models::sparse::Mdp<ValueType> newMDP(modelComponents);
670 auto inducedMC = newMDP.applyScheduler(*(interactiveUnderApproximationExplorer->getSchedulerForExploredMdp()), true);
671 scheduledModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType>>(inducedMC);
672 }
673 interactiveResult.schedulerAsMarkovChain = scheduledModel;
674 if (min) {
675 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getUpperValueBoundSchedulers();
676 } else {
677 interactiveResult.cutoffSchedulers = interactiveUnderApproximationExplorer->getLowerValueBoundSchedulers();
678 }
679 if (firstIteration) {
680 firstIteration = false;
681 }
682 unfoldingStatus = Status::ResultAvailable;
683 } else {
684 break;
685 }
686
688 break;
689 } else {
690 ++statistics.refinementSteps.value();
691 // Don't make too many outputs (to avoid logfile clutter)
692 if (statistics.refinementSteps.value() <= 1000) {
693 STORM_LOG_INFO("Completed iteration #" << statistics.refinementSteps.value() << ". Current checktime is " << statistics.totalTime << ".");
694 bool computingLowerBound = false;
695 bool computingUpperBound = false;
696 if (options.unfold) {
697 STORM_LOG_INFO("\tUnder-approx MDP has size " << interactiveUnderApproximationExplorer->getExploredMdp()->getNumberOfStates() << ".");
698 (min ? computingUpperBound : computingLowerBound) = true;
699 }
700 if (computingLowerBound && computingUpperBound) {
701 STORM_LOG_INFO("\tCurrent result is [" << interactiveResult.lowerBound << ", " << interactiveResult.upperBound << "].");
702 } else if (computingLowerBound) {
703 STORM_LOG_INFO("\tCurrent result is ≥" << interactiveResult.lowerBound << ".");
704 } else if (computingUpperBound) {
705 STORM_LOG_INFO("\tCurrent result is ≤" << interactiveResult.upperBound << ".");
706 }
707 }
708 }
709 if (underApproxFixPoint) {
710 STORM_LOG_INFO("Fixpoint reached after " << statistics.refinementSteps.value() << " iterations.\n");
711 statistics.refinementFixpointDetected = true;
712 unfoldingStatus = Status::Converged;
713 unfoldingControl = UnfoldingControl::Pause;
714 }
715 if (!hasTruncatedStates) {
716 STORM_LOG_INFO("No states have been truncated, so continued iteration does not yield new results.\n");
717 unfoldingStatus = Status::Converged;
718 unfoldingControl = UnfoldingControl::Pause;
719 }
720 }
721 // While we tell the procedure to be paused, idle
722 while (unfoldingControl ==
725 }
726 STORM_LOG_INFO("\tInteractive Unfolding terminated.\n");
727}
728
729template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
731 std::set<uint32_t> const& targetObservations, bool min, std::optional<std::string> rewardModelName,
734 unfoldInteractively(env, targetObservations, min, rewardModelName, valueBounds, result);
735}
736
737template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
742
743template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
745 if (unfoldingStatus == Status::Uninitialized)
746 return 0;
747 if (unfoldingStatus == Status::Exploring)
748 return 1;
749 if (unfoldingStatus == Status::ModelExplorationFinished)
750 return 2;
751 if (unfoldingStatus == Status::ResultAvailable)
752 return 3;
753 if (unfoldingStatus == Status::Terminated)
754 return 4;
755
756 return -1;
757}
758
759template<typename ValueType>
760ValueType getGap(ValueType const& l, ValueType const& u) {
761 STORM_LOG_ASSERT(l >= storm::utility::zero<ValueType>() && u >= storm::utility::zero<ValueType>(),
762 "Gap computation currently does not handle negative values.");
765 return storm::utility::zero<ValueType>();
766 } else {
767 return u;
768 }
769 } else if (storm::utility::isZero(u)) {
770 STORM_LOG_ASSERT(storm::utility::isZero(l), "Upper bound is zero but lower bound is " << l << ".");
771 return u;
772 } else {
773 STORM_LOG_ASSERT(!storm::utility::isInfinity(l), "Lower bound is infinity, but upper bound is " << u << ".");
774 // get the relative gap
775 return storm::utility::abs<ValueType>(u - l) * storm::utility::convertNumber<ValueType, uint64_t>(2) / (l + u);
776 }
777}
778
779template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
780bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildOverApproximation(
781 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, bool computeRewards, bool refine,
782 HeuristicParameters const& heuristicParameters, std::vector<BeliefValueType>& observationResolutionVector,
783 std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& overApproximation) {
784 // Detect whether the refinement reached a fixpoint.
785 bool fixPoint = true;
786
787 statistics.overApproximationBuildTime.start();
788 storm::storage::BitVector refinedObservations;
789 if (!refine) {
790 // If we build the model from scratch, we first have to set up the explorer for the overApproximation.
791 if (computeRewards) {
792 overApproximation->startNewExploration(storm::utility::zero<ValueType>());
793 } else {
794 overApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
795 }
796 } else {
797 // If we refine the existing overApproximation, our heuristic also wants to know which states are reachable under an optimal policy
798 overApproximation->computeOptimalChoicesAndReachableMdpStates(heuristicParameters.optimalChoiceValueEpsilon, true);
799 // We also need to find out which observation resolutions needs refinement.
800 // current maximal resolution (needed for refinement heuristic)
801 auto obsRatings = getObservationRatings(overApproximation, observationResolutionVector);
802 // If there is a score < 1, we have not reached a fixpoint, yet
803 auto numericPrecision = storm::utility::convertNumber<BeliefValueType>(options.numericPrecision);
804 if (std::any_of(obsRatings.begin(), obsRatings.end(),
805 [&numericPrecision](BeliefValueType const& value) { return value + numericPrecision < storm::utility::one<BeliefValueType>(); })) {
806 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because there are still observations to refine.");
807 fixPoint = false;
808 }
809 refinedObservations = storm::utility::vector::filter<BeliefValueType>(obsRatings, [&heuristicParameters](BeliefValueType const& r) {
810 return r <= storm::utility::convertNumber<BeliefValueType>(heuristicParameters.observationThreshold);
811 });
812 STORM_LOG_DEBUG("Refining the resolution of " << refinedObservations.getNumberOfSetBits() << "/" << refinedObservations.size() << " observations.");
813 for (auto const obs : refinedObservations) {
814 // Increment the resolution at the refined observations.
815 // Use storm's rational number to detect overflows properly.
816 storm::RationalNumber newObsResolutionAsRational = storm::utility::convertNumber<storm::RationalNumber>(observationResolutionVector[obs]) *
817 storm::utility::convertNumber<storm::RationalNumber>(options.resolutionFactor);
818 static_assert(storm::NumberTraits<BeliefValueType>::IsExact || std::is_same<BeliefValueType, double>::value, "Unhandled belief value type");
820 newObsResolutionAsRational > storm::utility::convertNumber<storm::RationalNumber>(std::numeric_limits<double>::max())) {
821 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(std::numeric_limits<double>::max());
822 } else {
823 observationResolutionVector[obs] = storm::utility::convertNumber<BeliefValueType>(newObsResolutionAsRational);
824 }
825 }
826 overApproximation->restartExploration();
827 }
828 statistics.overApproximationMaxResolution = storm::utility::ceil(*std::max_element(observationResolutionVector.begin(), observationResolutionVector.end()));
829
830 // Start exploration
831 storm::utility::Stopwatch explorationTime;
832 if (options.explorationTimeLimit != 0) {
833 explorationTime.start();
834 }
835 bool timeLimitExceeded = false;
836 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
837 uint64_t numRewiredOrExploredStates = 0;
838 while (overApproximation->hasUnexploredState()) {
839 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
840 static_cast<uint64_t>(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit) {
841 STORM_LOG_INFO("Exploration time limit exceeded.");
842 timeLimitExceeded = true;
843 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because the exploration time limit is exceeded.");
844 fixPoint = false;
845 }
846
847 uint64_t currId = overApproximation->exploreNextState();
848 bool hasOldBehavior = refine && overApproximation->currentStateHasOldBehavior();
849 if (!hasOldBehavior) {
850 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a new state is explored");
851 fixPoint = false; // Exploring a new state!
852 }
853 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
854 if (targetObservations.count(currObservation) != 0) {
855 overApproximation->setCurrentStateIsTarget();
856 overApproximation->addSelfloopTransition();
857 } else {
858 // We need to decide how to treat this state (and each individual enabled action). There are the following cases:
859 // 1 The state has no old behavior and
860 // 1.1 we explore all actions or
861 // 1.2 we truncate all actions
862 // 2 The state has old behavior and was truncated in the last iteration and
863 // 2.1 we explore all actions or
864 // 2.2 we truncate all actions (essentially restoring old behavior, but we do the truncation step again to benefit from updated bounds)
865 // 3 The state has old behavior and was not truncated in the last iteration and the current action
866 // 3.1 should be rewired or
867 // 3.2 should get the old behavior but either
868 // 3.2.1 none of the successor observation has been refined since the last rewiring or exploration of this action
869 // 3.2.2 rewiring is only delayed as it could still have an effect in a later refinement step
870
871 // Find out in which case we are
872 bool exploreAllActions = false;
873 bool truncateAllActions = false;
874 bool restoreAllActions = false;
875 bool checkRewireForAllActions = false;
876 // Get the relative gap
877 ValueType gap = getGap(overApproximation->getLowerValueBoundAtCurrentState(), overApproximation->getUpperValueBoundAtCurrentState());
878 if (!hasOldBehavior) {
879 // Case 1
880 // If we explore this state and if it has no old behavior, it is clear that an "old" optimal scheduler can be extended to a scheduler that
881 // reaches this state
882 if (!timeLimitExceeded && gap >= heuristicParameters.gapThreshold && numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
883 exploreAllActions = true; // Case 1.1
884 } else {
885 truncateAllActions = true; // Case 1.2
886 overApproximation->setCurrentStateIsTruncated();
887 }
888 } else if (overApproximation->getCurrentStateWasTruncated()) {
889 // Case 2
890 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
891 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
892 exploreAllActions = true; // Case 2.1
893 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because a previously truncated state is now explored.");
894 fixPoint = false;
895 } else {
896 truncateAllActions = true; // Case 2.2
897 overApproximation->setCurrentStateIsTruncated();
898 if (fixPoint) {
899 // Properly check whether this can still be a fixpoint
900 if (overApproximation->currentStateIsOptimalSchedulerReachable() && !storm::utility::isZero(gap)) {
901 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we truncate a state with non-zero gap "
902 << gap << " that is reachable via an optimal sched.");
903 fixPoint = false;
904 }
905 // else {}
906 // In this case we truncated a state that is not reachable under optimal schedulers.
907 // If no other state is explored (i.e. fixPoint remains true), these states should still not be reachable in subsequent iterations
908 }
909 }
910 } else {
911 // Case 3
912 // The decision for rewiring also depends on the corresponding action, but we have some criteria that lead to case 3.2 (independent of the
913 // action)
914 if (!timeLimitExceeded && overApproximation->currentStateIsOptimalSchedulerReachable() && gap > heuristicParameters.gapThreshold &&
915 numRewiredOrExploredStates < heuristicParameters.sizeThreshold) {
916 checkRewireForAllActions = true; // Case 3.1 or Case 3.2
917 } else {
918 restoreAllActions = true; // Definitely Case 3.2
919 // We still need to check for each action whether rewiring makes sense later
920 checkRewireForAllActions = true;
921 }
922 }
923 bool expandedAtLeastOneAction = false;
924 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
925 bool expandCurrentAction = exploreAllActions || truncateAllActions;
926 if (checkRewireForAllActions) {
927 assert(refine);
928 // In this case, we still need to check whether this action needs to be expanded
929 assert(!expandCurrentAction);
930 // Check the action dependent conditions for rewiring
931 // First, check whether this action has been rewired since the last refinement of one of the successor observations (i.e. whether rewiring
932 // would actually change the successor states)
933 assert(overApproximation->currentStateHasOldBehavior());
934 if (overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
935 overApproximation->currentStateHasSuccessorObservationInObservationSet(action, refinedObservations)) {
936 // Then, check whether the other criteria for rewiring are satisfied
937 if (!restoreAllActions && overApproximation->actionAtCurrentStateWasOptimal(action)) {
938 // Do the rewiring now! (Case 3.1)
939 expandCurrentAction = true;
940 STORM_LOG_INFO_COND(!fixPoint, "Not reaching a refinement fixpoint because we rewire a state.");
941 fixPoint = false;
942 } else {
943 // Delay the rewiring (Case 3.2.2)
944 overApproximation->setCurrentChoiceIsDelayed(action);
945 if (fixPoint) {
946 // Check whether this delay means that a fixpoint has not been reached
947 if (!overApproximation->getCurrentStateActionExplorationWasDelayed(action) ||
948 (overApproximation->currentStateIsOptimalSchedulerReachable() &&
949 overApproximation->actionAtCurrentStateWasOptimal(action) && !storm::utility::isZero(gap))) {
950 STORM_LOG_INFO_COND(!fixPoint,
951 "Not reaching a refinement fixpoint because we delay a rewiring of a state with non-zero gap "
952 << gap << " that is reachable via an optimal scheduler.");
953 fixPoint = false;
954 }
955 }
956 }
957 } // else { Case 3.2.1 }
958 }
959
960 if (expandCurrentAction) {
961 expandedAtLeastOneAction = true;
962 if (!truncateAllActions) {
963 // Cases 1.1, 2.1, or 3.1
964 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
965 for (auto const& successor : successorGridPoints) {
966 overApproximation->addTransitionToBelief(action, successor.first, successor.second, false);
967 }
968 if (computeRewards) {
969 overApproximation->computeRewardAtCurrentState(action);
970 }
971 } else {
972 // Cases 1.2 or 2.2
973 auto truncationProbability = storm::utility::zero<ValueType>();
974 auto truncationValueBound = storm::utility::zero<ValueType>();
975 auto successorGridPoints = beliefManager->expandAndTriangulate(currId, action, observationResolutionVector);
976 for (auto const& successor : successorGridPoints) {
977 bool added = overApproximation->addTransitionToBelief(action, successor.first, successor.second, true);
978 if (!added) {
979 // We did not explore this successor state. Get a bound on the "missing" value
980 truncationProbability += successor.second;
981 truncationValueBound += successor.second * (min ? overApproximation->computeLowerValueBoundAtBelief(successor.first)
982 : overApproximation->computeUpperValueBoundAtBelief(successor.first));
983 }
984 }
985 if (computeRewards) {
986 // The truncationValueBound will be added on top of the reward introduced by the current belief state.
987 overApproximation->addTransitionsToExtraStates(action, truncationProbability);
988 overApproximation->computeRewardAtCurrentState(action, truncationValueBound);
989 } else {
990 overApproximation->addTransitionsToExtraStates(action, truncationValueBound, truncationProbability - truncationValueBound);
991 }
992 }
993 } else {
994 // Case 3.2
995 overApproximation->restoreOldBehaviorAtCurrentState(action);
996 }
997 }
998 if (expandedAtLeastOneAction) {
999 ++numRewiredOrExploredStates;
1000 }
1001 }
1002
1003 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(currId); action < numActions; ++action) {
1004 if (pomdp().hasChoiceLabeling()) {
1005 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1006 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1007 overApproximation->addChoiceLabelToCurrentState(action, *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1008 }
1009 }
1010 }
1011
1013 break;
1014 }
1015 }
1016
1018 // don't overwrite statistics of a previous, successful computation
1019 if (!statistics.overApproximationStates) {
1020 statistics.overApproximationBuildAborted = true;
1021 statistics.overApproximationStates = overApproximation->getCurrentNumberOfMdpStates();
1022 }
1023 statistics.overApproximationBuildTime.stop();
1024 return false;
1025 }
1026
1027 overApproximation->finishExploration();
1028 statistics.overApproximationBuildTime.stop();
1029
1030 statistics.overApproximationCheckTime.start();
1031 overApproximation->computeValuesOfExploredMdp(env, min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
1032 statistics.overApproximationCheckTime.stop();
1033
1034 // don't overwrite statistics of a previous, successful computation
1035 if (!storm::utility::resources::isTerminate() || !statistics.overApproximationStates) {
1036 statistics.overApproximationStates = overApproximation->getExploredMdp()->getNumberOfStates();
1037 }
1038 return fixPoint;
1039}
1040
1041template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1042bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::buildUnderApproximation(
1043 storm::Environment const& env, std::set<uint32_t> const& targetObservations, bool min, bool computeRewards, bool refine,
1044 HeuristicParameters const& heuristicParameters, std::shared_ptr<BeliefManagerType>& beliefManager, std::shared_ptr<ExplorerType>& underApproximation,
1045 bool firstIteration) {
1046 statistics.underApproximationBuildTime.start();
1047
1048 unfoldingStatus = Status::Exploring;
1049 if (options.useClipping) {
1050 STORM_LOG_INFO("Use Belief Clipping with grid beliefs \n");
1051 statistics.nrClippingAttempts = 0;
1052 statistics.nrClippedStates = 0;
1053 }
1054
1055 uint64_t nrCutoffStrategies = min ? underApproximation->getNrSchedulersForUpperBounds() : underApproximation->getNrSchedulersForLowerBounds();
1056
1057 bool fixPoint = true;
1058 if (heuristicParameters.sizeThreshold != std::numeric_limits<uint64_t>::max()) {
1059 statistics.underApproximationStateLimit = heuristicParameters.sizeThreshold;
1060 }
1061 if (!refine) {
1062 if (options.interactiveUnfolding && !firstIteration) {
1063 underApproximation->restoreExplorationState();
1064 } else if (computeRewards) { // Build a new under approximation
1065 // We use the sink state for infinite cut-off/clipping values
1066 underApproximation->startNewExploration(storm::utility::zero<ValueType>(), storm::utility::infinity<ValueType>());
1067 } else {
1068 underApproximation->startNewExploration(storm::utility::one<ValueType>(), storm::utility::zero<ValueType>());
1069 }
1070 } else {
1071 // Restart the building process
1072 underApproximation->restartExploration();
1073 }
1074
1075 // Expand the beliefs
1076 storm::utility::Stopwatch explorationTime;
1077 storm::utility::Stopwatch printUpdateStopwatch;
1078 printUpdateStopwatch.start();
1079 if (options.explorationTimeLimit != 0) {
1080 explorationTime.start();
1081 }
1082 bool timeLimitExceeded = false;
1083 bool stateStored = false;
1084 while (underApproximation->hasUnexploredState()) {
1085 if (!timeLimitExceeded && options.explorationTimeLimit != 0 &&
1086 static_cast<uint64_t>(explorationTime.getTimeInSeconds()) > options.explorationTimeLimit) {
1087 STORM_LOG_INFO("Exploration time limit exceeded.");
1088 timeLimitExceeded = true;
1089 }
1090 if (printUpdateStopwatch.getTimeInSeconds() >= 60) {
1091 printUpdateStopwatch.restart();
1092 STORM_LOG_INFO("### " << underApproximation->getCurrentNumberOfMdpStates() << " beliefs in underapproximation MDP" << " ##### "
1093 << underApproximation->getUnexploredStates().size() << " beliefs queued\n");
1094 if (underApproximation->getCurrentNumberOfMdpStates() > heuristicParameters.sizeThreshold && options.useClipping) {
1095 STORM_LOG_INFO("##### Clipping Attempts: " << statistics.nrClippingAttempts.value() << " ##### "
1096 << "Clipped States: " << statistics.nrClippedStates.value() << "\n");
1097 }
1098 }
1099 if (unfoldingControl == UnfoldingControl::Pause && !stateStored) {
1100 underApproximation->storeExplorationState();
1101 stateStored = true;
1102 }
1103 uint64_t currId = underApproximation->exploreNextState();
1104 uint32_t currObservation = beliefManager->getBeliefObservation(currId);
1105 uint64_t addedActions = 0;
1106 bool stateAlreadyExplored = refine && underApproximation->currentStateHasOldBehavior() && !underApproximation->getCurrentStateWasTruncated();
1107 if (!stateAlreadyExplored || timeLimitExceeded) {
1108 fixPoint = false;
1109 }
1110 if (targetObservations.count(beliefManager->getBeliefObservation(currId)) != 0) {
1111 underApproximation->setCurrentStateIsTarget();
1112 underApproximation->addSelfloopTransition();
1113 underApproximation->addChoiceLabelToCurrentState(0, "loop");
1114 } else {
1115 bool stopExploration = false;
1116 bool clipBelief = false;
1117 if (timeLimitExceeded || (options.interactiveUnfolding && unfoldingControl != UnfoldingControl::Run)) {
1118 clipBelief = options.useClipping;
1119 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1120 } else if (!stateAlreadyExplored) {
1121 // Check whether we want to explore the state now!
1122 ValueType gap = getGap(underApproximation->getLowerValueBoundAtCurrentState(), underApproximation->getUpperValueBoundAtCurrentState());
1123 if ((gap < heuristicParameters.gapThreshold) || (gap == 0 && options.cutZeroGap)) {
1124 stopExploration = true;
1125 } else if (underApproximation->getCurrentNumberOfMdpStates() >=
1126 heuristicParameters.sizeThreshold /*&& !statistics.beliefMdpDetectedToBeFinite*/) {
1127 clipBelief = options.useClipping;
1128 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1129 }
1130 }
1131
1132 if (clipBelief && !underApproximation->isMarkedAsGridBelief(currId)) {
1133 // Use a belief grid as clipping candidates
1134 if (!options.useStateEliminationCutoff) {
1135 bool successfulClip = clipToGridExplicitly(currId, computeRewards, beliefManager, underApproximation, 0);
1136 // Set again as the current belief might have been detected to be a grid belief
1137 stopExploration = !underApproximation->isMarkedAsGridBelief(currId);
1138 if (successfulClip) {
1139 addedActions += 1;
1140 }
1141 } else {
1142 clipToGrid(currId, computeRewards, min, beliefManager, underApproximation);
1143 addedActions += beliefManager->getBeliefNumberOfChoices(currId);
1144 }
1145 } // end Clipping Procedure
1146
1147 if (stopExploration) {
1148 underApproximation->setCurrentStateIsTruncated();
1149 }
1150 if (options.useStateEliminationCutoff || !stopExploration) {
1151 // Add successor transitions or cut-off transitions when exploration is stopped
1152 uint64_t numActions = beliefManager->getBeliefNumberOfChoices(currId);
1153 if (underApproximation->needsActionAdjustment(numActions)) {
1154 underApproximation->adjustActions(numActions);
1155 }
1156 for (uint64_t action = 0; action < numActions; ++action) {
1157 // Always restore old behavior if available
1158 if (pomdp().hasChoiceLabeling()) {
1159 auto rowIndex = pomdp().getTransitionMatrix().getRowGroupIndices()[beliefManager->getRepresentativeState(currId)];
1160 if (pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).size() > 0) {
1161 underApproximation->addChoiceLabelToCurrentState(addedActions + action,
1162 *(pomdp().getChoiceLabeling().getLabelsOfChoice(rowIndex + action).begin()));
1163 }
1164 }
1165 if (stateAlreadyExplored) {
1166 underApproximation->restoreOldBehaviorAtCurrentState(action);
1167 } else {
1168 auto truncationProbability = storm::utility::zero<ValueType>();
1169 auto truncationValueBound = storm::utility::zero<ValueType>();
1170 auto successors = beliefManager->expand(currId, action);
1171 for (auto const& successor : successors) {
1172 bool added = underApproximation->addTransitionToBelief(addedActions + action, successor.first, successor.second, stopExploration);
1173 if (!added) {
1174 STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped.");
1175 // We did not explore this successor state. Get a bound on the "missing" value
1176 truncationProbability += successor.second;
1177 // Some care has to be taken here: Essentially, we are triangulating a value for the under-approximation out of
1178 // other under-approximation values. In general, this does not yield a sound underapproximation anymore as the
1179 // values might be achieved by different schedulers. However, in our case this is still the case as the
1180 // under-approximation values are based on a single memory-less scheduler.
1181 truncationValueBound += successor.second * (min ? underApproximation->computeUpperValueBoundAtBelief(successor.first)
1182 : underApproximation->computeLowerValueBoundAtBelief(successor.first));
1183 }
1184 }
1185 if (stopExploration) {
1186 if (computeRewards) {
1187 if (truncationValueBound == storm::utility::infinity<ValueType>()) {
1188 underApproximation->addTransitionsToExtraStates(addedActions + action, storm::utility::zero<ValueType>(),
1189 truncationProbability);
1190 } else {
1191 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationProbability);
1192 }
1193 } else {
1194 underApproximation->addTransitionsToExtraStates(addedActions + action, truncationValueBound,
1195 truncationProbability - truncationValueBound);
1196 }
1197 }
1198 if (computeRewards) {
1199 // The truncationValueBound will be added on top of the reward introduced by the current belief state.
1200 if (truncationValueBound != storm::utility::infinity<ValueType>()) {
1201 if (!clipBelief) {
1202 underApproximation->computeRewardAtCurrentState(action, truncationValueBound);
1203 } else {
1204 underApproximation->addRewardToCurrentState(addedActions + action,
1205 beliefManager->getBeliefActionReward(currId, action) + truncationValueBound);
1206 }
1207 }
1208 }
1209 }
1210 }
1211 } else {
1212 for (uint64_t i = 0; i < nrCutoffStrategies && !options.skipHeuristicSchedulers; ++i) {
1213 auto cutOffValue = min ? underApproximation->computeUpperValueBoundForScheduler(currId, i)
1214 : underApproximation->computeLowerValueBoundForScheduler(currId, i);
1215 if (computeRewards) {
1216 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1217 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::one<ValueType>());
1218 underApproximation->addRewardToCurrentState(addedActions, cutOffValue);
1219 } else {
1220 underApproximation->addTransitionsToExtraStates(addedActions, storm::utility::zero<ValueType>(), storm::utility::one<ValueType>());
1221 }
1222 } else {
1223 underApproximation->addTransitionsToExtraStates(addedActions, cutOffValue, storm::utility::one<ValueType>() - cutOffValue);
1224 }
1225 if (pomdp().hasChoiceLabeling()) {
1226 underApproximation->addChoiceLabelToCurrentState(addedActions, "sched_" + std::to_string(i));
1227 }
1228 addedActions++;
1229 }
1230 if (underApproximation->hasFMSchedulerValues()) {
1231 uint64_t transitionNr = 0;
1232 for (uint64_t i = 0; i < underApproximation->getNrOfMemoryNodesForObservation(currObservation); ++i) {
1233 auto resPair = underApproximation->computeFMSchedulerValueForMemoryNode(currId, i);
1234 ValueType cutOffValue;
1235 if (resPair.first) {
1236 cutOffValue = resPair.second;
1237 } else {
1238 STORM_LOG_DEBUG("Skipped cut-off of belief with ID " << currId << " with finite memory scheduler in memory node " << i
1239 << ". Missing values.");
1240 continue;
1241 }
1242 if (computeRewards) {
1243 if (cutOffValue != storm::utility::infinity<ValueType>()) {
1244 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::one<ValueType>());
1245 underApproximation->addRewardToCurrentState(addedActions + transitionNr, cutOffValue);
1246 } else {
1247 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, storm::utility::zero<ValueType>(),
1248 storm::utility::one<ValueType>());
1249 }
1250 } else {
1251 underApproximation->addTransitionsToExtraStates(addedActions + transitionNr, cutOffValue,
1252 storm::utility::one<ValueType>() - cutOffValue);
1253 }
1254 if (pomdp().hasChoiceLabeling()) {
1255 underApproximation->addChoiceLabelToCurrentState(addedActions + transitionNr, "mem_node_" + std::to_string(i));
1256 }
1257 ++transitionNr;
1258 }
1259 }
1260 }
1261 }
1263 break;
1264 }
1265 }
1266
1268 // don't overwrite statistics of a previous, successful computation
1269 if (!statistics.underApproximationStates) {
1270 statistics.underApproximationBuildAborted = true;
1271 statistics.underApproximationStates = underApproximation->getCurrentNumberOfMdpStates();
1272 }
1273 statistics.underApproximationBuildTime.stop();
1274 return false;
1275 }
1276
1277 underApproximation->finishExploration();
1278 statistics.underApproximationBuildTime.stop();
1279 printUpdateStopwatch.stop();
1280 STORM_LOG_INFO("Finished exploring under-approximation MDP.\nStart analysis...\n");
1281 unfoldingStatus = Status::ModelExplorationFinished;
1282 statistics.underApproximationCheckTime.start();
1283 underApproximation->computeValuesOfExploredMdp(env, min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize);
1284 statistics.underApproximationCheckTime.stop();
1285 if (underApproximation->getExploredMdp()->getStateLabeling().getStates("truncated").getNumberOfSetBits() > 0) {
1286 statistics.nrTruncatedStates = underApproximation->getExploredMdp()->getStateLabeling().getStates("truncated").getNumberOfSetBits();
1287 }
1288 // don't overwrite statistics of a previous, successful computation
1289 if (!storm::utility::resources::isTerminate() || !statistics.underApproximationStates) {
1290 statistics.underApproximationStates = underApproximation->getExploredMdp()->getNumberOfStates();
1291 }
1292 return fixPoint;
1293}
1294
1295template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1296void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGrid(uint64_t clippingStateId, bool computeRewards, bool min,
1297 std::shared_ptr<BeliefManagerType>& beliefManager,
1298 std::shared_ptr<ExplorerType>& beliefExplorer) {
1299 // Add all transitions to states which are already in the MDP, clip all others to a grid
1300 // To make the resulting MDP smaller, we eliminate intermediate successor states when clipping is applied
1301 for (uint64_t action = 0, numActions = beliefManager->getBeliefNumberOfChoices(clippingStateId); action < numActions; ++action) {
1302 auto rewardBound = utility::zero<BeliefValueType>();
1303 auto successors = beliefManager->expand(clippingStateId, action);
1304 auto absDelta = utility::zero<BeliefValueType>();
1305 for (auto const& successor : successors) {
1306 // Add transition if successor is in explored space.
1307 // We can directly add the transitions as there is at most one successor for each observation
1308 // Therefore no belief can be clipped to an already added successor
1309 bool added = beliefExplorer->addTransitionToBelief(action, successor.first, successor.second, true);
1310 if (!added) {
1311 // The successor is not in the explored space. Clip it
1312 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1313 auto clipping = beliefManager->clipBeliefToGrid(
1314 successor.first, options.clippingGridRes, computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() : storm::storage::BitVector());
1315 if (clipping.isClippable) {
1316 // The belief is not on the grid and there is a candidate with finite reward
1317 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1318 // Transition probability to candidate is (probability to successor) * (clipping transition probability)
1319 BeliefValueType transitionProb =
1320 (utility::one<BeliefValueType>() - clipping.delta) * utility::convertNumber<BeliefValueType>(successor.second);
1321 beliefExplorer->addTransitionToBelief(action, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb), false);
1322 // Collect weighted clipping values
1323 absDelta += clipping.delta * utility::convertNumber<BeliefValueType>(successor.second);
1324 if (computeRewards) {
1325 // collect cumulative reward bounds
1326 auto localRew = utility::zero<BeliefValueType>();
1327 for (auto const& deltaValue : clipping.deltaValues) {
1328 localRew += deltaValue.second *
1329 utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1330 }
1331 if (localRew == utility::infinity<BeliefValueType>()) {
1332 STORM_LOG_WARN("Infinite reward in clipping!");
1333 }
1334 rewardBound += localRew * utility::convertNumber<BeliefValueType>(successor.second);
1335 }
1336 } else if (clipping.onGrid) {
1337 // If the belief is not clippable, but on the grid, it may need to be explored, too
1338 beliefExplorer->addTransitionToBelief(action, successor.first, successor.second, false);
1339 } else {
1340 // Otherwise, the reward for all candidates is infinite, clipping does not make sense. Cut it off instead
1341 absDelta += utility::convertNumber<BeliefValueType>(successor.second);
1342 rewardBound += utility::convertNumber<BeliefValueType>(successor.second) *
1343 utility::convertNumber<BeliefValueType>(min ? beliefExplorer->computeUpperValueBoundAtBelief(successor.first)
1344 : beliefExplorer->computeLowerValueBoundAtBelief(successor.first));
1345 }
1346 }
1347 }
1348 // Add the collected clipping transition if necessary
1349 if (absDelta != utility::zero<BeliefValueType>()) {
1350 if (computeRewards) {
1351 if (rewardBound == utility::infinity<BeliefValueType>()) {
1352 // If the reward is infinite, add a transition to the sink state to collect infinite reward
1353 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1354 } else {
1355 beliefExplorer->addTransitionsToExtraStates(action, utility::convertNumber<BeliefMDPType>(absDelta));
1356 BeliefValueType totalRewardVal = rewardBound / absDelta;
1357 beliefExplorer->addClippingRewardToCurrentState(action, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1358 }
1359 } else {
1360 beliefExplorer->addTransitionsToExtraStates(action, utility::zero<BeliefMDPType>(), utility::convertNumber<BeliefMDPType>(absDelta));
1361 }
1362 }
1363 if (computeRewards) {
1364 beliefExplorer->computeRewardAtCurrentState(action);
1365 }
1366 }
1367}
1368
1369template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1370bool BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::clipToGridExplicitly(uint64_t clippingStateId, bool computeRewards,
1371 std::shared_ptr<BeliefManagerType>& beliefManager,
1372 std::shared_ptr<ExplorerType>& beliefExplorer,
1373 uint64_t localActionIndex) {
1374 statistics.nrClippingAttempts = statistics.nrClippingAttempts.value() + 1;
1375 auto clipping = beliefManager->clipBeliefToGrid(clippingStateId, options.clippingGridRes,
1376 computeRewards ? beliefExplorer->getStateExtremeBoundIsInfinite() : storm::storage::BitVector());
1377 if (clipping.isClippable) {
1378 // The belief is not on the grid and there is a candidate with finite reward
1379 statistics.nrClippedStates = statistics.nrClippedStates.value() + 1;
1380 // Transition probability to candidate is clipping value
1381 BeliefValueType transitionProb = (utility::one<BeliefValueType>() - clipping.delta);
1382 beliefExplorer->addTransitionToBelief(localActionIndex, clipping.targetBelief, utility::convertNumber<BeliefMDPType>(transitionProb), false);
1383 beliefExplorer->markAsGridBelief(clipping.targetBelief);
1384 if (computeRewards) {
1385 // collect cumulative reward bounds
1386 auto reward = utility::zero<BeliefValueType>();
1387 for (auto const& deltaValue : clipping.deltaValues) {
1388 reward += deltaValue.second * utility::convertNumber<BeliefValueType>((beliefExplorer->getExtremeValueBoundAtPOMDPState(deltaValue.first)));
1389 }
1390 if (reward == utility::infinity<BeliefValueType>()) {
1391 STORM_LOG_WARN("Infinite reward in clipping!");
1392 // If the reward is infinite, add a transition to the sink state to collect infinite reward in our semantics
1393 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1394 utility::convertNumber<BeliefMDPType>(clipping.delta));
1395 } else {
1396 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::convertNumber<BeliefMDPType>(clipping.delta));
1397 BeliefValueType totalRewardVal = reward / clipping.delta;
1398 beliefExplorer->addClippingRewardToCurrentState(localActionIndex, utility::convertNumber<BeliefMDPType>(totalRewardVal));
1399 }
1400 } else {
1401 beliefExplorer->addTransitionsToExtraStates(localActionIndex, utility::zero<BeliefMDPType>(),
1402 utility::convertNumber<BeliefMDPType>(clipping.delta));
1403 }
1404 beliefExplorer->addChoiceLabelToCurrentState(localActionIndex, "clip");
1405 return true;
1406 } else {
1407 if (clipping.onGrid) {
1408 // If the belief is not clippable, but on the grid, it may need to be explored, too
1409 beliefExplorer->markAsGridBelief(clippingStateId);
1410 }
1411 }
1412 return false;
1413}
1414
1415template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1416void BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::setUnfoldingControl(
1417 storm::pomdp::modelchecker::BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::UnfoldingControl newUnfoldingControl) {
1418 unfoldingControl = newUnfoldingControl;
1419}
1420
1421template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1423 STORM_LOG_TRACE("PAUSE COMMAND ISSUED");
1424 setUnfoldingControl(UnfoldingControl::Pause);
1425}
1426
1427template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1429 STORM_LOG_TRACE("CONTINUATION COMMAND ISSUED");
1430 setUnfoldingControl(UnfoldingControl::Run);
1431}
1432
1433template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1435 STORM_LOG_TRACE("TERMINATION COMMAND ISSUED");
1436 setUnfoldingControl(UnfoldingControl::Terminate);
1437}
1438
1439template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1441 return unfoldingStatus == Status::ResultAvailable || unfoldingStatus == Status::Converged;
1442}
1443
1444template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1448
1449template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1453
1454template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1455std::shared_ptr<storm::builder::BeliefMdpExplorer<PomdpModelType, BeliefValueType>>
1459
1460template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1462 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> valueList) {
1463 interactiveUnderApproximationExplorer->setFMSchedValueList(valueList);
1464}
1465
1466template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1468 typename ExplorerType::SuccessorObservationInformation const& info, BeliefValueType const& observationResolution, BeliefValueType const& maxResolution) {
1469 auto n = storm::utility::convertNumber<BeliefValueType, uint64_t>(info.support.size());
1470 auto one = storm::utility::one<BeliefValueType>();
1471 if (storm::utility::isOne(n)) {
1472 // If the belief is Dirac, it has to be approximated precisely.
1473 // In this case, we return the best possible rating
1474 return one;
1475 } else {
1476 // Create the rating for this observation at this choice from the given info
1477 auto obsChoiceRating = storm::utility::convertNumber<BeliefValueType, ValueType>(info.maxProbabilityToSuccessorWithObs / info.observationProbability);
1478 // At this point, obsRating is the largest triangulation weight (which ranges from 1/n to 1)
1479 // Normalize the rating so that it ranges from 0 to 1, where
1480 // 0 means that the actual belief lies in the middle of the triangulating simplex (i.e. a "bad" approximation) and 1 means that the belief is precisely
1481 // approximated.
1482 obsChoiceRating = (obsChoiceRating * n - one) / (n - one);
1483 // Scale the ratings with the resolutions, so that low resolutions get a lower rating (and are thus more likely to be refined)
1484 obsChoiceRating *= observationResolution / maxResolution;
1485 return obsChoiceRating;
1486 }
1487}
1488
1489template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1490std::vector<BeliefValueType> BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getObservationRatings(
1491 std::shared_ptr<ExplorerType> const& overApproximation, std::vector<BeliefValueType> const& observationResolutionVector) {
1492 uint64_t numMdpStates = overApproximation->getExploredMdp()->getNumberOfStates();
1493 auto const& choiceIndices = overApproximation->getExploredMdp()->getNondeterministicChoiceIndices();
1494 BeliefValueType maxResolution = *std::max_element(observationResolutionVector.begin(), observationResolutionVector.end());
1495
1496 std::vector<BeliefValueType> resultingRatings(pomdp().getNrObservations(), storm::utility::one<BeliefValueType>());
1497
1498 std::map<uint32_t, typename ExplorerType::SuccessorObservationInformation> gatheredSuccessorObservations; // Declare here to avoid reallocations
1499 for (uint64_t mdpState = 0; mdpState < numMdpStates; ++mdpState) {
1500 // Check whether this state is reached under an optimal scheduler.
1501 // The heuristic assumes that the remaining states are not relevant for the observation score.
1502 if (overApproximation->stateIsOptimalSchedulerReachable(mdpState)) {
1503 for (uint64_t mdpChoice = choiceIndices[mdpState]; mdpChoice < choiceIndices[mdpState + 1]; ++mdpChoice) {
1504 // Similarly, only optimal actions are relevant
1505 if (overApproximation->actionIsOptimal(mdpChoice)) {
1506 // score the observations for this choice
1507 gatheredSuccessorObservations.clear();
1508 overApproximation->gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
1509 for (auto const& obsInfo : gatheredSuccessorObservations) {
1510 auto const& obs = obsInfo.first;
1511 BeliefValueType obsChoiceRating = rateObservation(obsInfo.second, observationResolutionVector[obs], maxResolution);
1512
1513 // The rating of the observation will be the minimum over all choice-based observation ratings
1514 resultingRatings[obs] = std::min(resultingRatings[obs], obsChoiceRating);
1515 }
1516 }
1517 }
1518 }
1519 }
1520 return resultingRatings;
1521}
1522
1523template<typename PomdpModelType, typename BeliefValueType, typename BeliefMDPType>
1524typename PomdpModelType::ValueType BeliefExplorationPomdpModelChecker<PomdpModelType, BeliefValueType, BeliefMDPType>::getGap(
1525 typename PomdpModelType::ValueType const& l, typename PomdpModelType::ValueType const& u) {
1526 STORM_LOG_ASSERT(l >= storm::utility::zero<typename PomdpModelType::ValueType>() && u >= storm::utility::zero<typename PomdpModelType::ValueType>(),
1527 "Gap computation currently does not handle negative values.");
1530 return storm::utility::zero<typename PomdpModelType::ValueType>();
1531 } else {
1532 return u;
1533 }
1534 } else if (storm::utility::isZero(u)) {
1535 STORM_LOG_ASSERT(storm::utility::isZero(l), "Upper bound is zero but lower bound is " << l << ".");
1536 return u;
1537 } else {
1538 STORM_LOG_ASSERT(!storm::utility::isInfinity(l), "Lower bound is infinity, but upper bound is " << u << ".");
1539 // get the relative gap
1540 return storm::utility::abs<typename PomdpModelType::ValueType>(u - l) * storm::utility::convertNumber<typename PomdpModelType::ValueType, uint64_t>(2) /
1541 (l + u);
1542 }
1543}
1544
1545/* Template Instantiations */
1546
1547template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>>;
1548
1549template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<double>, storm::RationalNumber>;
1550
1551template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>, double>;
1552
1553template class BeliefExplorationPomdpModelChecker<storm::models::sparse::Pomdp<storm::RationalNumber>>;
1554
1555} // namespace modelchecker
1556} // namespace pomdp
1557} // namespace storm
virtual bool isRewardOperatorFormula() const
Definition Formula.cpp:176
void addLabel(std::string const &label)
Adds a new label to the labelings.
void removeLabel(std::string const &label)
Removes a label from the labelings.
This class represents a (discrete-time) Markov decision process.
Definition Mdp.h:14
std::shared_ptr< storm::models::sparse::Model< ValueType, RewardModelType > > applyScheduler(storm::storage::Scheduler< ValueType > const &scheduler, bool dropUnreachableStates=true, bool preserveModelType=false) const
Applies the given scheduler to this model.
This class manages the labeling of the state space with a number of (atomic) labels.
bool getStateHasLabel(std::string const &label, storm::storage::sparse::state_type state) const
Checks whether a given state is labeled with the given label.
void addLabelToState(std::string const &label, storm::storage::sparse::state_type state)
Adds a label to a given state.
Model checker for checking reachability queries on POMDPs using approximations based on exploration o...
BeliefExplorationPomdpModelChecker(std::shared_ptr< PomdpModelType > pomdp, Options options=Options())
Constructor.
int64_t getStatus()
Get the current status of the interactive unfolding.
bool hasConverged()
Indicates whether the interactive unfolding has coonverged, i.e.
bool isExploring()
Indicates whether the interactive unfolding is currently in the process of exploring the belief MDP.
std::shared_ptr< ExplorerType > getInteractiveBeliefExplorer()
Get a pointer to the belief explorer used in the interactive unfolding.
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
void unfoldInteractively(storm::Environment const &env, std::set< uint32_t > const &targetObservations, bool min, std::optional< std::string > rewardModelName, storm::pomdp::modelchecker::POMDPValueBounds< ValueType > const &valueBounds, Result &result)
Allows to generate an under-approximation using a controllable unfolding.
bool isResultReady()
Indicates whether there is a result after an interactive unfolding was paused.
Result check(storm::Environment const &env, storm::logic::Formula const &formula, storm::Environment const &preProcEnv, std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > const &additionalUnderApproximationBounds=std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > >())
Performs model checking of the given POMDP with regards to a formula using the previously specified o...
Result getInteractiveResult()
Get the latest saved result obtained by the interactive unfolding.
void printStatisticsToStream(std::ostream &stream) const
Prints statistics of the process to a given output stream.
void precomputeValueBounds(const logic::Formula &formula, storm::Environment const &preProcEnv)
Uses model checking on the underlying MDP to generate values used for cut-offs and for clipping compe...
void continueUnfolding()
Continues a previously paused interactive unfolding.
A bit vector that is internally represented as a vector of 64-bit values.
Definition BitVector.h:18
size_t size() const
Retrieves the number of bits this bit vector can store.
uint_fast64_t getNumberOfSetBits() const
Returns the number of bits that are set to true in this bit vector.
std::pair< std::shared_ptr< storm::models::sparse::Pomdp< ValueType > >, std::set< uint32_t > > transform(storm::storage::BitVector const &stateSet) const
Ensures that the given set of states is observation closed, potentially, adding new observation(s) A ...
A class that provides convenience operations to display run times.
Definition Stopwatch.h:14
void start()
Start stopwatch (again) and start measuring time.
Definition Stopwatch.cpp:48
void restart()
Reset the stopwatch and immediately start it.
Definition Stopwatch.cpp:59
SecondType getTimeInSeconds() const
Gets the measured time in seconds.
Definition Stopwatch.cpp:13
void stop()
Stop stopwatch and add measured time to total time.
Definition Stopwatch.cpp:42
#define STORM_LOG_INFO(message)
Definition logging.h:29
#define STORM_LOG_WARN(message)
Definition logging.h:30
#define STORM_LOG_DEBUG(message)
Definition logging.h:23
#define STORM_LOG_TRACE(message)
Definition logging.h:17
#define STORM_LOG_ASSERT(cond, message)
Definition macros.h:11
#define STORM_LOG_WARN_COND(cond, message)
Definition macros.h:38
#define STORM_LOG_ERROR_COND(cond, message)
Definition macros.h:52
#define STORM_LOG_THROW(cond, exception, message)
Definition macros.h:30
#define STORM_LOG_INFO_COND(cond, message)
Definition macros.h:45
SFTBDDChecker::ValueType ValueType
FormulaInformation getFormulaInformation(PomdpType const &pomdp, storm::logic::ProbabilityOperatorFormula const &formula)
ValueType getGap(ValueType const &l, ValueType const &u)
bool detectFiniteBeliefMdp(storm::models::sparse::Pomdp< ValueType > const &pomdp, std::optional< storm::storage::BitVector > const &targetStates)
This method tries to detect that the beliefmdp is finite.
storm::storage::BitVector getReachableStates(storm::storage::SparseMatrix< T > const &transitionMatrix, storm::storage::BitVector const &initialStates, storm::storage::BitVector const &constraintStates, storm::storage::BitVector const &targetStates, bool useStepBound, uint_fast64_t maximalSteps, boost::optional< storm::storage::BitVector > const &choiceFilter)
Performs a forward depth-first search through the underlying graph structure to identify the states t...
Definition graph.cpp:48
storage::BitVector BitVector
bool isTerminate()
Check whether the program should terminate (due to some abort signal).
bool isOne(ValueType const &a)
Definition constants.cpp:36
ValueType min(ValueType const &first, ValueType const &second)
bool isZero(ValueType const &a)
Definition constants.cpp:41
ValueType ceil(ValueType const &number)
bool isInfinity(ValueType const &a)
Definition constants.cpp:71
LabParser.cpp.
Definition cli.cpp:18
solver::OptimizationDirection OptimizationDirection
Structure for storing values on the POMDP used for cut-offs and clipping.
std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > fmSchedulerValueList
storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > extremePomdpValueBound
storm::pomdp::storage::PreprocessingPomdpValueBounds< ValueType > trivialPomdpValueBounds
std::unordered_map< std::string, RewardModelType > rewardModels
storm::storage::SparseMatrix< ValueType > transitionMatrix
std::optional< storm::storage::sparse::StateValuations > observationValuations
std::optional< storm::models::sparse::ChoiceLabeling > choiceLabeling
storm::models::sparse::StateLabeling stateLabeling
std::optional< std::vector< uint32_t > > observabilityClasses