Storm
A Modern Probabilistic Model Checker
Loading...
Searching...
No Matches
BeliefMdpExplorer.cpp
Go to the documentation of this file.
2
6
16#include "storm/utility/graph.h"
19
20namespace storm {
21namespace builder {
22template<typename PomdpType, typename BeliefValueType>
24 ValueType const &maxProb, uint64_t const &count)
25 : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) {
26 // Intentionally left empty.
27}
28
29template<typename PomdpType, typename BeliefValueType>
32 observationProbability += other.observationProbability;
33 maxProbabilityToSuccessorWithObs = std::max(maxProbabilityToSuccessorWithObs, other.maxProbabilityToSuccessorWithObs);
34 successorWithObsCount += other.successorWithObsCount;
35}
36
37template<typename PomdpType, typename BeliefValueType>
38BeliefMdpExplorer<PomdpType, BeliefValueType>::BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager,
40 ExplorationHeuristic explorationHeuristic)
41 : beliefManager(beliefManager), pomdpValueBounds(pomdpValueBounds), explHeuristic(explorationHeuristic), status(Status::Uninitialized) {
42 // Intentionally left empty
43}
44
45template<typename PomdpType, typename BeliefValueType>
49
50template<typename PomdpType, typename BeliefValueType>
51void BeliefMdpExplorer<PomdpType, BeliefValueType>::startNewExploration(std::optional<ValueType> extraTargetStateValue,
52 std::optional<ValueType> extraBottomStateValue) {
53 status = Status::Exploring;
54 // Reset data from potential previous explorations
55 prio = storm::utility::zero<ValueType>();
56 nextId = 0;
57 mdpStateToBeliefIdMap.clear();
58 beliefIdToMdpStateMap.clear();
59 exploredBeliefIds.clear();
60 exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false);
61 mdpStatesToExplorePrioState.clear();
62 mdpStatesToExploreStatePrio.clear();
63 stateRemapping.clear();
64 lowerValueBounds.clear();
65 upperValueBounds.clear();
66 values.clear();
67 exploredMdpTransitions.clear();
68 exploredChoiceIndices.clear();
69 previousChoiceIndices.clear();
70 probabilityEstimation.clear();
71 mdpActionRewards.clear();
72 targetStates.clear();
73 truncatedStates.clear();
74 clippedStates.clear();
75 delayedExplorationChoices.clear();
76 clippingTransitionRewards.clear();
77 mdpStateToChoiceLabelsMap.clear();
78 optimalChoices = std::nullopt;
79 optimalChoicesReachableMdpStates = std::nullopt;
80 scheduler = nullptr;
81 exploredMdp = nullptr;
82 internalAddRowGroupIndex(); // Mark the start of the first row group
83
84 // Add some states with special treatment (if requested)
85 if (extraBottomStateValue) {
86 currentMdpState = getCurrentNumberOfMdpStates();
87 extraBottomState = currentMdpState;
88 mdpStateToBeliefIdMap.push_back(beliefManager->noId());
89 probabilityEstimation.push_back(storm::utility::zero<ValueType>());
90 insertValueHints(extraBottomStateValue.value(), extraBottomStateValue.value());
91
92 internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.value(), storm::utility::one<ValueType>());
93 mdpStateToChoiceLabelsMap[getStartOfCurrentRowGroup()][0] = "loop";
94 internalAddRowGroupIndex();
95 ++nextId;
96 } else {
97 extraBottomState = std::nullopt;
98 }
99 if (extraTargetStateValue) {
100 currentMdpState = getCurrentNumberOfMdpStates();
101 extraTargetState = currentMdpState;
102 mdpStateToBeliefIdMap.push_back(beliefManager->noId());
103 probabilityEstimation.push_back(storm::utility::zero<ValueType>());
104 insertValueHints(extraTargetStateValue.value(), extraTargetStateValue.value());
105
106 internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.value(), storm::utility::one<ValueType>());
107 mdpStateToChoiceLabelsMap[getStartOfCurrentRowGroup()][0] = "loop";
108 internalAddRowGroupIndex();
109
110 targetStates.grow(getCurrentNumberOfMdpStates(), false);
111 targetStates.set(extraTargetState.value(), true);
112 ++nextId;
113 } else {
114 extraTargetState = std::nullopt;
115 }
116 currentMdpState = noState();
117
118 // Set up the initial state.
119 initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
120}
121
122template<typename PomdpType, typename BeliefValueType>
124 STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished, "Method call is invalid in current status.");
125 status = Status::Exploring;
126 // We will not erase old states during the exploration phase, so most state-based data (like mappings between MDP and Belief states) remain valid.
127 prio = storm::utility::zero<ValueType>();
128 stateRemapping.clear();
129 exploredBeliefIds.clear();
130 exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(), false);
131 exploredMdpTransitions.clear();
132 exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices());
133 clippingTransitionRewards.clear();
134 previousChoiceIndices = exploredMdp->getNondeterministicChoiceIndices();
135 exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices();
136 mdpActionRewards.clear();
137 probabilityEstimation.clear();
138 if (exploredMdp->hasRewardModel()) {
139 // Can be overwritten during exploration
140 mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector();
141 }
142 targetStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false);
143 truncatedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false);
144 clippedStates = storm::storage::BitVector(getCurrentNumberOfMdpStates(), false);
145 delayedExplorationChoices.clear();
146 mdpStatesToExplorePrioState.clear();
147 mdpStatesToExploreStatePrio.clear();
148
149 // The extra states are not changed
150 if (extraBottomState) {
151 currentMdpState = extraBottomState.value();
152 restoreOldBehaviorAtCurrentState(0);
153 }
154 if (extraTargetState) {
155 currentMdpState = extraTargetState.value();
156 restoreOldBehaviorAtCurrentState(0);
157 targetStates.set(extraTargetState.value(), true);
158 }
159 currentMdpState = noState();
160
161 // Set up the initial state.
162 initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
163}
164
165template<typename PomdpType, typename BeliefValueType>
167 explorationStorage.storedMdpStateToBeliefIdMap = std::vector<BeliefId>(mdpStateToBeliefIdMap);
168 explorationStorage.storedBeliefIdToMdpStateMap = std::map<BeliefId, MdpStateType>(beliefIdToMdpStateMap);
169 explorationStorage.storedExploredBeliefIds = storm::storage::BitVector(exploredBeliefIds);
170 explorationStorage.storedMdpStateToChoiceLabelsMap = std::map<BeliefId, std::map<uint64_t, std::string>>(mdpStateToChoiceLabelsMap);
171 explorationStorage.storedMdpStatesToExplorePrioState = std::multimap<ValueType, uint64_t>(mdpStatesToExplorePrioState);
172 explorationStorage.storedMdpStatesToExploreStatePrio = std::map<uint64_t, ValueType>(mdpStatesToExploreStatePrio);
173 explorationStorage.storedProbabilityEstimation = std::vector<ValueType>(probabilityEstimation);
174 explorationStorage.storedExploredMdpTransitions = std::vector<std::map<MdpStateType, ValueType>>(exploredMdpTransitions);
175 explorationStorage.storedExploredChoiceIndices = std::vector<MdpStateType>(exploredChoiceIndices);
176 explorationStorage.storedMdpActionRewards = std::vector<ValueType>(mdpActionRewards);
177 explorationStorage.storedClippingTransitionRewards = std::map<MdpStateType, ValueType>(clippingTransitionRewards);
178 explorationStorage.storedCurrentMdpState = currentMdpState;
179 explorationStorage.storedStateRemapping = std::map<MdpStateType, MdpStateType>(stateRemapping);
180 explorationStorage.storedNextId = nextId;
181 explorationStorage.storedPrio = ValueType(prio);
182 explorationStorage.storedLowerValueBounds = std::vector<ValueType>(lowerValueBounds);
183 explorationStorage.storedUpperValueBounds = std::vector<ValueType>(upperValueBounds);
184 explorationStorage.storedValues = std::vector<ValueType>(values);
185
186 explorationStorage.storedTargetStates = storm::storage::BitVector(targetStates);
187}
188
189template<typename PomdpType, typename BeliefValueType>
191 mdpStateToBeliefIdMap = std::vector<BeliefId>(explorationStorage.storedMdpStateToBeliefIdMap);
192 beliefIdToMdpStateMap = std::map<BeliefId, MdpStateType>(explorationStorage.storedBeliefIdToMdpStateMap);
193 exploredBeliefIds = storm::storage::BitVector(explorationStorage.storedExploredBeliefIds);
194 mdpStateToChoiceLabelsMap = std::map<BeliefId, std::map<uint64_t, std::string>>(explorationStorage.storedMdpStateToChoiceLabelsMap);
195 mdpStatesToExplorePrioState = std::multimap<ValueType, uint64_t>(explorationStorage.storedMdpStatesToExplorePrioState);
196 mdpStatesToExploreStatePrio = std::map<uint64_t, ValueType>(explorationStorage.storedMdpStatesToExploreStatePrio);
197 probabilityEstimation = std::vector<ValueType>(explorationStorage.storedProbabilityEstimation);
198 exploredMdpTransitions = std::vector<std::map<MdpStateType, ValueType>>(explorationStorage.storedExploredMdpTransitions);
199 exploredChoiceIndices = std::vector<MdpStateType>(explorationStorage.storedExploredChoiceIndices);
200 mdpActionRewards = std::vector<ValueType>(explorationStorage.storedMdpActionRewards);
201 clippingTransitionRewards = std::map<MdpStateType, ValueType>(explorationStorage.storedClippingTransitionRewards);
202 currentMdpState = explorationStorage.storedCurrentMdpState;
203 stateRemapping = std::map<MdpStateType, MdpStateType>(explorationStorage.storedStateRemapping);
204 nextId = explorationStorage.storedNextId;
205 prio = ValueType(explorationStorage.storedPrio);
206 lowerValueBounds = explorationStorage.storedLowerValueBounds;
207 upperValueBounds = explorationStorage.storedUpperValueBounds;
208 values = explorationStorage.storedValues;
209 status = Status::Exploring;
210 targetStates = explorationStorage.storedTargetStates;
211
212 truncatedStates.clear();
213 clippedStates.clear();
214 delayedExplorationChoices.clear();
215 optimalChoices = std::nullopt;
216 optimalChoicesReachableMdpStates = std::nullopt;
217 exploredMdp = nullptr;
218 scheduler = nullptr;
219}
220
221template<typename PomdpType, typename BeliefValueType>
223 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
224 return !mdpStatesToExploreStatePrio.empty();
225}
226
227template<typename PomdpType, typename BeliefValueType>
229 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
230 std::vector<uint64_t> res;
231 res.reserve(mdpStatesToExploreStatePrio.size());
232 for (auto const &entry : mdpStatesToExploreStatePrio) {
233 res.push_back(entry.first);
234 }
235 return res;
236}
237
238template<typename PomdpType, typename BeliefValueType>
240 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
241 // Mark the end of the previously explored row group.
242 if (currentMdpState != noState() && mdpStatesToExplorePrioState.rbegin()->second == exploredChoiceIndices.size()) {
243 internalAddRowGroupIndex();
244 }
245
246 // Pop from the queue.
247 currentMdpState = mdpStatesToExplorePrioState.rbegin()->second;
248 auto currprio = mdpStatesToExplorePrioState.rbegin()->first;
249 auto range = mdpStatesToExplorePrioState.equal_range(currprio);
250 for (auto i = range.first; i != range.second; ++i) {
251 if (i->second == currentMdpState) {
252 mdpStatesToExplorePrioState.erase(i);
253 break;
254 }
255 }
256 mdpStatesToExploreStatePrio.erase(currentMdpState);
257 if (currentMdpState != nextId && !currentStateHasOldBehavior()) {
258 stateRemapping[currentMdpState] = nextId;
259 STORM_LOG_DEBUG("Explore state " << currentMdpState << " [Bel " << getCurrentBeliefId() << " " << beliefManager->toString(getCurrentBeliefId())
260 << "] as state with ID " << nextId << " (Prio: " << storm::utility::to_string(currprio) << ")");
261 } else {
262 STORM_LOG_DEBUG("Explore state " << currentMdpState << " [Bel " << getCurrentBeliefId() << " " << beliefManager->toString(getCurrentBeliefId()) << "]"
263 << " (Prio: " << storm::utility::to_string(currprio) << ")");
264 }
265
266 if (!currentStateHasOldBehavior()) {
267 ++nextId;
268 }
269 if (explHeuristic == ExplorationHeuristic::ProbabilityPrio) {
270 probabilityEstimation.push_back(currprio);
271 }
272
273 return mdpStateToBeliefIdMap[currentMdpState];
274}
275
276template<typename PomdpType, typename BeliefValueType>
277void BeliefMdpExplorer<PomdpType, BeliefValueType>::addTransitionsToExtraStates(uint64_t const &localActionIndex, ValueType const &targetStateValue,
278 ValueType const &bottomStateValue) {
279 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
280 STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState] ||
281 getCurrentStateWasTruncated(),
282 "Action index " << localActionIndex << " was not valid at non-truncated state " << currentMdpState << " of the previously explored MDP.");
283 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
284 if (!storm::utility::isZero(bottomStateValue)) {
285 STORM_LOG_ASSERT(extraBottomState.has_value(), "Requested a transition to the extra bottom state but there is none.");
286 internalAddTransition(row, extraBottomState.value(), bottomStateValue);
287 }
288 if (!storm::utility::isZero(targetStateValue)) {
289 STORM_LOG_ASSERT(extraTargetState.has_value(), "Requested a transition to the extra target state but there is none.");
290 internalAddTransition(row, extraTargetState.value(), targetStateValue);
291 }
292}
293
294template<typename PomdpType, typename BeliefValueType>
295void BeliefMdpExplorer<PomdpType, BeliefValueType>::addSelfloopTransition(uint64_t const &localActionIndex, ValueType const &value) {
296 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
297 STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState] ||
298 getCurrentStateWasTruncated(),
299 "Action index " << localActionIndex << " was not valid at non-truncated state " << currentMdpState << " of the previously explored MDP.");
300 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
301 internalAddTransition(row, getCurrentMdpState(), value);
302}
303
304template<typename PomdpType, typename BeliefValueType>
305bool BeliefMdpExplorer<PomdpType, BeliefValueType>::addTransitionToBelief(uint64_t const &localActionIndex, BeliefId const &transitionTarget,
306 ValueType const &value, bool ignoreNewBeliefs) {
307 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
308 STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState] ||
309 getCurrentStateWasTruncated(),
310 "Action index " << localActionIndex << " was not valid at non-truncated state " << currentMdpState << " of the previously explored MDP.");
311
312 MdpStateType column;
313 if (ignoreNewBeliefs) {
314 column = getExploredMdpState(transitionTarget);
315 if (column == noState()) {
316 return false;
317 }
318 } else {
319 column = getOrAddMdpState(transitionTarget, value);
320 }
321 if (getCurrentMdpState() == exploredChoiceIndices.size()) {
322 internalAddRowGroupIndex();
323 }
324 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
325 internalAddTransition(row, column, value);
326 return true;
327}
328
329template<typename PomdpType, typename BeliefValueType>
331 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
332 if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) {
333 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
334 }
335 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
336 mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward;
337}
338
339template<typename PomdpType, typename BeliefValueType>
341 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
342 if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) {
343 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
344 }
345 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
346 mdpActionRewards[row] = rewardValue;
347}
348
349template<typename PomdpType, typename BeliefValueType>
351 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
352 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
353 clippingTransitionRewards[row] = rewardValue;
354}
355
356template<typename PomdpType, typename BeliefValueType>
358 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
359 targetStates.grow(getCurrentNumberOfMdpStates(), false);
360 targetStates.set(getCurrentMdpState(), true);
361}
362
363template<typename PomdpType, typename BeliefValueType>
365 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
366 truncatedStates.grow(getCurrentNumberOfMdpStates(), false);
367 truncatedStates.set(getCurrentMdpState(), true);
368}
369
370template<typename PomdpType, typename BeliefValueType>
372 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
373 setCurrentStateIsTruncated();
374 clippedStates.grow(getCurrentNumberOfMdpStates(), false);
375 clippedStates.set(getCurrentMdpState(), true);
376}
377
378template<typename PomdpType, typename BeliefValueType>
380 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
381 delayedExplorationChoices.grow(getCurrentNumberOfMdpChoices(), false);
382 delayedExplorationChoices.set(getStartOfCurrentRowGroup() + localActionIndex, true);
383}
384
385template<typename PomdpType, typename BeliefValueType>
387 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
388 STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateHasOldBehavior' called but there is no current state.");
389 return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates();
390}
391
392template<typename PomdpType, typename BeliefValueType>
394 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
395 STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
396 STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
397 STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available");
398 return exploredMdp->getStateLabeling().getStateHasLabel("truncated", getCurrentMdpState());
399}
400
401template<typename PomdpType, typename BeliefValueType>
403 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
404 STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
405 STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
406 STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available");
407 return exploredMdp->getStateLabeling().getStateHasLabel("clipped", getCurrentMdpState());
408}
409
410template<typename PomdpType, typename BeliefValueType>
412 STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
413 STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.has_value(),
414 "Method 'stateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
415 return optimalChoicesReachableMdpStates->get(mdpState);
416}
417
418template<typename PomdpType, typename BeliefValueType>
419bool BeliefMdpExplorer<PomdpType, BeliefValueType>::actionIsOptimal(uint64_t const &globalActionIndex) const {
420 STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
421 STORM_LOG_ASSERT(optimalChoices.has_value(), "Method 'actionIsOptimal' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
422 return optimalChoices->get(globalActionIndex);
423}
424
425template<typename PomdpType, typename BeliefValueType>
427 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
428 STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state.");
429 STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior");
430 STORM_LOG_ASSERT(optimalChoicesReachableMdpStates.has_value(),
431 "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
432 return optimalChoicesReachableMdpStates->get(getCurrentMdpState());
433}
434
435template<typename PomdpType, typename BeliefValueType>
437 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
438 STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
439 STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
440 STORM_LOG_ASSERT(optimalChoices.has_value(),
441 "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
442 uint64_t choice = previousChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
443 return optimalChoices->get(choice);
444}
445
446template<typename PomdpType, typename BeliefValueType>
448 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
449 STORM_LOG_ASSERT(getCurrentMdpState() != noState(), "Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
450 STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
451 STORM_LOG_ASSERT(exploredMdp, "No 'old' mdp available");
452 uint64_t choice = exploredMdp->getNondeterministicChoiceIndices()[getCurrentMdpState()] + localActionIndex;
453 return exploredMdp->hasChoiceLabeling() && exploredMdp->getChoiceLabeling().getLabels().count("delayed") > 0 &&
454 exploredMdp->getChoiceLabeling().getChoiceHasLabel("delayed", choice);
455}
456
457template<typename PomdpType, typename BeliefValueType>
459 STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Cannot restore old behavior as the current state does not have any.");
460 STORM_LOG_ASSERT(localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState],
461 "Action index " << localActionIndex << " was not valid at state " << currentMdpState << " of the previously explored MDP.");
462
463 if (getCurrentMdpState() == exploredChoiceIndices.size()) {
464 internalAddRowGroupIndex();
465 }
466
467 assert(getCurrentMdpState() < previousChoiceIndices.size());
468 assert(getCurrentMdpState() < exploredChoiceIndices.size());
469 uint64_t oldChoiceIndex = previousChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
470 uint64_t newChoiceIndex = exploredChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
471
472 // Insert the transitions
473 for (auto const &transition : exploredMdp->getTransitionMatrix().getRow(oldChoiceIndex)) {
474 internalAddTransition(newChoiceIndex, transition.getColumn(), transition.getValue());
475 // Check whether exploration is needed
476 auto beliefId = getBeliefId(transition.getColumn());
477 if (beliefId != beliefManager->noId()) { // Not the extra target or bottom state
478 if (!exploredBeliefIds.get(beliefId)) {
479 // This belief needs exploration
480 exploredBeliefIds.set(beliefId, true);
481 ValueType currentPrio;
482 switch (explHeuristic) {
484 currentPrio = prio;
485 prio = prio - storm::utility::one<ValueType>();
486 break;
488 currentPrio = getLowerValueBoundAtCurrentState();
489 break;
491 currentPrio = getUpperValueBoundAtCurrentState();
492 break;
494 currentPrio = getUpperValueBoundAtCurrentState() - getLowerValueBoundAtCurrentState();
495 break;
497 if (getCurrentMdpState() != noState()) {
498 currentPrio = probabilityEstimation[getCurrentMdpState()] * transition.getValue();
499 } else {
500 currentPrio = storm::utility::one<ValueType>();
501 }
502 break;
503 default:
504 STORM_LOG_THROW(false, storm::exceptions::NotImplementedException, "Other heuristics not implemented yet");
505 }
506 mdpStatesToExploreStatePrio[transition.getColumn()] = currentPrio;
507 mdpStatesToExplorePrioState.emplace(currentPrio, transition.getColumn());
508 }
509 }
510 }
511
512 // Actually, nothing needs to be done for rewards since we already initialize the vector with the "old" values
513}
514
515template<typename PomdpType, typename BeliefValueType>
517 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
518 STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states.");
519
520 // Complete the exploration
521 // Finish the last row grouping in case the last explored state was new
522 if (!currentStateHasOldBehavior() || exploredChoiceIndices.back() < getCurrentNumberOfMdpChoices()) {
523 internalAddRowGroupIndex();
524 }
525 // Resize state- and choice based vectors to the correct size
526 targetStates.resize(getCurrentNumberOfMdpStates(), false);
527 truncatedStates.resize(getCurrentNumberOfMdpStates(), false);
528 clippedStates.resize(getCurrentNumberOfMdpStates(), false);
529 if (!mdpActionRewards.empty()) {
530 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
531 }
532
533 // We are not exploring anymore
534 currentMdpState = noState();
535
536 // If this was a restarted exploration, we might still have unexplored states (which were only reachable and explored in a previous build).
537 // We get rid of these before rebuilding the model
538 if (exploredMdp) {
539 dropUnexploredStates();
540 }
541
542 // The potentially computed optimal choices and the set of states that are reachable under these choices are not valid anymore.
543 optimalChoices = std::nullopt;
544 optimalChoicesReachableMdpStates = std::nullopt;
545
546 // Apply state remapping to the Belief-State maps
547 if (!stateRemapping.empty()) {
548 std::vector<BeliefId> remappedStateToBeliefIdMap(mdpStateToBeliefIdMap);
549 for (auto const &entry : stateRemapping) {
550 remappedStateToBeliefIdMap[entry.second] = mdpStateToBeliefIdMap[entry.first];
551 }
552 mdpStateToBeliefIdMap = remappedStateToBeliefIdMap;
553 for (auto const &beliefMdpState : beliefIdToMdpStateMap) {
554 if (stateRemapping.find(beliefMdpState.second) != stateRemapping.end()) {
555 beliefIdToMdpStateMap[beliefMdpState.first] = stateRemapping[beliefMdpState.second];
556 }
557 }
558 if (!mdpStateToChoiceLabelsMap.empty()) {
559 std::map<BeliefId, std::map<uint64_t, std::string>> temp(mdpStateToChoiceLabelsMap);
560 for (auto const &entry : stateRemapping) {
561 temp[entry.second] = mdpStateToChoiceLabelsMap[entry.first];
562 }
563 mdpStateToChoiceLabelsMap = temp;
564 }
565 }
566
567 // Create the transition matrix
568 uint64_t entryCount = 0;
569 for (auto const &row : exploredMdpTransitions) {
570 entryCount += row.size();
571 }
572 storm::storage::SparseMatrixBuilder<ValueType> builder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(), entryCount, true, true,
573 getCurrentNumberOfMdpStates());
574 for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
575 uint64_t rowIndex = exploredChoiceIndices[groupIndex];
576 uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
577 builder.newRowGroup(rowIndex);
578 for (; rowIndex < groupEnd; ++rowIndex) {
579 for (auto const &entry : exploredMdpTransitions[rowIndex]) {
580 if (stateRemapping.find(entry.first) == stateRemapping.end()) {
581 builder.addNextValue(rowIndex, entry.first, entry.second);
582 } else {
583 builder.addNextValue(rowIndex, stateRemapping[entry.first], entry.second);
584 }
585 }
586 }
587 }
588 auto mdpTransitionMatrix = builder.build();
589
590 // Create a standard labeling
591 storm::models::sparse::StateLabeling mdpLabeling(getCurrentNumberOfMdpStates());
592 mdpLabeling.addLabel("init");
593 mdpLabeling.addLabelToState("init", initialMdpState);
594 targetStates.resize(getCurrentNumberOfMdpStates(), false);
595 mdpLabeling.addLabel("target", std::move(targetStates));
596 truncatedStates.resize(getCurrentNumberOfMdpStates(), false);
597 mdpLabeling.addLabel("truncated", std::move(truncatedStates));
598 clippedStates.resize(getCurrentNumberOfMdpStates(), false);
599 mdpLabeling.addLabel("clipped", std::move(clippedStates));
600
601 for (uint64_t state = 0; state < getCurrentNumberOfMdpStates(); ++state) {
602 if (state == extraBottomState || state == extraTargetState) {
603 if (!mdpLabeling.containsLabel("__extra")) {
604 mdpLabeling.addLabel("__extra");
605 }
606 mdpLabeling.addLabelToState("__extra", state);
607 } else {
608 STORM_LOG_DEBUG("Observation of MDP state " << state << " : " << beliefManager->getObservationLabel(mdpStateToBeliefIdMap[state]) << "\n");
609 std::string obsLabel = beliefManager->getObservationLabel(mdpStateToBeliefIdMap[state]);
610 uint32_t obsId = beliefManager->getBeliefObservation(mdpStateToBeliefIdMap[state]);
611 if (!obsLabel.empty()) {
612 if (!mdpLabeling.containsLabel(obsLabel)) {
613 mdpLabeling.addLabel(obsLabel);
614 }
615 mdpLabeling.addLabelToState(obsLabel, state);
616 } else if (mdpStateToBeliefIdMap[state] != beliefManager->noId()) {
617 std::string obsIdLabel = "obs_" + std::to_string(obsId);
618 if (!mdpLabeling.containsLabel(obsIdLabel)) {
619 mdpLabeling.addLabel(obsIdLabel);
620 }
621 mdpLabeling.addLabelToState(obsIdLabel, state);
622 }
623 }
624 }
625
626 // Create a standard reward model (if rewards are available)
627 std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
628 if (!mdpActionRewards.empty()) {
629 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
630 if (!clippingTransitionRewards.empty()) {
631 storm::storage::SparseMatrixBuilder<ValueType> rewardBuilder(getCurrentNumberOfMdpChoices(), getCurrentNumberOfMdpStates(),
632 clippingTransitionRewards.size(), true, true, getCurrentNumberOfMdpStates());
633 for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
634 uint64_t rowIndex = exploredChoiceIndices[groupIndex];
635 uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
636 rewardBuilder.newRowGroup(rowIndex);
637 for (; rowIndex < groupEnd; ++rowIndex) {
638 if (clippingTransitionRewards.find(rowIndex) != clippingTransitionRewards.end()) {
639 STORM_LOG_ASSERT(extraTargetState.has_value(), "Requested a transition to the extra target state but there is none.");
640 rewardBuilder.addNextValue(rowIndex, extraTargetState.value(), clippingTransitionRewards[rowIndex]);
641 }
642 }
643 }
644 auto transitionRewardMatrix = rewardBuilder.build();
645 mdpRewardModels.emplace("default", storm::models::sparse::StandardRewardModel<ValueType>(
646 std::optional<std::vector<ValueType>>(), std::move(mdpActionRewards), std::move(transitionRewardMatrix)));
647 } else {
648 mdpRewardModels.emplace(
649 "default", storm::models::sparse::StandardRewardModel<ValueType>(std::optional<std::vector<ValueType>>(), std::move(mdpActionRewards)));
650 }
651 }
652
653 // Create model components
654 storm::storage::sparse::ModelComponents<ValueType> modelComponents(std::move(mdpTransitionMatrix), std::move(mdpLabeling), std::move(mdpRewardModels));
655
656 // Potentially create a choice labeling
657 if (!mdpStateToChoiceLabelsMap.empty()) {
658 modelComponents.choiceLabeling = storm::models::sparse::ChoiceLabeling(getCurrentNumberOfMdpChoices());
659 for (auto const &stateMap : mdpStateToChoiceLabelsMap) {
660 auto rowGroup = stateMap.first;
661 for (auto const &actionLabel : stateMap.second) {
662 if (!modelComponents.choiceLabeling->containsLabel(actionLabel.second)) {
663 modelComponents.choiceLabeling->addLabel(actionLabel.second);
664 }
665 modelComponents.choiceLabeling->addLabelToChoice(actionLabel.second, exploredChoiceIndices.at(rowGroup) + actionLabel.first);
666 }
667 }
668 }
669
670 if (!delayedExplorationChoices.empty()) {
671 modelComponents.choiceLabeling = storm::models::sparse::ChoiceLabeling(getCurrentNumberOfMdpChoices());
672 delayedExplorationChoices.resize(getCurrentNumberOfMdpChoices(), false);
673 modelComponents.choiceLabeling->addLabel("delayed", std::move(delayedExplorationChoices));
674 }
675
676 // Create the final model.
677 exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
678 status = Status::ModelFinished;
679 STORM_LOG_DEBUG("Explored Mdp with " << exploredMdp->getNumberOfStates() << " states (" << clippedStates.getNumberOfSetBits()
680 << " of which were clipped and " << truncatedStates.getNumberOfSetBits() - clippedStates.getNumberOfSetBits()
681 << " of which were flagged as truncated).");
682}
683
684template<typename PomdpType, typename BeliefValueType>
686 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
687 STORM_LOG_ASSERT(!hasUnexploredState(), "Finishing exploration not possible if there are still unexplored states.");
688
689 STORM_LOG_ASSERT(exploredMdp, "Method called although no 'old' MDP is available.");
690 // Find the states (and corresponding choices) that were not explored.
691 // These correspond to "empty" MDP transitions
692 storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(), true), relevantMdpChoices(getCurrentNumberOfMdpChoices(), true);
693 std::vector<MdpStateType> toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState());
694 MdpStateType nextRelevantIndex = 0;
695 for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
696 uint64_t rowIndex = exploredChoiceIndices[groupIndex];
697 // Check first row in group
698 if (exploredMdpTransitions[rowIndex].empty()) {
699 relevantMdpChoices.set(rowIndex, false);
700 relevantMdpStates.set(groupIndex, false);
701 } else {
702 toRelevantStateIndexMap[groupIndex] = nextRelevantIndex;
703 ++nextRelevantIndex;
704 }
705 uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
706 // process remaining rows in group
707 for (++rowIndex; rowIndex < groupEnd; ++rowIndex) {
708 // Assert that all actions at the current state were consistently explored or unexplored.
709 STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex),
710 "Actions at 'old' MDP state " << groupIndex << " were only partly explored.");
711 if (exploredMdpTransitions[rowIndex].empty()) {
712 relevantMdpChoices.set(rowIndex, false);
713 }
714 }
715 }
716
717 if (relevantMdpStates.full()) {
718 // All states are relevant so nothing to do
719 return;
720 }
721
722 nextId -= (relevantMdpStates.size() - relevantMdpStates.getNumberOfSetBits());
723
724 // Translate various components to the "new" MDP state set
725 storm::utility::vector::filterVectorInPlace(mdpStateToBeliefIdMap, relevantMdpStates);
726 { // beliefIdToMdpStateMap
727 for (auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) {
728 if (relevantMdpStates.get(belIdToMdpStateIt->second)) {
729 // Translate current entry and move on to the next one.
730 belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second];
731 ++belIdToMdpStateIt;
732 } else {
733 STORM_LOG_ASSERT(!exploredBeliefIds.get(belIdToMdpStateIt->first),
734 "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId");
735 // Delete current entry and move on to the next one.
736 // This works because std::map::erase does not invalidate other iterators within the map!
737 beliefIdToMdpStateMap.erase(belIdToMdpStateIt++);
738 }
739 }
740 }
741 { // exploredMdpTransitions
742 storm::utility::vector::filterVectorInPlace(exploredMdpTransitions, relevantMdpChoices);
743 // Adjust column indices. Unfortunately, the fastest way seems to be to "rebuild" the map
744 // It might pay off to do this when building the matrix.
745 for (auto &transitions : exploredMdpTransitions) {
746 std::map<MdpStateType, ValueType> newTransitions;
747 for (auto const &entry : transitions) {
748 STORM_LOG_ASSERT(relevantMdpStates.get(entry.first), "Relevant state has transition to irrelevant state.");
749 newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second);
750 }
751 transitions = std::move(newTransitions);
752 }
753 }
754 { // exploredChoiceIndices
755 MdpStateType newState = 0;
756 assert(exploredChoiceIndices[0] == 0u);
757 // Loop invariant: all indices up to exploredChoiceIndices[newState] consider the new row indices and all other entries are not touched.
758 for (auto const oldState : relevantMdpStates) {
759 if (oldState != newState) {
760 assert(oldState > newState);
761 uint64_t groupSize = getRowGroupSizeOfState(oldState);
762 exploredChoiceIndices.at(newState + 1) = exploredChoiceIndices.at(newState) + groupSize;
763 }
764 ++newState;
765 }
766 exploredChoiceIndices.resize(newState + 1);
767 }
768 if (!mdpActionRewards.empty()) {
769 storm::utility::vector::filterVectorInPlace(mdpActionRewards, relevantMdpChoices);
770 }
771 if (extraBottomState) {
772 extraBottomState = toRelevantStateIndexMap[extraBottomState.value()];
773 }
774 if (extraTargetState) {
775 extraTargetState = toRelevantStateIndexMap[extraTargetState.value()];
776 }
777 targetStates = targetStates % relevantMdpStates;
778 truncatedStates = truncatedStates % relevantMdpStates;
779 clippedStates = clippedStates % relevantMdpStates;
780 initialMdpState = toRelevantStateIndexMap[initialMdpState];
781
782 storm::utility::vector::filterVectorInPlace(lowerValueBounds, relevantMdpStates);
783 storm::utility::vector::filterVectorInPlace(upperValueBounds, relevantMdpStates);
784 storm::utility::vector::filterVectorInPlace(values, relevantMdpStates);
785
786 { // mdpStateToChoiceLabelsMap
787 if (!mdpStateToChoiceLabelsMap.empty()) {
788 auto temp = std::map<BeliefId, std::map<uint64_t, std::string>>();
789 for (auto const relevantState : relevantMdpStates) {
790 temp[toRelevantStateIndexMap[relevantState]] = mdpStateToChoiceLabelsMap[relevantState];
791 }
792 mdpStateToChoiceLabelsMap = temp;
793 }
794 }
795}
796
797template<typename PomdpType, typename BeliefValueType>
798std::shared_ptr<storm::models::sparse::Mdp<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>>
800 STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked, "Method call is invalid in current status.");
801 STORM_LOG_ASSERT(exploredMdp, "Tried to get the explored MDP but exploration was not finished yet.");
802 return exploredMdp;
803}
804
805template<typename PomdpType, typename BeliefValueType>
807 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
808 return mdpStateToBeliefIdMap.size();
809}
810
811template<typename PomdpType, typename BeliefValueType>
813 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
814 return exploredMdpTransitions.size();
815}
816
817template<typename PomdpType, typename BeliefValueType>
819 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
820 assert(getCurrentMdpState() < exploredChoiceIndices.size());
821 return exploredChoiceIndices.at(getCurrentMdpState());
822}
823
824template<typename PomdpType, typename BeliefValueType>
826 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
827 assert(getCurrentMdpState() < exploredChoiceIndices.size() - 1);
828 return exploredChoiceIndices.at(getCurrentMdpState() + 1) - exploredChoiceIndices.at(getCurrentMdpState());
829}
830
831template<typename PomdpType, typename BeliefValueType>
833 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
834 assert(state < exploredChoiceIndices.size());
835 if (state < exploredChoiceIndices.size() - 1) {
836 return exploredChoiceIndices.at(state + 1) - exploredChoiceIndices.at(state);
837 } else if (state == exploredChoiceIndices.size() - 1) {
838 return exploredMdpTransitions.size() - exploredChoiceIndices.at(state);
839 } else {
840 return 0;
841 }
842}
843
844template<typename PomdpType, typename BeliefValueType>
846 return (currentStateHasOldBehavior() && getCurrentStateWasTruncated() && getCurrentMdpState() < exploredChoiceIndices.size() - 1 &&
847 getSizeOfCurrentRowGroup() != numActionsNeeded);
848}
849
850template<typename PomdpType, typename BeliefValueType>
852 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
853 return lowerValueBounds[getCurrentMdpState()];
854}
855
856template<typename PomdpType, typename BeliefValueType>
858 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
859 return upperValueBounds[getCurrentMdpState()];
860}
861
862template<typename PomdpType, typename BeliefValueType>
864 BeliefId const &beliefId) const {
865 STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(), "Requested lower value bounds but none were available.");
866 auto it = pomdpValueBounds.lower.begin();
867 ValueType result = beliefManager->getWeightedSum(beliefId, *it);
868 for (++it; it != pomdpValueBounds.lower.end(); ++it) {
869 result = std::max(result, beliefManager->getWeightedSum(beliefId, *it));
870 }
871 return result;
872}
873
874template<typename PomdpType, typename BeliefValueType>
876 BeliefId const &beliefId) const {
877 STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(), "Requested upper value bounds but none were available.");
878 auto it = pomdpValueBounds.upper.begin();
879 ValueType result = beliefManager->getWeightedSum(beliefId, *it);
880 for (++it; it != pomdpValueBounds.upper.end(); ++it) {
881 result = std::min(result, beliefManager->getWeightedSum(beliefId, *it));
882 }
883 return result;
884}
885
886template<typename PomdpType, typename BeliefValueType>
888 BeliefId const &beliefId, uint64_t schedulerId) const {
889 STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(), "Requested lower value bounds but none were available.");
890 STORM_LOG_ASSERT(pomdpValueBounds.lower.size() > schedulerId, "Requested lower value bound for scheduler with ID " << schedulerId << " not available.");
891 return beliefManager->getWeightedSum(beliefId, pomdpValueBounds.lower[schedulerId]);
892 ;
893}
894
895template<typename PomdpType, typename BeliefValueType>
897 BeliefId const &beliefId, uint64_t schedulerId) const {
898 STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(), "Requested upper value bounds but none were available.");
899 STORM_LOG_ASSERT(pomdpValueBounds.upper.size() > schedulerId, "Requested upper value bound for scheduler with ID " << schedulerId << " not available.");
900 return beliefManager->getWeightedSum(beliefId, pomdpValueBounds.upper[schedulerId]);
901}
902
903template<typename PomdpType, typename BeliefValueType>
904std::pair<bool, typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>
906 STORM_LOG_ASSERT(!fmSchedulerValueList.empty(), "Requested finite memory scheduler value bounds but none were available.");
907 auto obs = beliefManager->getBeliefObservation(beliefId);
908 STORM_LOG_ASSERT(fmSchedulerValueList.size() > obs, "Requested value bound for observation " << obs << " not available.");
909 STORM_LOG_ASSERT(fmSchedulerValueList.at(obs).size() > memoryNode,
910 "Requested value bound for observation " << obs << " and memory node " << memoryNode << " not available.");
911 return beliefManager->getWeightedSum(beliefId, fmSchedulerValueList.at(obs).at(memoryNode));
912}
913
914template<typename PomdpType, typename BeliefValueType>
916 STORM_LOG_ASSERT(status == Status::ModelFinished, "Method call is invalid in current status.");
917 STORM_LOG_ASSERT(exploredMdp, "Tried to compute values but the MDP is not explored");
918 auto property = createStandardProperty(dir, exploredMdp->hasRewardModel());
919 auto task = createStandardCheckTask(property);
920
921 std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(env, exploredMdp, task));
922 if (res) {
923 values = std::move(res->asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
924 scheduler = std::make_shared<storm::storage::Scheduler<ValueType>>(res->asExplicitQuantitativeCheckResult<ValueType>().getScheduler());
925 STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(lowerValueBounds, values, std::less_equal<ValueType>()),
926 "Computed values are smaller than the lower bound.");
927 STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(upperValueBounds, values, std::greater_equal<ValueType>()),
928 "Computed values are larger than the upper bound.");
929 } else {
931 STORM_LOG_ERROR("No result obtained while checking.");
932 }
933 status = Status::ModelChecked;
934}
935
936template<typename PomdpType, typename BeliefValueType>
938 return status == Status::ModelChecked;
939}
940
941template<typename PomdpType, typename BeliefValueType>
942std::vector<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType> const &BeliefMdpExplorer<PomdpType, BeliefValueType>::getValuesOfExploredMdp()
943 const {
944 STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
945 return values;
946}
947
948template<typename PomdpType, typename BeliefValueType>
949const std::shared_ptr<storm::storage::Scheduler<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>> &
951 STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
952 return scheduler;
953}
954
955template<typename PomdpType, typename BeliefValueType>
957 STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
958 STORM_LOG_ASSERT(exploredMdp, "Tried to get a value but no MDP was explored.");
959 return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)];
960}
961
962template<typename PomdpType, typename BeliefValueType>
964 MdpStateType exploredMdpState) const {
965 STORM_LOG_ASSERT(status != Status::Uninitialized, "Method call is invalid in current status.");
966 return mdpStateToBeliefIdMap[exploredMdpState];
967}
968
969template<typename PomdpType, typename BeliefValueType>
971 uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations) {
972 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
973 STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior");
974 uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex;
975 gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
976}
977
978template<typename PomdpType, typename BeliefValueType>
980 uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations) {
981 STORM_LOG_ASSERT(exploredMdp, "Method call is invalid if no MDP has been explored before");
982 for (auto const &entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) {
983 auto const &beliefId = getBeliefId(entry.getColumn());
984 if (beliefId != beliefManager->noId()) {
985 auto const &obs = beliefManager->getBeliefObservation(beliefId);
986 SuccessorObservationInformation info(entry.getValue(), entry.getValue(), 1);
987 auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info);
988 if (!obsInsertion.second) {
989 // There already is an entry for this observation, so join the two information constructs
990 obsInsertion.first->second.join(info);
991 }
992 beliefManager->joinSupport(beliefId, obsInsertion.first->second.support);
993 }
994 }
995}
996
997template<typename PomdpType, typename BeliefValueType>
999 storm::storage::BitVector const &observationSet) {
1000 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
1001 STORM_LOG_ASSERT(currentStateHasOldBehavior(), "Method call is invalid since the current state has no old behavior");
1002 uint64_t mdpChoice = previousChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
1003 return std::any_of(exploredMdp->getTransitionMatrix().getRow(mdpChoice).begin(), exploredMdp->getTransitionMatrix().getRow(mdpChoice).end(),
1004 [this, &observationSet](typename storm::storage::MatrixEntry<uint_fast64_t, ValueType> i) {
1005 return observationSet.get(beliefManager->getBeliefObservation(getBeliefId(i.getColumn())));
1006 });
1007}
1008
1009template<typename PomdpType, typename BeliefValueType>
1011 STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
1012 upperValueBounds = values;
1013}
1014
1015template<typename PomdpType, typename BeliefValueType>
1017 STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
1018 lowerValueBounds = values;
1019}
1020
1021template<typename PomdpType, typename BeliefValueType>
1023 bool relativeDifference) {
1024 STORM_LOG_ASSERT(status == Status::ModelChecked, "Method call is invalid in current status.");
1025 STORM_LOG_ASSERT(exploredMdp, "Method call is invalid in if no MDP is available.");
1026 STORM_LOG_ASSERT(!optimalChoices.has_value(), "Tried to compute optimal scheduler but this has already been done before.");
1027 STORM_LOG_ASSERT(!optimalChoicesReachableMdpStates.has_value(),
1028 "Tried to compute states that are reachable under an optimal scheduler but this has already been done before.");
1029
1030 // First find the choices that are optimal
1031 optimalChoices = storm::storage::BitVector(exploredMdp->getNumberOfChoices(), false);
1032 auto const &choiceIndices = exploredMdp->getNondeterministicChoiceIndices();
1033 auto const &transitions = exploredMdp->getTransitionMatrix();
1034 auto const &targetStatesExploredMDP = exploredMdp->getStates("target");
1035 for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) {
1036 if (targetStatesExploredMDP.get(mdpState)) {
1037 // Target states can be skipped.
1038 continue;
1039 } else {
1040 auto const &stateValue = values[mdpState];
1041 for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) {
1042 ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values);
1043 if (exploredMdp->hasRewardModel()) {
1044 choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice);
1045 }
1046 auto absDiff = storm::utility::abs<ValueType>((choiceValue - stateValue));
1047 if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) {
1048 optimalChoices->set(globalChoice, true);
1049 }
1050 }
1051 STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(), "Could not find an optimal choice.");
1052 }
1053 }
1054
1055 // Then, find the states that are reachable via these choices
1056 optimalChoicesReachableMdpStates = storm::utility::graph::getReachableStates(transitions, exploredMdp->getInitialStates(), ~targetStatesExploredMDP,
1057 targetStatesExploredMDP, false, 0, optimalChoices.value());
1058}
1059
1060template<typename PomdpType, typename BeliefValueType>
1062 return getExploredMdpState(beliefId) != noState();
1063}
1064
1065template<typename PomdpType, typename BeliefValueType>
1067 return std::numeric_limits<MdpStateType>::max();
1068}
1069
1070template<typename PomdpType, typename BeliefValueType>
1071std::shared_ptr<storm::logic::Formula const> BeliefMdpExplorer<PomdpType, BeliefValueType>::createStandardProperty(
1072 storm::solver::OptimizationDirection const &dir, bool computeRewards) {
1073 std::string propertyString = computeRewards ? "R" : "P";
1074 propertyString += storm::solver::minimize(dir) ? "min" : "max";
1075 propertyString += "=? [F \"target\"]";
1076 std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString);
1077 return storm::api::extractFormulasFromProperties(propertyVector).front();
1078}
1079
1080template<typename PomdpType, typename BeliefValueType>
1082BeliefMdpExplorer<PomdpType, BeliefValueType>::createStandardCheckTask(std::shared_ptr<storm::logic::Formula const> &property) {
1083 // Note: The property should not run out of scope after calling this because the task only stores the property by reference.
1084 // Therefore, this method needs the property by reference (and not const reference)
1085 auto task = storm::api::createTask<ValueType>(property, false);
1087 hint.setResultHint(values);
1088 auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
1089 task.setHint(hintPtr);
1090 task.setProduceSchedulers();
1091 return task;
1092}
1093
1094template<typename PomdpType, typename BeliefValueType>
1095typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentMdpState() const {
1096 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
1097 if (stateRemapping.find(currentMdpState) != stateRemapping.end()) {
1098 return stateRemapping.at(currentMdpState);
1099 } else {
1100 return currentMdpState;
1101 }
1102}
1103
1104template<typename PomdpType, typename BeliefValueType>
1105typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getCurrentBeliefId() const {
1106 STORM_LOG_ASSERT(status == Status::Exploring, "Method call is invalid in current status.");
1107 return getBeliefId(currentMdpState);
1108}
1109
1110template<typename PomdpType, typename BeliefValueType>
1111void BeliefMdpExplorer<PomdpType, BeliefValueType>::internalAddTransition(uint64_t const &row, MdpStateType const &column, ValueType const &value) {
1112 STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(), "Skipped at least one row.");
1113 if (row == exploredMdpTransitions.size()) {
1114 exploredMdpTransitions.emplace_back();
1115 }
1116 STORM_LOG_ASSERT(exploredMdpTransitions[row].count(column) == 0, "Trying to insert multiple transitions to the same state.");
1117 exploredMdpTransitions[row][column] = value;
1118}
1119
1120template<typename PomdpType, typename BeliefValueType>
1121void BeliefMdpExplorer<PomdpType, BeliefValueType>::internalAddRowGroupIndex() {
1122 exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices());
1123}
1124
1125template<typename PomdpType, typename BeliefValueType>
1127 gridBeliefs.insert(beliefId);
1128}
1129
1130template<typename PomdpType, typename BeliefValueType>
1132 return gridBeliefs.count(beliefId) > 0;
1133}
1134
1135template<typename PomdpType, typename BeliefValueType>
1137 BeliefId const &beliefId) const {
1138 if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) {
1139 return beliefIdToMdpStateMap.at(beliefId);
1140 } else {
1141 return noState();
1142 }
1143}
1144
1145template<typename PomdpType, typename BeliefValueType>
1146void BeliefMdpExplorer<PomdpType, BeliefValueType>::insertValueHints(ValueType const &lowerBound, ValueType const &upperBound) {
1147 lowerValueBounds.push_back(lowerBound);
1148 upperValueBounds.push_back(upperBound);
1149 // Take the middle value as a hint
1150 values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2));
1151 STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(), "Value vectors have different size then number of available states.");
1152 STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(), "Value vectors have inconsistent size.");
1153}
1154
1155template<typename PomdpType, typename BeliefValueType>
1156typename BeliefMdpExplorer<PomdpType, BeliefValueType>::MdpStateType BeliefMdpExplorer<PomdpType, BeliefValueType>::getOrAddMdpState(
1157 BeliefId const &beliefId, ValueType const &transitionValue) {
1158 exploredBeliefIds.grow(beliefId + 1, false);
1159 if (exploredBeliefIds.get(beliefId)) {
1160 if (explHeuristic == ExplorationHeuristic::ProbabilityPrio &&
1161 mdpStatesToExploreStatePrio.find(beliefIdToMdpStateMap[beliefId]) != mdpStatesToExploreStatePrio.end()) {
1162 // We check if the value is higher than the current priority and update if necessary
1163 auto newPrio = probabilityEstimation[getCurrentMdpState()] * transitionValue;
1164 if (newPrio > mdpStatesToExploreStatePrio[beliefIdToMdpStateMap[beliefId]]) {
1165 // Erase the state from the "queue" map and re-insert it with the new value
1166 auto range = mdpStatesToExplorePrioState.equal_range(mdpStatesToExploreStatePrio[beliefIdToMdpStateMap[beliefId]]);
1167 for (auto i = range.first; i != range.second; ++i) {
1168 if (i->second == beliefIdToMdpStateMap[beliefId]) {
1169 mdpStatesToExplorePrioState.erase(i);
1170 break;
1171 }
1172 }
1173 mdpStatesToExplorePrioState.emplace(newPrio, beliefIdToMdpStateMap[beliefId]);
1174 mdpStatesToExploreStatePrio[beliefIdToMdpStateMap[beliefId]] = newPrio;
1175 }
1176 }
1177 return beliefIdToMdpStateMap[beliefId];
1178 } else {
1179 // This state needs exploration
1180 exploredBeliefIds.set(beliefId, true);
1181
1182 // If this is a restart of the exploration, we still might have an MDP state for the belief
1183 if (exploredMdp) {
1184 auto findRes = beliefIdToMdpStateMap.find(beliefId);
1185 if (findRes != beliefIdToMdpStateMap.end()) {
1186 ValueType currentPrio;
1187 switch (explHeuristic) {
1189 currentPrio = prio;
1190 prio = prio - storm::utility::one<ValueType>();
1191 break;
1193 currentPrio = getLowerValueBoundAtCurrentState();
1194 break;
1196 currentPrio = getUpperValueBoundAtCurrentState();
1197 break;
1199 currentPrio = getUpperValueBoundAtCurrentState() - getLowerValueBoundAtCurrentState();
1200 break;
1202 if (getCurrentMdpState() != noState()) {
1203 currentPrio = probabilityEstimation[getCurrentMdpState()] * transitionValue;
1204 } else {
1205 currentPrio = storm::utility::one<ValueType>();
1206 }
1207 break;
1208 default:
1209 STORM_LOG_THROW(false, storm::exceptions::NotImplementedException, "Other heuristics not implemented yet");
1210 }
1211 mdpStatesToExploreStatePrio[findRes->second] = currentPrio;
1212 mdpStatesToExplorePrioState.emplace(currentPrio, findRes->second);
1213 return findRes->second;
1214 }
1215 }
1216 // At this point we need to add a new MDP state
1217 MdpStateType result = getCurrentNumberOfMdpStates();
1218 assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size());
1219 mdpStateToBeliefIdMap.push_back(beliefId);
1220 beliefIdToMdpStateMap[beliefId] = result;
1221 insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId));
1222 ValueType currentPrio;
1223 switch (explHeuristic) {
1225 currentPrio = prio;
1226 prio = prio - storm::utility::one<ValueType>();
1227 break;
1229 currentPrio = getLowerValueBoundAtCurrentState();
1230 break;
1232 currentPrio = getUpperValueBoundAtCurrentState();
1233 break;
1235 currentPrio = getUpperValueBoundAtCurrentState() - getLowerValueBoundAtCurrentState();
1236 break;
1238 if (getCurrentMdpState() != noState()) {
1239 currentPrio = probabilityEstimation[getCurrentMdpState()] * transitionValue;
1240 } else {
1241 currentPrio = storm::utility::one<ValueType>();
1242 }
1243 break;
1244 default:
1245 STORM_LOG_THROW(false, storm::exceptions::NotImplementedException, "Other heuristics not implemented yet");
1246 }
1247 mdpStatesToExploreStatePrio[result] = currentPrio;
1248 mdpStatesToExplorePrioState.emplace(currentPrio, result);
1249 return result;
1250 }
1251}
1252
1253template<typename PomdpType, typename BeliefValueType>
1254void BeliefMdpExplorer<PomdpType, BeliefValueType>::addChoiceLabelToCurrentState(uint64_t const &localActionIndex, std::string const &label) {
1255 mdpStateToChoiceLabelsMap[currentMdpState][localActionIndex] = label;
1256}
1257
1258template<typename PomdpType, typename BeliefValueType>
1259std::vector<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::BeliefId> BeliefMdpExplorer<PomdpType, BeliefValueType>::getBeliefsInMdp() {
1260 return mdpStateToBeliefIdMap;
1261}
1262
1263template<typename PomdpType, typename BeliefValueType>
1264std::vector<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::BeliefId> BeliefMdpExplorer<PomdpType, BeliefValueType>::getBeliefsWithObservationInMdp(
1265 uint32_t obs) const {
1266 std::vector<BeliefId> res;
1267 for (auto const &belief : mdpStateToBeliefIdMap) {
1268 if (belief != beliefManager->noId()) {
1269 if (beliefManager->getBeliefObservation(belief) == obs) {
1270 res.push_back(belief);
1271 }
1272 }
1273 }
1274 return res;
1275}
1276
1277template<typename PomdpType, typename BeliefValueType>
1279 uint64_t const &pomdpState) {
1280 return pomdpValueBounds.getSmallestUpperBound(pomdpState);
1281}
1282
1283template<typename PomdpType, typename BeliefValueType>
1285 uint64_t const &pomdpState) {
1286 return pomdpValueBounds.getHighestLowerBound(pomdpState);
1287}
1288
1289template<typename PomdpType, typename BeliefValueType>
1293
1294template<typename PomdpType, typename BeliefValueType>
1295void BeliefMdpExplorer<PomdpType, BeliefValueType>::setFMSchedValueList(std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> valueList) {
1296 fmSchedulerValueList = valueList;
1297}
1298
1299template<typename PomdpType, typename BeliefValueType>
1301 return fmSchedulerValueList.at(observation).size();
1302}
1303
1304template<typename PomdpType, typename BeliefValueType>
1309
1310template<typename PomdpType, typename BeliefValueType>
1314
1315template<typename PomdpType, typename BeliefValueType>
1317 return pomdpValueBounds.upper.size();
1318}
1319
1320template<typename PomdpType, typename BeliefValueType>
1322 return pomdpValueBounds.lower.size();
1323}
1324
1325template<typename PomdpType, typename BeliefValueType>
1328 STORM_LOG_ASSERT(!pomdpValueBounds.lowerSchedulers.empty(), "Requested lower bound scheduler but none were available.");
1329 STORM_LOG_ASSERT(pomdpValueBounds.lowerSchedulers.size() > schedulerId,
1330 "Requested lower value bound scheduler with ID " << schedulerId << " not available.");
1331 return pomdpValueBounds.lowerSchedulers[schedulerId];
1332}
1333
1334template<typename PomdpType, typename BeliefValueType>
1337 STORM_LOG_ASSERT(!pomdpValueBounds.upperSchedulers.empty(), "Requested upper bound scheduler but none were available.");
1338 STORM_LOG_ASSERT(pomdpValueBounds.upperSchedulers.size() > schedulerId,
1339 "Requested upper value bound scheduler with ID " << schedulerId << " not available.");
1340 return pomdpValueBounds.upperSchedulers[schedulerId];
1341}
1342
1343template<typename PomdpType, typename BeliefValueType>
1344std::vector<storm::storage::Scheduler<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>>
1346 STORM_LOG_ASSERT(!pomdpValueBounds.lowerSchedulers.empty(), "Requested lower bound schedulers but none were available.");
1347 return pomdpValueBounds.lowerSchedulers;
1348}
1349
1350template<typename PomdpType, typename BeliefValueType>
1351std::vector<storm::storage::Scheduler<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>>
1353 STORM_LOG_ASSERT(!pomdpValueBounds.upperSchedulers.empty(), "Requested upper bound schedulers but none were available.");
1354 return pomdpValueBounds.upperSchedulers;
1355}
1356
1357template<typename PomdpType, typename BeliefValueType>
1358
1360 return !fmSchedulerValueList.empty();
1361}
1362
1363template<typename PomdpType, typename BeliefValueType>
1365 BeliefId const &beliefId, storm::storage::SparseMatrix<BeliefValueType> &matrix) const {
1366 return beliefManager->computeMatrixBeliefProduct(beliefId, matrix);
1367}
1368
1369template<typename PomdpType, typename BeliefValueType>
1371 uint64_t currentRowGroupSize = getSizeOfCurrentRowGroup();
1372 assert(totalNumberOfActions != currentRowGroupSize);
1373 if (totalNumberOfActions > currentRowGroupSize) {
1374 uint64_t numberOfActionsToAdd = totalNumberOfActions - currentRowGroupSize;
1375 exploredMdpTransitions.insert(exploredMdpTransitions.begin() + (exploredChoiceIndices[getCurrentMdpState() + 1]), numberOfActionsToAdd,
1376 std::map<MdpStateType, ValueType>());
1377 for (uint64_t i = getCurrentMdpState() + 1; i < exploredChoiceIndices.size(); i++) {
1378 exploredChoiceIndices[i] += numberOfActionsToAdd;
1379 }
1380 return;
1381 }
1382 if (totalNumberOfActions < currentRowGroupSize) {
1383 uint64_t numberOfActionsToRemove = currentRowGroupSize - totalNumberOfActions;
1384 exploredMdpTransitions.erase(exploredMdpTransitions.begin() + (exploredChoiceIndices[getCurrentMdpState() + 1]) - numberOfActionsToRemove,
1385 exploredMdpTransitions.begin() + (exploredChoiceIndices[getCurrentMdpState() + 1]));
1386 for (uint64_t i = getCurrentMdpState() + 1; i < exploredChoiceIndices.size(); i++) {
1387 exploredChoiceIndices[i] -= numberOfActionsToRemove;
1388 }
1389 return;
1390 }
1391}
1392
1394
1395template class BeliefMdpExplorer<storm::models::sparse::Pomdp<double>, storm::RationalNumber>;
1396
1398
1400} // namespace builder
1401} // namespace storm
MdpStateType getExploredMdpState(BeliefId const &beliefId) const
void addClippingRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue)
ValueType computeUpperValueBoundAtBelief(BeliefId const &beliefId) const
MdpStateType getCurrentNumberOfMdpStates() const
ValueType const & getComputedValueAtInitialState() const
void restartExploration()
Restarts the exploration to allow re-exploring each state.
bool currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, storm::storage::BitVector const &observationSet)
ValueType getTrivialLowerBoundAtPOMDPState(uint64_t const &pomdpState)
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
ValueType getTrivialUpperBoundAtPOMDPState(uint64_t const &pomdpState)
void addTransitionsToExtraStates(uint64_t const &localActionIndex, ValueType const &targetStateValue=storm::utility::zero< ValueType >(), ValueType const &bottomStateValue=storm::utility::zero< ValueType >())
bool beliefHasMdpState(BeliefId const &beliefId) const
std::vector< ValueType > const & getValuesOfExploredMdp() const
storm::storage::BitVector getStateExtremeBoundIsInfinite()
storm::storage::Scheduler< ValueType > getLowerValueBoundScheduler(uint64_t schedulerId) const
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map< uint32_t, SuccessorObservationInformation > &gatheredSuccessorObservations)
void startNewExploration(std::optional< ValueType > extraTargetStateValue=boost::none, std::optional< ValueType > extraBottomStateValue=std::nullopt)
BeliefMdpExplorer(std::shared_ptr< BeliefManagerType > beliefManager, storm::pomdp::storage::PreprocessingPomdpValueBounds< ValueType > const &pomdpValueBounds, ExplorationHeuristic explorationHeuristic=ExplorationHeuristic::BreadthFirst)
bool actionIsOptimal(uint64_t const &globalActionIndex) const
Retrieves whether the given action at the current state was optimal in the most recent check.
bool needsActionAdjustment(uint64_t numActionsNeeded)
std::vector< BeliefId > getBeliefsWithObservationInMdp(uint32_t obs) const
ValueType getExtremeValueBoundAtPOMDPState(uint64_t const &pomdpState)
MdpStateType getBeliefId(MdpStateType exploredMdpState) const
void computeValuesOfExploredMdp(storm::Environment const &env, storm::solver::OptimizationDirection const &dir)
std::vector< BeliefId > getBeliefsInMdp()
std::shared_ptr< storm::models::sparse::Mdp< ValueType > > getExploredMdp() const
storm::storage::Scheduler< ValueType > getUpperValueBoundScheduler(uint64_t schedulerId) const
std::vector< storm::storage::Scheduler< ValueType > > getLowerValueBoundSchedulers() const
bool getCurrentStateActionExplorationWasDelayed(uint64_t const &localActionIndex) const
BeliefManagerType const & getBeliefManager() const
std::pair< bool, ValueType > computeFMSchedulerValueForMemoryNode(BeliefId const &beliefId, uint64_t memoryNode) const
void setExtremeValueBound(storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > valueBound)
void addChoiceLabelToCurrentState(uint64_t const &localActionIndex, std::string const &label)
std::vector< uint64_t > getUnexploredStates()
void restoreOldBehaviorAtCurrentState(uint64_t const &localActionIndex)
Inserts transitions and rewards at the given action as in the MDP of the previous exploration.
ValueType computeLowerValueBoundAtBelief(BeliefId const &beliefId) const
uint64_t getRowGroupSizeOfState(uint64_t state) const
const std::shared_ptr< storm::storage::Scheduler< BeliefMdpExplorer< PomdpType, BeliefValueType >::ValueType > > & getSchedulerForExploredMdp() const
MdpStateType getStartOfCurrentRowGroup() const
ValueType computeLowerValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const
bool currentStateIsOptimalSchedulerReachable() const
Retrieves whether the current state can be reached under a scheduler that was optimal in the most rec...
void addRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue)
Adds the provided reward value to the given action of the current state.
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map< uint32_t, SuccessorObservationInformation > &gatheredSuccessorObservations)
uint64_t getNrOfMemoryNodesForObservation(uint32_t observation) const
void computeRewardAtCurrentState(uint64_t const &localActionIndex, ValueType extraReward=storm::utility::zero< ValueType >())
void computeOptimalChoicesAndReachableMdpStates(ValueType const &ancillaryChoicesEpsilon, bool relativeDifference)
Computes the set of states that are reachable via a path that is consistent with an optimal MDP sched...
bool isMarkedAsGridBelief(BeliefId const &beliefId)
void markAsGridBelief(BeliefId const &beliefId)
ValueType computeUpperValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const
void adjustActions(uint64_t totalNumberOfActions)
bool actionAtCurrentStateWasOptimal(uint64_t const &localActionIndex) const
Retrieves whether the given action at the current state was optimal in the most recent check.
void setCurrentChoiceIsDelayed(uint64_t const &localActionIndex)
std::vector< BeliefValueType > computeProductWithSparseMatrix(BeliefId const &beliefId, storm::storage::SparseMatrix< BeliefValueType > &matrix) const
void addSelfloopTransition(uint64_t const &localActionIndex=0, ValueType const &value=storm::utility::one< ValueType >())
std::vector< storm::storage::Scheduler< ValueType > > getUpperValueBoundSchedulers() const
bool addTransitionToBelief(uint64_t const &localActionIndex, BeliefId const &transitionTarget, ValueType const &value, bool ignoreNewBeliefs)
Adds the next transition to the given successor belief.
bool stateIsOptimalSchedulerReachable(MdpStateType mdpState) const
Retrieves whether the current state can be reached under an optimal scheduler This requires a previou...
BeliefManagerType::BeliefId BeliefId
MdpStateType getCurrentNumberOfMdpChoices() const
This class contains information that might accelerate the model checking process.
This class manages the labeling of the choice space with a number of (atomic) labels.
void addLabel(std::string const &label)
Adds a new label to the labelings.
bool containsLabel(std::string const &label) const
Checks whether a label is registered within this labeling.
This class manages the labeling of the state space with a number of (atomic) labels.
void addLabelToState(std::string const &label, storm::storage::sparse::state_type state)
Adds a label to a given state.
A bit vector that is internally represented as a vector of 64-bit values.
Definition BitVector.h:18
void set(uint_fast64_t index, bool value=true)
Sets the given truth value at the given index.
size_t size() const
Retrieves the number of bits this bit vector can store.
This class defines which action is chosen in a particular state of a non-deterministic model.
Definition Scheduler.h:18
A class that can be used to build a sparse matrix by adding value by value.
void addNextValue(index_type row, index_type column, value_type const &value)
Sets the matrix entry at the given row and column to the given value.
void newRowGroup(index_type startingRow)
Starts a new row group in the matrix.
SparseMatrix< value_type > build(index_type overriddenRowCount=0, index_type overriddenColumnCount=0, index_type overriddenRowGroupCount=0)
A class that holds a possibly non-square matrix in the compressed row storage format.
#define STORM_LOG_DEBUG(message)
Definition logging.h:23
#define STORM_LOG_ERROR(message)
Definition logging.h:31
#define STORM_LOG_ASSERT(cond, message)
Definition macros.h:11
#define STORM_LOG_THROW(cond, exception, message)
Definition macros.h:30
#define STORM_LOG_WARN_COND_DEBUG(cond, message)
Definition macros.h:18
std::vector< storm::jani::Property > parseProperties(storm::parser::FormulaParser &formulaParser, std::string const &inputString, boost::optional< std::set< std::string > > const &propertyFilter)
std::vector< std::shared_ptr< storm::logic::Formula const > > extractFormulasFromProperties(std::vector< storm::jani::Property > const &properties)
SFTBDDChecker::ValueType ValueType
std::pair< storm::RationalNumber, storm::RationalNumber > count(std::vector< storm::storage::BitVector > const &origSets, std::vector< storm::storage::BitVector > const &intersects, std::vector< storm::storage::BitVector > const &intersectsInfo, storm::RationalNumber val, bool plus, uint64_t remdepth)
bool constexpr minimize(OptimizationDirection d)
storm::storage::BitVector getReachableStates(storm::storage::SparseMatrix< T > const &transitionMatrix, storm::storage::BitVector const &initialStates, storm::storage::BitVector const &constraintStates, storm::storage::BitVector const &targetStates, bool useStepBound, uint_fast64_t maximalSteps, boost::optional< storm::storage::BitVector > const &choiceFilter)
Performs a forward depth-first search through the underlying graph structure to identify the states t...
Definition graph.cpp:48
bool isTerminate()
Check whether the program should terminate (due to some abort signal).
bool compareElementWise(std::vector< T > const &left, std::vector< T > const &right, Comparator comp=std::less< T >())
Definition vector.h:177
void filterVectorInPlace(std::vector< Type > &v, storm::storage::BitVector const &filter)
Definition vector.h:1224
bool isZero(ValueType const &a)
Definition constants.cpp:41
std::string to_string(ValueType const &value)
LabParser.cpp.
Definition cli.cpp:18
ValueType maxProbabilityToSuccessorWithObs
The probability we move to the corresponding observation.
SuccessorObservationInformation(ValueType const &obsProb, ValueType const &maxProb, uint64_t const &count)
uint64_t successorWithObsCount
The maximal probability to move to a successor with the corresponding observation.
Struct to store the extreme bound values needed for the reward correction values when clipping is use...
ValueType getValueForState(uint64_t const &state)
Get the extreme bound value for a given state.
Struct for storing precomputed values bounding the actual values on the POMDP.
std::optional< storm::models::sparse::ChoiceLabeling > choiceLabeling