22template<
typename PomdpType,
typename BeliefValueType>
24 ValueType const &maxProb, uint64_t
const &count)
25 : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) {
29template<
typename PomdpType,
typename BeliefValueType>
37template<
typename PomdpType,
typename BeliefValueType>
41 : beliefManager(beliefManager), pomdpValueBounds(pomdpValueBounds), explHeuristic(explorationHeuristic), status(
Status::
Uninitialized) {
45template<
typename PomdpType,
typename BeliefValueType>
47 return *beliefManager;
50template<
typename PomdpType,
typename BeliefValueType>
52 std::optional<ValueType> extraBottomStateValue) {
53 status = Status::Exploring;
55 prio = storm::utility::zero<ValueType>();
57 mdpStateToBeliefIdMap.clear();
58 beliefIdToMdpStateMap.clear();
59 exploredBeliefIds.clear();
60 exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(),
false);
61 mdpStatesToExplorePrioState.clear();
62 mdpStatesToExploreStatePrio.clear();
63 stateRemapping.clear();
64 lowerValueBounds.clear();
65 upperValueBounds.clear();
67 exploredMdpTransitions.clear();
68 exploredChoiceIndices.clear();
69 previousChoiceIndices.clear();
70 probabilityEstimation.clear();
71 mdpActionRewards.clear();
73 truncatedStates.clear();
74 clippedStates.clear();
75 delayedExplorationChoices.clear();
76 clippingTransitionRewards.clear();
77 mdpStateToChoiceLabelsMap.clear();
78 optimalChoices = std::nullopt;
79 optimalChoicesReachableMdpStates = std::nullopt;
81 exploredMdp =
nullptr;
82 internalAddRowGroupIndex();
85 if (extraBottomStateValue) {
86 currentMdpState = getCurrentNumberOfMdpStates();
87 extraBottomState = currentMdpState;
88 mdpStateToBeliefIdMap.push_back(beliefManager->noId());
89 probabilityEstimation.push_back(storm::utility::zero<ValueType>());
90 insertValueHints(extraBottomStateValue.value(), extraBottomStateValue.value());
92 internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.value(), storm::utility::one<ValueType>());
93 mdpStateToChoiceLabelsMap[getStartOfCurrentRowGroup()][0] =
"loop";
94 internalAddRowGroupIndex();
97 extraBottomState = std::nullopt;
99 if (extraTargetStateValue) {
100 currentMdpState = getCurrentNumberOfMdpStates();
101 extraTargetState = currentMdpState;
102 mdpStateToBeliefIdMap.push_back(beliefManager->noId());
103 probabilityEstimation.push_back(storm::utility::zero<ValueType>());
104 insertValueHints(extraTargetStateValue.value(), extraTargetStateValue.value());
106 internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.value(), storm::utility::one<ValueType>());
107 mdpStateToChoiceLabelsMap[getStartOfCurrentRowGroup()][0] =
"loop";
108 internalAddRowGroupIndex();
110 targetStates.grow(getCurrentNumberOfMdpStates(),
false);
111 targetStates.set(extraTargetState.value(),
true);
114 extraTargetState = std::nullopt;
116 currentMdpState = noState();
119 initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
122template<
typename PomdpType,
typename BeliefValueType>
124 STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished,
"Method call is invalid in current status.");
125 status = Status::Exploring;
127 prio = storm::utility::zero<ValueType>();
128 stateRemapping.clear();
129 exploredBeliefIds.clear();
130 exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(),
false);
131 exploredMdpTransitions.clear();
132 exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices());
133 clippingTransitionRewards.clear();
134 previousChoiceIndices = exploredMdp->getNondeterministicChoiceIndices();
135 exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices();
136 mdpActionRewards.clear();
137 probabilityEstimation.clear();
138 if (exploredMdp->hasRewardModel()) {
140 mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector();
145 delayedExplorationChoices.clear();
146 mdpStatesToExplorePrioState.clear();
147 mdpStatesToExploreStatePrio.clear();
150 if (extraBottomState) {
151 currentMdpState = extraBottomState.value();
152 restoreOldBehaviorAtCurrentState(0);
154 if (extraTargetState) {
155 currentMdpState = extraTargetState.value();
156 restoreOldBehaviorAtCurrentState(0);
157 targetStates.set(extraTargetState.value(),
true);
159 currentMdpState = noState();
162 initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
165template<
typename PomdpType,
typename BeliefValueType>
167 explorationStorage.storedMdpStateToBeliefIdMap = std::vector<BeliefId>(mdpStateToBeliefIdMap);
168 explorationStorage.storedBeliefIdToMdpStateMap = std::map<BeliefId, MdpStateType>(beliefIdToMdpStateMap);
170 explorationStorage.storedMdpStateToChoiceLabelsMap = std::map<BeliefId, std::map<uint64_t, std::string>>(mdpStateToChoiceLabelsMap);
171 explorationStorage.storedMdpStatesToExplorePrioState = std::multimap<ValueType, uint64_t>(mdpStatesToExplorePrioState);
172 explorationStorage.storedMdpStatesToExploreStatePrio = std::map<uint64_t, ValueType>(mdpStatesToExploreStatePrio);
173 explorationStorage.storedProbabilityEstimation = std::vector<ValueType>(probabilityEstimation);
174 explorationStorage.storedExploredMdpTransitions = std::vector<std::map<MdpStateType, ValueType>>(exploredMdpTransitions);
175 explorationStorage.storedExploredChoiceIndices = std::vector<MdpStateType>(exploredChoiceIndices);
176 explorationStorage.storedMdpActionRewards = std::vector<ValueType>(mdpActionRewards);
177 explorationStorage.storedClippingTransitionRewards = std::map<MdpStateType, ValueType>(clippingTransitionRewards);
178 explorationStorage.storedCurrentMdpState = currentMdpState;
179 explorationStorage.storedStateRemapping = std::map<MdpStateType, MdpStateType>(stateRemapping);
180 explorationStorage.storedNextId = nextId;
181 explorationStorage.storedPrio =
ValueType(prio);
182 explorationStorage.storedLowerValueBounds = std::vector<ValueType>(lowerValueBounds);
183 explorationStorage.storedUpperValueBounds = std::vector<ValueType>(upperValueBounds);
184 explorationStorage.storedValues = std::vector<ValueType>(values);
189template<
typename PomdpType,
typename BeliefValueType>
191 mdpStateToBeliefIdMap = std::vector<BeliefId>(explorationStorage.storedMdpStateToBeliefIdMap);
192 beliefIdToMdpStateMap = std::map<BeliefId, MdpStateType>(explorationStorage.storedBeliefIdToMdpStateMap);
194 mdpStateToChoiceLabelsMap = std::map<BeliefId, std::map<uint64_t, std::string>>(explorationStorage.storedMdpStateToChoiceLabelsMap);
195 mdpStatesToExplorePrioState = std::multimap<ValueType, uint64_t>(explorationStorage.storedMdpStatesToExplorePrioState);
196 mdpStatesToExploreStatePrio = std::map<uint64_t, ValueType>(explorationStorage.storedMdpStatesToExploreStatePrio);
197 probabilityEstimation = std::vector<ValueType>(explorationStorage.storedProbabilityEstimation);
198 exploredMdpTransitions = std::vector<std::map<MdpStateType, ValueType>>(explorationStorage.storedExploredMdpTransitions);
199 exploredChoiceIndices = std::vector<MdpStateType>(explorationStorage.storedExploredChoiceIndices);
200 mdpActionRewards = std::vector<ValueType>(explorationStorage.storedMdpActionRewards);
201 clippingTransitionRewards = std::map<MdpStateType, ValueType>(explorationStorage.storedClippingTransitionRewards);
202 currentMdpState = explorationStorage.storedCurrentMdpState;
203 stateRemapping = std::map<MdpStateType, MdpStateType>(explorationStorage.storedStateRemapping);
204 nextId = explorationStorage.storedNextId;
205 prio =
ValueType(explorationStorage.storedPrio);
206 lowerValueBounds = explorationStorage.storedLowerValueBounds;
207 upperValueBounds = explorationStorage.storedUpperValueBounds;
208 values = explorationStorage.storedValues;
209 status = Status::Exploring;
210 targetStates = explorationStorage.storedTargetStates;
212 truncatedStates.clear();
213 clippedStates.clear();
214 delayedExplorationChoices.clear();
215 optimalChoices = std::nullopt;
216 optimalChoicesReachableMdpStates = std::nullopt;
217 exploredMdp =
nullptr;
221template<
typename PomdpType,
typename BeliefValueType>
223 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
224 return !mdpStatesToExploreStatePrio.empty();
227template<
typename PomdpType,
typename BeliefValueType>
229 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
230 std::vector<uint64_t> res;
231 res.reserve(mdpStatesToExploreStatePrio.size());
232 for (
auto const &entry : mdpStatesToExploreStatePrio) {
233 res.push_back(entry.first);
238template<
typename PomdpType,
typename BeliefValueType>
240 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
242 if (currentMdpState != noState() && mdpStatesToExplorePrioState.rbegin()->second == exploredChoiceIndices.size()) {
243 internalAddRowGroupIndex();
247 currentMdpState = mdpStatesToExplorePrioState.rbegin()->second;
248 auto currprio = mdpStatesToExplorePrioState.rbegin()->first;
249 auto range = mdpStatesToExplorePrioState.equal_range(currprio);
250 for (
auto i = range.first; i != range.second; ++i) {
251 if (i->second == currentMdpState) {
252 mdpStatesToExplorePrioState.erase(i);
256 mdpStatesToExploreStatePrio.erase(currentMdpState);
257 if (currentMdpState != nextId && !currentStateHasOldBehavior()) {
258 stateRemapping[currentMdpState] = nextId;
259 STORM_LOG_DEBUG(
"Explore state " << currentMdpState <<
" [Bel " << getCurrentBeliefId() <<
" " << beliefManager->toString(getCurrentBeliefId())
262 STORM_LOG_DEBUG(
"Explore state " << currentMdpState <<
" [Bel " << getCurrentBeliefId() <<
" " << beliefManager->toString(getCurrentBeliefId()) <<
"]"
266 if (!currentStateHasOldBehavior()) {
270 probabilityEstimation.push_back(currprio);
273 return mdpStateToBeliefIdMap[currentMdpState];
276template<
typename PomdpType,
typename BeliefValueType>
279 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
280 STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState] ||
281 getCurrentStateWasTruncated(),
282 "Action index " << localActionIndex <<
" was not valid at non-truncated state " << currentMdpState <<
" of the previously explored MDP.");
283 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
285 STORM_LOG_ASSERT(extraBottomState.has_value(),
"Requested a transition to the extra bottom state but there is none.");
286 internalAddTransition(row, extraBottomState.value(), bottomStateValue);
289 STORM_LOG_ASSERT(extraTargetState.has_value(),
"Requested a transition to the extra target state but there is none.");
290 internalAddTransition(row, extraTargetState.value(), targetStateValue);
294template<
typename PomdpType,
typename BeliefValueType>
296 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
297 STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState] ||
298 getCurrentStateWasTruncated(),
299 "Action index " << localActionIndex <<
" was not valid at non-truncated state " << currentMdpState <<
" of the previously explored MDP.");
300 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
301 internalAddTransition(row, getCurrentMdpState(), value);
304template<
typename PomdpType,
typename BeliefValueType>
306 ValueType const &value,
bool ignoreNewBeliefs) {
307 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
308 STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState] ||
309 getCurrentStateWasTruncated(),
310 "Action index " << localActionIndex <<
" was not valid at non-truncated state " << currentMdpState <<
" of the previously explored MDP.");
313 if (ignoreNewBeliefs) {
314 column = getExploredMdpState(transitionTarget);
315 if (column == noState()) {
319 column = getOrAddMdpState(transitionTarget, value);
321 if (getCurrentMdpState() == exploredChoiceIndices.size()) {
322 internalAddRowGroupIndex();
324 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
325 internalAddTransition(row, column, value);
329template<
typename PomdpType,
typename BeliefValueType>
331 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
332 if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) {
333 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
335 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
336 mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward;
339template<
typename PomdpType,
typename BeliefValueType>
341 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
342 if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) {
343 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
345 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
346 mdpActionRewards[row] = rewardValue;
349template<
typename PomdpType,
typename BeliefValueType>
351 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
352 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
353 clippingTransitionRewards[row] = rewardValue;
356template<
typename PomdpType,
typename BeliefValueType>
358 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
359 targetStates.grow(getCurrentNumberOfMdpStates(),
false);
360 targetStates.set(getCurrentMdpState(),
true);
363template<
typename PomdpType,
typename BeliefValueType>
365 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
366 truncatedStates.grow(getCurrentNumberOfMdpStates(),
false);
367 truncatedStates.set(getCurrentMdpState(),
true);
370template<
typename PomdpType,
typename BeliefValueType>
372 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
373 setCurrentStateIsTruncated();
374 clippedStates.grow(getCurrentNumberOfMdpStates(),
false);
375 clippedStates.set(getCurrentMdpState(),
true);
378template<
typename PomdpType,
typename BeliefValueType>
380 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
381 delayedExplorationChoices.grow(getCurrentNumberOfMdpChoices(),
false);
382 delayedExplorationChoices.set(getStartOfCurrentRowGroup() + localActionIndex,
true);
385template<
typename PomdpType,
typename BeliefValueType>
387 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
388 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'currentStateHasOldBehavior' called but there is no current state.");
389 return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates();
392template<
typename PomdpType,
typename BeliefValueType>
394 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
395 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
396 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
398 return exploredMdp->getStateLabeling().getStateHasLabel(
"truncated", getCurrentMdpState());
401template<
typename PomdpType,
typename BeliefValueType>
403 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
404 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
405 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
407 return exploredMdp->getStateLabeling().getStateHasLabel(
"clipped", getCurrentMdpState());
410template<
typename PomdpType,
typename BeliefValueType>
412 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
414 "Method 'stateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
415 return optimalChoicesReachableMdpStates->get(mdpState);
418template<
typename PomdpType,
typename BeliefValueType>
420 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
421 STORM_LOG_ASSERT(optimalChoices.has_value(),
"Method 'actionIsOptimal' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
422 return optimalChoices->get(globalActionIndex);
425template<
typename PomdpType,
typename BeliefValueType>
427 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
428 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state.");
429 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior");
431 "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
432 return optimalChoicesReachableMdpStates->get(getCurrentMdpState());
435template<
typename PomdpType,
typename BeliefValueType>
437 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
438 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
439 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
441 "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
442 uint64_t choice = previousChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
443 return optimalChoices->get(choice);
446template<
typename PomdpType,
typename BeliefValueType>
448 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
449 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
450 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
452 uint64_t choice = exploredMdp->getNondeterministicChoiceIndices()[getCurrentMdpState()] + localActionIndex;
453 return exploredMdp->hasChoiceLabeling() && exploredMdp->getChoiceLabeling().getLabels().count(
"delayed") > 0 &&
454 exploredMdp->getChoiceLabeling().getChoiceHasLabel(
"delayed", choice);
457template<
typename PomdpType,
typename BeliefValueType>
459 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Cannot restore old behavior as the current state does not have any.");
460 STORM_LOG_ASSERT(localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState],
461 "Action index " << localActionIndex <<
" was not valid at state " << currentMdpState <<
" of the previously explored MDP.");
463 if (getCurrentMdpState() == exploredChoiceIndices.size()) {
464 internalAddRowGroupIndex();
467 assert(getCurrentMdpState() < previousChoiceIndices.size());
468 assert(getCurrentMdpState() < exploredChoiceIndices.size());
469 uint64_t oldChoiceIndex = previousChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
470 uint64_t newChoiceIndex = exploredChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
473 for (
auto const &transition : exploredMdp->getTransitionMatrix().getRow(oldChoiceIndex)) {
474 internalAddTransition(newChoiceIndex, transition.getColumn(), transition.getValue());
476 auto beliefId = getBeliefId(transition.getColumn());
477 if (beliefId != beliefManager->noId()) {
478 if (!exploredBeliefIds.get(beliefId)) {
480 exploredBeliefIds.set(beliefId,
true);
482 switch (explHeuristic) {
485 prio = prio - storm::utility::one<ValueType>();
488 currentPrio = getLowerValueBoundAtCurrentState();
491 currentPrio = getUpperValueBoundAtCurrentState();
494 currentPrio = getUpperValueBoundAtCurrentState() - getLowerValueBoundAtCurrentState();
497 if (getCurrentMdpState() != noState()) {
498 currentPrio = probabilityEstimation[getCurrentMdpState()] * transition.getValue();
500 currentPrio = storm::utility::one<ValueType>();
504 STORM_LOG_THROW(
false, storm::exceptions::NotImplementedException,
"Other heuristics not implemented yet");
506 mdpStatesToExploreStatePrio[transition.getColumn()] = currentPrio;
507 mdpStatesToExplorePrioState.emplace(currentPrio, transition.getColumn());
515template<
typename PomdpType,
typename BeliefValueType>
517 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
518 STORM_LOG_ASSERT(!hasUnexploredState(),
"Finishing exploration not possible if there are still unexplored states.");
522 if (!currentStateHasOldBehavior() || exploredChoiceIndices.back() < getCurrentNumberOfMdpChoices()) {
523 internalAddRowGroupIndex();
526 targetStates.resize(getCurrentNumberOfMdpStates(),
false);
527 truncatedStates.resize(getCurrentNumberOfMdpStates(),
false);
528 clippedStates.resize(getCurrentNumberOfMdpStates(),
false);
529 if (!mdpActionRewards.empty()) {
530 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
534 currentMdpState = noState();
539 dropUnexploredStates();
543 optimalChoices = std::nullopt;
544 optimalChoicesReachableMdpStates = std::nullopt;
547 if (!stateRemapping.empty()) {
548 std::vector<BeliefId> remappedStateToBeliefIdMap(mdpStateToBeliefIdMap);
549 for (
auto const &entry : stateRemapping) {
550 remappedStateToBeliefIdMap[entry.second] = mdpStateToBeliefIdMap[entry.first];
552 mdpStateToBeliefIdMap = remappedStateToBeliefIdMap;
553 for (
auto const &beliefMdpState : beliefIdToMdpStateMap) {
554 if (stateRemapping.find(beliefMdpState.second) != stateRemapping.end()) {
555 beliefIdToMdpStateMap[beliefMdpState.first] = stateRemapping[beliefMdpState.second];
558 if (!mdpStateToChoiceLabelsMap.empty()) {
559 std::map<BeliefId, std::map<uint64_t, std::string>> temp(mdpStateToChoiceLabelsMap);
560 for (
auto const &entry : stateRemapping) {
561 temp[entry.second] = mdpStateToChoiceLabelsMap[entry.first];
563 mdpStateToChoiceLabelsMap = temp;
568 uint64_t entryCount = 0;
569 for (
auto const &row : exploredMdpTransitions) {
570 entryCount += row.size();
573 getCurrentNumberOfMdpStates());
574 for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
575 uint64_t rowIndex = exploredChoiceIndices[groupIndex];
576 uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
578 for (; rowIndex < groupEnd; ++rowIndex) {
579 for (
auto const &entry : exploredMdpTransitions[rowIndex]) {
580 if (stateRemapping.find(entry.first) == stateRemapping.end()) {
581 builder.
addNextValue(rowIndex, entry.first, entry.second);
583 builder.
addNextValue(rowIndex, stateRemapping[entry.first], entry.second);
588 auto mdpTransitionMatrix = builder.
build();
594 targetStates.resize(getCurrentNumberOfMdpStates(),
false);
595 mdpLabeling.
addLabel(
"target", std::move(targetStates));
596 truncatedStates.resize(getCurrentNumberOfMdpStates(),
false);
597 mdpLabeling.
addLabel(
"truncated", std::move(truncatedStates));
598 clippedStates.resize(getCurrentNumberOfMdpStates(),
false);
599 mdpLabeling.
addLabel(
"clipped", std::move(clippedStates));
601 for (uint64_t state = 0; state < getCurrentNumberOfMdpStates(); ++state) {
602 if (state == extraBottomState || state == extraTargetState) {
608 STORM_LOG_DEBUG(
"Observation of MDP state " << state <<
" : " << beliefManager->getObservationLabel(mdpStateToBeliefIdMap[state]) <<
"\n");
609 std::string obsLabel = beliefManager->getObservationLabel(mdpStateToBeliefIdMap[state]);
610 uint32_t obsId = beliefManager->getBeliefObservation(mdpStateToBeliefIdMap[state]);
611 if (!obsLabel.empty()) {
616 }
else if (mdpStateToBeliefIdMap[state] != beliefManager->noId()) {
617 std::string obsIdLabel =
"obs_" + std::to_string(obsId);
627 std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
628 if (!mdpActionRewards.empty()) {
629 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
630 if (!clippingTransitionRewards.empty()) {
632 clippingTransitionRewards.size(),
true,
true, getCurrentNumberOfMdpStates());
633 for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
634 uint64_t rowIndex = exploredChoiceIndices[groupIndex];
635 uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
637 for (; rowIndex < groupEnd; ++rowIndex) {
638 if (clippingTransitionRewards.find(rowIndex) != clippingTransitionRewards.end()) {
639 STORM_LOG_ASSERT(extraTargetState.has_value(),
"Requested a transition to the extra target state but there is none.");
640 rewardBuilder.
addNextValue(rowIndex, extraTargetState.value(), clippingTransitionRewards[rowIndex]);
644 auto transitionRewardMatrix = rewardBuilder.
build();
646 std::optional<std::vector<ValueType>>(), std::move(mdpActionRewards), std::move(transitionRewardMatrix)));
648 mdpRewardModels.emplace(
657 if (!mdpStateToChoiceLabelsMap.empty()) {
659 for (
auto const &stateMap : mdpStateToChoiceLabelsMap) {
660 auto rowGroup = stateMap.first;
661 for (
auto const &actionLabel : stateMap.second) {
662 if (!modelComponents.
choiceLabeling->containsLabel(actionLabel.second)) {
665 modelComponents.
choiceLabeling->addLabelToChoice(actionLabel.second, exploredChoiceIndices.at(rowGroup) + actionLabel.first);
670 if (!delayedExplorationChoices.empty()) {
672 delayedExplorationChoices.resize(getCurrentNumberOfMdpChoices(),
false);
673 modelComponents.
choiceLabeling->addLabel(
"delayed", std::move(delayedExplorationChoices));
677 exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
678 status = Status::ModelFinished;
679 STORM_LOG_DEBUG(
"Explored Mdp with " << exploredMdp->getNumberOfStates() <<
" states (" << clippedStates.getNumberOfSetBits()
680 <<
" of which were clipped and " << truncatedStates.getNumberOfSetBits() - clippedStates.getNumberOfSetBits()
681 <<
" of which were flagged as truncated).");
684template<
typename PomdpType,
typename BeliefValueType>
686 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
687 STORM_LOG_ASSERT(!hasUnexploredState(),
"Finishing exploration not possible if there are still unexplored states.");
689 STORM_LOG_ASSERT(exploredMdp,
"Method called although no 'old' MDP is available.");
692 storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(),
true), relevantMdpChoices(getCurrentNumberOfMdpChoices(),
true);
693 std::vector<MdpStateType> toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState());
695 for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
696 uint64_t rowIndex = exploredChoiceIndices[groupIndex];
698 if (exploredMdpTransitions[rowIndex].empty()) {
699 relevantMdpChoices.
set(rowIndex,
false);
700 relevantMdpStates.set(groupIndex,
false);
702 toRelevantStateIndexMap[groupIndex] = nextRelevantIndex;
705 uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
707 for (++rowIndex; rowIndex < groupEnd; ++rowIndex) {
709 STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex),
710 "Actions at 'old' MDP state " << groupIndex <<
" were only partly explored.");
711 if (exploredMdpTransitions[rowIndex].empty()) {
712 relevantMdpChoices.
set(rowIndex,
false);
717 if (relevantMdpStates.full()) {
722 nextId -= (relevantMdpStates.size() - relevantMdpStates.getNumberOfSetBits());
727 for (
auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) {
728 if (relevantMdpStates.get(belIdToMdpStateIt->second)) {
730 belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second];
734 "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId");
737 beliefIdToMdpStateMap.erase(belIdToMdpStateIt++);
745 for (
auto &transitions : exploredMdpTransitions) {
746 std::map<MdpStateType, ValueType> newTransitions;
747 for (
auto const &entry : transitions) {
748 STORM_LOG_ASSERT(relevantMdpStates.get(entry.first),
"Relevant state has transition to irrelevant state.");
749 newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second);
751 transitions = std::move(newTransitions);
756 assert(exploredChoiceIndices[0] == 0u);
758 for (
auto const oldState : relevantMdpStates) {
759 if (oldState != newState) {
760 assert(oldState > newState);
761 uint64_t groupSize = getRowGroupSizeOfState(oldState);
762 exploredChoiceIndices.at(newState + 1) = exploredChoiceIndices.at(newState) + groupSize;
766 exploredChoiceIndices.resize(newState + 1);
768 if (!mdpActionRewards.empty()) {
771 if (extraBottomState) {
772 extraBottomState = toRelevantStateIndexMap[extraBottomState.value()];
774 if (extraTargetState) {
775 extraTargetState = toRelevantStateIndexMap[extraTargetState.value()];
777 targetStates = targetStates % relevantMdpStates;
778 truncatedStates = truncatedStates % relevantMdpStates;
779 clippedStates = clippedStates % relevantMdpStates;
780 initialMdpState = toRelevantStateIndexMap[initialMdpState];
787 if (!mdpStateToChoiceLabelsMap.empty()) {
788 auto temp = std::map<BeliefId, std::map<uint64_t, std::string>>();
789 for (
auto const relevantState : relevantMdpStates) {
790 temp[toRelevantStateIndexMap[relevantState]] = mdpStateToChoiceLabelsMap[relevantState];
792 mdpStateToChoiceLabelsMap = temp;
797template<
typename PomdpType,
typename BeliefValueType>
798std::shared_ptr<storm::models::sparse::Mdp<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>>
800 STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked,
"Method call is invalid in current status.");
801 STORM_LOG_ASSERT(exploredMdp,
"Tried to get the explored MDP but exploration was not finished yet.");
805template<
typename PomdpType,
typename BeliefValueType>
807 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
808 return mdpStateToBeliefIdMap.size();
811template<
typename PomdpType,
typename BeliefValueType>
813 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
814 return exploredMdpTransitions.size();
817template<
typename PomdpType,
typename BeliefValueType>
819 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
820 assert(getCurrentMdpState() < exploredChoiceIndices.size());
821 return exploredChoiceIndices.at(getCurrentMdpState());
824template<
typename PomdpType,
typename BeliefValueType>
826 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
827 assert(getCurrentMdpState() < exploredChoiceIndices.size() - 1);
828 return exploredChoiceIndices.at(getCurrentMdpState() + 1) - exploredChoiceIndices.at(getCurrentMdpState());
831template<
typename PomdpType,
typename BeliefValueType>
833 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
834 assert(state < exploredChoiceIndices.size());
835 if (state < exploredChoiceIndices.size() - 1) {
836 return exploredChoiceIndices.at(state + 1) - exploredChoiceIndices.at(state);
837 }
else if (state == exploredChoiceIndices.size() - 1) {
838 return exploredMdpTransitions.size() - exploredChoiceIndices.at(state);
844template<
typename PomdpType,
typename BeliefValueType>
846 return (currentStateHasOldBehavior() && getCurrentStateWasTruncated() && getCurrentMdpState() < exploredChoiceIndices.size() - 1 &&
847 getSizeOfCurrentRowGroup() != numActionsNeeded);
850template<
typename PomdpType,
typename BeliefValueType>
852 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
853 return lowerValueBounds[getCurrentMdpState()];
856template<
typename PomdpType,
typename BeliefValueType>
858 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
859 return upperValueBounds[getCurrentMdpState()];
862template<
typename PomdpType,
typename BeliefValueType>
865 STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(),
"Requested lower value bounds but none were available.");
866 auto it = pomdpValueBounds.lower.begin();
867 ValueType result = beliefManager->getWeightedSum(beliefId, *it);
868 for (++it; it != pomdpValueBounds.lower.end(); ++it) {
869 result = std::max(result, beliefManager->getWeightedSum(beliefId, *it));
874template<
typename PomdpType,
typename BeliefValueType>
877 STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(),
"Requested upper value bounds but none were available.");
878 auto it = pomdpValueBounds.upper.begin();
879 ValueType result = beliefManager->getWeightedSum(beliefId, *it);
880 for (++it; it != pomdpValueBounds.upper.end(); ++it) {
881 result = std::min(result, beliefManager->getWeightedSum(beliefId, *it));
886template<
typename PomdpType,
typename BeliefValueType>
888 BeliefId const &beliefId, uint64_t schedulerId)
const {
889 STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(),
"Requested lower value bounds but none were available.");
890 STORM_LOG_ASSERT(pomdpValueBounds.lower.size() > schedulerId,
"Requested lower value bound for scheduler with ID " << schedulerId <<
" not available.");
891 return beliefManager->getWeightedSum(beliefId, pomdpValueBounds.lower[schedulerId]);
894template<
typename PomdpType,
typename BeliefValueType>
896 BeliefId const &beliefId, uint64_t schedulerId)
const {
897 STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(),
"Requested upper value bounds but none were available.");
898 STORM_LOG_ASSERT(pomdpValueBounds.upper.size() > schedulerId,
"Requested upper value bound for scheduler with ID " << schedulerId <<
" not available.");
899 return beliefManager->getWeightedSum(beliefId, pomdpValueBounds.upper[schedulerId]);
902template<
typename PomdpType,
typename BeliefValueType>
903std::pair<bool, typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>
905 STORM_LOG_ASSERT(!fmSchedulerValueList.empty(),
"Requested finite memory scheduler value bounds but none were available.");
906 auto obs = beliefManager->getBeliefObservation(beliefId);
907 STORM_LOG_ASSERT(fmSchedulerValueList.size() > obs,
"Requested value bound for observation " << obs <<
" not available.");
909 "Requested value bound for observation " << obs <<
" and memory node " << memoryNode <<
" not available.");
910 return beliefManager->getWeightedSum(beliefId, fmSchedulerValueList.at(obs).at(memoryNode));
913template<
typename PomdpType,
typename BeliefValueType>
915 STORM_LOG_ASSERT(status == Status::ModelFinished,
"Method call is invalid in current status.");
916 STORM_LOG_ASSERT(exploredMdp,
"Tried to compute values but the MDP is not explored");
917 auto property = createStandardProperty(dir, exploredMdp->hasRewardModel());
918 auto task = createStandardCheckTask(property);
920 std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(env, exploredMdp, task));
922 values = std::move(res->asExplicitQuantitativeCheckResult<
ValueType>().getValueVector());
923 scheduler = std::make_shared<storm::storage::Scheduler<ValueType>>(res->asExplicitQuantitativeCheckResult<
ValueType>().getScheduler());
925 "Computed values are smaller than the lower bound.");
927 "Computed values are larger than the upper bound.");
932 status = Status::ModelChecked;
935template<
typename PomdpType,
typename BeliefValueType>
937 return status == Status::ModelChecked;
940template<
typename PomdpType,
typename BeliefValueType>
943 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
947template<
typename PomdpType,
typename BeliefValueType>
948const std::shared_ptr<storm::storage::Scheduler<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>> &
950 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
954template<
typename PomdpType,
typename BeliefValueType>
956 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
957 STORM_LOG_ASSERT(exploredMdp,
"Tried to get a value but no MDP was explored.");
958 return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)];
961template<
typename PomdpType,
typename BeliefValueType>
964 STORM_LOG_ASSERT(status != Status::Uninitialized,
"Method call is invalid in current status.");
965 return mdpStateToBeliefIdMap[exploredMdpState];
968template<
typename PomdpType,
typename BeliefValueType>
970 uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations) {
971 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
972 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method call is invalid since the current state has no old behavior");
973 uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex;
974 gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
977template<
typename PomdpType,
typename BeliefValueType>
979 uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations) {
980 STORM_LOG_ASSERT(exploredMdp,
"Method call is invalid if no MDP has been explored before");
981 for (
auto const &entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) {
982 auto const &beliefId = getBeliefId(entry.getColumn());
983 if (beliefId != beliefManager->noId()) {
984 auto const &obs = beliefManager->getBeliefObservation(beliefId);
986 auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info);
987 if (!obsInsertion.second) {
989 obsInsertion.first->second.join(info);
991 beliefManager->joinSupport(beliefId, obsInsertion.first->second.support);
996template<
typename PomdpType,
typename BeliefValueType>
999 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
1000 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method call is invalid since the current state has no old behavior");
1001 uint64_t mdpChoice = previousChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
1002 return std::any_of(exploredMdp->getTransitionMatrix().getRow(mdpChoice).begin(), exploredMdp->getTransitionMatrix().getRow(mdpChoice).end(),
1004 return observationSet.get(beliefManager->getBeliefObservation(getBeliefId(i.getColumn())));
1008template<
typename PomdpType,
typename BeliefValueType>
1010 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
1011 upperValueBounds = values;
1014template<
typename PomdpType,
typename BeliefValueType>
1016 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
1017 lowerValueBounds = values;
1020template<
typename PomdpType,
typename BeliefValueType>
1022 bool relativeDifference) {
1023 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
1024 STORM_LOG_ASSERT(exploredMdp,
"Method call is invalid in if no MDP is available.");
1025 STORM_LOG_ASSERT(!optimalChoices.has_value(),
"Tried to compute optimal scheduler but this has already been done before.");
1027 "Tried to compute states that are reachable under an optimal scheduler but this has already been done before.");
1031 auto const &choiceIndices = exploredMdp->getNondeterministicChoiceIndices();
1032 auto const &transitions = exploredMdp->getTransitionMatrix();
1033 auto const &targetStatesExploredMDP = exploredMdp->getStates(
"target");
1034 for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) {
1035 if (targetStatesExploredMDP.get(mdpState)) {
1039 auto const &stateValue = values[mdpState];
1040 for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) {
1041 ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values);
1042 if (exploredMdp->hasRewardModel()) {
1043 choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice);
1045 auto absDiff = storm::utility::abs<ValueType>((choiceValue - stateValue));
1046 if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) {
1047 optimalChoices->set(globalChoice,
true);
1050 STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(),
"Could not find an optimal choice.");
1056 targetStatesExploredMDP,
false, 0, optimalChoices.value());
1059template<
typename PomdpType,
typename BeliefValueType>
1061 return getExploredMdpState(beliefId) != noState();
1064template<
typename PomdpType,
typename BeliefValueType>
1066 return std::numeric_limits<MdpStateType>::max();
1069template<
typename PomdpType,
typename BeliefValueType>
1070std::shared_ptr<storm::logic::Formula const> BeliefMdpExplorer<PomdpType, BeliefValueType>::createStandardProperty(
1072 std::string propertyString = computeRewards ?
"R" :
"P";
1074 propertyString +=
"=? [F \"target\"]";
1079template<
typename PomdpType,
typename BeliefValueType>
1081BeliefMdpExplorer<PomdpType, BeliefValueType>::createStandardCheckTask(std::shared_ptr<storm::logic::Formula const> &property) {
1084 auto task = storm::api::createTask<ValueType>(property,
false);
1086 hint.setResultHint(values);
1087 auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
1088 task.setHint(hintPtr);
1089 task.setProduceSchedulers();
1093template<
typename PomdpType,
typename BeliefValueType>
1095 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
1096 if (stateRemapping.find(currentMdpState) != stateRemapping.end()) {
1097 return stateRemapping.at(currentMdpState);
1099 return currentMdpState;
1103template<
typename PomdpType,
typename BeliefValueType>
1105 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
1106 return getBeliefId(currentMdpState);
1109template<
typename PomdpType,
typename BeliefValueType>
1110void BeliefMdpExplorer<PomdpType, BeliefValueType>::internalAddTransition(uint64_t
const &row, MdpStateType
const &column, ValueType
const &value) {
1111 STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(),
"Skipped at least one row.");
1112 if (row == exploredMdpTransitions.size()) {
1113 exploredMdpTransitions.emplace_back();
1115 STORM_LOG_ASSERT(exploredMdpTransitions[row].
count(column) == 0,
"Trying to insert multiple transitions to the same state.");
1116 exploredMdpTransitions[row][column] = value;
1119template<
typename PomdpType,
typename BeliefValueType>
1120void BeliefMdpExplorer<PomdpType, BeliefValueType>::internalAddRowGroupIndex() {
1121 exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices());
1124template<
typename PomdpType,
typename BeliefValueType>
1126 gridBeliefs.insert(beliefId);
1129template<
typename PomdpType,
typename BeliefValueType>
1131 return gridBeliefs.count(beliefId) > 0;
1134template<
typename PomdpType,
typename BeliefValueType>
1137 if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) {
1138 return beliefIdToMdpStateMap.at(beliefId);
1144template<
typename PomdpType,
typename BeliefValueType>
1146 lowerValueBounds.push_back(lowerBound);
1147 upperValueBounds.push_back(upperBound);
1149 values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2));
1150 STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(),
"Value vectors have different size then number of available states.");
1151 STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(),
"Value vectors have inconsistent size.");
1154template<
typename PomdpType,
typename BeliefValueType>
1156 BeliefId
const &beliefId, ValueType
const &transitionValue) {
1157 exploredBeliefIds.grow(beliefId + 1,
false);
1158 if (exploredBeliefIds.get(beliefId)) {
1160 mdpStatesToExploreStatePrio.find(beliefIdToMdpStateMap[beliefId]) != mdpStatesToExploreStatePrio.end()) {
1162 auto newPrio = probabilityEstimation[getCurrentMdpState()] * transitionValue;
1163 if (newPrio > mdpStatesToExploreStatePrio[beliefIdToMdpStateMap[beliefId]]) {
1165 auto range = mdpStatesToExplorePrioState.equal_range(mdpStatesToExploreStatePrio[beliefIdToMdpStateMap[beliefId]]);
1166 for (
auto i = range.first; i != range.second; ++i) {
1167 if (i->second == beliefIdToMdpStateMap[beliefId]) {
1168 mdpStatesToExplorePrioState.erase(i);
1172 mdpStatesToExplorePrioState.emplace(newPrio, beliefIdToMdpStateMap[beliefId]);
1173 mdpStatesToExploreStatePrio[beliefIdToMdpStateMap[beliefId]] = newPrio;
1176 return beliefIdToMdpStateMap[beliefId];
1179 exploredBeliefIds.set(beliefId,
true);
1183 auto findRes = beliefIdToMdpStateMap.find(beliefId);
1184 if (findRes != beliefIdToMdpStateMap.end()) {
1186 switch (explHeuristic) {
1189 prio = prio - storm::utility::one<ValueType>();
1192 currentPrio = getLowerValueBoundAtCurrentState();
1195 currentPrio = getUpperValueBoundAtCurrentState();
1198 currentPrio = getUpperValueBoundAtCurrentState() - getLowerValueBoundAtCurrentState();
1201 if (getCurrentMdpState() != noState()) {
1202 currentPrio = probabilityEstimation[getCurrentMdpState()] * transitionValue;
1204 currentPrio = storm::utility::one<ValueType>();
1208 STORM_LOG_THROW(
false, storm::exceptions::NotImplementedException,
"Other heuristics not implemented yet");
1210 mdpStatesToExploreStatePrio[findRes->second] = currentPrio;
1211 mdpStatesToExplorePrioState.emplace(currentPrio, findRes->second);
1212 return findRes->second;
1216 MdpStateType result = getCurrentNumberOfMdpStates();
1217 assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size());
1218 mdpStateToBeliefIdMap.push_back(beliefId);
1219 beliefIdToMdpStateMap[beliefId] = result;
1220 insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId));
1222 switch (explHeuristic) {
1225 prio = prio - storm::utility::one<ValueType>();
1228 currentPrio = getLowerValueBoundAtCurrentState();
1231 currentPrio = getUpperValueBoundAtCurrentState();
1234 currentPrio = getUpperValueBoundAtCurrentState() - getLowerValueBoundAtCurrentState();
1237 if (getCurrentMdpState() != noState()) {
1238 currentPrio = probabilityEstimation[getCurrentMdpState()] * transitionValue;
1240 currentPrio = storm::utility::one<ValueType>();
1244 STORM_LOG_THROW(
false, storm::exceptions::NotImplementedException,
"Other heuristics not implemented yet");
1246 mdpStatesToExploreStatePrio[result] = currentPrio;
1247 mdpStatesToExplorePrioState.emplace(currentPrio, result);
1252template<
typename PomdpType,
typename BeliefValueType>
1254 mdpStateToChoiceLabelsMap[currentMdpState][localActionIndex] = label;
1257template<
typename PomdpType,
typename BeliefValueType>
1259 return mdpStateToBeliefIdMap;
1262template<
typename PomdpType,
typename BeliefValueType>
1264 uint32_t obs)
const {
1265 std::vector<BeliefId> res;
1266 for (
auto const &belief : mdpStateToBeliefIdMap) {
1267 if (belief != beliefManager->noId()) {
1268 if (beliefManager->getBeliefObservation(belief) == obs) {
1269 res.push_back(belief);
1276template<
typename PomdpType,
typename BeliefValueType>
1278 uint64_t
const &pomdpState) {
1279 return pomdpValueBounds.getSmallestUpperBound(pomdpState);
1282template<
typename PomdpType,
typename BeliefValueType>
1284 uint64_t
const &pomdpState) {
1285 return pomdpValueBounds.getHighestLowerBound(pomdpState);
1288template<
typename PomdpType,
typename BeliefValueType>
1290 extremeValueBound = valueBound;
1293template<
typename PomdpType,
typename BeliefValueType>
1295 fmSchedulerValueList = valueList;
1298template<
typename PomdpType,
typename BeliefValueType>
1300 return fmSchedulerValueList.at(observation).size();
1303template<
typename PomdpType,
typename BeliefValueType>
1305 const uint64_t &pomdpState) {
1309template<
typename PomdpType,
typename BeliefValueType>
1311 return extremeValueBound.isInfinite;
1314template<
typename PomdpType,
typename BeliefValueType>
1316 return pomdpValueBounds.upper.
size();
1319template<
typename PomdpType,
typename BeliefValueType>
1321 return pomdpValueBounds.lower.size();
1324template<
typename PomdpType,
typename BeliefValueType>
1327 STORM_LOG_ASSERT(!pomdpValueBounds.lowerSchedulers.empty(),
"Requested lower bound scheduler but none were available.");
1329 "Requested lower value bound scheduler with ID " << schedulerId <<
" not available.");
1330 return pomdpValueBounds.lowerSchedulers[schedulerId];
1333template<
typename PomdpType,
typename BeliefValueType>
1336 STORM_LOG_ASSERT(!pomdpValueBounds.upperSchedulers.empty(),
"Requested upper bound scheduler but none were available.");
1338 "Requested upper value bound scheduler with ID " << schedulerId <<
" not available.");
1339 return pomdpValueBounds.upperSchedulers[schedulerId];
1342template<
typename PomdpType,
typename BeliefValueType>
1343std::vector<storm::storage::Scheduler<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>>
1345 STORM_LOG_ASSERT(!pomdpValueBounds.lowerSchedulers.empty(),
"Requested lower bound schedulers but none were available.");
1346 return pomdpValueBounds.lowerSchedulers;
1349template<
typename PomdpType,
typename BeliefValueType>
1350std::vector<storm::storage::Scheduler<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>>
1352 STORM_LOG_ASSERT(!pomdpValueBounds.upperSchedulers.empty(),
"Requested upper bound schedulers but none were available.");
1353 return pomdpValueBounds.upperSchedulers;
1356template<
typename PomdpType,
typename BeliefValueType>
1359 return !fmSchedulerValueList.empty();
1362template<
typename PomdpType,
typename BeliefValueType>
1365 return beliefManager->computeMatrixBeliefProduct(beliefId, matrix);
1368template<
typename PomdpType,
typename BeliefValueType>
1370 uint64_t currentRowGroupSize = getSizeOfCurrentRowGroup();
1371 assert(totalNumberOfActions != currentRowGroupSize);
1372 if (totalNumberOfActions > currentRowGroupSize) {
1373 uint64_t numberOfActionsToAdd = totalNumberOfActions - currentRowGroupSize;
1374 exploredMdpTransitions.insert(exploredMdpTransitions.begin() + (exploredChoiceIndices[getCurrentMdpState() + 1]), numberOfActionsToAdd,
1375 std::map<MdpStateType, ValueType>());
1376 for (uint64_t i = getCurrentMdpState() + 1; i < exploredChoiceIndices.size(); i++) {
1377 exploredChoiceIndices[i] += numberOfActionsToAdd;
1381 if (totalNumberOfActions < currentRowGroupSize) {
1382 uint64_t numberOfActionsToRemove = currentRowGroupSize - totalNumberOfActions;
1383 exploredMdpTransitions.erase(exploredMdpTransitions.begin() + (exploredChoiceIndices[getCurrentMdpState() + 1]) - numberOfActionsToRemove,
1384 exploredMdpTransitions.begin() + (exploredChoiceIndices[getCurrentMdpState() + 1]));
1385 for (uint64_t i = getCurrentMdpState() + 1; i < exploredChoiceIndices.size(); i++) {
1386 exploredChoiceIndices[i] -= numberOfActionsToRemove;
MdpStateType getExploredMdpState(BeliefId const &beliefId) const
void storeExplorationState()
void addClippingRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue)
ValueType computeUpperValueBoundAtBelief(BeliefId const &beliefId) const
uint64_t getNrSchedulersForLowerBounds()
MdpStateType getCurrentNumberOfMdpStates() const
ValueType const & getComputedValueAtInitialState() const
void restartExploration()
Restarts the exploration to allow re-exploring each state.
bool currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, storm::storage::BitVector const &observationSet)
ValueType getTrivialLowerBoundAtPOMDPState(uint64_t const &pomdpState)
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
ValueType getTrivialUpperBoundAtPOMDPState(uint64_t const &pomdpState)
void addTransitionsToExtraStates(uint64_t const &localActionIndex, ValueType const &targetStateValue=storm::utility::zero< ValueType >(), ValueType const &bottomStateValue=storm::utility::zero< ValueType >())
bool beliefHasMdpState(BeliefId const &beliefId) const
std::vector< ValueType > const & getValuesOfExploredMdp() const
storm::storage::BitVector getStateExtremeBoundIsInfinite()
void setCurrentStateIsClipped()
void setCurrentStateIsTarget()
storm::storage::Scheduler< ValueType > getLowerValueBoundScheduler(uint64_t schedulerId) const
void restoreExplorationState()
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map< uint32_t, SuccessorObservationInformation > &gatheredSuccessorObservations)
uint64_t getSizeOfCurrentRowGroup() const
void takeCurrentValuesAsLowerBounds()
bool hasComputedValues() const
void startNewExploration(std::optional< ValueType > extraTargetStateValue=boost::none, std::optional< ValueType > extraBottomStateValue=std::nullopt)
bool getCurrentStateWasTruncated() const
BeliefMdpExplorer(std::shared_ptr< BeliefManagerType > beliefManager, storm::pomdp::storage::PreprocessingPomdpValueBounds< ValueType > const &pomdpValueBounds, ExplorationHeuristic explorationHeuristic=ExplorationHeuristic::BreadthFirst)
bool actionIsOptimal(uint64_t const &globalActionIndex) const
Retrieves whether the given action at the current state was optimal in the most recent check.
bool needsActionAdjustment(uint64_t numActionsNeeded)
std::vector< BeliefId > getBeliefsWithObservationInMdp(uint32_t obs) const
ValueType getExtremeValueBoundAtPOMDPState(uint64_t const &pomdpState)
MdpStateType getBeliefId(MdpStateType exploredMdpState) const
void computeValuesOfExploredMdp(storm::Environment const &env, storm::solver::OptimizationDirection const &dir)
std::vector< BeliefId > getBeliefsInMdp()
std::shared_ptr< storm::models::sparse::Mdp< ValueType > > getExploredMdp() const
storm::storage::Scheduler< ValueType > getUpperValueBoundScheduler(uint64_t schedulerId) const
std::vector< storm::storage::Scheduler< ValueType > > getLowerValueBoundSchedulers() const
bool getCurrentStateActionExplorationWasDelayed(uint64_t const &localActionIndex) const
BeliefManagerType const & getBeliefManager() const
PomdpType::ValueType ValueType
bool hasUnexploredState() const
void dropUnexploredStates()
void setCurrentStateIsTruncated()
std::pair< bool, ValueType > computeFMSchedulerValueForMemoryNode(BeliefId const &beliefId, uint64_t memoryNode) const
void setExtremeValueBound(storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > valueBound)
bool hasFMSchedulerValues() const
void addChoiceLabelToCurrentState(uint64_t const &localActionIndex, std::string const &label)
std::vector< uint64_t > getUnexploredStates()
void restoreOldBehaviorAtCurrentState(uint64_t const &localActionIndex)
Inserts transitions and rewards at the given action as in the MDP of the previous exploration.
ValueType computeLowerValueBoundAtBelief(BeliefId const &beliefId) const
uint64_t getRowGroupSizeOfState(uint64_t state) const
bool currentStateHasOldBehavior() const
const std::shared_ptr< storm::storage::Scheduler< BeliefMdpExplorer< PomdpType, BeliefValueType >::ValueType > > & getSchedulerForExploredMdp() const
MdpStateType getStartOfCurrentRowGroup() const
ValueType computeLowerValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const
bool currentStateIsOptimalSchedulerReachable() const
Retrieves whether the current state can be reached under a scheduler that was optimal in the most rec...
void addRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue)
Adds the provided reward value to the given action of the current state.
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map< uint32_t, SuccessorObservationInformation > &gatheredSuccessorObservations)
uint64_t getNrOfMemoryNodesForObservation(uint32_t observation) const
void computeRewardAtCurrentState(uint64_t const &localActionIndex, ValueType extraReward=storm::utility::zero< ValueType >())
void computeOptimalChoicesAndReachableMdpStates(ValueType const &ancillaryChoicesEpsilon, bool relativeDifference)
Computes the set of states that are reachable via a path that is consistent with an optimal MDP sched...
uint64_t getNrSchedulersForUpperBounds()
bool isMarkedAsGridBelief(BeliefId const &beliefId)
ValueType getUpperValueBoundAtCurrentState() const
BeliefId exploreNextState()
void markAsGridBelief(BeliefId const &beliefId)
ValueType computeUpperValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const
void adjustActions(uint64_t totalNumberOfActions)
bool actionAtCurrentStateWasOptimal(uint64_t const &localActionIndex) const
Retrieves whether the given action at the current state was optimal in the most recent check.
void setCurrentChoiceIsDelayed(uint64_t const &localActionIndex)
std::vector< BeliefValueType > computeProductWithSparseMatrix(BeliefId const &beliefId, storm::storage::SparseMatrix< BeliefValueType > &matrix) const
void addSelfloopTransition(uint64_t const &localActionIndex=0, ValueType const &value=storm::utility::one< ValueType >())
std::vector< storm::storage::Scheduler< ValueType > > getUpperValueBoundSchedulers() const
ValueType getLowerValueBoundAtCurrentState() const
bool getCurrentStateWasClipped() const
bool addTransitionToBelief(uint64_t const &localActionIndex, BeliefId const &transitionTarget, ValueType const &value, bool ignoreNewBeliefs)
Adds the next transition to the given successor belief.
void takeCurrentValuesAsUpperBounds()
bool stateIsOptimalSchedulerReachable(MdpStateType mdpState) const
Retrieves whether the current state can be reached under an optimal scheduler This requires a previou...
BeliefManagerType::BeliefId BeliefId
MdpStateType getCurrentNumberOfMdpChoices() const
This class contains information that might accelerate the model checking process.
This class manages the labeling of the choice space with a number of (atomic) labels.
void addLabel(std::string const &label)
Adds a new label to the labelings.
bool containsLabel(std::string const &label) const
Checks whether a label is registered within this labeling.
This class manages the labeling of the state space with a number of (atomic) labels.
void addLabelToState(std::string const &label, storm::storage::sparse::state_type state)
Adds a label to a given state.
A bit vector that is internally represented as a vector of 64-bit values.
void set(uint64_t index, bool value=true)
Sets the given truth value at the given index.
size_t size() const
Retrieves the number of bits this bit vector can store.
This class defines which action is chosen in a particular state of a non-deterministic model.
A class that can be used to build a sparse matrix by adding value by value.
void addNextValue(index_type row, index_type column, value_type const &value)
Sets the matrix entry at the given row and column to the given value.
void newRowGroup(index_type startingRow)
Starts a new row group in the matrix.
SparseMatrix< value_type > build(index_type overriddenRowCount=0, index_type overriddenColumnCount=0, index_type overriddenRowGroupCount=0)
A class that holds a possibly non-square matrix in the compressed row storage format.
#define STORM_LOG_DEBUG(message)
#define STORM_LOG_ERROR(message)
#define STORM_LOG_ASSERT(cond, message)
#define STORM_LOG_THROW(cond, exception, message)
#define STORM_LOG_WARN_COND_DEBUG(cond, message)
std::vector< storm::jani::Property > parseProperties(storm::parser::FormulaParser &formulaParser, std::string const &inputString, boost::optional< std::set< std::string > > const &propertyFilter)
std::vector< std::shared_ptr< storm::logic::Formula const > > extractFormulasFromProperties(std::vector< storm::jani::Property > const &properties)
SFTBDDChecker::ValueType ValueType
std::pair< storm::RationalNumber, storm::RationalNumber > count(std::vector< storm::storage::BitVector > const &origSets, std::vector< storm::storage::BitVector > const &intersects, std::vector< storm::storage::BitVector > const &intersectsInfo, storm::RationalNumber val, bool plus, uint64_t remdepth)
bool constexpr minimize(OptimizationDirection d)
storm::storage::BitVector getReachableStates(storm::storage::SparseMatrix< T > const &transitionMatrix, storm::storage::BitVector const &initialStates, storm::storage::BitVector const &constraintStates, storm::storage::BitVector const &targetStates, bool useStepBound, uint_fast64_t maximalSteps, boost::optional< storm::storage::BitVector > const &choiceFilter)
Performs a forward depth-first search through the underlying graph structure to identify the states t...
bool isTerminate()
Check whether the program should terminate (due to some abort signal).
bool compareElementWise(std::vector< T > const &left, std::vector< T > const &right, Comparator comp=std::less< T >())
void filterVectorInPlace(std::vector< Type > &v, storm::storage::BitVector const &filter)
bool isZero(ValueType const &a)
std::string to_string(ValueType const &value)
Struct to store the extreme bound values needed for the reward correction values when clipping is use...
ValueType getValueForState(uint64_t const &state)
Get the extreme bound value for a given state.
Struct for storing precomputed values bounding the actual values on the POMDP.
std::optional< storm::models::sparse::ChoiceLabeling > choiceLabeling