22template<
typename PomdpType,
typename BeliefValueType>
24 ValueType const &maxProb, uint64_t
const &count)
25 : observationProbability(obsProb), maxProbabilityToSuccessorWithObs(maxProb), successorWithObsCount(count) {
29template<
typename PomdpType,
typename BeliefValueType>
37template<
typename PomdpType,
typename BeliefValueType>
41 : beliefManager(beliefManager), pomdpValueBounds(pomdpValueBounds), explHeuristic(explorationHeuristic), status(
Status::
Uninitialized) {
45template<
typename PomdpType,
typename BeliefValueType>
47 return *beliefManager;
50template<
typename PomdpType,
typename BeliefValueType>
52 std::optional<ValueType> extraBottomStateValue) {
53 status = Status::Exploring;
55 prio = storm::utility::zero<ValueType>();
57 mdpStateToBeliefIdMap.clear();
58 beliefIdToMdpStateMap.clear();
59 exploredBeliefIds.clear();
60 exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(),
false);
61 mdpStatesToExplorePrioState.clear();
62 mdpStatesToExploreStatePrio.clear();
63 stateRemapping.clear();
64 lowerValueBounds.clear();
65 upperValueBounds.clear();
67 exploredMdpTransitions.clear();
68 exploredChoiceIndices.clear();
69 previousChoiceIndices.clear();
70 probabilityEstimation.clear();
71 mdpActionRewards.clear();
73 truncatedStates.clear();
74 clippedStates.clear();
75 delayedExplorationChoices.clear();
76 clippingTransitionRewards.clear();
77 mdpStateToChoiceLabelsMap.clear();
78 optimalChoices = std::nullopt;
79 optimalChoicesReachableMdpStates = std::nullopt;
81 exploredMdp =
nullptr;
82 internalAddRowGroupIndex();
85 if (extraBottomStateValue) {
86 currentMdpState = getCurrentNumberOfMdpStates();
87 extraBottomState = currentMdpState;
88 mdpStateToBeliefIdMap.push_back(beliefManager->noId());
89 probabilityEstimation.push_back(storm::utility::zero<ValueType>());
90 insertValueHints(extraBottomStateValue.value(), extraBottomStateValue.value());
92 internalAddTransition(getStartOfCurrentRowGroup(), extraBottomState.value(), storm::utility::one<ValueType>());
93 mdpStateToChoiceLabelsMap[getStartOfCurrentRowGroup()][0] =
"loop";
94 internalAddRowGroupIndex();
97 extraBottomState = std::nullopt;
99 if (extraTargetStateValue) {
100 currentMdpState = getCurrentNumberOfMdpStates();
101 extraTargetState = currentMdpState;
102 mdpStateToBeliefIdMap.push_back(beliefManager->noId());
103 probabilityEstimation.push_back(storm::utility::zero<ValueType>());
104 insertValueHints(extraTargetStateValue.value(), extraTargetStateValue.value());
106 internalAddTransition(getStartOfCurrentRowGroup(), extraTargetState.value(), storm::utility::one<ValueType>());
107 mdpStateToChoiceLabelsMap[getStartOfCurrentRowGroup()][0] =
"loop";
108 internalAddRowGroupIndex();
110 targetStates.grow(getCurrentNumberOfMdpStates(),
false);
111 targetStates.set(extraTargetState.value(),
true);
114 extraTargetState = std::nullopt;
116 currentMdpState = noState();
119 initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
122template<
typename PomdpType,
typename BeliefValueType>
124 STORM_LOG_ASSERT(status == Status::ModelChecked || status == Status::ModelFinished,
"Method call is invalid in current status.");
125 status = Status::Exploring;
127 prio = storm::utility::zero<ValueType>();
128 stateRemapping.clear();
129 exploredBeliefIds.clear();
130 exploredBeliefIds.grow(beliefManager->getNumberOfBeliefIds(),
false);
131 exploredMdpTransitions.clear();
132 exploredMdpTransitions.resize(exploredMdp->getNumberOfChoices());
133 clippingTransitionRewards.clear();
134 previousChoiceIndices = exploredMdp->getNondeterministicChoiceIndices();
135 exploredChoiceIndices = exploredMdp->getNondeterministicChoiceIndices();
136 mdpActionRewards.clear();
137 probabilityEstimation.clear();
138 if (exploredMdp->hasRewardModel()) {
140 mdpActionRewards = exploredMdp->getUniqueRewardModel().getStateActionRewardVector();
145 delayedExplorationChoices.clear();
146 mdpStatesToExplorePrioState.clear();
147 mdpStatesToExploreStatePrio.clear();
150 if (extraBottomState) {
151 currentMdpState = extraBottomState.value();
152 restoreOldBehaviorAtCurrentState(0);
154 if (extraTargetState) {
155 currentMdpState = extraTargetState.value();
156 restoreOldBehaviorAtCurrentState(0);
157 targetStates.set(extraTargetState.value(),
true);
159 currentMdpState = noState();
162 initialMdpState = getOrAddMdpState(beliefManager->getInitialBelief());
165template<
typename PomdpType,
typename BeliefValueType>
167 explorationStorage.storedMdpStateToBeliefIdMap = std::vector<BeliefId>(mdpStateToBeliefIdMap);
168 explorationStorage.storedBeliefIdToMdpStateMap = std::map<BeliefId, MdpStateType>(beliefIdToMdpStateMap);
170 explorationStorage.storedMdpStateToChoiceLabelsMap = std::map<BeliefId, std::map<uint64_t, std::string>>(mdpStateToChoiceLabelsMap);
171 explorationStorage.storedMdpStatesToExplorePrioState = std::multimap<ValueType, uint64_t>(mdpStatesToExplorePrioState);
172 explorationStorage.storedMdpStatesToExploreStatePrio = std::map<uint64_t, ValueType>(mdpStatesToExploreStatePrio);
173 explorationStorage.storedProbabilityEstimation = std::vector<ValueType>(probabilityEstimation);
174 explorationStorage.storedExploredMdpTransitions = std::vector<std::map<MdpStateType, ValueType>>(exploredMdpTransitions);
175 explorationStorage.storedExploredChoiceIndices = std::vector<MdpStateType>(exploredChoiceIndices);
176 explorationStorage.storedMdpActionRewards = std::vector<ValueType>(mdpActionRewards);
177 explorationStorage.storedClippingTransitionRewards = std::map<MdpStateType, ValueType>(clippingTransitionRewards);
178 explorationStorage.storedCurrentMdpState = currentMdpState;
179 explorationStorage.storedStateRemapping = std::map<MdpStateType, MdpStateType>(stateRemapping);
180 explorationStorage.storedNextId = nextId;
181 explorationStorage.storedPrio =
ValueType(prio);
182 explorationStorage.storedLowerValueBounds = std::vector<ValueType>(lowerValueBounds);
183 explorationStorage.storedUpperValueBounds = std::vector<ValueType>(upperValueBounds);
184 explorationStorage.storedValues = std::vector<ValueType>(values);
189template<
typename PomdpType,
typename BeliefValueType>
191 mdpStateToBeliefIdMap = std::vector<BeliefId>(explorationStorage.storedMdpStateToBeliefIdMap);
192 beliefIdToMdpStateMap = std::map<BeliefId, MdpStateType>(explorationStorage.storedBeliefIdToMdpStateMap);
194 mdpStateToChoiceLabelsMap = std::map<BeliefId, std::map<uint64_t, std::string>>(explorationStorage.storedMdpStateToChoiceLabelsMap);
195 mdpStatesToExplorePrioState = std::multimap<ValueType, uint64_t>(explorationStorage.storedMdpStatesToExplorePrioState);
196 mdpStatesToExploreStatePrio = std::map<uint64_t, ValueType>(explorationStorage.storedMdpStatesToExploreStatePrio);
197 probabilityEstimation = std::vector<ValueType>(explorationStorage.storedProbabilityEstimation);
198 exploredMdpTransitions = std::vector<std::map<MdpStateType, ValueType>>(explorationStorage.storedExploredMdpTransitions);
199 exploredChoiceIndices = std::vector<MdpStateType>(explorationStorage.storedExploredChoiceIndices);
200 mdpActionRewards = std::vector<ValueType>(explorationStorage.storedMdpActionRewards);
201 clippingTransitionRewards = std::map<MdpStateType, ValueType>(explorationStorage.storedClippingTransitionRewards);
202 currentMdpState = explorationStorage.storedCurrentMdpState;
203 stateRemapping = std::map<MdpStateType, MdpStateType>(explorationStorage.storedStateRemapping);
204 nextId = explorationStorage.storedNextId;
205 prio =
ValueType(explorationStorage.storedPrio);
206 lowerValueBounds = explorationStorage.storedLowerValueBounds;
207 upperValueBounds = explorationStorage.storedUpperValueBounds;
208 values = explorationStorage.storedValues;
209 status = Status::Exploring;
210 targetStates = explorationStorage.storedTargetStates;
212 truncatedStates.clear();
213 clippedStates.clear();
214 delayedExplorationChoices.clear();
215 optimalChoices = std::nullopt;
216 optimalChoicesReachableMdpStates = std::nullopt;
217 exploredMdp =
nullptr;
221template<
typename PomdpType,
typename BeliefValueType>
223 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
224 return !mdpStatesToExploreStatePrio.empty();
227template<
typename PomdpType,
typename BeliefValueType>
229 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
230 std::vector<uint64_t> res;
231 res.reserve(mdpStatesToExploreStatePrio.size());
232 for (
auto const &entry : mdpStatesToExploreStatePrio) {
233 res.push_back(entry.first);
238template<
typename PomdpType,
typename BeliefValueType>
240 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
242 if (currentMdpState != noState() && mdpStatesToExplorePrioState.rbegin()->second == exploredChoiceIndices.size()) {
243 internalAddRowGroupIndex();
247 currentMdpState = mdpStatesToExplorePrioState.rbegin()->second;
248 auto currprio = mdpStatesToExplorePrioState.rbegin()->first;
249 auto range = mdpStatesToExplorePrioState.equal_range(currprio);
250 for (
auto i = range.first; i != range.second; ++i) {
251 if (i->second == currentMdpState) {
252 mdpStatesToExplorePrioState.erase(i);
256 mdpStatesToExploreStatePrio.erase(currentMdpState);
257 if (currentMdpState != nextId && !currentStateHasOldBehavior()) {
258 stateRemapping[currentMdpState] = nextId;
259 STORM_LOG_DEBUG(
"Explore state " << currentMdpState <<
" [Bel " << getCurrentBeliefId() <<
" " << beliefManager->toString(getCurrentBeliefId())
262 STORM_LOG_DEBUG(
"Explore state " << currentMdpState <<
" [Bel " << getCurrentBeliefId() <<
" " << beliefManager->toString(getCurrentBeliefId()) <<
"]"
266 if (!currentStateHasOldBehavior()) {
270 probabilityEstimation.push_back(currprio);
273 return mdpStateToBeliefIdMap[currentMdpState];
276template<
typename PomdpType,
typename BeliefValueType>
279 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
280 STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState] ||
281 getCurrentStateWasTruncated(),
282 "Action index " << localActionIndex <<
" was not valid at non-truncated state " << currentMdpState <<
" of the previously explored MDP.");
283 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
285 STORM_LOG_ASSERT(extraBottomState.has_value(),
"Requested a transition to the extra bottom state but there is none.");
286 internalAddTransition(row, extraBottomState.value(), bottomStateValue);
289 STORM_LOG_ASSERT(extraTargetState.has_value(),
"Requested a transition to the extra target state but there is none.");
290 internalAddTransition(row, extraTargetState.value(), targetStateValue);
294template<
typename PomdpType,
typename BeliefValueType>
296 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
297 STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState] ||
298 getCurrentStateWasTruncated(),
299 "Action index " << localActionIndex <<
" was not valid at non-truncated state " << currentMdpState <<
" of the previously explored MDP.");
300 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
301 internalAddTransition(row, getCurrentMdpState(), value);
304template<
typename PomdpType,
typename BeliefValueType>
306 ValueType const &value,
bool ignoreNewBeliefs) {
307 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
308 STORM_LOG_ASSERT(!currentStateHasOldBehavior() || localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState] ||
309 getCurrentStateWasTruncated(),
310 "Action index " << localActionIndex <<
" was not valid at non-truncated state " << currentMdpState <<
" of the previously explored MDP.");
313 if (ignoreNewBeliefs) {
314 column = getExploredMdpState(transitionTarget);
315 if (column == noState()) {
319 column = getOrAddMdpState(transitionTarget, value);
321 if (getCurrentMdpState() == exploredChoiceIndices.size()) {
322 internalAddRowGroupIndex();
324 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
325 internalAddTransition(row, column, value);
329template<
typename PomdpType,
typename BeliefValueType>
331 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
332 if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) {
333 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
335 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
336 mdpActionRewards[row] = beliefManager->getBeliefActionReward(getCurrentBeliefId(), localActionIndex) + extraReward;
339template<
typename PomdpType,
typename BeliefValueType>
341 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
342 if (getCurrentNumberOfMdpChoices() > mdpActionRewards.size()) {
343 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
345 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
346 mdpActionRewards[row] = rewardValue;
349template<
typename PomdpType,
typename BeliefValueType>
351 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
352 uint64_t row = getStartOfCurrentRowGroup() + localActionIndex;
353 clippingTransitionRewards[row] = rewardValue;
356template<
typename PomdpType,
typename BeliefValueType>
358 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
359 targetStates.grow(getCurrentNumberOfMdpStates(),
false);
360 targetStates.set(getCurrentMdpState(),
true);
363template<
typename PomdpType,
typename BeliefValueType>
365 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
366 truncatedStates.grow(getCurrentNumberOfMdpStates(),
false);
367 truncatedStates.set(getCurrentMdpState(),
true);
370template<
typename PomdpType,
typename BeliefValueType>
372 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
373 setCurrentStateIsTruncated();
374 clippedStates.grow(getCurrentNumberOfMdpStates(),
false);
375 clippedStates.set(getCurrentMdpState(),
true);
378template<
typename PomdpType,
typename BeliefValueType>
380 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
381 delayedExplorationChoices.grow(getCurrentNumberOfMdpChoices(),
false);
382 delayedExplorationChoices.set(getStartOfCurrentRowGroup() + localActionIndex,
true);
385template<
typename PomdpType,
typename BeliefValueType>
387 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
388 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'currentStateHasOldBehavior' called but there is no current state.");
389 return exploredMdp && getCurrentMdpState() < exploredMdp->getNumberOfStates();
392template<
typename PomdpType,
typename BeliefValueType>
394 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
395 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
396 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
398 return exploredMdp->getStateLabeling().getStateHasLabel(
"truncated", getCurrentMdpState());
401template<
typename PomdpType,
typename BeliefValueType>
403 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
404 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
405 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
407 return exploredMdp->getStateLabeling().getStateHasLabel(
"clipped", getCurrentMdpState());
410template<
typename PomdpType,
typename BeliefValueType>
412 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
414 "Method 'stateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
415 return optimalChoicesReachableMdpStates->get(mdpState);
418template<
typename PomdpType,
typename BeliefValueType>
420 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
421 STORM_LOG_ASSERT(optimalChoices.has_value(),
"Method 'actionIsOptimal' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
422 return optimalChoices->get(globalActionIndex);
425template<
typename PomdpType,
typename BeliefValueType>
427 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
428 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'currentStateIsOptimalSchedulerReachable' called but there is no current state.");
429 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method 'currentStateIsOptimalSchedulerReachable' called but current state has no old behavior");
431 "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
432 return optimalChoicesReachableMdpStates->get(getCurrentMdpState());
435template<
typename PomdpType,
typename BeliefValueType>
437 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
438 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
439 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
441 "Method 'currentStateIsOptimalSchedulerReachable' called but 'computeOptimalChoicesAndReachableMdpStates' was not called before.");
442 uint64_t choice = previousChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
443 return optimalChoices->get(choice);
446template<
typename PomdpType,
typename BeliefValueType>
448 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
449 STORM_LOG_ASSERT(getCurrentMdpState() != noState(),
"Method 'actionAtCurrentStateWasOptimal' called but there is no current state.");
450 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method 'actionAtCurrentStateWasOptimal' called but current state has no old behavior");
452 uint64_t choice = exploredMdp->getNondeterministicChoiceIndices()[getCurrentMdpState()] + localActionIndex;
453 return exploredMdp->hasChoiceLabeling() && exploredMdp->getChoiceLabeling().getLabels().count(
"delayed") > 0 &&
454 exploredMdp->getChoiceLabeling().getChoiceHasLabel(
"delayed", choice);
457template<
typename PomdpType,
typename BeliefValueType>
459 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Cannot restore old behavior as the current state does not have any.");
460 STORM_LOG_ASSERT(localActionIndex < previousChoiceIndices[currentMdpState + 1] - previousChoiceIndices[currentMdpState],
461 "Action index " << localActionIndex <<
" was not valid at state " << currentMdpState <<
" of the previously explored MDP.");
463 if (getCurrentMdpState() == exploredChoiceIndices.size()) {
464 internalAddRowGroupIndex();
467 assert(getCurrentMdpState() < previousChoiceIndices.size());
468 assert(getCurrentMdpState() < exploredChoiceIndices.size());
469 uint64_t oldChoiceIndex = previousChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
470 uint64_t newChoiceIndex = exploredChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
473 for (
auto const &transition : exploredMdp->getTransitionMatrix().getRow(oldChoiceIndex)) {
474 internalAddTransition(newChoiceIndex, transition.getColumn(), transition.getValue());
476 auto beliefId = getBeliefId(transition.getColumn());
477 if (beliefId != beliefManager->noId()) {
478 if (!exploredBeliefIds.get(beliefId)) {
480 exploredBeliefIds.set(beliefId,
true);
482 switch (explHeuristic) {
485 prio = prio - storm::utility::one<ValueType>();
488 currentPrio = getLowerValueBoundAtCurrentState();
491 currentPrio = getUpperValueBoundAtCurrentState();
494 currentPrio = getUpperValueBoundAtCurrentState() - getLowerValueBoundAtCurrentState();
497 if (getCurrentMdpState() != noState()) {
498 currentPrio = probabilityEstimation[getCurrentMdpState()] * transition.getValue();
500 currentPrio = storm::utility::one<ValueType>();
504 STORM_LOG_THROW(
false, storm::exceptions::NotImplementedException,
"Other heuristics not implemented yet");
506 mdpStatesToExploreStatePrio[transition.getColumn()] = currentPrio;
507 mdpStatesToExplorePrioState.emplace(currentPrio, transition.getColumn());
515template<
typename PomdpType,
typename BeliefValueType>
517 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
518 STORM_LOG_ASSERT(!hasUnexploredState(),
"Finishing exploration not possible if there are still unexplored states.");
522 if (!currentStateHasOldBehavior() || exploredChoiceIndices.back() < getCurrentNumberOfMdpChoices()) {
523 internalAddRowGroupIndex();
526 targetStates.resize(getCurrentNumberOfMdpStates(),
false);
527 truncatedStates.resize(getCurrentNumberOfMdpStates(),
false);
528 clippedStates.resize(getCurrentNumberOfMdpStates(),
false);
529 if (!mdpActionRewards.empty()) {
530 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
534 currentMdpState = noState();
539 dropUnexploredStates();
543 optimalChoices = std::nullopt;
544 optimalChoicesReachableMdpStates = std::nullopt;
547 if (!stateRemapping.empty()) {
548 std::vector<BeliefId> remappedStateToBeliefIdMap(mdpStateToBeliefIdMap);
549 for (
auto const &entry : stateRemapping) {
550 remappedStateToBeliefIdMap[entry.second] = mdpStateToBeliefIdMap[entry.first];
552 mdpStateToBeliefIdMap = remappedStateToBeliefIdMap;
553 for (
auto const &beliefMdpState : beliefIdToMdpStateMap) {
554 if (stateRemapping.find(beliefMdpState.second) != stateRemapping.end()) {
555 beliefIdToMdpStateMap[beliefMdpState.first] = stateRemapping[beliefMdpState.second];
558 if (!mdpStateToChoiceLabelsMap.empty()) {
559 std::map<BeliefId, std::map<uint64_t, std::string>> temp(mdpStateToChoiceLabelsMap);
560 for (
auto const &entry : stateRemapping) {
561 temp[entry.second] = mdpStateToChoiceLabelsMap[entry.first];
563 mdpStateToChoiceLabelsMap = temp;
568 uint64_t entryCount = 0;
569 for (
auto const &row : exploredMdpTransitions) {
570 entryCount += row.size();
573 getCurrentNumberOfMdpStates());
574 for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
575 uint64_t rowIndex = exploredChoiceIndices[groupIndex];
576 uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
578 for (; rowIndex < groupEnd; ++rowIndex) {
579 for (
auto const &entry : exploredMdpTransitions[rowIndex]) {
580 if (stateRemapping.find(entry.first) == stateRemapping.end()) {
581 builder.
addNextValue(rowIndex, entry.first, entry.second);
583 builder.
addNextValue(rowIndex, stateRemapping[entry.first], entry.second);
588 auto mdpTransitionMatrix = builder.
build();
594 targetStates.resize(getCurrentNumberOfMdpStates(),
false);
595 mdpLabeling.
addLabel(
"target", std::move(targetStates));
596 truncatedStates.resize(getCurrentNumberOfMdpStates(),
false);
597 mdpLabeling.
addLabel(
"truncated", std::move(truncatedStates));
598 clippedStates.resize(getCurrentNumberOfMdpStates(),
false);
599 mdpLabeling.
addLabel(
"clipped", std::move(clippedStates));
601 for (uint64_t state = 0; state < getCurrentNumberOfMdpStates(); ++state) {
602 if (state == extraBottomState || state == extraTargetState) {
608 STORM_LOG_DEBUG(
"Observation of MDP state " << state <<
" : " << beliefManager->getObservationLabel(mdpStateToBeliefIdMap[state]) <<
"\n");
609 std::string obsLabel = beliefManager->getObservationLabel(mdpStateToBeliefIdMap[state]);
610 uint32_t obsId = beliefManager->getBeliefObservation(mdpStateToBeliefIdMap[state]);
611 if (!obsLabel.empty()) {
616 }
else if (mdpStateToBeliefIdMap[state] != beliefManager->noId()) {
617 std::string obsIdLabel =
"obs_" + std::to_string(obsId);
627 std::unordered_map<std::string, storm::models::sparse::StandardRewardModel<ValueType>> mdpRewardModels;
628 if (!mdpActionRewards.empty()) {
629 mdpActionRewards.resize(getCurrentNumberOfMdpChoices(), storm::utility::zero<ValueType>());
630 if (!clippingTransitionRewards.empty()) {
632 clippingTransitionRewards.size(),
true,
true, getCurrentNumberOfMdpStates());
633 for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
634 uint64_t rowIndex = exploredChoiceIndices[groupIndex];
635 uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
637 for (; rowIndex < groupEnd; ++rowIndex) {
638 if (clippingTransitionRewards.find(rowIndex) != clippingTransitionRewards.end()) {
639 STORM_LOG_ASSERT(extraTargetState.has_value(),
"Requested a transition to the extra target state but there is none.");
640 rewardBuilder.
addNextValue(rowIndex, extraTargetState.value(), clippingTransitionRewards[rowIndex]);
644 auto transitionRewardMatrix = rewardBuilder.
build();
646 std::optional<std::vector<ValueType>>(), std::move(mdpActionRewards), std::move(transitionRewardMatrix)));
648 mdpRewardModels.emplace(
657 if (!mdpStateToChoiceLabelsMap.empty()) {
659 for (
auto const &stateMap : mdpStateToChoiceLabelsMap) {
660 auto rowGroup = stateMap.first;
661 for (
auto const &actionLabel : stateMap.second) {
662 if (!modelComponents.
choiceLabeling->containsLabel(actionLabel.second)) {
665 modelComponents.
choiceLabeling->addLabelToChoice(actionLabel.second, exploredChoiceIndices.at(rowGroup) + actionLabel.first);
670 if (!delayedExplorationChoices.empty()) {
672 delayedExplorationChoices.resize(getCurrentNumberOfMdpChoices(),
false);
673 modelComponents.
choiceLabeling->addLabel(
"delayed", std::move(delayedExplorationChoices));
677 exploredMdp = std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(modelComponents));
678 status = Status::ModelFinished;
679 STORM_LOG_DEBUG(
"Explored Mdp with " << exploredMdp->getNumberOfStates() <<
" states (" << clippedStates.getNumberOfSetBits()
680 <<
" of which were clipped and " << truncatedStates.getNumberOfSetBits() - clippedStates.getNumberOfSetBits()
681 <<
" of which were flagged as truncated).");
684template<
typename PomdpType,
typename BeliefValueType>
686 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
687 STORM_LOG_ASSERT(!hasUnexploredState(),
"Finishing exploration not possible if there are still unexplored states.");
689 STORM_LOG_ASSERT(exploredMdp,
"Method called although no 'old' MDP is available.");
692 storm::storage::BitVector relevantMdpStates(getCurrentNumberOfMdpStates(),
true), relevantMdpChoices(getCurrentNumberOfMdpChoices(),
true);
693 std::vector<MdpStateType> toRelevantStateIndexMap(getCurrentNumberOfMdpStates(), noState());
695 for (uint64_t groupIndex = 0; groupIndex < exploredChoiceIndices.size() - 1; ++groupIndex) {
696 uint64_t rowIndex = exploredChoiceIndices[groupIndex];
698 if (exploredMdpTransitions[rowIndex].empty()) {
699 relevantMdpChoices.
set(rowIndex,
false);
700 relevantMdpStates.set(groupIndex,
false);
702 toRelevantStateIndexMap[groupIndex] = nextRelevantIndex;
705 uint64_t groupEnd = exploredChoiceIndices[groupIndex + 1];
707 for (++rowIndex; rowIndex < groupEnd; ++rowIndex) {
709 STORM_LOG_ASSERT(exploredMdpTransitions[rowIndex].empty() != relevantMdpStates.get(groupIndex),
710 "Actions at 'old' MDP state " << groupIndex <<
" were only partly explored.");
711 if (exploredMdpTransitions[rowIndex].empty()) {
712 relevantMdpChoices.
set(rowIndex,
false);
717 if (relevantMdpStates.full()) {
722 nextId -= (relevantMdpStates.size() - relevantMdpStates.getNumberOfSetBits());
727 for (
auto belIdToMdpStateIt = beliefIdToMdpStateMap.begin(); belIdToMdpStateIt != beliefIdToMdpStateMap.end();) {
728 if (relevantMdpStates.get(belIdToMdpStateIt->second)) {
730 belIdToMdpStateIt->second = toRelevantStateIndexMap[belIdToMdpStateIt->second];
734 "Inconsistent exploration information: Unexplored MDPState corresponds to explored beliefId");
737 beliefIdToMdpStateMap.erase(belIdToMdpStateIt++);
745 for (
auto &transitions : exploredMdpTransitions) {
746 std::map<MdpStateType, ValueType> newTransitions;
747 for (
auto const &entry : transitions) {
748 STORM_LOG_ASSERT(relevantMdpStates.get(entry.first),
"Relevant state has transition to irrelevant state.");
749 newTransitions.emplace_hint(newTransitions.end(), toRelevantStateIndexMap[entry.first], entry.second);
751 transitions = std::move(newTransitions);
756 assert(exploredChoiceIndices[0] == 0u);
758 for (
auto const oldState : relevantMdpStates) {
759 if (oldState != newState) {
760 assert(oldState > newState);
761 uint64_t groupSize = getRowGroupSizeOfState(oldState);
762 exploredChoiceIndices.at(newState + 1) = exploredChoiceIndices.at(newState) + groupSize;
766 exploredChoiceIndices.resize(newState + 1);
768 if (!mdpActionRewards.empty()) {
771 if (extraBottomState) {
772 extraBottomState = toRelevantStateIndexMap[extraBottomState.value()];
774 if (extraTargetState) {
775 extraTargetState = toRelevantStateIndexMap[extraTargetState.value()];
777 targetStates = targetStates % relevantMdpStates;
778 truncatedStates = truncatedStates % relevantMdpStates;
779 clippedStates = clippedStates % relevantMdpStates;
780 initialMdpState = toRelevantStateIndexMap[initialMdpState];
787 if (!mdpStateToChoiceLabelsMap.empty()) {
788 auto temp = std::map<BeliefId, std::map<uint64_t, std::string>>();
789 for (
auto const relevantState : relevantMdpStates) {
790 temp[toRelevantStateIndexMap[relevantState]] = mdpStateToChoiceLabelsMap[relevantState];
792 mdpStateToChoiceLabelsMap = temp;
797template<
typename PomdpType,
typename BeliefValueType>
798std::shared_ptr<storm::models::sparse::Mdp<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>>
800 STORM_LOG_ASSERT(status == Status::ModelFinished || status == Status::ModelChecked,
"Method call is invalid in current status.");
801 STORM_LOG_ASSERT(exploredMdp,
"Tried to get the explored MDP but exploration was not finished yet.");
805template<
typename PomdpType,
typename BeliefValueType>
807 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
808 return mdpStateToBeliefIdMap.size();
811template<
typename PomdpType,
typename BeliefValueType>
813 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
814 return exploredMdpTransitions.size();
817template<
typename PomdpType,
typename BeliefValueType>
819 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
820 assert(getCurrentMdpState() < exploredChoiceIndices.size());
821 return exploredChoiceIndices.at(getCurrentMdpState());
824template<
typename PomdpType,
typename BeliefValueType>
826 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
827 assert(getCurrentMdpState() < exploredChoiceIndices.size() - 1);
828 return exploredChoiceIndices.at(getCurrentMdpState() + 1) - exploredChoiceIndices.at(getCurrentMdpState());
831template<
typename PomdpType,
typename BeliefValueType>
833 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
834 assert(state < exploredChoiceIndices.size());
835 if (state < exploredChoiceIndices.size() - 1) {
836 return exploredChoiceIndices.at(state + 1) - exploredChoiceIndices.at(state);
837 }
else if (state == exploredChoiceIndices.size() - 1) {
838 return exploredMdpTransitions.size() - exploredChoiceIndices.at(state);
844template<
typename PomdpType,
typename BeliefValueType>
846 return (currentStateHasOldBehavior() && getCurrentStateWasTruncated() && getCurrentMdpState() < exploredChoiceIndices.size() - 1 &&
847 getSizeOfCurrentRowGroup() != numActionsNeeded);
850template<
typename PomdpType,
typename BeliefValueType>
852 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
853 return lowerValueBounds[getCurrentMdpState()];
856template<
typename PomdpType,
typename BeliefValueType>
858 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
859 return upperValueBounds[getCurrentMdpState()];
862template<
typename PomdpType,
typename BeliefValueType>
865 STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(),
"Requested lower value bounds but none were available.");
866 auto it = pomdpValueBounds.lower.begin();
867 ValueType result = beliefManager->getWeightedSum(beliefId, *it);
868 for (++it; it != pomdpValueBounds.lower.end(); ++it) {
869 result = std::max(result, beliefManager->getWeightedSum(beliefId, *it));
874template<
typename PomdpType,
typename BeliefValueType>
877 STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(),
"Requested upper value bounds but none were available.");
878 auto it = pomdpValueBounds.upper.begin();
879 ValueType result = beliefManager->getWeightedSum(beliefId, *it);
880 for (++it; it != pomdpValueBounds.upper.end(); ++it) {
881 result = std::min(result, beliefManager->getWeightedSum(beliefId, *it));
886template<
typename PomdpType,
typename BeliefValueType>
888 BeliefId const &beliefId, uint64_t schedulerId)
const {
889 STORM_LOG_ASSERT(!pomdpValueBounds.lower.empty(),
"Requested lower value bounds but none were available.");
890 STORM_LOG_ASSERT(pomdpValueBounds.lower.size() > schedulerId,
"Requested lower value bound for scheduler with ID " << schedulerId <<
" not available.");
891 return beliefManager->getWeightedSum(beliefId, pomdpValueBounds.lower[schedulerId]);
895template<
typename PomdpType,
typename BeliefValueType>
897 BeliefId const &beliefId, uint64_t schedulerId)
const {
898 STORM_LOG_ASSERT(!pomdpValueBounds.upper.empty(),
"Requested upper value bounds but none were available.");
899 STORM_LOG_ASSERT(pomdpValueBounds.upper.size() > schedulerId,
"Requested upper value bound for scheduler with ID " << schedulerId <<
" not available.");
900 return beliefManager->getWeightedSum(beliefId, pomdpValueBounds.upper[schedulerId]);
903template<
typename PomdpType,
typename BeliefValueType>
904std::pair<bool, typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>
906 STORM_LOG_ASSERT(!fmSchedulerValueList.empty(),
"Requested finite memory scheduler value bounds but none were available.");
907 auto obs = beliefManager->getBeliefObservation(beliefId);
908 STORM_LOG_ASSERT(fmSchedulerValueList.size() > obs,
"Requested value bound for observation " << obs <<
" not available.");
910 "Requested value bound for observation " << obs <<
" and memory node " << memoryNode <<
" not available.");
911 return beliefManager->getWeightedSum(beliefId, fmSchedulerValueList.at(obs).at(memoryNode));
914template<
typename PomdpType,
typename BeliefValueType>
916 STORM_LOG_ASSERT(status == Status::ModelFinished,
"Method call is invalid in current status.");
917 STORM_LOG_ASSERT(exploredMdp,
"Tried to compute values but the MDP is not explored");
918 auto property = createStandardProperty(dir, exploredMdp->hasRewardModel());
919 auto task = createStandardCheckTask(property);
921 std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(env, exploredMdp, task));
923 values = std::move(res->asExplicitQuantitativeCheckResult<
ValueType>().getValueVector());
924 scheduler = std::make_shared<storm::storage::Scheduler<ValueType>>(res->asExplicitQuantitativeCheckResult<
ValueType>().getScheduler());
926 "Computed values are smaller than the lower bound.");
928 "Computed values are larger than the upper bound.");
933 status = Status::ModelChecked;
936template<
typename PomdpType,
typename BeliefValueType>
938 return status == Status::ModelChecked;
941template<
typename PomdpType,
typename BeliefValueType>
944 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
948template<
typename PomdpType,
typename BeliefValueType>
949const std::shared_ptr<storm::storage::Scheduler<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>> &
951 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
955template<
typename PomdpType,
typename BeliefValueType>
957 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
958 STORM_LOG_ASSERT(exploredMdp,
"Tried to get a value but no MDP was explored.");
959 return getValuesOfExploredMdp()[exploredMdp->getInitialStates().getNextSetIndex(0)];
962template<
typename PomdpType,
typename BeliefValueType>
965 STORM_LOG_ASSERT(status != Status::Uninitialized,
"Method call is invalid in current status.");
966 return mdpStateToBeliefIdMap[exploredMdpState];
969template<
typename PomdpType,
typename BeliefValueType>
971 uint64_t localActionIndex, std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations) {
972 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
973 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method call is invalid since the current state has no old behavior");
974 uint64_t mdpChoice = getStartOfCurrentRowGroup() + localActionIndex;
975 gatherSuccessorObservationInformationAtMdpChoice(mdpChoice, gatheredSuccessorObservations);
978template<
typename PomdpType,
typename BeliefValueType>
980 uint64_t mdpChoice, std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations) {
981 STORM_LOG_ASSERT(exploredMdp,
"Method call is invalid if no MDP has been explored before");
982 for (
auto const &entry : exploredMdp->getTransitionMatrix().getRow(mdpChoice)) {
983 auto const &beliefId = getBeliefId(entry.getColumn());
984 if (beliefId != beliefManager->noId()) {
985 auto const &obs = beliefManager->getBeliefObservation(beliefId);
987 auto obsInsertion = gatheredSuccessorObservations.emplace(obs, info);
988 if (!obsInsertion.second) {
990 obsInsertion.first->second.join(info);
992 beliefManager->joinSupport(beliefId, obsInsertion.first->second.support);
997template<
typename PomdpType,
typename BeliefValueType>
1000 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
1001 STORM_LOG_ASSERT(currentStateHasOldBehavior(),
"Method call is invalid since the current state has no old behavior");
1002 uint64_t mdpChoice = previousChoiceIndices.at(getCurrentMdpState()) + localActionIndex;
1003 return std::any_of(exploredMdp->getTransitionMatrix().getRow(mdpChoice).begin(), exploredMdp->getTransitionMatrix().getRow(mdpChoice).end(),
1005 return observationSet.get(beliefManager->getBeliefObservation(getBeliefId(i.getColumn())));
1009template<
typename PomdpType,
typename BeliefValueType>
1011 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
1012 upperValueBounds = values;
1015template<
typename PomdpType,
typename BeliefValueType>
1017 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
1018 lowerValueBounds = values;
1021template<
typename PomdpType,
typename BeliefValueType>
1023 bool relativeDifference) {
1024 STORM_LOG_ASSERT(status == Status::ModelChecked,
"Method call is invalid in current status.");
1025 STORM_LOG_ASSERT(exploredMdp,
"Method call is invalid in if no MDP is available.");
1026 STORM_LOG_ASSERT(!optimalChoices.has_value(),
"Tried to compute optimal scheduler but this has already been done before.");
1028 "Tried to compute states that are reachable under an optimal scheduler but this has already been done before.");
1032 auto const &choiceIndices = exploredMdp->getNondeterministicChoiceIndices();
1033 auto const &transitions = exploredMdp->getTransitionMatrix();
1034 auto const &targetStatesExploredMDP = exploredMdp->getStates(
"target");
1035 for (uint64_t mdpState = 0; mdpState < exploredMdp->getNumberOfStates(); ++mdpState) {
1036 if (targetStatesExploredMDP.get(mdpState)) {
1040 auto const &stateValue = values[mdpState];
1041 for (uint64_t globalChoice = choiceIndices[mdpState]; globalChoice < choiceIndices[mdpState + 1]; ++globalChoice) {
1042 ValueType choiceValue = transitions.multiplyRowWithVector(globalChoice, values);
1043 if (exploredMdp->hasRewardModel()) {
1044 choiceValue += exploredMdp->getUniqueRewardModel().getStateActionReward(globalChoice);
1046 auto absDiff = storm::utility::abs<ValueType>((choiceValue - stateValue));
1047 if ((relativeDifference && absDiff <= ancillaryChoicesEpsilon * stateValue) || (!relativeDifference && absDiff <= ancillaryChoicesEpsilon)) {
1048 optimalChoices->set(globalChoice,
true);
1051 STORM_LOG_ASSERT(optimalChoices->getNextSetIndex(choiceIndices[mdpState]) < optimalChoices->size(),
"Could not find an optimal choice.");
1057 targetStatesExploredMDP,
false, 0, optimalChoices.value());
1060template<
typename PomdpType,
typename BeliefValueType>
1062 return getExploredMdpState(beliefId) != noState();
1065template<
typename PomdpType,
typename BeliefValueType>
1067 return std::numeric_limits<MdpStateType>::max();
1070template<
typename PomdpType,
typename BeliefValueType>
1071std::shared_ptr<storm::logic::Formula const> BeliefMdpExplorer<PomdpType, BeliefValueType>::createStandardProperty(
1073 std::string propertyString = computeRewards ?
"R" :
"P";
1075 propertyString +=
"=? [F \"target\"]";
1080template<
typename PomdpType,
typename BeliefValueType>
1082BeliefMdpExplorer<PomdpType, BeliefValueType>::createStandardCheckTask(std::shared_ptr<storm::logic::Formula const> &property) {
1085 auto task = storm::api::createTask<ValueType>(property,
false);
1087 hint.setResultHint(values);
1088 auto hintPtr = std::make_shared<storm::modelchecker::ExplicitModelCheckerHint<ValueType>>(hint);
1089 task.setHint(hintPtr);
1090 task.setProduceSchedulers();
1094template<
typename PomdpType,
typename BeliefValueType>
1096 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
1097 if (stateRemapping.find(currentMdpState) != stateRemapping.end()) {
1098 return stateRemapping.at(currentMdpState);
1100 return currentMdpState;
1104template<
typename PomdpType,
typename BeliefValueType>
1106 STORM_LOG_ASSERT(status == Status::Exploring,
"Method call is invalid in current status.");
1107 return getBeliefId(currentMdpState);
1110template<
typename PomdpType,
typename BeliefValueType>
1111void BeliefMdpExplorer<PomdpType, BeliefValueType>::internalAddTransition(uint64_t
const &row, MdpStateType
const &column, ValueType
const &value) {
1112 STORM_LOG_ASSERT(row <= exploredMdpTransitions.size(),
"Skipped at least one row.");
1113 if (row == exploredMdpTransitions.size()) {
1114 exploredMdpTransitions.emplace_back();
1116 STORM_LOG_ASSERT(exploredMdpTransitions[row].
count(column) == 0,
"Trying to insert multiple transitions to the same state.");
1117 exploredMdpTransitions[row][column] = value;
1120template<
typename PomdpType,
typename BeliefValueType>
1121void BeliefMdpExplorer<PomdpType, BeliefValueType>::internalAddRowGroupIndex() {
1122 exploredChoiceIndices.push_back(getCurrentNumberOfMdpChoices());
1125template<
typename PomdpType,
typename BeliefValueType>
1127 gridBeliefs.insert(beliefId);
1130template<
typename PomdpType,
typename BeliefValueType>
1132 return gridBeliefs.count(beliefId) > 0;
1135template<
typename PomdpType,
typename BeliefValueType>
1138 if (beliefId < exploredBeliefIds.size() && exploredBeliefIds.get(beliefId)) {
1139 return beliefIdToMdpStateMap.at(beliefId);
1145template<
typename PomdpType,
typename BeliefValueType>
1147 lowerValueBounds.push_back(lowerBound);
1148 upperValueBounds.push_back(upperBound);
1150 values.push_back((lowerBound + upperBound) / storm::utility::convertNumber<ValueType, uint64_t>(2));
1151 STORM_LOG_ASSERT(lowerValueBounds.size() == getCurrentNumberOfMdpStates(),
"Value vectors have different size then number of available states.");
1152 STORM_LOG_ASSERT(lowerValueBounds.size() == upperValueBounds.size() && values.size() == upperValueBounds.size(),
"Value vectors have inconsistent size.");
1155template<
typename PomdpType,
typename BeliefValueType>
1157 BeliefId
const &beliefId, ValueType
const &transitionValue) {
1158 exploredBeliefIds.grow(beliefId + 1,
false);
1159 if (exploredBeliefIds.get(beliefId)) {
1161 mdpStatesToExploreStatePrio.find(beliefIdToMdpStateMap[beliefId]) != mdpStatesToExploreStatePrio.end()) {
1163 auto newPrio = probabilityEstimation[getCurrentMdpState()] * transitionValue;
1164 if (newPrio > mdpStatesToExploreStatePrio[beliefIdToMdpStateMap[beliefId]]) {
1166 auto range = mdpStatesToExplorePrioState.equal_range(mdpStatesToExploreStatePrio[beliefIdToMdpStateMap[beliefId]]);
1167 for (
auto i = range.first; i != range.second; ++i) {
1168 if (i->second == beliefIdToMdpStateMap[beliefId]) {
1169 mdpStatesToExplorePrioState.erase(i);
1173 mdpStatesToExplorePrioState.emplace(newPrio, beliefIdToMdpStateMap[beliefId]);
1174 mdpStatesToExploreStatePrio[beliefIdToMdpStateMap[beliefId]] = newPrio;
1177 return beliefIdToMdpStateMap[beliefId];
1180 exploredBeliefIds.set(beliefId,
true);
1184 auto findRes = beliefIdToMdpStateMap.find(beliefId);
1185 if (findRes != beliefIdToMdpStateMap.end()) {
1187 switch (explHeuristic) {
1190 prio = prio - storm::utility::one<ValueType>();
1193 currentPrio = getLowerValueBoundAtCurrentState();
1196 currentPrio = getUpperValueBoundAtCurrentState();
1199 currentPrio = getUpperValueBoundAtCurrentState() - getLowerValueBoundAtCurrentState();
1202 if (getCurrentMdpState() != noState()) {
1203 currentPrio = probabilityEstimation[getCurrentMdpState()] * transitionValue;
1205 currentPrio = storm::utility::one<ValueType>();
1209 STORM_LOG_THROW(
false, storm::exceptions::NotImplementedException,
"Other heuristics not implemented yet");
1211 mdpStatesToExploreStatePrio[findRes->second] = currentPrio;
1212 mdpStatesToExplorePrioState.emplace(currentPrio, findRes->second);
1213 return findRes->second;
1217 MdpStateType result = getCurrentNumberOfMdpStates();
1218 assert(getCurrentNumberOfMdpStates() == mdpStateToBeliefIdMap.size());
1219 mdpStateToBeliefIdMap.push_back(beliefId);
1220 beliefIdToMdpStateMap[beliefId] = result;
1221 insertValueHints(computeLowerValueBoundAtBelief(beliefId), computeUpperValueBoundAtBelief(beliefId));
1223 switch (explHeuristic) {
1226 prio = prio - storm::utility::one<ValueType>();
1229 currentPrio = getLowerValueBoundAtCurrentState();
1232 currentPrio = getUpperValueBoundAtCurrentState();
1235 currentPrio = getUpperValueBoundAtCurrentState() - getLowerValueBoundAtCurrentState();
1238 if (getCurrentMdpState() != noState()) {
1239 currentPrio = probabilityEstimation[getCurrentMdpState()] * transitionValue;
1241 currentPrio = storm::utility::one<ValueType>();
1245 STORM_LOG_THROW(
false, storm::exceptions::NotImplementedException,
"Other heuristics not implemented yet");
1247 mdpStatesToExploreStatePrio[result] = currentPrio;
1248 mdpStatesToExplorePrioState.emplace(currentPrio, result);
1253template<
typename PomdpType,
typename BeliefValueType>
1255 mdpStateToChoiceLabelsMap[currentMdpState][localActionIndex] = label;
1258template<
typename PomdpType,
typename BeliefValueType>
1260 return mdpStateToBeliefIdMap;
1263template<
typename PomdpType,
typename BeliefValueType>
1265 uint32_t obs)
const {
1266 std::vector<BeliefId> res;
1267 for (
auto const &belief : mdpStateToBeliefIdMap) {
1268 if (belief != beliefManager->noId()) {
1269 if (beliefManager->getBeliefObservation(belief) == obs) {
1270 res.push_back(belief);
1277template<
typename PomdpType,
typename BeliefValueType>
1279 uint64_t
const &pomdpState) {
1280 return pomdpValueBounds.getSmallestUpperBound(pomdpState);
1283template<
typename PomdpType,
typename BeliefValueType>
1285 uint64_t
const &pomdpState) {
1286 return pomdpValueBounds.getHighestLowerBound(pomdpState);
1289template<
typename PomdpType,
typename BeliefValueType>
1291 extremeValueBound = valueBound;
1294template<
typename PomdpType,
typename BeliefValueType>
1296 fmSchedulerValueList = valueList;
1299template<
typename PomdpType,
typename BeliefValueType>
1301 return fmSchedulerValueList.at(observation).size();
1304template<
typename PomdpType,
typename BeliefValueType>
1306 const uint64_t &pomdpState) {
1310template<
typename PomdpType,
typename BeliefValueType>
1312 return extremeValueBound.isInfinite;
1315template<
typename PomdpType,
typename BeliefValueType>
1317 return pomdpValueBounds.upper.
size();
1320template<
typename PomdpType,
typename BeliefValueType>
1322 return pomdpValueBounds.lower.size();
1325template<
typename PomdpType,
typename BeliefValueType>
1328 STORM_LOG_ASSERT(!pomdpValueBounds.lowerSchedulers.empty(),
"Requested lower bound scheduler but none were available.");
1330 "Requested lower value bound scheduler with ID " << schedulerId <<
" not available.");
1331 return pomdpValueBounds.lowerSchedulers[schedulerId];
1334template<
typename PomdpType,
typename BeliefValueType>
1337 STORM_LOG_ASSERT(!pomdpValueBounds.upperSchedulers.empty(),
"Requested upper bound scheduler but none were available.");
1339 "Requested upper value bound scheduler with ID " << schedulerId <<
" not available.");
1340 return pomdpValueBounds.upperSchedulers[schedulerId];
1343template<
typename PomdpType,
typename BeliefValueType>
1344std::vector<storm::storage::Scheduler<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>>
1346 STORM_LOG_ASSERT(!pomdpValueBounds.lowerSchedulers.empty(),
"Requested lower bound schedulers but none were available.");
1347 return pomdpValueBounds.lowerSchedulers;
1350template<
typename PomdpType,
typename BeliefValueType>
1351std::vector<storm::storage::Scheduler<typename BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>>
1353 STORM_LOG_ASSERT(!pomdpValueBounds.upperSchedulers.empty(),
"Requested upper bound schedulers but none were available.");
1354 return pomdpValueBounds.upperSchedulers;
1357template<
typename PomdpType,
typename BeliefValueType>
1360 return !fmSchedulerValueList.empty();
1363template<
typename PomdpType,
typename BeliefValueType>
1366 return beliefManager->computeMatrixBeliefProduct(beliefId, matrix);
1369template<
typename PomdpType,
typename BeliefValueType>
1371 uint64_t currentRowGroupSize = getSizeOfCurrentRowGroup();
1372 assert(totalNumberOfActions != currentRowGroupSize);
1373 if (totalNumberOfActions > currentRowGroupSize) {
1374 uint64_t numberOfActionsToAdd = totalNumberOfActions - currentRowGroupSize;
1375 exploredMdpTransitions.insert(exploredMdpTransitions.begin() + (exploredChoiceIndices[getCurrentMdpState() + 1]), numberOfActionsToAdd,
1376 std::map<MdpStateType, ValueType>());
1377 for (uint64_t i = getCurrentMdpState() + 1; i < exploredChoiceIndices.size(); i++) {
1378 exploredChoiceIndices[i] += numberOfActionsToAdd;
1382 if (totalNumberOfActions < currentRowGroupSize) {
1383 uint64_t numberOfActionsToRemove = currentRowGroupSize - totalNumberOfActions;
1384 exploredMdpTransitions.erase(exploredMdpTransitions.begin() + (exploredChoiceIndices[getCurrentMdpState() + 1]) - numberOfActionsToRemove,
1385 exploredMdpTransitions.begin() + (exploredChoiceIndices[getCurrentMdpState() + 1]));
1386 for (uint64_t i = getCurrentMdpState() + 1; i < exploredChoiceIndices.size(); i++) {
1387 exploredChoiceIndices[i] -= numberOfActionsToRemove;
MdpStateType getExploredMdpState(BeliefId const &beliefId) const
void storeExplorationState()
void addClippingRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue)
ValueType computeUpperValueBoundAtBelief(BeliefId const &beliefId) const
uint64_t getNrSchedulersForLowerBounds()
MdpStateType getCurrentNumberOfMdpStates() const
ValueType const & getComputedValueAtInitialState() const
void restartExploration()
Restarts the exploration to allow re-exploring each state.
bool currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, storm::storage::BitVector const &observationSet)
ValueType getTrivialLowerBoundAtPOMDPState(uint64_t const &pomdpState)
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
ValueType getTrivialUpperBoundAtPOMDPState(uint64_t const &pomdpState)
void addTransitionsToExtraStates(uint64_t const &localActionIndex, ValueType const &targetStateValue=storm::utility::zero< ValueType >(), ValueType const &bottomStateValue=storm::utility::zero< ValueType >())
bool beliefHasMdpState(BeliefId const &beliefId) const
std::vector< ValueType > const & getValuesOfExploredMdp() const
storm::storage::BitVector getStateExtremeBoundIsInfinite()
void setCurrentStateIsClipped()
void setCurrentStateIsTarget()
storm::storage::Scheduler< ValueType > getLowerValueBoundScheduler(uint64_t schedulerId) const
void restoreExplorationState()
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map< uint32_t, SuccessorObservationInformation > &gatheredSuccessorObservations)
uint64_t getSizeOfCurrentRowGroup() const
void takeCurrentValuesAsLowerBounds()
bool hasComputedValues() const
void startNewExploration(std::optional< ValueType > extraTargetStateValue=boost::none, std::optional< ValueType > extraBottomStateValue=std::nullopt)
bool getCurrentStateWasTruncated() const
BeliefMdpExplorer(std::shared_ptr< BeliefManagerType > beliefManager, storm::pomdp::storage::PreprocessingPomdpValueBounds< ValueType > const &pomdpValueBounds, ExplorationHeuristic explorationHeuristic=ExplorationHeuristic::BreadthFirst)
bool actionIsOptimal(uint64_t const &globalActionIndex) const
Retrieves whether the given action at the current state was optimal in the most recent check.
bool needsActionAdjustment(uint64_t numActionsNeeded)
std::vector< BeliefId > getBeliefsWithObservationInMdp(uint32_t obs) const
ValueType getExtremeValueBoundAtPOMDPState(uint64_t const &pomdpState)
MdpStateType getBeliefId(MdpStateType exploredMdpState) const
void computeValuesOfExploredMdp(storm::Environment const &env, storm::solver::OptimizationDirection const &dir)
std::vector< BeliefId > getBeliefsInMdp()
std::shared_ptr< storm::models::sparse::Mdp< ValueType > > getExploredMdp() const
storm::storage::Scheduler< ValueType > getUpperValueBoundScheduler(uint64_t schedulerId) const
std::vector< storm::storage::Scheduler< ValueType > > getLowerValueBoundSchedulers() const
bool getCurrentStateActionExplorationWasDelayed(uint64_t const &localActionIndex) const
BeliefManagerType const & getBeliefManager() const
PomdpType::ValueType ValueType
bool hasUnexploredState() const
void dropUnexploredStates()
void setCurrentStateIsTruncated()
std::pair< bool, ValueType > computeFMSchedulerValueForMemoryNode(BeliefId const &beliefId, uint64_t memoryNode) const
void setExtremeValueBound(storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > valueBound)
bool hasFMSchedulerValues() const
void addChoiceLabelToCurrentState(uint64_t const &localActionIndex, std::string const &label)
std::vector< uint64_t > getUnexploredStates()
void restoreOldBehaviorAtCurrentState(uint64_t const &localActionIndex)
Inserts transitions and rewards at the given action as in the MDP of the previous exploration.
ValueType computeLowerValueBoundAtBelief(BeliefId const &beliefId) const
uint64_t getRowGroupSizeOfState(uint64_t state) const
bool currentStateHasOldBehavior() const
const std::shared_ptr< storm::storage::Scheduler< BeliefMdpExplorer< PomdpType, BeliefValueType >::ValueType > > & getSchedulerForExploredMdp() const
MdpStateType getStartOfCurrentRowGroup() const
ValueType computeLowerValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const
bool currentStateIsOptimalSchedulerReachable() const
Retrieves whether the current state can be reached under a scheduler that was optimal in the most rec...
void addRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue)
Adds the provided reward value to the given action of the current state.
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map< uint32_t, SuccessorObservationInformation > &gatheredSuccessorObservations)
uint64_t getNrOfMemoryNodesForObservation(uint32_t observation) const
void computeRewardAtCurrentState(uint64_t const &localActionIndex, ValueType extraReward=storm::utility::zero< ValueType >())
void computeOptimalChoicesAndReachableMdpStates(ValueType const &ancillaryChoicesEpsilon, bool relativeDifference)
Computes the set of states that are reachable via a path that is consistent with an optimal MDP sched...
uint64_t getNrSchedulersForUpperBounds()
bool isMarkedAsGridBelief(BeliefId const &beliefId)
ValueType getUpperValueBoundAtCurrentState() const
BeliefId exploreNextState()
void markAsGridBelief(BeliefId const &beliefId)
ValueType computeUpperValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const
void adjustActions(uint64_t totalNumberOfActions)
bool actionAtCurrentStateWasOptimal(uint64_t const &localActionIndex) const
Retrieves whether the given action at the current state was optimal in the most recent check.
void setCurrentChoiceIsDelayed(uint64_t const &localActionIndex)
std::vector< BeliefValueType > computeProductWithSparseMatrix(BeliefId const &beliefId, storm::storage::SparseMatrix< BeliefValueType > &matrix) const
void addSelfloopTransition(uint64_t const &localActionIndex=0, ValueType const &value=storm::utility::one< ValueType >())
std::vector< storm::storage::Scheduler< ValueType > > getUpperValueBoundSchedulers() const
ValueType getLowerValueBoundAtCurrentState() const
bool getCurrentStateWasClipped() const
bool addTransitionToBelief(uint64_t const &localActionIndex, BeliefId const &transitionTarget, ValueType const &value, bool ignoreNewBeliefs)
Adds the next transition to the given successor belief.
void takeCurrentValuesAsUpperBounds()
bool stateIsOptimalSchedulerReachable(MdpStateType mdpState) const
Retrieves whether the current state can be reached under an optimal scheduler This requires a previou...
BeliefManagerType::BeliefId BeliefId
MdpStateType getCurrentNumberOfMdpChoices() const
This class contains information that might accelerate the model checking process.
This class manages the labeling of the choice space with a number of (atomic) labels.
void addLabel(std::string const &label)
Adds a new label to the labelings.
bool containsLabel(std::string const &label) const
Checks whether a label is registered within this labeling.
This class manages the labeling of the state space with a number of (atomic) labels.
void addLabelToState(std::string const &label, storm::storage::sparse::state_type state)
Adds a label to a given state.
A bit vector that is internally represented as a vector of 64-bit values.
void set(uint_fast64_t index, bool value=true)
Sets the given truth value at the given index.
size_t size() const
Retrieves the number of bits this bit vector can store.
This class defines which action is chosen in a particular state of a non-deterministic model.
A class that can be used to build a sparse matrix by adding value by value.
void addNextValue(index_type row, index_type column, value_type const &value)
Sets the matrix entry at the given row and column to the given value.
void newRowGroup(index_type startingRow)
Starts a new row group in the matrix.
SparseMatrix< value_type > build(index_type overriddenRowCount=0, index_type overriddenColumnCount=0, index_type overriddenRowGroupCount=0)
A class that holds a possibly non-square matrix in the compressed row storage format.
#define STORM_LOG_DEBUG(message)
#define STORM_LOG_ERROR(message)
#define STORM_LOG_ASSERT(cond, message)
#define STORM_LOG_THROW(cond, exception, message)
#define STORM_LOG_WARN_COND_DEBUG(cond, message)
std::vector< storm::jani::Property > parseProperties(storm::parser::FormulaParser &formulaParser, std::string const &inputString, boost::optional< std::set< std::string > > const &propertyFilter)
std::vector< std::shared_ptr< storm::logic::Formula const > > extractFormulasFromProperties(std::vector< storm::jani::Property > const &properties)
SFTBDDChecker::ValueType ValueType
std::pair< storm::RationalNumber, storm::RationalNumber > count(std::vector< storm::storage::BitVector > const &origSets, std::vector< storm::storage::BitVector > const &intersects, std::vector< storm::storage::BitVector > const &intersectsInfo, storm::RationalNumber val, bool plus, uint64_t remdepth)
bool constexpr minimize(OptimizationDirection d)
storm::storage::BitVector getReachableStates(storm::storage::SparseMatrix< T > const &transitionMatrix, storm::storage::BitVector const &initialStates, storm::storage::BitVector const &constraintStates, storm::storage::BitVector const &targetStates, bool useStepBound, uint_fast64_t maximalSteps, boost::optional< storm::storage::BitVector > const &choiceFilter)
Performs a forward depth-first search through the underlying graph structure to identify the states t...
bool isTerminate()
Check whether the program should terminate (due to some abort signal).
bool compareElementWise(std::vector< T > const &left, std::vector< T > const &right, Comparator comp=std::less< T >())
void filterVectorInPlace(std::vector< Type > &v, storm::storage::BitVector const &filter)
bool isZero(ValueType const &a)
std::string to_string(ValueType const &value)
Struct to store the extreme bound values needed for the reward correction values when clipping is use...
ValueType getValueForState(uint64_t const &state)
Get the extreme bound value for a given state.
Struct for storing precomputed values bounding the actual values on the POMDP.
std::optional< storm::models::sparse::ChoiceLabeling > choiceLabeling