Storm
A Modern Probabilistic Model Checker
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BeliefMdpExplorer.h
Go to the documentation of this file.
1#pragma once
2
3#include <deque>
4#include <map>
5#include <memory>
6#include <optional>
7#include <queue>
8#include <vector>
9
14
15namespace storm {
16class Environment;
17
18namespace modelchecker {
19template<typename FormulaType, typename ValueType>
20class CheckTask;
21class CheckResult;
22} // namespace modelchecker
23namespace builder {
25
26template<typename PomdpType, typename BeliefValueType = typename PomdpType::ValueType>
28 public:
29 typedef typename PomdpType::ValueType ValueType;
32 typedef uint64_t MdpStateType;
33
42
44
45 BeliefMdpExplorer(std::shared_ptr<BeliefManagerType> beliefManager, storm::pomdp::storage::PreprocessingPomdpValueBounds<ValueType> const &pomdpValueBounds,
47
49
51
52 void startNewExploration(std::optional<ValueType> extraTargetStateValue = boost::none, std::optional<ValueType> extraBottomStateValue = std::nullopt);
53
60 void restartExploration();
61
62 bool hasUnexploredState() const;
63
64 std::vector<uint64_t> getUnexploredStates();
65
67
68 void addChoiceLabelToCurrentState(uint64_t const &localActionIndex, std::string const &label);
69
70 void addTransitionsToExtraStates(uint64_t const &localActionIndex, ValueType const &targetStateValue = storm::utility::zero<ValueType>(),
71 ValueType const &bottomStateValue = storm::utility::zero<ValueType>());
72
73 void addSelfloopTransition(uint64_t const &localActionIndex = 0, ValueType const &value = storm::utility::one<ValueType>());
74
83 bool addTransitionToBelief(uint64_t const &localActionIndex, BeliefId const &transitionTarget, ValueType const &value, bool ignoreNewBeliefs);
84
85 void computeRewardAtCurrentState(uint64_t const &localActionIndex, ValueType extraReward = storm::utility::zero<ValueType>());
86
93 void addRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue);
94
96
98
100
101 void setCurrentChoiceIsDelayed(uint64_t const &localActionIndex);
102
103 bool currentStateHasOldBehavior() const;
104
105 bool getCurrentStateWasTruncated() const;
106
107 bool getCurrentStateWasClipped() const;
108
114
119 bool actionIsOptimal(uint64_t const &globalActionIndex) const;
120
126
131 bool actionAtCurrentStateWasOptimal(uint64_t const &localActionIndex) const;
132
133 bool getCurrentStateActionExplorationWasDelayed(uint64_t const &localActionIndex) const;
134
141 void restoreOldBehaviorAtCurrentState(uint64_t const &localActionIndex);
142
143 void finishExploration();
144
146
147 std::shared_ptr<storm::models::sparse::Mdp<ValueType>> getExploredMdp() const;
148
150
152
154
155 uint64_t getSizeOfCurrentRowGroup() const;
156
157 uint64_t getRowGroupSizeOfState(uint64_t state) const;
158
159 bool needsActionAdjustment(uint64_t numActionsNeeded);
160
162
164
166
168
169 ValueType computeLowerValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const;
170
171 ValueType computeUpperValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const;
172
173 std::pair<bool, ValueType> computeFMSchedulerValueForMemoryNode(BeliefId const &beliefId, uint64_t memoryNode) const;
174
176
178
179 std::vector<storm::storage::Scheduler<ValueType>> getUpperValueBoundSchedulers() const;
180
181 std::vector<storm::storage::Scheduler<ValueType>> getLowerValueBoundSchedulers() const;
182
184
185 bool hasComputedValues() const;
186
187 bool hasFMSchedulerValues() const;
188
189 std::vector<ValueType> const &getValuesOfExploredMdp() const;
190
192
193 MdpStateType getBeliefId(MdpStateType exploredMdpState) const;
194
195 void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex,
196 std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations);
197
199 std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations);
200
201 bool currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, storm::storage::BitVector const &observationSet);
202
204
206
214 void computeOptimalChoicesAndReachableMdpStates(ValueType const &ancillaryChoicesEpsilon, bool relativeDifference);
215
216 std::vector<BeliefId> getBeliefsWithObservationInMdp(uint32_t obs) const;
217
218 std::vector<BeliefId> getBeliefsInMdp();
219
220 void addClippingRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue);
221
222 ValueType getTrivialUpperBoundAtPOMDPState(uint64_t const &pomdpState);
223
224 ValueType getTrivialLowerBoundAtPOMDPState(uint64_t const &pomdpState);
225
227
228 ValueType getExtremeValueBoundAtPOMDPState(uint64_t const &pomdpState);
229
230 MdpStateType getExploredMdpState(BeliefId const &beliefId) const;
231
232 bool beliefHasMdpState(BeliefId const &beliefId) const;
233
235
237
239
240 void markAsGridBelief(BeliefId const &beliefId);
241
242 bool isMarkedAsGridBelief(BeliefId const &beliefId);
243
244 const std::shared_ptr<storm::storage::Scheduler<BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>> &getSchedulerForExploredMdp() const;
245
246 void setFMSchedValueList(std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> valueList);
247
248 uint64_t getNrOfMemoryNodesForObservation(uint32_t observation) const;
249
251
253
254 void adjustActions(uint64_t totalNumberOfActions);
255
256 std::vector<BeliefValueType> computeProductWithSparseMatrix(BeliefId const &beliefId, storm::storage::SparseMatrix<BeliefValueType> &matrix) const;
257
258 private:
259 MdpStateType noState() const;
260
261 std::shared_ptr<storm::logic::Formula const> createStandardProperty(storm::solver::OptimizationDirection const &dir, bool computeRewards);
262
263 storm::modelchecker::CheckTask<storm::logic::Formula, ValueType> createStandardCheckTask(std::shared_ptr<storm::logic::Formula const> &property);
264
265 MdpStateType getCurrentMdpState() const;
266
267 MdpStateType getCurrentBeliefId() const;
268
269 void internalAddTransition(uint64_t const &row, MdpStateType const &column, ValueType const &value);
270
271 void internalAddRowGroupIndex();
272
273 void insertValueHints(ValueType const &lowerBound, ValueType const &upperBound);
274
275 MdpStateType getOrAddMdpState(BeliefId const &beliefId, ValueType const &transitionValue = storm::utility::zero<ValueType>());
276
277 // Belief state related information
278 std::shared_ptr<BeliefManagerType> beliefManager;
279 std::vector<BeliefId> mdpStateToBeliefIdMap;
280 std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap;
281 storm::storage::BitVector exploredBeliefIds;
282 std::map<BeliefId, std::map<uint64_t, std::string>> mdpStateToChoiceLabelsMap;
283
284 // Exploration information
285 std::multimap<ValueType, uint64_t> mdpStatesToExplorePrioState;
286 std::map<uint64_t, ValueType> mdpStatesToExploreStatePrio;
287 std::vector<ValueType> probabilityEstimation;
288 std::vector<std::map<MdpStateType, ValueType>> exploredMdpTransitions;
289 std::vector<MdpStateType> exploredChoiceIndices;
290 std::vector<MdpStateType> previousChoiceIndices;
291 std::vector<ValueType> mdpActionRewards;
292 std::map<MdpStateType, ValueType> clippingTransitionRewards;
293 uint64_t currentMdpState;
294 std::map<MdpStateType, MdpStateType> stateRemapping;
295 uint64_t nextId;
296 ValueType prio;
297
298 // Special states and choices during exploration
299 std::optional<MdpStateType> extraTargetState;
300 std::optional<MdpStateType> extraBottomState;
301 storm::storage::BitVector targetStates;
302 storm::storage::BitVector truncatedStates;
303 storm::storage::BitVector clippedStates;
304 MdpStateType initialMdpState;
305 storm::storage::BitVector delayedExplorationChoices;
306 std::unordered_set<BeliefId> gridBeliefs;
307
308 // Final Mdp
309 std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
310
311 // Value and scheduler related information
314 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> fmSchedulerValueList;
315 std::vector<ValueType> lowerValueBounds;
316 std::vector<ValueType> upperValueBounds;
317 std::vector<ValueType> values; // Contains an estimate during building and the actual result after a check has performed
318 std::optional<storm::storage::BitVector> optimalChoices;
319 std::optional<storm::storage::BitVector> optimalChoicesReachableMdpStates;
320 std::shared_ptr<storm::storage::Scheduler<ValueType>> scheduler;
321
322 // The current status of this explorer
323 ExplorationHeuristic explHeuristic;
324 Status status;
325
326 struct ExplorationStorage {
327 std::vector<BeliefId> storedMdpStateToBeliefIdMap;
328 std::map<BeliefId, MdpStateType> storedBeliefIdToMdpStateMap;
329 storm::storage::BitVector storedExploredBeliefIds;
330 std::map<BeliefId, std::map<uint64_t, std::string>> storedMdpStateToChoiceLabelsMap;
331 std::multimap<ValueType, uint64_t> storedMdpStatesToExplorePrioState;
332 std::map<uint64_t, ValueType> storedMdpStatesToExploreStatePrio;
333 std::vector<ValueType> storedProbabilityEstimation;
334 std::vector<std::map<MdpStateType, ValueType>> storedExploredMdpTransitions;
335 std::vector<MdpStateType> storedExploredChoiceIndices;
336 std::vector<ValueType> storedMdpActionRewards;
337 std::map<MdpStateType, ValueType> storedClippingTransitionRewards;
338 uint64_t storedCurrentMdpState;
339 std::map<MdpStateType, MdpStateType> storedStateRemapping;
340 uint64_t storedNextId;
341 ValueType storedPrio;
342 std::vector<ValueType> storedLowerValueBounds;
343 std::vector<ValueType> storedUpperValueBounds;
344 std::vector<ValueType> storedValues;
345 storm::storage::BitVector storedTargetStates;
346 };
347
348 ExplorationStorage explorationStorage;
349};
350} // namespace builder
351} // namespace storm
MdpStateType getExploredMdpState(BeliefId const &beliefId) const
void addClippingRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue)
ValueType computeUpperValueBoundAtBelief(BeliefId const &beliefId) const
MdpStateType getCurrentNumberOfMdpStates() const
ValueType const & getComputedValueAtInitialState() const
void restartExploration()
Restarts the exploration to allow re-exploring each state.
bool currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, storm::storage::BitVector const &observationSet)
ValueType getTrivialLowerBoundAtPOMDPState(uint64_t const &pomdpState)
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
ValueType getTrivialUpperBoundAtPOMDPState(uint64_t const &pomdpState)
void addTransitionsToExtraStates(uint64_t const &localActionIndex, ValueType const &targetStateValue=storm::utility::zero< ValueType >(), ValueType const &bottomStateValue=storm::utility::zero< ValueType >())
bool beliefHasMdpState(BeliefId const &beliefId) const
std::vector< ValueType > const & getValuesOfExploredMdp() const
storm::storage::BitVector getStateExtremeBoundIsInfinite()
storm::storage::Scheduler< ValueType > getLowerValueBoundScheduler(uint64_t schedulerId) const
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map< uint32_t, SuccessorObservationInformation > &gatheredSuccessorObservations)
void startNewExploration(std::optional< ValueType > extraTargetStateValue=boost::none, std::optional< ValueType > extraBottomStateValue=std::nullopt)
bool actionIsOptimal(uint64_t const &globalActionIndex) const
Retrieves whether the given action at the current state was optimal in the most recent check.
bool needsActionAdjustment(uint64_t numActionsNeeded)
std::vector< BeliefId > getBeliefsWithObservationInMdp(uint32_t obs) const
ValueType getExtremeValueBoundAtPOMDPState(uint64_t const &pomdpState)
MdpStateType getBeliefId(MdpStateType exploredMdpState) const
void computeValuesOfExploredMdp(storm::Environment const &env, storm::solver::OptimizationDirection const &dir)
std::vector< BeliefId > getBeliefsInMdp()
std::shared_ptr< storm::models::sparse::Mdp< ValueType > > getExploredMdp() const
storm::storage::Scheduler< ValueType > getUpperValueBoundScheduler(uint64_t schedulerId) const
std::vector< storm::storage::Scheduler< ValueType > > getLowerValueBoundSchedulers() const
bool getCurrentStateActionExplorationWasDelayed(uint64_t const &localActionIndex) const
BeliefManagerType const & getBeliefManager() const
storm::storage::BeliefManager< PomdpType, BeliefValueType > BeliefManagerType
std::pair< bool, ValueType > computeFMSchedulerValueForMemoryNode(BeliefId const &beliefId, uint64_t memoryNode) const
void setExtremeValueBound(storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > valueBound)
void addChoiceLabelToCurrentState(uint64_t const &localActionIndex, std::string const &label)
std::vector< uint64_t > getUnexploredStates()
void restoreOldBehaviorAtCurrentState(uint64_t const &localActionIndex)
Inserts transitions and rewards at the given action as in the MDP of the previous exploration.
ValueType computeLowerValueBoundAtBelief(BeliefId const &beliefId) const
uint64_t getRowGroupSizeOfState(uint64_t state) const
const std::shared_ptr< storm::storage::Scheduler< BeliefMdpExplorer< PomdpType, BeliefValueType >::ValueType > > & getSchedulerForExploredMdp() const
MdpStateType getStartOfCurrentRowGroup() const
ValueType computeLowerValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const
bool currentStateIsOptimalSchedulerReachable() const
Retrieves whether the current state can be reached under a scheduler that was optimal in the most rec...
void addRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue)
Adds the provided reward value to the given action of the current state.
BeliefMdpExplorer(BeliefMdpExplorer &&other)=default
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map< uint32_t, SuccessorObservationInformation > &gatheredSuccessorObservations)
uint64_t getNrOfMemoryNodesForObservation(uint32_t observation) const
void computeRewardAtCurrentState(uint64_t const &localActionIndex, ValueType extraReward=storm::utility::zero< ValueType >())
void computeOptimalChoicesAndReachableMdpStates(ValueType const &ancillaryChoicesEpsilon, bool relativeDifference)
Computes the set of states that are reachable via a path that is consistent with an optimal MDP sched...
bool isMarkedAsGridBelief(BeliefId const &beliefId)
void markAsGridBelief(BeliefId const &beliefId)
ValueType computeUpperValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const
void adjustActions(uint64_t totalNumberOfActions)
bool actionAtCurrentStateWasOptimal(uint64_t const &localActionIndex) const
Retrieves whether the given action at the current state was optimal in the most recent check.
void setCurrentChoiceIsDelayed(uint64_t const &localActionIndex)
std::vector< BeliefValueType > computeProductWithSparseMatrix(BeliefId const &beliefId, storm::storage::SparseMatrix< BeliefValueType > &matrix) const
void addSelfloopTransition(uint64_t const &localActionIndex=0, ValueType const &value=storm::utility::one< ValueType >())
std::vector< storm::storage::Scheduler< ValueType > > getUpperValueBoundSchedulers() const
bool addTransitionToBelief(uint64_t const &localActionIndex, BeliefId const &transitionTarget, ValueType const &value, bool ignoreNewBeliefs)
Adds the next transition to the given successor belief.
bool stateIsOptimalSchedulerReachable(MdpStateType mdpState) const
Retrieves whether the current state can be reached under an optimal scheduler This requires a previou...
BeliefManagerType::BeliefId BeliefId
MdpStateType getCurrentNumberOfMdpChoices() const
boost::container::flat_set< StateType > BeliefSupportType
A bit vector that is internally represented as a vector of 64-bit values.
Definition BitVector.h:18
This class defines which action is chosen in a particular state of a non-deterministic model.
Definition Scheduler.h:18
A class that holds a possibly non-square matrix in the compressed row storage format.
LabParser.cpp.
Definition cli.cpp:18
BeliefManagerType::BeliefSupportType support
The number of successor beliefstates with this observation.
ValueType maxProbabilityToSuccessorWithObs
The probability we move to the corresponding observation.
uint64_t successorWithObsCount
The maximal probability to move to a successor with the corresponding observation.
Struct to store the extreme bound values needed for the reward correction values when clipping is use...
Struct for storing precomputed values bounding the actual values on the POMDP.