18namespace modelchecker {
19template<
typename FormulaType,
typename ValueType>
26template<
typename PomdpType,
typename BeliefValueType =
typename PomdpType::ValueType>
52 void startNewExploration(std::optional<ValueType> extraTargetStateValue = boost::none, std::optional<ValueType> extraBottomStateValue = std::nullopt);
71 ValueType const &bottomStateValue = storm::utility::zero<ValueType>());
147 std::shared_ptr<storm::models::sparse::Mdp<ValueType>>
getExploredMdp()
const;
196 std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations);
199 std::map<uint32_t, SuccessorObservationInformation> &gatheredSuccessorObservations);
244 const std::shared_ptr<storm::storage::Scheduler<BeliefMdpExplorer<PomdpType, BeliefValueType>::ValueType>> &
getSchedulerForExploredMdp()
const;
246 void setFMSchedValueList(std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> valueList);
271 void internalAddRowGroupIndex();
278 std::shared_ptr<BeliefManagerType> beliefManager;
279 std::vector<BeliefId> mdpStateToBeliefIdMap;
280 std::map<BeliefId, MdpStateType> beliefIdToMdpStateMap;
282 std::map<BeliefId, std::map<uint64_t, std::string>> mdpStateToChoiceLabelsMap;
285 std::multimap<ValueType, uint64_t> mdpStatesToExplorePrioState;
286 std::map<uint64_t, ValueType> mdpStatesToExploreStatePrio;
287 std::vector<ValueType> probabilityEstimation;
288 std::vector<std::map<MdpStateType, ValueType>> exploredMdpTransitions;
289 std::vector<MdpStateType> exploredChoiceIndices;
290 std::vector<MdpStateType> previousChoiceIndices;
291 std::vector<ValueType> mdpActionRewards;
292 std::map<MdpStateType, ValueType> clippingTransitionRewards;
293 uint64_t currentMdpState;
294 std::map<MdpStateType, MdpStateType> stateRemapping;
299 std::optional<MdpStateType> extraTargetState;
300 std::optional<MdpStateType> extraBottomState;
306 std::unordered_set<BeliefId> gridBeliefs;
309 std::shared_ptr<storm::models::sparse::Mdp<ValueType>> exploredMdp;
314 std::vector<std::vector<std::unordered_map<uint64_t, ValueType>>> fmSchedulerValueList;
315 std::vector<ValueType> lowerValueBounds;
316 std::vector<ValueType> upperValueBounds;
317 std::vector<ValueType> values;
318 std::optional<storm::storage::BitVector> optimalChoices;
319 std::optional<storm::storage::BitVector> optimalChoicesReachableMdpStates;
320 std::shared_ptr<storm::storage::Scheduler<ValueType>> scheduler;
326 struct ExplorationStorage {
327 std::vector<BeliefId> storedMdpStateToBeliefIdMap;
328 std::map<BeliefId, MdpStateType> storedBeliefIdToMdpStateMap;
330 std::map<BeliefId, std::map<uint64_t, std::string>> storedMdpStateToChoiceLabelsMap;
331 std::multimap<ValueType, uint64_t> storedMdpStatesToExplorePrioState;
332 std::map<uint64_t, ValueType> storedMdpStatesToExploreStatePrio;
333 std::vector<ValueType> storedProbabilityEstimation;
334 std::vector<std::map<MdpStateType, ValueType>> storedExploredMdpTransitions;
335 std::vector<MdpStateType> storedExploredChoiceIndices;
336 std::vector<ValueType> storedMdpActionRewards;
337 std::map<MdpStateType, ValueType> storedClippingTransitionRewards;
338 uint64_t storedCurrentMdpState;
339 std::map<MdpStateType, MdpStateType> storedStateRemapping;
340 uint64_t storedNextId;
342 std::vector<ValueType> storedLowerValueBounds;
343 std::vector<ValueType> storedUpperValueBounds;
344 std::vector<ValueType> storedValues;
348 ExplorationStorage explorationStorage;
MdpStateType getExploredMdpState(BeliefId const &beliefId) const
void storeExplorationState()
void addClippingRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue)
ValueType computeUpperValueBoundAtBelief(BeliefId const &beliefId) const
uint64_t getNrSchedulersForLowerBounds()
MdpStateType getCurrentNumberOfMdpStates() const
ValueType const & getComputedValueAtInitialState() const
void restartExploration()
Restarts the exploration to allow re-exploring each state.
bool currentStateHasSuccessorObservationInObservationSet(uint64_t localActionIndex, storm::storage::BitVector const &observationSet)
ValueType getTrivialLowerBoundAtPOMDPState(uint64_t const &pomdpState)
void setFMSchedValueList(std::vector< std::vector< std::unordered_map< uint64_t, ValueType > > > valueList)
ValueType getTrivialUpperBoundAtPOMDPState(uint64_t const &pomdpState)
void addTransitionsToExtraStates(uint64_t const &localActionIndex, ValueType const &targetStateValue=storm::utility::zero< ValueType >(), ValueType const &bottomStateValue=storm::utility::zero< ValueType >())
bool beliefHasMdpState(BeliefId const &beliefId) const
std::vector< ValueType > const & getValuesOfExploredMdp() const
storm::storage::BitVector getStateExtremeBoundIsInfinite()
void setCurrentStateIsClipped()
void setCurrentStateIsTarget()
storm::storage::Scheduler< ValueType > getLowerValueBoundScheduler(uint64_t schedulerId) const
void restoreExplorationState()
void gatherSuccessorObservationInformationAtCurrentState(uint64_t localActionIndex, std::map< uint32_t, SuccessorObservationInformation > &gatheredSuccessorObservations)
uint64_t getSizeOfCurrentRowGroup() const
void takeCurrentValuesAsLowerBounds()
bool hasComputedValues() const
void startNewExploration(std::optional< ValueType > extraTargetStateValue=boost::none, std::optional< ValueType > extraBottomStateValue=std::nullopt)
bool getCurrentStateWasTruncated() const
bool actionIsOptimal(uint64_t const &globalActionIndex) const
Retrieves whether the given action at the current state was optimal in the most recent check.
bool needsActionAdjustment(uint64_t numActionsNeeded)
std::vector< BeliefId > getBeliefsWithObservationInMdp(uint32_t obs) const
ValueType getExtremeValueBoundAtPOMDPState(uint64_t const &pomdpState)
MdpStateType getBeliefId(MdpStateType exploredMdpState) const
void computeValuesOfExploredMdp(storm::Environment const &env, storm::solver::OptimizationDirection const &dir)
std::vector< BeliefId > getBeliefsInMdp()
std::shared_ptr< storm::models::sparse::Mdp< ValueType > > getExploredMdp() const
storm::storage::Scheduler< ValueType > getUpperValueBoundScheduler(uint64_t schedulerId) const
std::vector< storm::storage::Scheduler< ValueType > > getLowerValueBoundSchedulers() const
bool getCurrentStateActionExplorationWasDelayed(uint64_t const &localActionIndex) const
BeliefManagerType const & getBeliefManager() const
PomdpType::ValueType ValueType
bool hasUnexploredState() const
void dropUnexploredStates()
void setCurrentStateIsTruncated()
storm::storage::BeliefManager< PomdpType, BeliefValueType > BeliefManagerType
std::pair< bool, ValueType > computeFMSchedulerValueForMemoryNode(BeliefId const &beliefId, uint64_t memoryNode) const
void setExtremeValueBound(storm::pomdp::storage::ExtremePOMDPValueBound< ValueType > valueBound)
bool hasFMSchedulerValues() const
void addChoiceLabelToCurrentState(uint64_t const &localActionIndex, std::string const &label)
std::vector< uint64_t > getUnexploredStates()
void restoreOldBehaviorAtCurrentState(uint64_t const &localActionIndex)
Inserts transitions and rewards at the given action as in the MDP of the previous exploration.
ValueType computeLowerValueBoundAtBelief(BeliefId const &beliefId) const
uint64_t getRowGroupSizeOfState(uint64_t state) const
bool currentStateHasOldBehavior() const
const std::shared_ptr< storm::storage::Scheduler< BeliefMdpExplorer< PomdpType, BeliefValueType >::ValueType > > & getSchedulerForExploredMdp() const
MdpStateType getStartOfCurrentRowGroup() const
ValueType computeLowerValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const
bool currentStateIsOptimalSchedulerReachable() const
Retrieves whether the current state can be reached under a scheduler that was optimal in the most rec...
void addRewardToCurrentState(uint64_t const &localActionIndex, ValueType rewardValue)
Adds the provided reward value to the given action of the current state.
BeliefMdpExplorer(BeliefMdpExplorer &&other)=default
void gatherSuccessorObservationInformationAtMdpChoice(uint64_t mdpChoice, std::map< uint32_t, SuccessorObservationInformation > &gatheredSuccessorObservations)
uint64_t getNrOfMemoryNodesForObservation(uint32_t observation) const
void computeRewardAtCurrentState(uint64_t const &localActionIndex, ValueType extraReward=storm::utility::zero< ValueType >())
void computeOptimalChoicesAndReachableMdpStates(ValueType const &ancillaryChoicesEpsilon, bool relativeDifference)
Computes the set of states that are reachable via a path that is consistent with an optimal MDP sched...
uint64_t getNrSchedulersForUpperBounds()
bool isMarkedAsGridBelief(BeliefId const &beliefId)
ValueType getUpperValueBoundAtCurrentState() const
BeliefId exploreNextState()
void markAsGridBelief(BeliefId const &beliefId)
ValueType computeUpperValueBoundForScheduler(BeliefId const &beliefId, uint64_t schedulerId) const
void adjustActions(uint64_t totalNumberOfActions)
bool actionAtCurrentStateWasOptimal(uint64_t const &localActionIndex) const
Retrieves whether the given action at the current state was optimal in the most recent check.
void setCurrentChoiceIsDelayed(uint64_t const &localActionIndex)
std::vector< BeliefValueType > computeProductWithSparseMatrix(BeliefId const &beliefId, storm::storage::SparseMatrix< BeliefValueType > &matrix) const
void addSelfloopTransition(uint64_t const &localActionIndex=0, ValueType const &value=storm::utility::one< ValueType >())
std::vector< storm::storage::Scheduler< ValueType > > getUpperValueBoundSchedulers() const
ValueType getLowerValueBoundAtCurrentState() const
bool getCurrentStateWasClipped() const
bool addTransitionToBelief(uint64_t const &localActionIndex, BeliefId const &transitionTarget, ValueType const &value, bool ignoreNewBeliefs)
Adds the next transition to the given successor belief.
void takeCurrentValuesAsUpperBounds()
bool stateIsOptimalSchedulerReachable(MdpStateType mdpState) const
Retrieves whether the current state can be reached under an optimal scheduler This requires a previou...
BeliefManagerType::BeliefId BeliefId
MdpStateType getCurrentNumberOfMdpChoices() const
boost::container::flat_set< StateType > BeliefSupportType
A bit vector that is internally represented as a vector of 64-bit values.
This class defines which action is chosen in a particular state of a non-deterministic model.
A class that holds a possibly non-square matrix in the compressed row storage format.
Struct to store the extreme bound values needed for the reward correction values when clipping is use...
Struct for storing precomputed values bounding the actual values on the POMDP.