87 STORM_LOG_THROW(pomdp.isCanonic(), storm::exceptions::IllegalArgumentException,
"POMDP needs to be canonic");
90 uint64_t nrStates = pomdp.getNumberOfStates();
91 std::unordered_map<uint32_t, std::vector<storm::RationalFunction>> observationChoiceWeights = getObservationChoiceWeights(applicationMode);
94 for (uint64_t state = 0; state < nrStates; ++state) {
95 auto const& weights = observationChoiceWeights.at(pomdp.getObservation(state));
96 std::map<uint64_t, storm::RationalFunction> weightedTransitions;
97 for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) {
98 auto ratSum = storm::utility::zero<storm::RationalFunction>();
99 uint64_t nrEntries = pomdp.getTransitionMatrix().getRow(state, action).getNumberOfEntries();
100 uint64_t currEntry = 1;
101 for (
auto const& entry : pomdp.getTransitionMatrix().getRow(state, action)) {
102 auto it = weightedTransitions.find(entry.getColumn());
103 auto entryVal = storm::utility::convertNumber<storm::RationalFunction>(entry.getValue());
105 if (currEntry == nrEntries && storm::utility::one<storm::RationalFunction>() - ratSum != storm::utility::zero<storm::RationalFunction>()) {
107 entryVal += (storm::utility::one<storm::RationalFunction>() - ratSum);
109 if (it == weightedTransitions.end()) {
110 weightedTransitions[entry.getColumn()] = entryVal * weights[action];
112 it->second += entryVal * weights[action];
117 for (
auto const& entry : weightedTransitions) {
123 for (
auto const& pomdpRewardModel : pomdp.getRewardModels()) {
124 std::vector<storm::RationalFunction> stateRewards;
126 if (pomdpRewardModel.second.hasStateRewards()) {
127 stateRewards = storm::utility::vector::convertNumericVector<storm::RationalFunction>(pomdpRewardModel.second.getStateRewardVector());
129 stateRewards.resize(nrStates, storm::utility::zero<storm::RationalFunction>());
131 if (pomdpRewardModel.second.hasStateActionRewards()) {
132 std::vector<ValueType> pomdpActionRewards = pomdpRewardModel.second.getStateActionRewardVector();
133 for (uint64_t state = 0; state < nrStates; ++state) {
134 auto& stateReward = stateRewards[state];
135 auto const& weights = observationChoiceWeights.at(pomdp.getObservation(state));
136 uint64_t offset = pomdp.getTransitionMatrix().getRowGroupIndices()[state];
137 for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) {
139 stateReward += storm::utility::convertNumber<storm::RationalFunction>(pomdpActionRewards[offset + action]) * weights[action];
145 modelComponents.
rewardModels.emplace(pomdpRewardModel.first, std::move(rewardModel));
151 return std::make_shared<storm::models::sparse::Dtmc<storm::RationalFunction>>(modelComponents);