86 STORM_LOG_THROW(pomdp.isCanonic(), storm::exceptions::IllegalArgumentException,
"POMDP needs to be canonic");
89 uint64_t nrStates = pomdp.getNumberOfStates();
90 std::unordered_map<uint32_t, std::vector<storm::RationalFunction>> observationChoiceWeights = getObservationChoiceWeights(applicationMode);
93 for (uint64_t state = 0; state < nrStates; ++state) {
94 auto const& weights = observationChoiceWeights.at(pomdp.getObservation(state));
95 std::map<uint64_t, storm::RationalFunction> weightedTransitions;
96 for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) {
97 auto ratSum = storm::utility::zero<storm::RationalFunction>();
98 uint64_t nrEntries = pomdp.getTransitionMatrix().getRow(state, action).getNumberOfEntries();
99 uint64_t currEntry = 1;
100 for (
auto const& entry : pomdp.getTransitionMatrix().getRow(state, action)) {
101 auto it = weightedTransitions.find(entry.getColumn());
102 auto entryVal = storm::utility::convertNumber<storm::RationalFunction>(entry.getValue());
104 if (currEntry == nrEntries && storm::utility::one<storm::RationalFunction>() - ratSum != storm::utility::zero<storm::RationalFunction>()) {
106 entryVal += (storm::utility::one<storm::RationalFunction>() - ratSum);
108 if (it == weightedTransitions.end()) {
109 weightedTransitions[entry.getColumn()] = entryVal * weights[action];
111 it->second += entryVal * weights[action];
116 for (
auto const& entry : weightedTransitions) {
122 for (
auto const& pomdpRewardModel : pomdp.getRewardModels()) {
123 std::vector<storm::RationalFunction> stateRewards;
125 if (pomdpRewardModel.second.hasStateRewards()) {
126 stateRewards = storm::utility::vector::convertNumericVector<storm::RationalFunction>(pomdpRewardModel.second.getStateRewardVector());
128 stateRewards.resize(nrStates, storm::utility::zero<storm::RationalFunction>());
130 if (pomdpRewardModel.second.hasStateActionRewards()) {
131 std::vector<ValueType> pomdpActionRewards = pomdpRewardModel.second.getStateActionRewardVector();
132 for (uint64_t state = 0; state < nrStates; ++state) {
133 auto& stateReward = stateRewards[state];
134 auto const& weights = observationChoiceWeights.at(pomdp.getObservation(state));
135 uint64_t offset = pomdp.getTransitionMatrix().getRowGroupIndices()[state];
136 for (uint64_t action = 0; action < pomdp.getNumberOfChoices(state); ++action) {
138 stateReward += storm::utility::convertNumber<storm::RationalFunction>(pomdpActionRewards[offset + action]) * weights[action];
144 modelComponents.
rewardModels.emplace(pomdpRewardModel.first, std::move(rewardModel));
150 return std::make_shared<storm::models::sparse::Dtmc<storm::RationalFunction>>(modelComponents);