Storm
A Modern Probabilistic Model Checker
Loading...
Searching...
No Matches
BeliefManager.cpp
Go to the documentation of this file.
2
9
10namespace storm {
11namespace storage {
12
13template<typename PomdpType, typename BeliefValueType, typename StateType>
17
18template<typename PomdpType, typename BeliefValueType, typename StateType>
20 : dimension(dimension), diff(std::move(diff)) {
21 // Intentionally left empty
22}
23
24template<typename PomdpType, typename BeliefValueType, typename StateType>
26 if (diff != other.diff) {
27 return diff > other.diff;
28 } else {
29 return dimension < other.dimension;
30 }
31}
32
33template<typename PomdpType, typename BeliefValueType, typename StateType>
34bool BeliefManager<PomdpType, BeliefValueType, StateType>::Belief_equal_to::operator()(const BeliefType &lhBelief, const BeliefType &rhBelief) const {
35 return lhBelief == rhBelief;
36}
37
38template<>
39bool BeliefManager<storm::models::sparse::Pomdp<double>, double, uint64_t>::Belief_equal_to::operator()(const BeliefType &lhBelief,
40 const BeliefType &rhBelief) const {
41 // If the sizes are different, we don't have to look inside the belief
42 if (lhBelief.size() != rhBelief.size()) {
43 return false;
44 }
45 // Assumes that beliefs are ordered
46 auto lhIt = lhBelief.begin();
47 auto rhIt = rhBelief.begin();
48 while (lhIt != lhBelief.end() || rhIt != rhBelief.end()) {
49 // Iterate over the entries simultaneously, beliefs not equal if they contain either different states or different values for the same state
50 if (lhIt->first != rhIt->first || std::fabs(lhIt->second - rhIt->second) > 1e-15) {
51 return false;
52 }
53 ++lhIt;
54 ++rhIt;
55 }
56 return lhIt == lhBelief.end() && rhIt == rhBelief.end();
57}
58
59template<typename PomdpType, typename BeliefValueType, typename StateType>
60std::size_t BeliefManager<PomdpType, BeliefValueType, StateType>::BeliefHash::operator()(const BeliefType &belief) const {
61 std::size_t seed = 0;
62 // Assumes that beliefs are ordered
63 for (auto const &entry : belief) {
64 boost::hash_combine(seed, entry.first);
65 boost::hash_combine(seed, entry.second);
66 }
67 return seed;
68}
69
70template<>
71std::size_t BeliefManager<storm::models::sparse::Pomdp<double>, double, uint64_t>::BeliefHash::operator()(const BeliefType &belief) const {
72 std::size_t seed = 0;
73 // Assumes that beliefs are ordered
74 for (auto const &entry : belief) {
75 boost::hash_combine(seed, entry.first);
76 boost::hash_combine(seed, round(storm::utility::convertNumber<double>(entry.second) * 1e15));
77 }
78 return seed;
79}
80
81template<typename PomdpType, typename BeliefValueType, typename StateType>
82BeliefManager<PomdpType, BeliefValueType, StateType>::BeliefManager(PomdpType const &pomdp, BeliefValueType const &precision,
83 TriangulationMode const &triangulationMode)
84 : pomdp(pomdp), triangulationMode(triangulationMode) {
86 beliefToIdMap.resize(pomdp.getNrObservations());
87 initialBeliefId = computeInitialBelief();
88}
89
90template<typename PomdpType, typename BeliefValueType, typename StateType>
91void BeliefManager<PomdpType, BeliefValueType, StateType>::setRewardModel(std::optional<std::string> rewardModelName) {
92 if (rewardModelName) {
93 auto const &rewardModel = pomdp.getRewardModel(rewardModelName.value());
94 pomdpActionRewardVector = rewardModel.getTotalRewardVector(pomdp.getTransitionMatrix());
95 } else {
96 setRewardModel(pomdp.getUniqueRewardModelName());
97 }
98}
99
100template<typename PomdpType, typename BeliefValueType, typename StateType>
102 pomdpActionRewardVector.clear();
103}
104
105template<typename PomdpType, typename BeliefValueType, typename StateType>
109
110template<typename PomdpType, typename BeliefValueType, typename StateType>
112 return isEqual(getBelief(first), getBelief(second));
113}
114
115template<typename PomdpType, typename BeliefValueType, typename StateType>
117 return toString(getBelief(beliefId));
118}
119
120template<typename PomdpType, typename BeliefValueType, typename StateType>
122 std::stringstream str;
123 str << "(\n";
124 for (uint64_t i = 0; i < t.size(); ++i) {
125 str << "\t" << t.weights[i] << " * \t" << toString(getBelief(t.gridPoints[i])) << "\n";
126 }
127 str << ")\n";
128 return str.str();
129}
130
131template<typename PomdpType, typename BeliefValueType, typename StateType>
133 BeliefId const &beliefId, std::vector<ValueType> const &summands) {
134 auto result = storm::utility::zero<ValueType>();
135 for (auto const &entry : getBelief(beliefId)) {
136 result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
137 }
138 return result;
139}
140
141template<typename PomdpType, typename BeliefValueType, typename StateType>
142std::pair<bool, typename BeliefManager<PomdpType, BeliefValueType, StateType>::ValueType> BeliefManager<PomdpType, BeliefValueType, StateType>::getWeightedSum(
143 BeliefId const &beliefId, std::unordered_map<StateType, ValueType> const &summands) {
144 bool successful = true;
145 auto result = storm::utility::zero<ValueType>();
146 for (auto const &entry : getBelief(beliefId)) {
147 auto probIter = summands.find(entry.first);
148 if (probIter != summands.end()) {
149 result += storm::utility::convertNumber<ValueType>(entry.second) * storm::utility::convertNumber<ValueType>(summands.at(entry.first));
150 } else {
151 successful = false;
152 break;
153 }
154 }
155 return {successful, result};
156}
157
158template<typename PomdpType, typename BeliefValueType, typename StateType>
162
163template<typename PomdpType, typename BeliefValueType, typename StateType>
165 BeliefId const &beliefId, uint64_t const &localActionIndex) const {
166 auto const &belief = getBelief(beliefId);
167 STORM_LOG_ASSERT(!pomdpActionRewardVector.empty(), "Requested a reward although no reward model was specified.");
168 auto result = storm::utility::zero<ValueType>();
169 auto const &choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
170 for (auto const &entry : belief) {
171 uint64_t choiceIndex = choiceIndices[entry.first] + localActionIndex;
172 STORM_LOG_ASSERT(choiceIndex < choiceIndices[entry.first + 1], "Invalid local action index.");
173 STORM_LOG_ASSERT(choiceIndex < pomdpActionRewardVector.size(), "Invalid choice index.");
174 result += storm::utility::convertNumber<ValueType>(entry.second) * pomdpActionRewardVector[choiceIndex];
175 }
176 return result;
177}
178
179template<typename PomdpType, typename BeliefValueType, typename StateType>
181 return getBeliefObservation(getBelief(beliefId));
182}
183
184template<typename PomdpType, typename BeliefValueType, typename StateType>
186 auto const &belief = getBelief(beliefId);
187 return pomdp.getNumberOfChoices(belief.begin()->first);
188}
189
190template<typename PomdpType, typename BeliefValueType, typename StateType>
192 BeliefId beliefId, BeliefValueType resolution) {
193 return triangulateBelief(getBelief(beliefId), resolution);
194}
195
196template<typename PomdpType, typename BeliefValueType, typename StateType>
197template<typename DistributionType>
198void BeliefManager<PomdpType, BeliefValueType, StateType>::addToDistribution(DistributionType &distr, StateType const &state, BeliefValueType const &value) {
199 auto insertionRes = distr.emplace(state, value);
200 if (!insertionRes.second) {
201 insertionRes.first->second += value;
202 }
203}
204
205template<typename PomdpType, typename BeliefValueType, typename StateType>
206template<typename DistributionType>
208 if (distr.size() == 1 && cc.isEqual(distr.begin()->second, storm::utility::one<BeliefValueType>())) {
209 // If the distribution consists of only one entry and its value is sufficiently close to 1, make it exactly 1 to avoid numerical problems
210 distr.begin()->second = storm::utility::one<BeliefValueType>();
211 }
212}
213
214template<typename PomdpType, typename BeliefValueType, typename StateType>
216 auto const &belief = getBelief(beliefId);
217 for (auto const &entry : belief) {
218 support.insert(entry.first);
219 }
220}
221
222template<typename PomdpType, typename BeliefValueType, typename StateType>
226
227template<typename PomdpType, typename BeliefValueType, typename StateType>
228std::vector<std::pair<typename BeliefManager<PomdpType, BeliefValueType, StateType>::BeliefId,
231 std::vector<BeliefValueType> const &observationResolutions) {
232 return expandInternal(beliefId, actionIndex, observationResolutions);
233}
234
235template<typename PomdpType, typename BeliefValueType, typename StateType>
236std::vector<std::pair<typename BeliefManager<PomdpType, BeliefValueType, StateType>::BeliefId,
239 std::vector<uint64_t> const &observationResolutions) {
240 return expandInternal(beliefId, actionIndex, std::nullopt, observationResolutions);
241}
242
243template<typename PomdpType, typename BeliefValueType, typename StateType>
244std::vector<std::pair<typename BeliefManager<PomdpType, BeliefValueType, StateType>::BeliefId,
247 return expandInternal(beliefId, actionIndex);
248}
249
250template<typename PomdpType, typename BeliefValueType, typename StateType>
252 BeliefId const &id) const {
253 STORM_LOG_ASSERT(id != noId(), "Tried to get a non-existent belief.");
254 STORM_LOG_ASSERT(id < getNumberOfBeliefIds(), "Belief index " << id << " is out of range.");
255 return beliefs[id];
256}
257
258template<typename PomdpType, typename BeliefValueType, typename StateType>
259typename BeliefManager<PomdpType, BeliefValueType, StateType>::BeliefId BeliefManager<PomdpType, BeliefValueType, StateType>::getId(
260 BeliefType const &belief) const {
261 uint32_t obs = getBeliefObservation(belief);
262 STORM_LOG_ASSERT(obs < beliefToIdMap.size(), "Belief has unknown observation.");
263 auto idIt = beliefToIdMap[obs].find(belief);
264 STORM_LOG_ASSERT(idIt != beliefToIdMap[obs].end(), "Unknown Belief.");
265 return idIt->second;
266}
267
268template<typename PomdpType, typename BeliefValueType, typename StateType>
269std::string BeliefManager<PomdpType, BeliefValueType, StateType>::toString(BeliefType const &belief) const {
270 std::stringstream str;
271 str << "{ ";
272 bool first = true;
273 for (auto const &entry : belief) {
274 if (first) {
275 first = false;
276 } else {
277 str << ", ";
278 }
279 str << entry.first << ": " << std::setprecision(std::numeric_limits<double>::max_digits10 + 1) << entry.second;
280 }
281 str << " }";
282 return str.str();
283}
284
285template<typename PomdpType, typename BeliefValueType, typename StateType>
286bool BeliefManager<PomdpType, BeliefValueType, StateType>::isEqual(BeliefType const &first, BeliefType const &second) const {
287 if (first.size() != second.size()) {
288 return false;
289 }
290 auto secondIt = second.begin();
291 for (auto const &firstEntry : first) {
292 if (firstEntry.first != secondIt->first) {
293 return false;
294 }
295 if (!cc.isEqual(firstEntry.second, secondIt->second)) {
296 return false;
297 }
298 ++secondIt;
299 }
300 return true;
301}
302
303template<typename PomdpType, typename BeliefValueType, typename StateType>
304bool BeliefManager<PomdpType, BeliefValueType, StateType>::assertBelief(BeliefType const &belief) const {
305 auto sum = storm::utility::zero<BeliefValueType>();
306 std::optional<uint32_t> observation;
307 for (auto const &entry : belief) {
308 if (entry.first >= pomdp.getNumberOfStates()) {
309 STORM_LOG_ERROR("Belief does refer to non-existing pomdp state " << entry.first << ".");
310 return false;
311 }
312 uint64_t entryObservation = pomdp.getObservation(entry.first);
313 if (observation) {
314 if (observation.value() != entryObservation) {
315 STORM_LOG_ERROR("Beliefsupport contains different observations.");
316 return false;
317 }
318 } else {
319 observation = entryObservation;
320 }
321 // Don't use cc for these checks, because computations with zero are usually fine
322 if (storm::utility::isZero(entry.second)) {
323 // We assume that beliefs only consider their support.
324 STORM_LOG_ERROR("Zero belief probability.");
325 return false;
326 }
327 if (entry.second < storm::utility::zero<BeliefValueType>()) {
328 STORM_LOG_ERROR("Negative belief probability.");
329 return false;
330 }
331 if (cc.isLess(storm::utility::one<BeliefValueType>(), entry.second)) {
332 STORM_LOG_ERROR("Belief probability greater than one.");
333 return false;
334 }
335 sum += entry.second;
336 }
337 if (!cc.isOne(sum)) {
338 STORM_LOG_ERROR("Belief does not sum up to one. (" << sum << " instead).");
339 return false;
340 }
341 return true;
342}
343
344template<typename PomdpType, typename BeliefValueType, typename StateType>
345bool BeliefManager<PomdpType, BeliefValueType, StateType>::assertTriangulation(BeliefType const &belief, Triangulation const &triangulation) const {
346 if (triangulation.weights.size() != triangulation.gridPoints.size()) {
347 STORM_LOG_ERROR("Number of weights and points in triangulation does not match.");
348 return false;
349 }
350 if (triangulation.size() == 0) {
351 STORM_LOG_ERROR("Empty triangulation.");
352 return false;
353 }
354 BeliefType triangulatedBelief;
355 auto weightSum = storm::utility::zero<BeliefValueType>();
356 for (uint64_t i = 0; i < triangulation.weights.size(); ++i) {
357 if (cc.isZero(triangulation.weights[i])) {
358 STORM_LOG_ERROR("Zero weight in triangulation.");
359 return false;
360 }
361 if (cc.isLess(triangulation.weights[i], storm::utility::zero<BeliefValueType>())) {
362 STORM_LOG_ERROR("Negative weight in triangulation.");
363 return false;
364 }
365 if (cc.isLess(storm::utility::one<BeliefValueType>(), triangulation.weights[i])) {
366 STORM_LOG_ERROR("Weight greater than one in triangulation.");
367 }
368 weightSum += triangulation.weights[i];
369 BeliefType const &gridPoint = getBelief(triangulation.gridPoints[i]);
370 for (auto const &pointEntry : gridPoint) {
371 BeliefValueType &triangulatedValue = triangulatedBelief.emplace(pointEntry.first, storm::utility::zero<BeliefValueType>()).first->second;
372 triangulatedValue += triangulation.weights[i] * pointEntry.second;
373 }
374 }
375 if (!cc.isOne(weightSum)) {
376 STORM_LOG_ERROR("Triangulation weights do not sum up to one.");
377 return false;
378 }
379 if (!assertBelief(triangulatedBelief)) {
380 STORM_LOG_ERROR("Triangulated belief is not a belief.");
381 }
382 if (!isEqual(belief, triangulatedBelief)) {
383 STORM_LOG_ERROR("Belief:\n\t" << toString(belief) << "\ndoes not match triangulated belief:\n\t" << toString(triangulatedBelief) << ".");
384 return false;
385 }
386 return true;
387}
388
389template<typename PomdpType, typename BeliefValueType, typename StateType>
391 STORM_LOG_ASSERT(assertBelief(belief), "Invalid belief.");
392 return pomdp.getObservation(belief.begin()->first);
393}
394
395template<typename PomdpType, typename BeliefValueType, typename StateType>
396void BeliefManager<PomdpType, BeliefValueType, StateType>::triangulateBeliefFreudenthal(BeliefType const &belief, BeliefValueType const &resolution,
397 Triangulation &result) {
398 STORM_LOG_ASSERT(resolution != 0, "Invalid resolution: 0");
399 STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution");
400 StateType numEntries = belief.size();
401 // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math)
402 // Probabilities will be triangulated to values in 0/N, 1/N, 2/N, ..., N/N
403 // Variable names are mostly based on the paper
404 // However, we speed this up a little by exploiting that belief states usually have sparse support (i.e. numEntries is much smaller than
405 // pomdp.getNumberOfStates()). Initialize diffs and the first row of the 'qs' matrix (aka v)
406 std::set<FreudenthalDiff, std::greater<>> sorted_diffs; // d (and p?) in the paper
407 std::vector<BeliefValueType> qsRow; // Row of the 'qs' matrix from the paper (initially corresponds to v
408 qsRow.reserve(numEntries);
409 std::vector<StateType> toOriginalIndicesMap; // Maps 'local' indices to the original pomdp state indices
410 toOriginalIndicesMap.reserve(numEntries);
411 BeliefValueType x = resolution;
412 for (auto const &entry : belief) {
413 qsRow.push_back(storm::utility::floor(x)); // v
414 sorted_diffs.emplace(toOriginalIndicesMap.size(), x - qsRow.back()); // x-v
415 toOriginalIndicesMap.push_back(entry.first);
416 x -= entry.second * resolution;
417 }
418 // Insert a dummy 0 column in the qs matrix so the loops below are a bit simpler
419 qsRow.push_back(storm::utility::zero<BeliefValueType>());
420
421 result.weights.reserve(numEntries);
422 result.gridPoints.reserve(numEntries);
423 auto currentSortedDiff = sorted_diffs.begin();
424 auto previousSortedDiff = sorted_diffs.end();
425 --previousSortedDiff;
426 for (StateType i = 0; i < numEntries; ++i) {
427 // Compute the weight for the grid points
428 BeliefValueType weight = previousSortedDiff->diff - currentSortedDiff->diff;
429 if (i == 0) {
430 // The first weight is a bit different
431 weight += storm::utility::one<BeliefValueType>();
432 } else {
433 // 'compute' the next row of the qs matrix
434 qsRow[previousSortedDiff->dimension] += storm::utility::one<BeliefValueType>();
435 }
436 if (!cc.isZero(weight)) {
437 result.weights.push_back(weight);
438 // Compute the grid point
439 BeliefType gridPoint;
440 for (StateType j = 0; j < numEntries; ++j) {
441 BeliefValueType gridPointEntry = qsRow[j] - qsRow[j + 1];
442 if (!cc.isZero(gridPointEntry)) {
443 gridPoint[toOriginalIndicesMap[j]] = gridPointEntry / resolution;
444 }
445 }
446 result.gridPoints.push_back(getOrAddBeliefId(gridPoint));
447 }
448 previousSortedDiff = currentSortedDiff++;
449 }
450}
451
452template<typename PomdpType, typename BeliefValueType, typename StateType>
453void BeliefManager<PomdpType, BeliefValueType, StateType>::triangulateBeliefDynamic(BeliefType const &belief, BeliefValueType const &resolution,
454 Triangulation &result) {
455 // Find the best resolution for this belief, i.e., N such that the largest distance between one of the belief values to a value in {i/N | 0 ≤ i ≤ N} is
456 // minimal
457 STORM_LOG_ASSERT(storm::utility::isInteger(resolution), "Expected an integer resolution");
458 BeliefValueType finalResolution = resolution;
459 uint64_t finalResolutionMisses = belief.size() + 1;
460 // We don't need to check resolutions that are smaller than the maximal resolution divided by 2 (as we already checked multiples of these)
461 for (BeliefValueType currResolution = resolution; currResolution > resolution / 2; --currResolution) {
462 uint64_t currResMisses = 0;
463 bool continueWithNextResolution = false;
464 for (auto const &belEntry : belief) {
465 BeliefValueType product = belEntry.second * currResolution;
466 if (!cc.isZero(product - storm::utility::round(product))) {
467 ++currResMisses;
468 if (currResMisses >= finalResolutionMisses) {
469 // This resolution is not better than a previous resolution
470 continueWithNextResolution = true;
471 break;
472 }
473 }
474 }
475 if (!continueWithNextResolution) {
476 STORM_LOG_ASSERT(currResMisses < finalResolutionMisses, "Distance for this resolution should not be larger than a previously checked one.");
477 finalResolution = currResolution;
478 finalResolutionMisses = currResMisses;
479 if (currResMisses == 0) {
480 break;
481 }
482 }
483 }
484
485 STORM_LOG_TRACE("Picking resolution " << finalResolution << " for belief " << toString(belief));
486
487 // do standard freudenthal with the found resolution
488 triangulateBeliefFreudenthal(belief, finalResolution, result);
489}
490
491template<typename PomdpType, typename BeliefValueType, typename StateType>
492typename BeliefManager<PomdpType, BeliefValueType, StateType>::Triangulation BeliefManager<PomdpType, BeliefValueType, StateType>::triangulateBelief(
493 BeliefType const &belief, BeliefValueType const &resolution) {
494 STORM_LOG_ASSERT(assertBelief(belief), "Input belief for triangulation is not valid.");
495 Triangulation result;
496 // Quickly triangulate Dirac beliefs
497 if (belief.size() == 1u) {
498 result.weights.push_back(storm::utility::one<BeliefValueType>());
499 result.gridPoints.push_back(getOrAddBeliefId(belief));
500 } else {
501 auto ceiledResolution = storm::utility::ceil<BeliefValueType>(resolution);
502 switch (triangulationMode) {
503 case TriangulationMode::Static:
504 triangulateBeliefFreudenthal(belief, ceiledResolution, result);
505 break;
506 case TriangulationMode::Dynamic:
507 triangulateBeliefDynamic(belief, ceiledResolution, result);
508 break;
509 default:
510 STORM_LOG_ASSERT(false, "Invalid triangulation mode.");
511 }
512 }
513 STORM_LOG_ASSERT(assertTriangulation(belief, result), "Incorrect triangulation: " << toString(result));
514 return result;
515}
516
517template<typename PomdpType, typename BeliefValueType, typename StateType>
518std::vector<std::pair<typename BeliefManager<PomdpType, BeliefValueType, StateType>::BeliefId,
520BeliefManager<PomdpType, BeliefValueType, StateType>::expandInternal(BeliefId const &beliefId, uint64_t actionIndex,
521 std::optional<std::vector<BeliefValueType>> const &observationTriangulationResolutions,
522 std::optional<std::vector<uint64_t>> const &observationGridClippingResolutions) {
523 std::vector<std::pair<BeliefId, ValueType>> destinations;
524
525 BeliefType belief = getBelief(beliefId);
526
527 // Find the probability we go to each observation
528 BeliefType successorObs; // This is actually not a belief but has the same type
529 for (auto const &pointEntry : belief) {
530 uint64_t state = pointEntry.first;
531 for (auto const &pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
532 if (!storm::utility::isZero(pomdpTransition.getValue())) {
533 auto obs = pomdp.getObservation(pomdpTransition.getColumn());
534 addToDistribution(successorObs, obs, pointEntry.second * storm::utility::convertNumber<BeliefValueType>(pomdpTransition.getValue()));
535 }
536 }
537 }
538 adjustDistribution(successorObs);
539
540 // Now for each successor observation we find and potentially triangulate the successor belief
541 for (auto const &successor : successorObs) {
542 BeliefType successorBelief;
543 for (auto const &pointEntry : belief) {
544 uint64_t state = pointEntry.first;
545 for (auto const &pomdpTransition : pomdp.getTransitionMatrix().getRow(state, actionIndex)) {
546 if (pomdp.getObservation(pomdpTransition.getColumn()) == successor.first) {
547 BeliefValueType prob = pointEntry.second * storm::utility::convertNumber<BeliefValueType>(pomdpTransition.getValue()) / successor.second;
548 addToDistribution(successorBelief, pomdpTransition.getColumn(), prob);
549 }
550 }
551 }
552 adjustDistribution(successorBelief);
553 STORM_LOG_ASSERT(assertBelief(successorBelief), "Invalid successor belief.");
554
555 // Insert the destination. We know that destinations have to be disjoint since they have different observations
556 if (observationTriangulationResolutions) {
557 Triangulation triangulation = triangulateBelief(successorBelief, observationTriangulationResolutions.value()[successor.first]);
558 for (size_t j = 0; j < triangulation.size(); ++j) {
559 // Here we additionally assume that triangulation.gridPoints does not contain the same point multiple times
560 BeliefValueType a = triangulation.weights[j] * successor.second;
561 destinations.emplace_back(triangulation.gridPoints[j], storm::utility::convertNumber<ValueType>(a));
562 }
563 } else if (observationGridClippingResolutions) {
564 BeliefClipping clipping = clipBeliefToGrid(successorBelief, observationGridClippingResolutions.value()[successor.first],
565 storm::storage::BitVector(pomdp.getNumberOfStates()));
566 if (clipping.isClippable) {
567 BeliefValueType a = (storm::utility::one<BeliefValueType>() - clipping.delta) * successor.second;
568 destinations.emplace_back(clipping.targetBelief, storm::utility::convertNumber<ValueType>(a));
569 } else {
570 // Belief on Grid
571 destinations.emplace_back(getOrAddBeliefId(successorBelief), storm::utility::convertNumber<ValueType>(successor.second));
572 }
573 } else {
574 destinations.emplace_back(getOrAddBeliefId(successorBelief), storm::utility::convertNumber<ValueType>(successor.second));
575 }
576 }
577
578 return destinations;
579}
580
581template<typename PomdpType, typename BeliefValueType, typename StateType>
583 BeliefId const &beliefId, uint64_t resolution, storm::storage::BitVector isInfinite) {
584 auto res = clipBeliefToGrid(getBelief(beliefId), resolution, isInfinite);
585 res.startingBelief = beliefId;
586 return res;
587}
588
589template<typename PomdpType, typename BeliefValueType, typename StateType>
591 BeliefType const &belief, uint64_t resolution, const storm::storage::BitVector &isInfinite) {
592 uint32_t obs = getBeliefObservation(belief);
593 STORM_LOG_ASSERT(obs < beliefToIdMap.size(), "Belief has unknown observation.");
594 if (!lpSolver) {
595 lpSolver = storm::utility::solver::getLpSolver<BeliefValueType>("POMDP LP Solver");
596 } else {
597 lpSolver->pop();
598 }
599 lpSolver->push();
600
601 std::vector<BeliefValueType> helper(belief.size(), storm::utility::zero<BeliefValueType>());
602 helper[0] = storm::utility::convertNumber<BeliefValueType>(resolution);
603 bool done = false;
604 // Set-up Variables
605 std::vector<storm::expressions::Expression> decisionVariables;
606 // Add variable for the clipping value, it is to be minimized
607 auto bigDelta = lpSolver->addBoundedContinuousVariable("D", storm::utility::zero<BeliefValueType>(), storm::utility::one<BeliefValueType>(),
608 storm::utility::one<BeliefValueType>());
609 // State clipping values
610 std::vector<storm::expressions::Expression> deltas;
611 uint64_t i = 0;
612 for (auto const &state : belief) {
613 // This is a quite dirty fix to enable GLPK for the TACAS '22 implementation without substantially changing the implementation for Gurobi.
614 if (typeid(*lpSolver) == typeid(storm::solver::GlpkLpSolver<ValueType>) && !isInfinite.empty()) {
615 if (isInfinite[state.first]) {
616 auto localDelta = lpSolver->addBoundedContinuousVariable("d_" + std::to_string(i), storm::utility::zero<BeliefValueType>(), state.second);
617 auto deltaExpr = storm::expressions::Expression(localDelta);
618 deltas.push_back(deltaExpr);
619 lpSolver->addConstraint("state_val_inf_" + std::to_string(i), deltaExpr == lpSolver->getConstant(storm::utility::zero<BeliefValueType>()));
620 }
621 } else {
622 BeliefValueType bound = state.second;
623 if (!isInfinite.empty()) {
624 bound = isInfinite[state.first] ? storm::utility::zero<BeliefValueType>() : state.second;
625 }
626 auto localDelta = lpSolver->addBoundedContinuousVariable("d_" + std::to_string(i), storm::utility::zero<BeliefValueType>(), bound);
627 deltas.push_back(storm::expressions::Expression(localDelta));
628 }
629 ++i;
630 }
631 lpSolver->update();
632 std::vector<BeliefType> gridCandidates;
633 while (!done) {
634 BeliefType candidate;
635 auto belIter = belief.begin();
636 for (uint64_t j = 0; j < belief.size() - 1; ++j) {
637 if (!cc.isEqual(helper[j] - helper[j + 1], storm::utility::zero<BeliefValueType>())) {
638 candidate[belIter->first] = (helper[j] - helper[j + 1]) / storm::utility::convertNumber<BeliefValueType>(resolution);
639 }
640 belIter++;
641 }
642 if (!cc.isEqual(helper[belief.size() - 1], storm::utility::zero<BeliefValueType>())) {
643 candidate[belIter->first] = helper[belief.size() - 1] / storm::utility::convertNumber<BeliefValueType>(resolution);
644 }
645 if (isEqual(candidate, belief)) {
646 // TODO Improve handling of successors which are already on the grid
647 return BeliefClipping{false, noId(), noId(), storm::utility::zero<BeliefValueType>(), {}, true};
648 } else {
649 gridCandidates.push_back(candidate);
650
651 // Add variables a_j
652 auto decisionVar = lpSolver->addBinaryVariable("a_" + std::to_string(gridCandidates.size() - 1));
653 decisionVariables.push_back(storm::expressions::Expression(decisionVar));
654 lpSolver->update();
655
656 i = 0;
657 for (auto const &state : belief) {
658 // Add the constraint to describe the transformation between the state values in the beliefs
659 // d_i
660 storm::expressions::Expression leftSide = deltas[i];
661 storm::expressions::Expression targetValue = lpSolver->getConstant(candidate[i]);
662 if (candidate.find(state.first) != candidate.end()) {
663 targetValue = lpSolver->getConstant(candidate.at(state.first));
664 } else {
665 targetValue = lpSolver->getConstant(storm::utility::zero<BeliefValueType>());
666 }
667
668 // b(s_i) - b_j(s_i) + D * b_j(s_i) - 1 + a_j
670 lpSolver->getConstant(state.second) - targetValue + storm::expressions::Expression(bigDelta) * targetValue -
671 lpSolver->getConstant(storm::utility::one<BeliefValueType>()) + storm::expressions::Expression(decisionVar);
672
673 // Add left >= right
674 lpSolver->addConstraint("state_eq_" + std::to_string(i) + "_" + std::to_string(gridCandidates.size() - 1), leftSide >= rightSide);
675 ++i;
676 lpSolver->update();
677 }
678 }
679 if (helper.back() == storm::utility::convertNumber<BeliefValueType>(resolution)) {
680 // If the last entry of helper is the gridResolution, we have enumerated all necessary distributions
681 done = true;
682 } else {
683 // Update helper by finding the index to increment
684 auto helperIt = helper.end() - 1;
685 while (*helperIt == *(helperIt - 1)) {
686 --helperIt;
687 }
688 STORM_LOG_ASSERT(helperIt != helper.begin(), "Error in grid clipping - index wrong");
689 // Increment the value at the index
690 *helperIt += 1;
691 // Reset all indices greater than the changed one to 0
692 ++helperIt;
693 while (helperIt != helper.end()) {
694 *helperIt = 0;
695 ++helperIt;
696 }
697 }
698 }
699
700 // Only one target belief should be chosen
701 lpSolver->addConstraint("choice", storm::expressions::sum(decisionVariables) == lpSolver->getConstant(storm::utility::one<BeliefValueType>()));
702 // Link D and d_i
703 lpSolver->addConstraint("delta", storm::expressions::Expression(bigDelta) == storm::expressions::sum(deltas));
704 // Exclude D = 0 (self-loop)
705 lpSolver->addConstraint("not_zero", storm::expressions::Expression(bigDelta) > lpSolver->getConstant(storm::utility::zero<BeliefValueType>()));
706
707 lpSolver->update();
708
709 lpSolver->optimize();
710 // Get the optimal belief for clipping
711 BeliefId targetBelief = noId();
712 // Not a belief but has the same type
713 BeliefType deltaValues;
714 auto optDelta = storm::utility::zero<BeliefValueType>();
715 auto deltaSum = storm::utility::zero<BeliefValueType>();
716 if (lpSolver->isOptimal()) {
717 optDelta = lpSolver->getObjectiveValue();
718 for (uint64_t dist = 0; dist < gridCandidates.size(); ++dist) {
719 if (lpSolver->getBinaryValue(lpSolver->getManager().getVariable("a_" + std::to_string(dist)))) {
720 targetBelief = getOrAddBeliefId(gridCandidates[dist]);
721 break;
722 }
723 }
724 i = 0;
725 for (auto const &state : belief) {
726 auto val = lpSolver->getContinuousValue(lpSolver->getManager().getVariable("d_" + std::to_string(i)));
727 if (cc.isLess(storm::utility::zero<BeliefValueType>(), val)) {
728 deltaValues.emplace(state.first, val);
729 deltaSum += val;
730 }
731 ++i;
732 }
733
734 if (cc.isEqual(optDelta, storm::utility::zero<BeliefValueType>())) {
735 // If we get an optimal value of 0, the LP solver considers two beliefs to be equal, possibly due to numerical instability
736 // For a sound result, we consider the state to not be clippable
737 STORM_LOG_WARN("LP solver returned an optimal value of 0. This should definitely not happen when using a grid");
738 STORM_LOG_WARN("Origin" << toString(belief));
739 STORM_LOG_WARN("Target [Bel " << targetBelief << "] " << toString(targetBelief));
740 return BeliefClipping{false, noId(), noId(), storm::utility::zero<BeliefValueType>(), {}, false};
741 }
742
743 if (optDelta == storm::utility::one<BeliefValueType>()) {
744 STORM_LOG_WARN("LP solver returned an optimal value of 1. Sum of state clipping values is " << deltaSum);
745 // If we get an optimal value of 1, we cannot clip the belief as by definition this would correspond to a division by 0.
746 STORM_LOG_DEBUG("Origin" << toString(belief));
747 STORM_LOG_DEBUG("Target [Bel " << targetBelief << "] " << toString(targetBelief));
748
749 if (deltaSum == storm::utility::one<BeliefValueType>()) {
750 return BeliefClipping{false, noId(), noId(), storm::utility::zero<BeliefValueType>(), {}, false};
751 }
752 optDelta = deltaSum;
753 }
754 }
755 return BeliefClipping{lpSolver->isOptimal(), noId(), targetBelief, optDelta, deltaValues, false};
756}
757
758template<typename PomdpType, typename BeliefValueType, typename StateType>
759typename BeliefManager<PomdpType, BeliefValueType, StateType>::BeliefId BeliefManager<PomdpType, BeliefValueType, StateType>::computeInitialBelief() {
760 STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, "POMDP contains more than one initial state");
761 STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, "POMDP does not contain an initial state");
762 BeliefType belief;
763 belief[*pomdp.getInitialStates().begin()] = storm::utility::one<BeliefValueType>();
764
765 STORM_LOG_ASSERT(assertBelief(belief), "Invalid initial belief.");
766 return getOrAddBeliefId(belief);
767}
768
769template<typename PomdpType, typename BeliefValueType, typename StateType>
770typename BeliefManager<PomdpType, BeliefValueType, StateType>::BeliefId BeliefManager<PomdpType, BeliefValueType, StateType>::getOrAddBeliefId(
771 BeliefType const &belief) {
772 uint32_t obs = getBeliefObservation(belief);
773 STORM_LOG_ASSERT(obs < beliefToIdMap.size(), "Belief has unknown observation.");
774 auto insertioRes = beliefToIdMap[obs].emplace(belief, beliefs.size());
775 if (insertioRes.second) {
776 // There actually was an insertion, so add the new belief
777 STORM_LOG_TRACE("Add Belief " << beliefs.size() << " " << toString(belief));
778 beliefs.push_back(belief);
779 }
780 // Return the id
781 return insertioRes.first->second;
782}
783template<typename PomdpType, typename BeliefValueType, typename StateType>
785 return getBelief(beliefId).begin()->first;
786}
787
788template<typename PomdpType, typename BeliefValueType, typename StateType>
790 if (pomdp.hasObservationValuations()) {
791 return pomdp.getObservationValuations().getStateInfo(getBeliefObservation(beliefId));
792 } else {
793 STORM_LOG_TRACE("Cannot get observation labels as no observation valuation has been defined for the POMDP. Return empty label instead.");
794 return "";
795 }
796}
797
798template<typename PomdpType, typename BeliefValueType, typename StateType>
799std::vector<BeliefValueType> BeliefManager<PomdpType, BeliefValueType, StateType>::getBeliefAsVector(BeliefId const &beliefId) {
800 return getBeliefAsVector(getBelief(beliefId));
801}
802
803template<typename PomdpType, typename BeliefValueType, typename StateType>
804std::vector<BeliefValueType> BeliefManager<PomdpType, BeliefValueType, StateType>::getBeliefAsVector(const BeliefType &belief) {
805 std::vector<BeliefValueType> res(pomdp.getNumberOfStates(), storm::utility::zero<BeliefValueType>());
806 for (auto const &stateprob : belief) {
807 res[stateprob.first] = stateprob.second;
808 }
809 return res;
810}
811
812template<typename PomdpType, typename BeliefValueType, typename StateType>
815 std::vector<BeliefValueType> beliefAsVector = getBeliefAsVector(beliefId);
816 std::vector<BeliefValueType> res(matrix.getRowCount());
817 matrix.multiplyWithVector(beliefAsVector, res);
818 return res;
819}
820
822
823template class BeliefManager<storm::models::sparse::Pomdp<double>, storm::RationalNumber>;
824
826
828} // namespace storage
829} // namespace storm
double round(double val, int precision)
std::vector< BeliefValueType > computeMatrixBeliefProduct(BeliefId const &beliefId, storm::storage::SparseMatrix< BeliefValueType > &matrix)
void joinSupport(BeliefId const &beliefId, BeliefSupportType &support)
boost::container::flat_set< StateType > BeliefSupportType
PomdpType::ValueType ValueType
BeliefManager(PomdpType const &pomdp, BeliefValueType const &precision, TriangulationMode const &triangulationMode)
std::vector< std::pair< BeliefId, ValueType > > expandAndTriangulate(BeliefId const &beliefId, uint64_t actionIndex, std::vector< BeliefValueType > const &observationResolutions)
boost::container::flat_map< StateType, BeliefValueType > BeliefType
uint32_t getBeliefObservation(BeliefId beliefId)
void setRewardModel(std::optional< std::string > rewardModelName=std::nullopt)
std::string toString(BeliefId const &beliefId) const
uint64_t getBeliefNumberOfChoices(BeliefId beliefId)
uint64_t getRepresentativeState(BeliefId const &beliefId)
Returns the first state in the belief as a representative.
std::vector< std::pair< BeliefId, ValueType > > expand(BeliefId const &beliefId, uint64_t actionIndex)
BeliefId const & getInitialBelief() const
std::vector< std::pair< BeliefId, ValueType > > expandAndClip(BeliefId const &beliefId, uint64_t actionIndex, std::vector< uint64_t > const &observationResolutions)
ValueType getBeliefActionReward(BeliefId const &beliefId, uint64_t const &localActionIndex) const
BeliefId getNumberOfBeliefIds() const
void addToDistribution(DistributionType &distr, StateType const &state, BeliefValueType const &value)
std::string getObservationLabel(BeliefId const &beliefId)
ValueType getWeightedSum(BeliefId const &beliefId, std::vector< ValueType > const &summands)
BeliefClipping clipBeliefToGrid(BeliefId const &beliefId, uint64_t resolution, storm::storage::BitVector isInfinite=storm::storage::BitVector())
bool isEqual(BeliefId const &first, BeliefId const &second) const
Triangulation triangulateBelief(BeliefId beliefId, BeliefValueType resolution)
A bit vector that is internally represented as a vector of 64-bit values.
Definition BitVector.h:18
bool empty() const
Retrieves whether no bits are set to true in this bit vector.
A class that holds a possibly non-square matrix in the compressed row storage format.
void multiplyWithVector(std::vector< value_type > const &vector, std::vector< value_type > &result, std::vector< value_type > const *summand=nullptr) const
Multiplies the matrix with the given vector and writes the result to the given result vector.
index_type getRowCount() const
Returns the number of rows of the matrix.
#define STORM_LOG_WARN(message)
Definition logging.h:30
#define STORM_LOG_DEBUG(message)
Definition logging.h:23
#define STORM_LOG_TRACE(message)
Definition logging.h:17
#define STORM_LOG_ERROR(message)
Definition logging.h:31
#define STORM_LOG_ASSERT(cond, message)
Definition macros.h:11
Expression sum(std::vector< storm::expressions::Expression > const &expressions)
std::string toString(PomdpMemoryPattern const &pattern)
ValueType floor(ValueType const &number)
bool isZero(ValueType const &a)
Definition constants.cpp:41
bool isInteger(ValueType const &number)
Definition constants.cpp:76
ValueType round(ValueType const &number)
LabParser.cpp.
Definition cli.cpp:18
std::vector< BeliefValueType > weights