Storm
A Modern Probabilistic Model Checker
Loading...
Searching...
No Matches
PreprocessingPomdpValueBoundsModelChecker.cpp
Go to the documentation of this file.
2#include <random>
3
6
9
15
16namespace storm {
17namespace pomdp {
18namespace modelchecker {
19template<typename ValueType>
22
23template<typename ValueType>
28
29template<typename ValueType>
35
36template<typename ValueType>
37std::vector<ValueType> PreprocessingPomdpValueBoundsModelChecker<ValueType>::getChoiceValues(std::vector<ValueType> const& stateValues,
38 std::vector<ValueType>* actionBasedRewards) {
39 std::vector<ValueType> choiceValues((pomdp.getNumberOfChoices()));
40 pomdp.getTransitionMatrix().multiplyWithVector(stateValues, choiceValues, actionBasedRewards);
41 return choiceValues;
42}
43
44template<typename ValueType>
45std::pair<std::vector<ValueType>, storm::storage::Scheduler<ValueType>> PreprocessingPomdpValueBoundsModelChecker<ValueType>::computeValuesForGuessedScheduler(
46 storm::Environment const& env, std::vector<ValueType> const& stateValues, std::vector<ValueType>* actionBasedRewards, storm::logic::Formula const& formula,
48 ValueType const& scoreThreshold, bool relativeScore) {
49 // Create some positional scheduler for the POMDP
50 storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
51 // For each state, we heuristically find a good distribution over output actions.
52 auto choiceValues = getChoiceValues(stateValues, actionBasedRewards);
53 auto const& choiceIndices = pomdp.getTransitionMatrix().getRowGroupIndices();
54 std::vector<storm::storage::Distribution<ValueType, uint_fast64_t>> choiceDistributions(pomdp.getNrObservations());
55 for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
56 auto& choiceDistribution = choiceDistributions[pomdp.getObservation(state)];
57 ValueType const& stateValue = stateValues[state];
58 assert(stateValue >= storm::utility::zero<ValueType>());
59 for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) {
60 ValueType const& choiceValue = choiceValues[choice];
61 assert(choiceValue >= storm::utility::zero<ValueType>());
62 // Rate this choice by considering the relative difference between the choice value and the (optimal) state value
63 // A high score shall mean that the choice is "good"
64 if (storm::utility::isInfinity(stateValue)) {
65 // For infinity states, we simply distribute uniformly.
66 // This case could be handled a bit more sensible
67 choiceDistribution.addProbability(choice - choiceIndices[state], scoreThreshold);
68 } else {
69 ValueType choiceScore = info.minimize() ? (choiceValue - stateValue) : (stateValue - choiceValue);
70 if (relativeScore) {
71 ValueType avg = (stateValue + choiceValue) / storm::utility::convertNumber<ValueType, uint64_t>(2);
72 if (!storm::utility::isZero(avg)) {
73 choiceScore /= avg;
74 }
75 }
76 choiceScore = storm::utility::one<ValueType>() - choiceScore;
77 if (choiceScore >= scoreThreshold) {
78 choiceDistribution.addProbability(choice - choiceIndices[state], choiceScore);
79 }
80 }
81 }
82 // If the distribution is empty, i.e. no choice has had a suitable score, distribute uniformly
83 if (choiceDistribution.size() == 0) {
84 for (auto choice = choiceIndices[state]; choice < choiceIndices[state + 1]; ++choice) {
85 choiceDistribution.addProbability(choice - choiceIndices[state], scoreThreshold);
86 }
87 }
88 STORM_LOG_ASSERT(choiceDistribution.size() > 0, "Empty choice distribution.");
89 }
90 // Normalize all distributions
91 for (auto& choiceDistribution : choiceDistributions) {
92 choiceDistribution.normalize();
93 }
94 // Set the scheduler for all states
95 for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
96 pomdpScheduler.setChoice(choiceDistributions[pomdp.getObservation(state)], state);
97 }
98 STORM_LOG_ASSERT(!pomdpScheduler.isPartialScheduler(), "Expected a fully defined scheduler.");
99 auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
100
101 auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(env, scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
102 STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
103 STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
104 std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
105 return std::make_pair(pomdpSchedulerResult, pomdpScheduler);
106}
107
108template<typename ValueType>
109std::pair<std::vector<ValueType>, storm::storage::Scheduler<ValueType>> PreprocessingPomdpValueBoundsModelChecker<ValueType>::computeValuesForRandomFMPolicy(
110 storm::Environment const& env, storm::logic::Formula const& formula, storm::pomdp::analysis::FormulaInformation const& info, uint64_t memoryBound) {
111 // Consider memoryless policy on memory-unfolded POMDP
112 storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates() * memoryBound);
113
114 STORM_LOG_DEBUG("Computing the unfolding for memory bound " << memoryBound);
116 storm::transformer::PomdpMemoryUnfolder<ValueType> memoryUnfolder(pomdp, memory);
117 // We keep unreachable states to not mess with the state ordering and capture potential better choices
118 auto memPomdp = memoryUnfolder.transform(false);
119
120 // Determine an observation-based policy by choosing any of the enabled actions uniformly at random
121 std::vector<uint64_t> obsChoiceVector(memPomdp->getNrObservations());
122 std::random_device rd;
123 auto engine = std::mt19937(rd());
124 for (uint64_t obs = 0; obs < memPomdp->getNrObservations(); ++obs) {
125 uint64_t nrChoices = memPomdp->getNumberOfChoices(memPomdp->getStatesWithObservation(obs).front());
126 std::uniform_int_distribution<uint64_t> uniform_dist(0, nrChoices - 1);
127 obsChoiceVector[obs] = uniform_dist(engine);
128 }
129 for (uint64_t state = 0; state < memPomdp->getNumberOfStates(); ++state) {
130 pomdpScheduler.setChoice(obsChoiceVector[memPomdp->getObservation(state)], state);
131 }
132
133 // Model check the DTMC resulting from the policy
134 auto underlyingMdp =
135 std::make_shared<storm::models::sparse::Mdp<ValueType>>(memPomdp->getTransitionMatrix(), memPomdp->getStateLabeling(), memPomdp->getRewardModels());
136 auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
137 auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(env, scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
138 STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
139 STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
140 std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
141
142 // Take the optimal value in ANY of the unfolded states for a POMDP state as the resulting state value
143 std::vector<ValueType> res(pomdp.getNumberOfStates(), info.minimize() ? storm::utility::infinity<ValueType>() : -storm::utility::infinity<ValueType>());
144 for (uint64_t memPomdpState = 0; memPomdpState < pomdpSchedulerResult.size(); ++memPomdpState) {
145 uint64_t modelState = memPomdpState / memoryBound;
146 if ((info.minimize() && pomdpSchedulerResult[memPomdpState] < res[modelState]) ||
147 (!info.minimize() && pomdpSchedulerResult[memPomdpState] > res[modelState])) {
148 res[modelState] = pomdpSchedulerResult[memPomdpState];
149 }
150 }
151 return std::make_pair(res, pomdpScheduler);
152}
153
154template<typename ValueType>
155[[maybe_unused]] std::pair<std::vector<ValueType>, storm::storage::Scheduler<ValueType>>
156PreprocessingPomdpValueBoundsModelChecker<ValueType>::computeValuesForRandomMemorylessPolicy(
158 std::shared_ptr<storm::models::sparse::Mdp<ValueType>> underlyingMdp) {
159 storm::storage::Scheduler<ValueType> pomdpScheduler(pomdp.getNumberOfStates());
160 std::vector<uint64_t> obsChoiceVector(pomdp.getNrObservations());
161
162 std::random_device rd;
163 auto engine = std::mt19937(rd());
164 for (uint64_t obs = 0; obs < pomdp.getNrObservations(); ++obs) {
165 uint64_t nrChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(obs).front());
166 std::uniform_int_distribution<uint64_t> uniform_dist(0, nrChoices - 1);
167 obsChoiceVector[obs] = uniform_dist(engine);
168 }
169
170 for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
171 STORM_LOG_DEBUG("State " << state << " -- Random Choice " << obsChoiceVector[pomdp.getObservation(state)]);
172 pomdpScheduler.setChoice(obsChoiceVector[pomdp.getObservation(state)], state);
173 }
174
175 auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false);
176
177 auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(env, scheduledModel, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
178 STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
179 STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
180 std::vector<ValueType> pomdpSchedulerResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
181
182 STORM_LOG_DEBUG("Initial Value for guessed Policy: " << pomdpSchedulerResult[pomdp.getInitialStates().getNextSetIndex(0)]);
183
184 return std::make_pair(pomdpSchedulerResult, pomdpScheduler);
185}
186
187template<typename ValueType>
190 STORM_LOG_THROW(info.isNonNestedReachabilityProbability() || info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException,
191 "The property type is not supported for this analysis.");
192
193 // Compute the values on the fully observable MDP
194 // We need an actual MDP so that we can apply schedulers below.
195 // Also, the api call in the next line will require a copy anyway.
196 auto underlyingMdp =
197 std::make_shared<storm::models::sparse::Mdp<ValueType>>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels());
198 auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(env, underlyingMdp, storm::api::createTask<ValueType>(formula.asSharedPointer(), false));
199 STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
200 STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
201 std::vector<ValueType> fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
202
203 std::vector<ValueType> actionBasedRewards;
204 std::vector<ValueType>* actionBasedRewardsPtr = nullptr;
206 actionBasedRewards = pomdp.getRewardModel(info.getRewardModelName()).getTotalRewardVector(pomdp.getTransitionMatrix());
207 actionBasedRewardsPtr = &actionBasedRewards;
208 }
209 std::vector<std::vector<ValueType>> guessedSchedulerValues;
210 std::vector<storm::storage::Scheduler<ValueType>> guessedSchedulers;
211 std::shared_ptr<std::pair<std::vector<ValueType>, storm::storage::Scheduler<ValueType>>> guessedSchedulerPair;
212 std::vector<std::pair<double, bool>> guessParameters({{0.875, false}, {0.875, true}, {0.75, false}, {0.75, true}});
213 for (auto const& pars : guessParameters) {
214 guessedSchedulerPair = std::make_shared<std::pair<std::vector<ValueType>, storm::storage::Scheduler<ValueType>>>(
215 computeValuesForGuessedScheduler(env, fullyObservableResult, actionBasedRewardsPtr, formula, info, underlyingMdp,
216 storm::utility::convertNumber<ValueType>(pars.first), pars.second));
217 guessedSchedulerValues.push_back(guessedSchedulerPair->first);
218 guessedSchedulers.push_back(guessedSchedulerPair->second);
219 }
220
221 // compute the 'best' guess and do a few iterations on it
222 uint64_t bestGuess = 0;
223 ValueType bestGuessSum = std::accumulate(guessedSchedulerValues.front().begin(), guessedSchedulerValues.front().end(), storm::utility::zero<ValueType>());
224 for (uint64_t guess = 1; guess < guessedSchedulerValues.size(); ++guess) {
225 ValueType guessSum = std::accumulate(guessedSchedulerValues[guess].begin(), guessedSchedulerValues[guess].end(), storm::utility::zero<ValueType>());
226 if ((info.minimize() && guessSum < bestGuessSum) || (info.maximize() && guessSum > bestGuessSum)) {
227 bestGuess = guess;
228 bestGuessSum = guessSum;
229 }
230 }
231 guessedSchedulerPair = std::make_shared<std::pair<std::vector<ValueType>, storm::storage::Scheduler<ValueType>>>(
232 computeValuesForGuessedScheduler(env, guessedSchedulerValues[bestGuess], actionBasedRewardsPtr, formula, info, underlyingMdp,
233 storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
234 guessedSchedulerValues.push_back(guessedSchedulerPair->first);
235 guessedSchedulers.push_back(guessedSchedulerPair->second);
236 guessedSchedulerPair = std::make_shared<std::pair<std::vector<ValueType>, storm::storage::Scheduler<ValueType>>>(
237 computeValuesForGuessedScheduler(env, guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp,
238 storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
239 guessedSchedulerValues.push_back(guessedSchedulerPair->first);
240 guessedSchedulers.push_back(guessedSchedulerPair->second);
241 guessedSchedulerPair = std::make_shared<std::pair<std::vector<ValueType>, storm::storage::Scheduler<ValueType>>>(
242 computeValuesForGuessedScheduler(env, guessedSchedulerValues.back(), actionBasedRewardsPtr, formula, info, underlyingMdp,
243 storm::utility::convertNumber<ValueType>(guessParameters[bestGuess].first), guessParameters[bestGuess].second));
244 guessedSchedulerValues.push_back(guessedSchedulerPair->first);
245 guessedSchedulers.push_back(guessedSchedulerPair->second);
246
247 // Check if one of the guesses is worse than one of the others (and potentially delete it)
248 // Avoid deleting entries during the loop to ensure that indices remain valid
249 storm::storage::BitVector keptGuesses(guessedSchedulerValues.size(), true);
250 for (uint64_t i = 0; i < guessedSchedulerValues.size() - 1; ++i) {
251 if (!keptGuesses.get(i)) {
252 continue;
253 }
254 for (uint64_t j = i + 1; j < guessedSchedulerValues.size(); ++j) {
255 if (!keptGuesses.get(j)) {
256 continue;
257 }
258 if (storm::utility::vector::compareElementWise(guessedSchedulerValues[i], guessedSchedulerValues[j], std::less_equal<ValueType>())) {
259 if (info.minimize()) {
260 // In this case we are guessing upper bounds (and smaller upper bounds are better)
261 keptGuesses.set(j, false);
262 } else {
263 // In this case we are guessing lower bounds (and larger lower bounds are better)
264 keptGuesses.set(i, false);
265 break;
266 }
267 } else if (storm::utility::vector::compareElementWise(guessedSchedulerValues[j], guessedSchedulerValues[i], std::less_equal<ValueType>())) {
268 if (info.minimize()) {
269 keptGuesses.set(i, false);
270 break;
271 } else {
272 keptGuesses.set(j, false);
273 }
274 }
275 }
276 }
277 STORM_LOG_INFO("Keeping scheduler guesses " << keptGuesses);
278 storm::utility::vector::filterVectorInPlace(guessedSchedulerValues, keptGuesses);
279 std::vector<storm::storage::Scheduler<ValueType>> filteredSchedulers;
280 for (uint64_t i = 0; i < guessedSchedulers.size(); ++i) {
281 if (keptGuesses[i]) {
282 filteredSchedulers.push_back(guessedSchedulers[i]);
283 }
284 }
285
286 // Finally prepare the result
287 ValueBounds result;
288 if (info.minimize()) {
289 result.lower.push_back(std::move(fullyObservableResult));
290 result.upper = std::move(guessedSchedulerValues);
291 result.upperSchedulers = filteredSchedulers;
292 } else {
293 result.lower = std::move(guessedSchedulerValues);
294 result.upper.push_back(std::move(fullyObservableResult));
295 result.lowerSchedulers = filteredSchedulers;
296 }
297 STORM_LOG_WARN_COND_DEBUG(storm::utility::vector::compareElementWise(result.lower.front(), result.upper.front(), std::less_equal<ValueType>()),
298 "Lower bound is larger than upper bound");
299 return result;
300}
301
302template<typename ValueType>
308
309template<typename ValueType>
314
315template<typename ValueType>
318 STORM_LOG_THROW(info.isNonNestedExpectedRewardFormula(), storm::exceptions::NotSupportedException, "The property type is not supported for this analysis.");
319
320 // Compute the values for the opposite direction on the fully observable MDP
321 // We need an actual MDP so that we can apply schedulers below.
322 // Also, the api call in the next line will require a copy anyway.
324 if (formula.asOperatorFormula().getOptimalityType() == storm::solver::OptimizationDirection::Maximize) {
325 newFormula.setOptimalityType(storm::solver::OptimizationDirection::Minimize);
326 } else {
327 newFormula.setOptimalityType(storm::solver::OptimizationDirection::Maximize);
328 }
329 auto formulaPtr = std::make_shared<storm::logic::RewardOperatorFormula>(newFormula);
330 auto underlyingMdp =
331 std::make_shared<storm::models::sparse::Mdp<ValueType>>(pomdp.getTransitionMatrix(), pomdp.getStateLabeling(), pomdp.getRewardModels());
332 auto resultPtr = storm::api::verifyWithSparseEngine<ValueType>(env, underlyingMdp, storm::api::createTask<ValueType>(formulaPtr, false));
333 STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained.");
334 STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type");
335 std::vector<ValueType> resultVec = std::move(resultPtr->template asExplicitQuantitativeCheckResult<ValueType>().getValueVector());
337 if (info.minimize()) {
338 res.min = false;
339 } else {
340 res.min = true;
341 }
343 res.values = std::move(resultVec);
344 return res;
345}
346
348
350} // namespace modelchecker
351} // namespace pomdp
352} // namespace storm
RewardOperatorFormula & asRewardOperatorFormula()
Definition Formula.cpp:461
OperatorFormula & asOperatorFormula()
Definition Formula.cpp:469
std::shared_ptr< Formula const > asSharedPointer()
Definition Formula.cpp:548
void setOptimalityType(storm::solver::OptimizationDirection newOptimalityType)
storm::solver::OptimizationDirection const & getOptimalityType() const
This class represents a (discrete-time) Markov decision process.
Definition Mdp.h:14
This class represents a partially observable Markov decision process.
Definition Pomdp.h:15
PreprocessingPomdpValueBoundsModelChecker(storm::models::sparse::Pomdp< ValueType > const &pomdp)
A bit vector that is internally represented as a vector of 64-bit values.
Definition BitVector.h:18
void set(uint_fast64_t index, bool value=true)
Sets the given truth value at the given index.
bool get(uint_fast64_t index) const
Retrieves the truth value of the bit at the given index and performs a bound check.
PomdpMemory build(PomdpMemoryPattern pattern, uint64_t numStates) const
This class defines which action is chosen in a particular state of a non-deterministic model.
Definition Scheduler.h:18
#define STORM_LOG_INFO(message)
Definition logging.h:29
#define STORM_LOG_DEBUG(message)
Definition logging.h:23
#define STORM_LOG_ASSERT(cond, message)
Definition macros.h:11
#define STORM_LOG_THROW(cond, exception, message)
Definition macros.h:30
#define STORM_LOG_WARN_COND_DEBUG(cond, message)
Definition macros.h:18
SFTBDDChecker::ValueType ValueType
FormulaInformation getFormulaInformation(PomdpType const &pomdp, storm::logic::ProbabilityOperatorFormula const &formula)
bool compareElementWise(std::vector< T > const &left, std::vector< T > const &right, Comparator comp=std::less< T >())
Definition vector.h:177
storm::storage::BitVector filterInfinity(std::vector< T > const &values)
Retrieves a bit vector containing all the indices for which the value at this position is equal to on...
Definition vector.h:585
void filterVectorInPlace(std::vector< Type > &v, storm::storage::BitVector const &filter)
Definition vector.h:1224
bool isZero(ValueType const &a)
Definition constants.cpp:41
ValueType infinity()
Definition constants.cpp:31
bool isInfinity(ValueType const &a)
Definition constants.cpp:71
LabParser.cpp.
Definition cli.cpp:18
Struct to store the extreme bound values needed for the reward correction values when clipping is use...
Struct for storing precomputed values bounding the actual values on the POMDP.
std::vector< storm::storage::Scheduler< ValueType > > lowerSchedulers
std::vector< storm::storage::Scheduler< ValueType > > upperSchedulers