Storm 1.11.1.1
A Modern Probabilistic Model Checker
Loading...
Searching...
No Matches
ArchiveReader.cpp
Go to the documentation of this file.
2
3#include <array>
4#include <cstring>
5#include <fstream>
6
10
13
14namespace storm::io {
15
16#ifdef STORM_HAVE_LIBARCHIVE
20void checkResult(archive* arch, auto resultCode) {
21 static_assert(ARCHIVE_OK == 0, "Expected that return value >= 0 means a valid result");
22 STORM_LOG_THROW(arch != nullptr, storm::exceptions::FileIoException, "Unexpected result: Archive not loaded.");
23 STORM_LOG_THROW(std::cmp_greater_equal(resultCode, ARCHIVE_WARN), storm::exceptions::FileIoException,
24 "Unexpected result from archive: " << archive_error_string(arch) << ".");
25 STORM_LOG_WARN_COND(std::cmp_greater_equal(resultCode, ARCHIVE_OK), "Unexpected result from archive: " << archive_error_string(arch) << ".");
26}
27
28void ArchiveReader::ArchiveDeleter::operator()(archive* arch) const noexcept {
29 if (arch) {
30 // archives created for reading OR writing can be freed by archive_free()
31 archive_free(arch);
32 }
33}
34
35ArchiveReader::ArchiveReadEntry::ArchiveReadEntry(archive_entry* currentEntry, archive* archive) : _currentEntry(currentEntry), _archive(archive) {
36 STORM_LOG_ASSERT(_currentEntry, "No valid entry loaded.");
37}
38#endif
39
40std::filesystem::path ArchiveReader::ArchiveReadEntry::name() const {
41#ifdef STORM_HAVE_LIBARCHIVE
42 STORM_LOG_THROW(_currentEntry, storm::exceptions::FileIoException, "No valid entry loaded.");
43 std::filesystem::path result;
44 char const* path = archive_entry_pathname(_currentEntry);
45 if (path) {
46 result = path;
47 }
48 return result;
49#else
50 STORM_LOG_THROW(false, storm::exceptions::MissingLibraryException, "Reading archives is not supported. Storm is compiled without LibArchive.");
51#endif
52}
53
55#ifdef STORM_HAVE_LIBARCHIVE
56 STORM_LOG_THROW(_currentEntry, storm::exceptions::FileIoException, "No valid entry loaded.");
57 return archive_entry_filetype(_currentEntry) == AE_IFDIR;
58#else
59 STORM_LOG_THROW(false, storm::exceptions::MissingLibraryException, "Reading archives is not supported. Storm is compiled without LibArchive.");
60#endif
61}
62
63template<typename T>
65
66template<typename T, std::endian Endianness>
67 requires(std::is_arithmetic_v<T>)
69#ifdef STORM_HAVE_LIBARCHIVE
70 using BucketType = decltype(std::declval<storm::storage::BitVector&>().getBucket({}));
71 constexpr bool IsBitVector = std::is_same_v<T, bool>;
72 using DataType = std::conditional_t<IsBitVector, BucketType, T>; // for BitVectors, we use uint64_t as the underlying type
73 constexpr bool NativeEndianness = Endianness == std::endian::native;
74 STORM_LOG_THROW(_currentEntry, storm::exceptions::FileIoException, "No valid entry loaded.");
75
76 // Prepare the vector to store the data, using given size (if available)
77 Vec<T> content;
78 auto entrySize = archive_entry_size(_currentEntry);
79 checkResult(_archive, entrySize);
80 entrySize = std::max<decltype(entrySize)>(entrySize, 0);
81 STORM_LOG_THROW((entrySize % sizeof(DataType) == 0), storm::exceptions::FileIoException,
82 "Archive entry '" << name() << "' can not be extracted as vector of a " << sizeof(DataType) << "-bytes type: File size " << entrySize
83 << " bytes is not a multiple of " << sizeof(DataType) << " bytes.");
84 if constexpr (IsBitVector) {
85 content.resize(entrySize * 8); // 8 bits in a byte
86 } else {
87 // For other types, we reserve the number of elements
88 content.reserve(entrySize / sizeof(DataType));
89 }
90
91 [[maybe_unused]] uint64_t bucketCount = 0; // only used for BitVector content
92 // Helper function to add data to the content
93 auto append = [&content, &bucketCount](std::ranges::input_range auto&& data) {
94 if constexpr (IsBitVector) {
95 content.grow(bucketCount + data.size() * sizeof(BucketType) * 8); // 8 bits in a byte
96 for (auto bits : data) {
97 content.setBucket(
98 bucketCount,
100 bits)); // Our bit vectors store the items in reverse order, i.e., the first item is indicated by the most significant bit
101 ++bucketCount;
102 }
103 } else {
104 (void)bucketCount; // silences unused lambda capture warning
105 content.insert(content.end(), data.begin(), data.end());
106 }
107 };
108
109 static_assert(BufferSize % sizeof(DataType) == 0, "Buffer size should be a multiple of sizeof(DataType).");
110 std::array<char, BufferSize> buffer;
111 auto bytesRead = archive_read_data(_archive, buffer.data(), BufferSize);
112 checkResult(_archive, bytesRead);
113 while (true) {
114 // process the current buffer contents
115 uint64_t const numValues = bytesRead / sizeof(DataType); // number of values that we can now append
116 if constexpr (NativeEndianness || sizeof(DataType) == 1) {
117 append(std::span<const DataType>(reinterpret_cast<const DataType*>(buffer.data()), numValues));
118 } else {
119 append(std::span<const DataType>(reinterpret_cast<const DataType*>(buffer.data()), numValues) |
120 std::ranges::views::transform(storm::utility::byteSwap<DataType>));
121 }
122
123 // put the next chunk into the buffer
124 uint64_t offsetBytes = bytesRead % sizeof(DataType); // number of bytes that could not be processed in this round
125 if (offsetBytes > 0 && numValues > 0) {
126 // if some of the bytes could not be processed, we copy them to the beginning of the buffer for the next read
127 // the copy is always safe (i.e. no overlap of source and destination) as implied by the asserted expressions below
128 STORM_LOG_ASSERT(static_cast<uint64_t>(bytesRead) == numValues * sizeof(DataType) + offsetBytes,
129 "Unsafe copy."); // by def. of bytesRead and numValues
130 STORM_LOG_ASSERT(static_cast<uint64_t>(bytesRead - offsetBytes) > offsetBytes,
131 "Unsafe copy."); // because numValues > 0 and sizeof(DataType) > offsetBytes
132 std::copy(buffer.data() + bytesRead - offsetBytes, buffer.data() + bytesRead, buffer.data());
133 }
134 bytesRead = archive_read_data(_archive, buffer.data() + offsetBytes, BufferSize - offsetBytes);
135 checkResult(_archive, bytesRead);
136 if (bytesRead == 0) {
138 offsetBytes == 0, storm::exceptions::FileIoException,
139 "Archive entry could not be extracted as vector of a " << sizeof(DataType) << "-bytes type: " << offsetBytes << " bytes left in the buffer.");
140 break; // no more data to read
141 }
142 bytesRead += offsetBytes; // actual number of bytes to process in the buffer
143 }
144
145 // Resize the content to the actual size
146 if constexpr (IsBitVector) {
147 content.resize(bucketCount * sizeof(BucketType) * 8); // 8 bits in a byte
148 } else {
149 content.shrink_to_fit();
150 }
151
152 // We have read the data, i.e. it is no longer readable
153 _archive = nullptr;
154 return content;
155#else
156 STORM_LOG_THROW(false, storm::exceptions::MissingLibraryException, "Reading archives is not supported. Storm is compiled without LibArchive.");
157#endif
158}
159
161#ifdef STORM_HAVE_LIBARCHIVE
162 // Prepare the vector to store the data, using given size (if available)
163 std::string content;
164 auto const entrySize = archive_entry_size(_currentEntry);
165 checkResult(_archive, entrySize);
166 content.reserve(std::max<decltype(entrySize)>(entrySize, 0));
167
168 std::array<char, BufferSize> buffer;
169 la_ssize_t bytesRead = 0;
170 while ((bytesRead = archive_read_data(_archive, buffer.data(), BufferSize)) > 0) {
171 content.append(buffer.data(), bytesRead);
172 }
173 STORM_LOG_THROW(bytesRead >= 0, storm::exceptions::FileIoException, "Failed to read data from archive. " << archive_error_string(_archive) << ".");
174 content.shrink_to_fit();
175
176 // We have read the data, i.e. it is no longer readable
177 _archive = nullptr;
178 return content;
179#else
180 STORM_LOG_THROW(false, storm::exceptions::MissingLibraryException, "Reading archives is not supported. Storm is compiled without LibArchive.");
181#endif
182}
183
184#ifdef STORM_HAVE_LIBARCHIVE
185ArchiveReader::Iterator::Iterator(std::filesystem::path const& filename) : _archive(archive_read_new(), ArchiveDeleter{}), _currentEntry(nullptr) {
186 STORM_LOG_THROW(_archive, storm::exceptions::FileIoException, "Failed to create archive reader.");
187 // Enable all filters (e.g., gzip, bzip2, xz) and all formats (tar, zip, etc.)
188 checkResult(_archive.get(), archive_read_support_filter_all(_archive.get()));
189 checkResult(_archive.get(), archive_read_support_format_all(_archive.get()));
190 // A typical block size of 10240 is recommended by libarchive documentation
191 checkResult(_archive.get(), archive_read_open_filename(_archive.get(), filename.c_str(), 10240));
192 ++*this; // Move to the first entry
193}
194#endif
195
196bool ArchiveReader::Iterator::operator==(Iterator const& other) const {
197#ifdef STORM_HAVE_LIBARCHIVE
198 return _currentEntry == other._currentEntry;
199#else
200 STORM_LOG_THROW(false, storm::exceptions::MissingLibraryException, "Reading archives is not supported. Storm is compiled without LibArchive.");
201#endif
202}
203
204bool ArchiveReader::Iterator::operator!=(Iterator const& other) const {
205#ifdef STORM_HAVE_LIBARCHIVE
206 return _currentEntry != other._currentEntry;
207#else
208 STORM_LOG_THROW(false, storm::exceptions::MissingLibraryException, "Reading archives is not supported. Storm is compiled without LibArchive.");
209#endif
210}
211
215typename ArchiveReader::Iterator& ArchiveReader::Iterator::operator++() {
216#ifdef STORM_HAVE_LIBARCHIVE
217 int r = archive_read_next_header(_archive.get(), &_currentEntry);
218 if (r == ARCHIVE_EOF) {
219 // End of archive
220 _currentEntry = nullptr;
221 _archive.reset();
222 } else {
223 checkResult(_archive.get(), r);
224 }
225 return *this;
226#else
227 STORM_LOG_THROW(false, storm::exceptions::MissingLibraryException, "Reading archives is not supported. Storm is compiled without LibArchive.");
228#endif
229}
230
231typename ArchiveReader::ArchiveReadEntry ArchiveReader::Iterator::operator*() const {
232#ifdef STORM_HAVE_LIBARCHIVE
233 return ArchiveReadEntry(_currentEntry, _archive.get());
234#else
235 STORM_LOG_THROW(false, storm::exceptions::MissingLibraryException, "Reading archives is not supported. Storm is compiled without LibArchive.");
236#endif
237}
238
240 if (!std::filesystem::is_regular_file(file)) {
241 return false;
242 }
243
244#ifdef STORM_HAVE_LIBARCHIVE
245 // Try to open the archive
246 struct archive* a = archive_read_new();
247 if (!a) {
248 return false;
249 }
250 archive_read_support_filter_all(a);
251 archive_read_support_format_all(a);
252 if (archive_read_open_filename(a, file.string().c_str(), 10240) != ARCHIVE_OK) {
253 archive_read_free(a);
254 return false;
255 }
256 struct archive_entry* entry = nullptr;
257 auto const r = archive_read_next_header(a, &entry);
258 bool const result = (r == ARCHIVE_OK || r == ARCHIVE_EOF || r == ARCHIVE_WARN);
259 archive_read_free(a);
260 return result;
261#else
262 // read magic bytes
263 std::ifstream in(file, std::ios::binary);
264 if (!in) {
265 return false;
266 }
267 // 512 bytes is enough for signatures below (including tar header).
268 std::array<unsigned char, 512> buf;
269 in.read(reinterpret_cast<char*>(buf.data()), buf.size());
270 std::streamsize const n = in.gcount();
271 if (n <= 0) {
272 return false;
273 }
274 std::span<unsigned char> bytes(buf.data(), static_cast<std::size_t>(n));
275 auto starts_with_bytes = [&bytes](std::initializer_list<unsigned char> magic, std::size_t offset = 0) -> bool {
276 return bytes.size() >= magic.size() + offset && std::equal(magic.begin(), magic.end(), bytes.begin() + offset);
277 };
278
279 // see https://en.wikipedia.org/wiki/List_of_file_signatures
280 std::initializer_list<unsigned char> const gz{0x1F, 0x8B}, // Gzip
281 xz{0xFD, '7', 'z', 'X', 'Z', 0x00}, // XZ
282 tar{'u', 's', 't', 'a', 'r'}, // TAR, at offset 257
283 zip{0x50, 0x4B, 0x03, 0x04}, zipEmpty{0x50, 0x4B, 0x05, 0x06}, zipSpanned{0x50, 0x4B, 0x07, 0x08}; // Zip
284 return starts_with_bytes(gz) || starts_with_bytes(xz) || starts_with_bytes(tar, 257) || starts_with_bytes(zip) || starts_with_bytes(zipEmpty) ||
285 starts_with_bytes(zipSpanned);
286#endif
287}
288
289ArchiveReader::ArchiveReader(std::filesystem::path const& file) : file(file) {};
290
292#ifdef STORM_HAVE_LIBARCHIVE
293 return Iterator(file);
294#else
295 STORM_LOG_THROW(false, storm::exceptions::MissingLibraryException, "Reading archives is not supported. Storm is compiled without LibArchive.");
296#endif
297}
298
300#ifdef STORM_HAVE_LIBARCHIVE
301 return Iterator();
302#else
303 STORM_LOG_THROW(false, storm::exceptions::MissingLibraryException, "Reading archives is not supported. Storm is compiled without LibArchive.");
304#endif
305}
306
307ArchiveReader openArchive(std::filesystem::path const& file) {
308 return ArchiveReader(file);
309}
310
311template Vec<char> ArchiveReader::ArchiveReadEntry::toVector<char, std::endian::little>();
312template Vec<bool> ArchiveReader::ArchiveReadEntry::toVector<bool, std::endian::little>();
313template Vec<uint32_t> ArchiveReader::ArchiveReadEntry::toVector<uint32_t, std::endian::little>();
314template Vec<uint64_t> ArchiveReader::ArchiveReadEntry::toVector<uint64_t, std::endian::little>();
315template Vec<int64_t> ArchiveReader::ArchiveReadEntry::toVector<int64_t, std::endian::little>();
316template Vec<double> ArchiveReader::ArchiveReadEntry::toVector<double, std::endian::little>();
317
318} // namespace storm::io
PositionIteratorType Iterator
Object that reads the archive entry.
VectorType< T > toVector()
extracts the current entry’s data as a vector of the given type.
bool isDir() const
std::conditional_t< std::is_same_v< T, bool >, storm::storage::BitVector, std::vector< T > > VectorType
std::string toString()
extracts the current entry’s data as a string
std::filesystem::path name() const
Get the current entry’s path (filename) inside the archive.
ArchiveReader(std::filesystem::path const &file)
#define STORM_LOG_ASSERT(cond, message)
Definition macros.h:11
#define STORM_LOG_WARN_COND(cond, message)
Definition macros.h:38
#define STORM_LOG_THROW(cond, exception, message)
Definition macros.h:30
ArchiveReader openArchive(std::filesystem::path const &file)
Reads an archive file.
typename ArchiveReader::ArchiveReadEntry::VectorType< T > Vec
T reverseBits(T const t)
Swaps the bits in the bit representation of the given value.