Skip to content

Commit a7ed5bf

Browse files
miguelportillaseelabs
authored andcommitted
Improve shards file exception handling
1 parent a73372c commit a7ed5bf

4 files changed

Lines changed: 218 additions & 154 deletions

File tree

src/ripple/nodestore/impl/DatabaseShardImp.cpp

Lines changed: 91 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -92,67 +92,77 @@ DatabaseShardImp::init()
9292
return true;
9393
}
9494

95-
// Find shards
96-
for (auto const& d : directory_iterator(dir_))
95+
try
9796
{
98-
if (!is_directory(d))
99-
continue;
97+
// Find shards
98+
for (auto const& d : directory_iterator(dir_))
99+
{
100+
if (!is_directory(d))
101+
continue;
100102

101-
// Validate shard directory name is numeric
102-
auto dirName = d.path().stem().string();
103-
if (!std::all_of(
104-
dirName.begin(),
105-
dirName.end(),
106-
[](auto c){
103+
// Validate shard directory name is numeric
104+
auto dirName = d.path().stem().string();
105+
if (!std::all_of(
106+
dirName.begin(),
107+
dirName.end(),
108+
[](auto c) {
107109
return ::isdigit(static_cast<unsigned char>(c));
108110
}))
109-
{
110-
continue;
111-
}
112-
113-
auto const shardIndex {std::stoul(dirName)};
114-
if (shardIndex < earliestShardIndex())
115-
{
116-
JLOG(j_.fatal()) <<
117-
"Invalid shard index " << shardIndex <<
118-
". Earliest shard index " << earliestShardIndex();
119-
return false;
120-
}
111+
{
112+
continue;
113+
}
121114

122-
// Check if a previous import failed
123-
if (is_regular_file(dir_ / std::to_string(shardIndex) /
124-
importMarker_))
125-
{
126-
JLOG(j_.warn()) <<
127-
"shard " << shardIndex <<
128-
" previously failed import, removing";
129-
if (!this->remove(dir_ / std::to_string(shardIndex)))
115+
auto const shardIndex {std::stoul(dirName)};
116+
if (shardIndex < earliestShardIndex())
117+
{
118+
JLOG(j_.fatal()) <<
119+
"Invalid shard index " << shardIndex <<
120+
". Earliest shard index " << earliestShardIndex();
130121
return false;
131-
continue;
132-
}
122+
}
133123

134-
auto shard = std::make_unique<Shard>(
135-
*this, shardIndex, cacheSz_, cacheAge_, j_);
136-
if (!shard->open(config_, scheduler_))
137-
return false;
138-
usedDiskSpace_ += shard->fileSize();
139-
if (shard->complete())
140-
complete_.emplace(shard->index(), std::move(shard));
141-
else
142-
{
143-
if (incomplete_)
124+
// Check if a previous import failed
125+
if (is_regular_file(
126+
dir_ / std::to_string(shardIndex) / importMarker_))
144127
{
145-
JLOG(j_.fatal()) <<
146-
"More than one control file found";
128+
JLOG(j_.warn()) <<
129+
"shard " << shardIndex <<
130+
" previously failed import, removing";
131+
remove_all(dir_ / std::to_string(shardIndex));
132+
continue;
133+
}
134+
135+
auto shard {std::make_unique<Shard>(
136+
*this, shardIndex, cacheSz_, cacheAge_, j_)};
137+
if (!shard->open(config_, scheduler_))
147138
return false;
139+
140+
usedDiskSpace_ += shard->fileSize();
141+
if (shard->complete())
142+
complete_.emplace(shard->index(), std::move(shard));
143+
else
144+
{
145+
if (incomplete_)
146+
{
147+
JLOG(j_.fatal()) <<
148+
"More than one control file found";
149+
return false;
150+
}
151+
incomplete_ = std::move(shard);
148152
}
149-
incomplete_ = std::move(shard);
150153
}
151154
}
155+
catch (std::exception const& e)
156+
{
157+
JLOG(j_.error()) <<
158+
"exception: " << e.what();
159+
return false;
160+
}
161+
152162
if (!incomplete_ && complete_.empty())
153163
{
154164
// New Shard Store, calculate file descriptor requirements
155-
if (maxDiskSpace_ > space(dir_).free)
165+
if (maxDiskSpace_ > available())
156166
{
157167
JLOG(j_.error()) <<
158168
"Insufficient disk space";
@@ -185,7 +195,7 @@ DatabaseShardImp::prepareLedger(std::uint32_t validLedgerSeq)
185195
canAdd_ = false;
186196
return boost::none;
187197
}
188-
if (avgShardSz_ > boost::filesystem::space(dir_).free)
198+
if (avgShardSz_ > available())
189199
{
190200
JLOG(j_.error()) <<
191201
"Insufficient disk space";
@@ -211,9 +221,9 @@ DatabaseShardImp::prepareLedger(std::uint32_t validLedgerSeq)
211221
if (!incomplete_->open(config_, scheduler_))
212222
{
213223
incomplete_.reset();
214-
this->remove(dir_ / std::to_string(*shardIndex));
215224
return boost::none;
216225
}
226+
217227
return incomplete_->prepare();
218228
}
219229

@@ -254,6 +264,7 @@ DatabaseShardImp::prepareShard(std::uint32_t shardIndex)
254264
{
255265
return false;
256266
}
267+
257268
if (complete_.find(shardIndex) != complete_.end())
258269
{
259270
JLOG(j_.debug()) <<
@@ -287,7 +298,7 @@ DatabaseShardImp::prepareShard(std::uint32_t shardIndex)
287298
"Exceeds maximum size";
288299
return false;
289300
}
290-
if (sz > space(dir_).free)
301+
if (sz > available())
291302
{
292303
JLOG(j_.error()) <<
293304
"Insufficient disk space";
@@ -321,10 +332,19 @@ DatabaseShardImp::importShard(std::uint32_t shardIndex,
321332
boost::filesystem::path const& srcDir, bool validate)
322333
{
323334
using namespace boost::filesystem;
324-
if (!is_directory(srcDir) || is_empty(srcDir))
335+
try
336+
{
337+
if (!is_directory(srcDir) || is_empty(srcDir))
338+
{
339+
JLOG(j_.error()) <<
340+
"Invalid source directory " << srcDir.string();
341+
return false;
342+
}
343+
}
344+
catch (std::exception const& e)
325345
{
326346
JLOG(j_.error()) <<
327-
"Invalid source directory " << srcDir.string();
347+
"exception: " << e.what();
328348
return false;
329349
}
330350

@@ -334,12 +354,10 @@ DatabaseShardImp::importShard(std::uint32_t shardIndex,
334354
{
335355
rename(src, dst);
336356
}
337-
catch (const filesystem_error& e)
357+
catch (std::exception const& e)
338358
{
339359
JLOG(j_.error()) <<
340-
"rename " << src.string() <<
341-
" to " << dst.string() <<
342-
": Exception, " << e.code().message();
360+
"exception: " << e.what();
343361
return false;
344362
}
345363
return true;
@@ -367,27 +385,25 @@ DatabaseShardImp::importShard(std::uint32_t shardIndex,
367385
*this, shardIndex, cacheSz_, cacheAge_, j_)};
368386
auto fail = [&](std::string msg)
369387
{
370-
if (!msg.empty())
371-
{
372-
JLOG(j_.error()) << msg;
373-
}
388+
JLOG(j_.error()) << msg;
374389
shard.release();
375390
move(dstDir, srcDir);
376391
return false;
377392
};
393+
378394
if (!shard->open(config_, scheduler_))
379-
return fail({});
395+
return fail("Failure");
380396
if (!shard->complete())
381397
return fail("Incomplete shard");
382398

383-
// Verify database integrity
384399
try
385400
{
401+
// Verify database integrity
386402
shard->getBackend()->verify();
387403
}
388404
catch (std::exception const& e)
389405
{
390-
return fail(std::string("Verify: Exception, ") + e.what());
406+
return fail(std::string("exception: ") + e.what());
391407
}
392408

393409
// Validate shard ledgers
@@ -397,14 +413,14 @@ DatabaseShardImp::importShard(std::uint32_t shardIndex,
397413
// so the database can fetch data from it
398414
it->second = shard.get();
399415
l.unlock();
400-
auto valid {shard->validate(app_)};
416+
auto const valid {shard->validate(app_)};
401417
l.lock();
402418
if (!valid)
403419
{
404420
it = preShards_.find(shardIndex);
405421
if(it != preShards_.end())
406422
it->second = nullptr;
407-
return fail({});
423+
return fail("failed validation");
408424
}
409425
}
410426

@@ -634,7 +650,7 @@ DatabaseShardImp::import(Database& source)
634650
canAdd_ = false;
635651
break;
636652
}
637-
if (avgShardSz_ > boost::filesystem::space(dir_).free)
653+
if (avgShardSz_ > available())
638654
{
639655
JLOG(j_.error()) <<
640656
"Insufficient disk space";
@@ -686,7 +702,6 @@ DatabaseShardImp::import(Database& source)
686702
if (!shard->open(config_, scheduler_))
687703
{
688704
shard.reset();
689-
this->remove(shardDir);
690705
continue;
691706
}
692707

@@ -699,7 +714,7 @@ DatabaseShardImp::import(Database& source)
699714
"shard " << shardIndex <<
700715
" unable to create temp marker file";
701716
shard.reset();
702-
this->remove(shardDir);
717+
removeAll(shardDir, j_);
703718
continue;
704719
}
705720
ofs.close();
@@ -727,7 +742,7 @@ DatabaseShardImp::import(Database& source)
727742
JLOG(j_.debug()) <<
728743
"shard " << shardIndex <<
729744
" successfully imported";
730-
this->remove(markerFile);
745+
removeAll(markerFile, j_);
731746
break;
732747
}
733748
}
@@ -738,7 +753,7 @@ DatabaseShardImp::import(Database& source)
738753
"shard " << shardIndex <<
739754
" failed to import";
740755
shard.reset();
741-
this->remove(shardDir);
756+
removeAll(shardDir, j_);
742757
}
743758
}
744759

@@ -1070,7 +1085,7 @@ DatabaseShardImp::updateStats(std::lock_guard<std::mutex>&)
10701085
else
10711086
{
10721087
auto const sz = maxDiskSpace_ - usedDiskSpace_;
1073-
if (sz > space(dir_).free)
1088+
if (sz > available())
10741089
{
10751090
JLOG(j_.warn()) <<
10761091
"Max Shard Store size exceeds "
@@ -1110,21 +1125,19 @@ DatabaseShardImp::selectCache(std::uint32_t seq)
11101125
return {};
11111126
}
11121127

1113-
bool
1114-
DatabaseShardImp::remove(boost::filesystem::path const& path)
1128+
std::uint64_t
1129+
DatabaseShardImp::available() const
11151130
{
11161131
try
11171132
{
1118-
boost::filesystem::remove_all(path);
1133+
return boost::filesystem::space(dir_).available;
11191134
}
1120-
catch (const boost::filesystem::filesystem_error& e)
1135+
catch (std::exception const& e)
11211136
{
11221137
JLOG(j_.error()) <<
1123-
"remove_all " << path.string() <<
1124-
": Exception, " << e.code().message();
1125-
return false;
1138+
"exception: " << e.what();
1139+
return 0;
11261140
}
1127-
return true;
11281141
}
11291142

11301143
} // NodeStore

src/ripple/nodestore/impl/DatabaseShardImp.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,9 @@ class DatabaseShardImp : public DatabaseShard
241241
1, static_cast<int>(complete_.size() + (incomplete_ ? 1 : 0))));
242242
}
243243

244-
bool
245-
remove(boost::filesystem::path const& path);
244+
// Returns available storage space
245+
std::uint64_t
246+
available() const;
246247
};
247248

248249
} // NodeStore

0 commit comments

Comments
 (0)