-
Notifications
You must be signed in to change notification settings - Fork 400
New incremental cleanup strategy: 63%-88% faster #2903
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ | |
|
|
||
| #include <chrono> | ||
| #include <mutex> | ||
| #include <algorithm> | ||
| #include <array> | ||
| #include <cstdlib> | ||
| #include <stdexcept> | ||
|
|
@@ -130,6 +131,9 @@ struct Store::Private { | |
| Option<const std::string&> target_mdir, | ||
| Option<Flags> new_flags, | ||
| MoveOptions opts); | ||
|
|
||
| bool remove_message_by_id_unlocked(Store::Id id); | ||
|
|
||
| XapianDb xapian_db_; | ||
| Config config_; | ||
| ContactsCache contacts_cache_; | ||
|
|
@@ -430,17 +434,66 @@ Store::remove_messages(const std::vector<Store::Id>& ids) | |
|
|
||
| xapian_db().request_transaction(); | ||
|
|
||
| for (auto&& id : ids) { | ||
| if (const auto msg = priv_->find_message_unlocked(id); !msg) { | ||
| mu_warning("cannot find document {} for deletion", id); | ||
| } else { | ||
| for (auto&& label: msg->labels()) | ||
| priv_->labels_cache_.decrease(label); | ||
| xapian_db().delete_document(id); | ||
| } | ||
| for (auto&& id : ids) | ||
| priv_->remove_message_by_id_unlocked(id); | ||
|
|
||
| xapian_db().request_commit(true/*force*/); | ||
| } | ||
|
|
||
| bool | ||
| Store::Private::remove_message_by_id_unlocked(Store::Id id) | ||
| { | ||
| if (auto msg = labels_cache_.empty() ? Nothing : find_message_unlocked(id); msg) | ||
| for (auto&& label: msg->labels()) | ||
| labels_cache_.decrease(label); | ||
|
|
||
| if (!xapian_db_.delete_document(id)) { | ||
| mu_warning("cannot find document {} for deletion", id); | ||
| return false; | ||
| } | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| size_t | ||
| Store::remove_messages_by_term(std::span<const std::string> terms, | ||
| std::function<void (size_t)> progress_fn) | ||
| { | ||
| std::unique_lock lock{priv_->lock_}; | ||
| size_t nr_removed = 0; | ||
| std::vector<Xapian::Query> qvec; | ||
| std::vector<Store::Id> ids_to_remove; | ||
|
|
||
| xapian_db().request_transaction(); | ||
|
|
||
| while (!terms.empty()) { | ||
| auto chunk = terms.subspan(0, std::min<size_t>(terms.size(), 1024)); | ||
| terms = terms.subspan(chunk.size()); | ||
| qvec.clear(); | ||
| qvec.reserve(chunk.size()); | ||
| std::ranges::copy(chunk, std::back_inserter(qvec)); | ||
| auto enq = xapian_db().enquire(); | ||
| enq.set_weighting_scheme(Xapian::BoolWeight()); // No score | ||
| enq.set_docid_order(Xapian::Enquire::ASCENDING); | ||
| enq.set_query(Xapian::Query{Xapian::Query::OP_OR, qvec.begin(), qvec.end()}); | ||
| auto mset = enq.get_mset(0, xapian_db().size()); | ||
| // Work around Xapian MSetIterator trait mismatch: | ||
| // https://trac.xapian.org/ticket/850 | ||
| ids_to_remove.reserve(ids_to_remove.size() + mset.size()); | ||
| for (auto it = mset.begin(); it != mset.end(); ++it) | ||
| ids_to_remove.push_back(*it); | ||
| } | ||
|
|
||
| // Sort the IDs to remove to make Xapian tree traversal easier | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To what extent are we depending on Xapian implementation details here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Xapian contractually iterates over terms in ascending byte-lexicographic order, so it's only natural to suppose that its storage engines in general will get the best locality accessing terms in this order. We're allowed to delete in any order we want, so if we have to choose an order, this one seems reasonable. There's no functional dependency on Xapian internals --- and sadly, Xapian has no public bulk delete API, AFAIK https://xapian.org/docs/apidoc/html/classXapian_1_1Database.html#abf8de9d7fe351a347e7fa9af605a71bb |
||
| std::ranges::sort(ids_to_remove); | ||
| for (Id id : ids_to_remove) { | ||
| nr_removed += priv_->remove_message_by_id_unlocked(id); | ||
| if (nr_removed % 500 == 0) | ||
| progress_fn(nr_removed); | ||
| } | ||
|
|
||
| xapian_db().request_commit(true/*force*/); | ||
| return nr_removed; | ||
| } | ||
|
|
||
| Option<Message> | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This loop could use a comment, i.e., a few lines on what we're doing here.