Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 33 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,15 @@ set(SOURCE_FILES
${SYNC_DIR}/IBLTMultiset.cpp
${SYNC_DIR}/IBLTSync.cpp
${SYNC_DIR}/IBLTSync_Multiset.cpp
${SYNC_DIR}/IBLTSync_Adaptive.cpp
${SYNC_DIR}/IBLTSync_Adaptive_PartialDecode.cpp
${SYNC_DIR}/IBLTSetOfSets.cpp
${SYNC_DIR}/Compact2DBitArray.cpp
${SYNC_DIR}/Cuckoo.cpp
${SYNC_DIR}/CuckooSync.cpp
${SYNC_DIR}/FullSync.cpp
${SYNC_DIR}/BloomFilterSync.cpp
${SYNC_DIR}/BloomFilter.cpp
${SYNC_DIR}/BloomFilterSync.cpp
${SYNC_DIR}/BloomFilter.cpp
${SYNC_DIR}/MET_IBLTSync.cpp
${SYNC_DIR}/MET_IBLT.cpp

Expand Down Expand Up @@ -120,14 +122,16 @@ set(HEADERS
${SYNC_DIR_INC}/IBLTSetOfSets.h
${SYNC_DIR_INC}/IBLTSync_HalfRound.h
${SYNC_DIR_INC}/IBLTSync_Multiset.h
${SYNC_DIR_INC}/IBLTSync_Adaptive.h
${SYNC_DIR_INC}/IBLTSync_Adaptive_PartialDecode.h
${SYNC_DIR_INC}/Compact2DBitArray.h
${SYNC_DIR_INC}/Cuckoo.h
${SYNC_DIR_INC}/CuckooSync.h
${SYNC_DIR_INC}/InterCPISync.h
${SYNC_DIR_INC}/PrioCPISync.h
${SYNC_DIR_INC}/ProbCPISync.h
${SYNC_DIR_INC}/BloomFilterSync.h
${SYNC_DIR_INC}/BloomFilter.h
${SYNC_DIR_INC}/BloomFilterSync.h
${SYNC_DIR_INC}/BloomFilter.h
${SYNC_DIR_INC}/MET_IBLTSync.h
${SYNC_DIR_INC}/MET_IBLT.h

Expand All @@ -138,6 +142,30 @@ set(HEADERS
${SYNC_BENCH_INC}/FromFileGen.h
)

if(NOT DEFINED includedir)
find_path(NTL_INCLUDE_PATH NTL
PATHS
/usr/include
/usr/local/include
/opt/local/include
/opt/homebrew/include
)
if(NTL_INCLUDE_PATH)
get_filename_component(includedir "${NTL_INCLUDE_PATH}" DIRECTORY)
else()
set(includedir "/usr/local")
endif()
endif()

# Users may need to update these paths
include_directories(${includedir}/include)
link_directories(${includedir}/lib)
link_directories(${includedir}/lib64)


include_directories(${CMAKE_INSTALL_INCLUDEDIR})
link_directories(${CMAKE_INSTALL_LIBDIR})

# Add gensync library
add_library(gensync STATIC ${SOURCE_FILES} ${HEADERS})
target_include_directories(gensync PUBLIC ${INCLUDE_DIR})
Expand Down Expand Up @@ -218,4 +246,4 @@ set(CPAKC_PACKAGE_VENDOR "NISLAB")
set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}-${CPACK_PACKAGE_RELEASE}.${CMAKE_SYSTEM_PROCESSOR}")
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST /usr/local /usr/local/lib64 /usr/local/lib /usr/local/include)
include(CPack)
include(CPack)
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,10 @@ sync succeeded.
* The [Multi-Edge-Type IBLT](https://arxiv.org/pdf/2211.05472) is an IBLT-based set reconciliation protocol that does not require estimation of the size of the set-difference. This is due to the scalable nature of the MET-IBLT data structure. The protocol works by iteratively adding and exchanging parts ("types") of the MET-IBLT data structure between the client and server until all differing elements are peeled/decoded.
* Bloom Filter Sync
* The [Bloom Filter](https://dl.acm.org/doi/pdf/10.1145/362686.362692) is a space-efficient probabilistic data structure for testing set membership. The protocol enables set reconciliation by exchanging filters and transferring only elements which are detected as most likely missing from the other party's set.
* Adaptive-IBLT Sync
* Perform IBLT-based set reconciliation by first attempting to decode the IBLT with an initial number of cells. If decoding fails, the client adaptively doubles the number of cells and retries. This process repeats until successful decoding occurs. The server remains passive, waiting to receive each new IBLT and attempting to decode it.
* Adaptive-IBLT Sync(Partial Decoding)
* An optimization over Adaptive-IBLTSync that reuses successfully decoded elements from previous rounds to reduce redundant communication. When decoding fails, the server sends back only the indices of cells that were successfully peeled. The client uses this information to avoid re-sending already known elements in the next IBLT.
* **Included Sync Protocols (Set of Sets):**
* IBLT Set of Sets
* Sync using the protocol described [here](https://dl.acm.org/doi/abs/10.1145/3196959.3196988). This sync serializes an IBLT containing a child set into a bitstring where it is then treated as an element of a larger IBLT. Each host recovers the IBLT containing the serialized IBLTs and deserializes each one. A matching procedure is then used to determine which child sets should sync with each other and which elements they need. If this sync is two way this info is then sent back to the peer node. The number of differences in each child IBLT may not be larger than the total number of sets being synced
Expand Down
4 changes: 4 additions & 0 deletions include/GenSync/Aux/ConstantsAndTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,9 @@ static const int SYNC_SUCCESS = 0; /** Exit status when synchronization succeeds
static const int SYNC_FAILURE = -1; /** Exit status when synchronization fails. */
static const int GENERAL_ERROR = -2; /** Exit status for a general error. */

// ... Constants for synchronization configuration
const long DEFAULT_NUM_HASHES = 4;
const long DEFAULT_NUM_HASH_CHECK = 11;

#endif /* TYPES_H */

32 changes: 30 additions & 2 deletions include/GenSync/Communicants/Communicant.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,14 @@ class Communicant {
*/
void commSend(const ZZ &num, Nullable<size_t> size = NOT_SET<size_t>());

/**
* Sends a vec_ZZ.
* @require must have called EstablishModSend/EstablishModRecv before any of these functions will work.
* @param vec A vector of ZZ's
* @see commSend(const char *str) for more details.
* */
void commSend(const vec_ZZ &vec);

// Specialized send functions for specific data types
/**
* Sends a *positive* ZZ_p over the line
Expand Down Expand Up @@ -263,6 +271,14 @@ class Communicant {
*/
void commSend(const Cuckoo &cf);

/**
* Sends a vector of ZZ.
* @require must have called EstablishModSend/EstablishModRecv before any of these functions will work.
* @param vec A vector of ZZ
* @see commSend(const char *str) for more details.
*/
void commSend(const vector<ZZ> &vec);

/**
* Receives up to MAX_BUF_SIZE characters from the socket.
* This is the primitive receive method that all other methods call.
Expand Down Expand Up @@ -324,8 +340,18 @@ class Communicant {
*/
ZZ_p commRecv_ZZ_p();

vec_ZZ_p
commRecv_vec_ZZ_p(); /** @require must have called EstablishModSend/EstablishModRecv before any of these functions will work. */
/**
* Specialized receive functions for specific data types.
* @require must have called EstablishMod before any of these functions will work.
*/
vec_ZZ commRecv_vec_ZZ();

/**
* Specialized receive functions for specific data types.
* @require must have called EstablishMod before any of these functions will work.
*/
vec_ZZ_p commRecv_vec_ZZ_p();

long commRecv_long();

int commRecv_int();
Expand All @@ -334,6 +360,8 @@ class Communicant {

byte commRecv_byte();

vector<ZZ> commRecv_vector_ZZ();

/**
* Receives a GenIBLT.
* @param size The size of the GenIBLT to be received. Must be >0 or NOT_SET.
Expand Down
2 changes: 2 additions & 0 deletions include/GenSync/Syncs/GenSync.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,8 @@ class GenSync {
CuckooSync,
BloomFilterSync,
MET_IBLTSync,
IBLTSync_Adaptive,
IBLTSync_Adaptive_PartialDecode,
END // one after the end of iterable options
};

Expand Down
70 changes: 70 additions & 0 deletions include/GenSync/Syncs/IBLTSync_Adaptive.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/* This code is part of the GenSync project developed at Boston University. Please see the README for use and references. */

/**
* The {@link IBLTSync_Adaptive} method syncs with another {@link IBLTSync_Adaptive} method by sending an IBLT containing
* its set. Upon receiving this IBLT, the server performs a subtract operation on both IBLTs
* and uses the resulting IBLT to calculate the symmetric set difference. These differences
* are then sent back to the client. If the decoding process failed, we construct an IBLT with doubled size
* (which is the optimal strategy with respect to overhead over the "clairvoyant" IBLT) and re-run the synchronization.
* There is an adjustable probability that the sync will fail to recover all differences.
*
* Created by Xingyu Chen on 4/27/25.
*/

#ifndef GENSYNC_IBLTSYNC_ADAPTIVE_H
#define GENSYNC_IBLTSYNC_ADAPTIVE_H

#include <GenSync/Aux/SyncMethod.h>
#include <GenSync/Syncs/IBLT.h>

/**
* IBLTSync_Adaptive class dynamically grows the IBLT if reconciliation fails.
*/
class IBLTSync_Adaptive : public SyncMethod
{
public:
/**
* Constructor.
* @param initExpected The initial guess of set difference size between client and server
* @param eltSize The size of elements being sent over between client and server
*/
explicit IBLTSync_Adaptive(size_t initExpected, size_t eltSize);

~IBLTSync_Adaptive() override;

// Implemented parent class methods
bool SyncClient(const shared_ptr<Communicant>& commSync,
list<shared_ptr<DataObject>> &selfMinusOther,
list<shared_ptr<DataObject>> &otherMinusSelf) override;

bool SyncServer(const shared_ptr<Communicant>& commSync,
list<shared_ptr<DataObject>> &selfMinusOther,
list<shared_ptr<DataObject>> &otherMinusSelf) override;

/**
* Helper Function for constructing IBLT and inserting elements.
* @param currentExpected The current guess of set difference size between client and server
* @param elementSize The size of elements being sent over between client and server
*/
IBLT buildIBLT(size_t currentExpected, size_t elementSize);

bool addElem(shared_ptr<DataObject> datum) override;
bool delElem(shared_ptr<DataObject> datum) override;

string getName() override;

/* Getters for the parameters set in the constructor */
size_t getInitExpNumElems() const {return initExpNumElems;}
size_t getElementSize() const {return elementSize;}
private:
// Initial value of estimated number of difference
size_t initExpNumElems;

// Size of elements
size_t elementSize;

// Number of elements
size_t elementCount = 0;
};

#endif //GENSYNC_IBLTSYNC_ADAPTIVE_H
80 changes: 80 additions & 0 deletions include/GenSync/Syncs/IBLTSync_Adaptive_PartialDecode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/**
* The {@link IBLTSync_Adaptive_PartialDecode} method syncs with another {@link IBLTSync_Adaptive_PartialDecode} method by sending an IBLT containing
* its set. Upon receiving this IBLT, the server performs a subtract operation on both IBLTs
* and uses the resulting IBLT to calculate the symmetric set difference. These differences
* are then sent back to the client. If the decoding process failed, we construct an new IBLT.
* The new IBLT is built with doubled size minus the number of peeled elements and we do not insert peeled elements.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a note that doubling in size is the optimal strategy with respect to overhead over the "clairvoyant" IBLT?

* There is an adjustable probability that the sync will fail to recover all differences.
* This type of syncs is a variance of {@link IBLTSync_Adaptive}.
*
* Created by Xingyu Chen on 6/5/25.
*/

#ifndef GENSYNC_IBLTSYNC_ADAPTIVE_PARTIALDECODE_H
#define GENSYNC_IBLTSYNC_ADAPTIVE_PARTIALDECODE_H

#include <GenSync/Aux/SyncMethod.h>
#include <GenSync/Syncs/IBLT.h>
#include <unordered_set>

/**
* IBLTSync_Adaptive_PartialDecode class dynamically grows the IBLT if reconciliation fails, it records the indices of peeled elements
* and do not insert them into the following generated IBLTs.
*/
class IBLTSync_Adaptive_PartialDecode : public SyncMethod
{
private:
// Initial value of estimated number of difference
size_t initExpNumElems;

// Size of elements
size_t elementSize;

// Number of elements
size_t elementCount = 0;

// Provides a hash function for NTL::ZZ by converting the value to a string and hashing that.
struct HashZZ {
size_t operator()(const ZZ& z) const {
return hash<string>()(zzToString(z));
}
};

public:
/**
* Constructor.
* @param initExpected The initial guess of set difference size between client and server
* @param eltSize The size of elements being sent over between client and server
*/
explicit IBLTSync_Adaptive_PartialDecode(size_t initExpected, size_t eltSize);

~IBLTSync_Adaptive_PartialDecode() override;

// Implemented parent class methods
bool SyncClient(const shared_ptr<Communicant>& commSync,
list<shared_ptr<DataObject>> &selfMinusOther,
list<shared_ptr<DataObject>> &otherMinusSelf) override;

bool SyncServer(const shared_ptr<Communicant>& commSync,
list<shared_ptr<DataObject>> &selfMinusOther,
list<shared_ptr<DataObject>> &otherMinusSelf) override;

/**
* Helper Function for constructing IBLT and inserting elements that have not been peeled.
* @param currentExpected The current guess of set difference size between client and server
* @param elementSize The size of elements being sent over between client and server
* @param peeledKeys The set of keys
*/
IBLT buildIBLTwithUnpeeledElements(size_t currentExpected, size_t elementSize, unordered_set<ZZ, HashZZ> peeledKeys);

bool addElem(shared_ptr<DataObject> datum) override;
bool delElem(shared_ptr<DataObject> datum) override;

string getName() override;

/* Getters for the parameters set in the constructor */
size_t getInitExpNumElems() const {return initExpNumElems;}
size_t getElementSize() const {return elementSize;}
};

#endif //GENSYNC_IBLTSYNC_ADAPTIVE_PARTIALDECODE_H
20 changes: 19 additions & 1 deletion src/Benchmarks/BenchParams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include <GenSync/Syncs/CuckooSync.h>
#include <GenSync/Syncs/BloomFilterSync.h>
#include <GenSync/Syncs/MET_IBLTSync.h>
#include <GenSync/Syncs/IBLTSync_Adaptive.h>
#include <GenSync/Syncs/IBLTSync_Adaptive_PartialDecode.h>

const char BenchParams::KEYVAL_SEP = ':';
const string BenchParams::FILEPATH_SEP = "/"; // TODO: we currently don't compile for _WIN32!
Expand Down Expand Up @@ -182,7 +184,9 @@ inline shared_ptr<Params> decideBenchParams(GenSync::SyncProtocol syncProtocol,
} else if (syncProtocol == GenSync::SyncProtocol::IBLTSync
|| syncProtocol == GenSync::SyncProtocol::OneWayIBLTSync
|| syncProtocol == GenSync::SyncProtocol::IBLTSetOfSets
|| syncProtocol == GenSync::SyncProtocol::IBLTSync_Multiset) {
|| syncProtocol == GenSync::SyncProtocol::IBLTSync_Multiset
|| syncProtocol == GenSync::SyncProtocol::IBLTSync_Adaptive
|| syncProtocol == GenSync::SyncProtocol::IBLTSync_Adaptive_PartialDecode) {
auto par = make_shared<IBLTParams>();
is >> *par;
return par;
Expand Down Expand Up @@ -377,6 +381,20 @@ BenchParams::BenchParams(SyncMethod& meth) :
return;
}

auto iblt_adaptive = dynamic_cast<IBLTSync_Adaptive*>(&meth);
if (iblt_adaptive) {
syncProtocol = GenSync::SyncProtocol::IBLTSync_Adaptive;
syncParams = make_shared<IBLTParams>(iblt_adaptive->getInitExpNumElems(), iblt_adaptive->getElementSize());
return;
}

auto iblt_adaptive_partialdecode = dynamic_cast<IBLTSync_Adaptive_PartialDecode*>(&meth);
if (iblt_adaptive_partialdecode) {
syncProtocol = GenSync::SyncProtocol::IBLTSync_Adaptive_PartialDecode;
syncParams = make_shared<IBLTParams>(iblt_adaptive_partialdecode->getInitExpNumElems(), iblt_adaptive_partialdecode->getElementSize());
return;
}

throw runtime_error("The SyncMethod is not known to BenchParams");
}

Expand Down
Loading