-
Notifications
You must be signed in to change notification settings - Fork 93
Update NO_REPEAT_RANDOM to use Sample Without Replacement (2nd Try) #853
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
85eed22
d245579
3c1970f
d4cae8d
4e35982
1966c66
ba51d03
5ad0dd8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -135,19 +135,44 @@ class BitSet { | |
| bool operator[](size_t bit) const { return !!(bits_[bit >> 5] & (1UL << (bit & 31))); } | ||
| bool get(size_t bit) const { return !!(bits_[bit >> 5] & (1UL << (bit & 31))); } | ||
| void set(size_t bit) { bits_[bit >> 5] |= (1UL << (bit & 31)); } | ||
| void set_subset(size_t first, size_t last) { *this |= ~(BitSet<SIZE>::fill() << last + 1) & (BitSet<SIZE>::fill() << first); } | ||
| void clear(size_t bit) { bits_[bit >> 5] &=~ (1UL << (bit & 31)); } | ||
| void clear_subset(size_t first, size_t last) { *this &= (BitSet<SIZE>::fill() << last + 1) | ~(BitSet<SIZE>::fill() << first); } | ||
| void clear() { | ||
| for (size_t i = 0; i < NELEM(bits_); i++) { | ||
| bits_[i] = 0; | ||
| } | ||
| } | ||
| // Create a BitSet with a given number of initially set bits | ||
| static BitSet<SIZE> fill(size_t n = SIZE) { | ||
| BitSet<SIZE> ret; | ||
| for (size_t i = 0; i <= (n - 1) >> 5; i++) { | ||
| ret.bits_[i] = ~uint32_t(0); | ||
| } | ||
| if (!!(n & 31)) ret.bits_[(n - 1) >> 5] >>= 32 - (n & 31); | ||
| return ret; | ||
| } | ||
| uint32_t get_word(int word) const { | ||
| if (word < 0 || word >= (int)NELEM(bits_)) return 0; | ||
| return bits_[word]; | ||
| } | ||
| uint32_t get32(int pos) const { | ||
| uint64_t tmp = get_word(1 + (pos >> 5)); | ||
| tmp <<= 32; | ||
| tmp |= get_word(pos >> 5); | ||
| return tmp >> (pos & 31); | ||
| } | ||
| size_t popcount() const { | ||
| size_t ret = 0; | ||
| for (size_t i = 0; i < NELEM(bits_); i++) { | ||
| ret += __builtin_popcount(bits_[i]); | ||
| } | ||
| return ret; | ||
| } | ||
| size_t popcount_subset(size_t first, size_t last) const { | ||
| BitSet<SIZE> subset = ~(BitSet<SIZE>::fill() << last + 1) & (BitSet<SIZE>::fill() << first); | ||
| return (subset & *this).popcount(); | ||
| } | ||
| size_t next(size_t bit) const { | ||
| for (size_t i = 1; i <= SIZE; i++) { | ||
| size_t j = (bit + i) % SIZE; | ||
|
|
@@ -172,15 +197,68 @@ class BitSet { | |
| } | ||
| return 0; | ||
| } | ||
| size_t nth_subset(int bit, size_t first, size_t last) const { | ||
| for (size_t i = first; i <= last; i++) { | ||
| if (get(i)) { | ||
| if (bit-- <= 0) { | ||
| return i; | ||
| } | ||
| } | ||
| } | ||
| return 0; | ||
| } | ||
| void operator>>=(int bits) { | ||
| if (!bits) return; | ||
| for (int i = 0; i < (int)SIZE; i++) { | ||
| if (i + bits < (int)SIZE && get(i + bits)) { | ||
| set(i); | ||
| } else { | ||
| clear(i); | ||
| } | ||
| for (size_t i = 0; i < NELEM(bits_); i++) bits_[i] = get32(i * 32 + bits); | ||
| } | ||
| BitSet<SIZE> operator>>(int bits) const { | ||
| BitSet<SIZE> ret; | ||
| for (size_t i = 0; i < NELEM(bits_); i++) ret.bits_[i] = get32(i * 32 + bits); | ||
| return ret; | ||
| } | ||
| void operator<<=(int bits) { | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably improve these while we're at it, something like: uint32_t get_word(int word) {
if (word < 0 || word >= (int) NELEM(bits_)) return 0;
return bits_[word];
}
uint32_t get32(int pos) {
uint64_t tmp = get_word(1 + (pos >> 5));
tmp <<= 32;
tmp |= get_word(pos >> 5);
return tmp >> (pos & 31);
}
BitSet<SIZE> operator>>(int bits) const {
BitSet<SIZE> ret;
for (int i = 0; i < SIZE; i++) ret.bits_[i]= get32(i*32 + bits);
return ret;
}<< would be basically the same, but with a - inside the get32() call. |
||
| if (!bits) return; | ||
| for (int i = NELEM(bits_) - 1; i >= 0; i--) bits_[i] = get32(i * 32 - bits); | ||
| } | ||
| BitSet<SIZE> operator<<(int bits) const { | ||
| BitSet<SIZE> ret; | ||
| for (int i = NELEM(bits_) - 1; i >= 0; i--) ret.bits_[i] = get32(i * 32 - bits); | ||
| return ret; | ||
| } | ||
| BitSet<SIZE> operator~() const { | ||
| BitSet<SIZE> ret = *this; | ||
| for (size_t i = 0; i < NELEM(bits_); i++) { | ||
| ret.bits_[i] = ~ret.bits_[i]; | ||
| } | ||
| if (!!(SIZE & 31)) ret.bits_[SIZE >> 5] &= ~uint32_t(0) >> 32 - (SIZE & 31); | ||
| return ret; | ||
| } | ||
| void operator&=(const BitSet<SIZE>& other) { | ||
| for (size_t i = 0; i < NELEM(bits_); i++) { | ||
| bits_[i] &= other.bits_[i]; | ||
| } | ||
| } | ||
| BitSet<SIZE> operator&(const BitSet<SIZE>& other) const { | ||
| BitSet<SIZE> ret = *this; | ||
| ret &= other; | ||
| return ret; | ||
| } | ||
| void operator|=(const BitSet<SIZE>& other) { | ||
| for (size_t i = 0; i < NELEM(bits_); i++) { | ||
| bits_[i] |= other.bits_[i]; | ||
| } | ||
| } | ||
| BitSet<SIZE> operator|(const BitSet<SIZE>& other) const { | ||
| BitSet<SIZE> ret = *this; | ||
| ret |= other; | ||
| return ret; | ||
| } | ||
| void print() const { | ||
| STDOUT << "BitSet<" << SIZE << ">: "; | ||
| for (size_t i = 0; i < SIZE; ++i) { | ||
| STDOUT << get(SIZE - 1 - i); | ||
| } | ||
| STDOUT << "\n"; | ||
| } | ||
|
|
||
| private: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,18 @@ | |
| #include "../common/atomic.h" | ||
| #include "../common/file_reader.h" | ||
|
|
||
| #ifdef RANDOM_SAMPLE_WITHOUT_REPLACEMENT | ||
| #include "../common/arg_parser.h" | ||
|
|
||
| #ifndef SAMPLE_WITHOUT_REPLACEMENT_BUFFER_SIZE_BITS | ||
| #define SAMPLE_WITHOUT_REPLACEMENT_BUFFER_SIZE_BITS 32 | ||
| #endif | ||
|
|
||
| #if SAMPLE_WITHOUT_REPLACEMENT_BUFFER_SIZE_BITS > 256 | ||
| #define SAMPLE_WITHOUT_REPLACEMENT_BUFFER_SIZE_BITS 256 | ||
| #endif | ||
| #endif | ||
|
|
||
| class Effect; | ||
| Effect* all_effects = NULL; | ||
|
|
||
|
|
@@ -175,6 +187,13 @@ class Effect { | |
| paired_ = false; | ||
| #ifdef KILL_OLD_PLAYERS | ||
| killable_ = false; | ||
| #endif | ||
| #ifdef NO_REPEAT_RANDOM | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we're going to have this code (which I'm not entirely convinced it is a good idea.) then we should use a new define to enable it. Maybe the NO_REPEAT_RANDOM_BUFFER_SIZE_BITS define? |
||
| #ifdef RANDOM_SAMPLE_WITHOUT_REPLACEMENT | ||
| available_.clear(); | ||
| #endif | ||
| last_ = -1; | ||
| last_subid_ = -1; | ||
| #endif | ||
| } | ||
| static int altnum(const char* s) { | ||
|
|
@@ -379,6 +398,13 @@ class Effect { | |
| } | ||
|
|
||
| #ifdef NO_REPEAT_RANDOM | ||
|
|
||
| #ifdef RANDOM_SAMPLE_WITHOUT_REPLACEMENT | ||
| // Storage for random sampling without replacement. | ||
| // Each bit tracks a specific File. `1` marks as available to select. | ||
| BitSet<SAMPLE_WITHOUT_REPLACEMENT_BUFFER_SIZE_BITS> available_; | ||
| #endif | ||
|
|
||
| int16_t last_ = -1; | ||
| int16_t last_subid_ = -1; | ||
|
|
||
|
|
@@ -406,6 +432,32 @@ class Effect { | |
|
|
||
| int random_subid(int filenum) { | ||
| if (!sub_files_) return 0; | ||
| #ifdef NO_REPEAT_RANDOM | ||
| #ifdef RANDOM_SAMPLE_WITHOUT_REPLACEMENT | ||
| const size_t total_files = files_found() * number_of_subfiles(); | ||
| if (total_files > 2 && total_files <= SAMPLE_WITHOUT_REPLACEMENT_BUFFER_SIZE_BITS) { | ||
| const size_t first = filenum * sub_files_; | ||
| const size_t last = first + files_found(); | ||
|
|
||
| uint8_t n = available_.popcount_subset(first, last); | ||
| if (!n) { | ||
| available_.set_subset(first, last); | ||
| n = sub_files_; | ||
| } | ||
| if (n == sub_files_ && filenum == last_ && last_subid_ >= 0) n--; | ||
|
|
||
| size_t ret = available_.nth_subset(RANDOMIZE(n, -1), first, last); | ||
| if (ret == last_ * sub_files_ + last_subid_) { | ||
| ret = available_.popcount_subset(ret, last) > 1 ? available_.next(ret) : available_.prev(ret); | ||
| } | ||
|
|
||
| available_.clear(ret); | ||
| last_ = ret / sub_files_; | ||
| last_subid_ = ret % sub_files_; | ||
| return last_subid_; | ||
| } | ||
| #endif | ||
| #endif | ||
| int ret = RANDOMIZE(sub_files_, last_ == filenum ? last_subid_ : -1); | ||
| #ifdef NO_REPEAT_RANDOM | ||
| last_subid_ = ret; | ||
|
|
@@ -430,6 +482,28 @@ class Effect { | |
| (file_type_ == FileType::SOUND || paired_)) { | ||
| n = std::min<int>(SaberBase::sound_number, num_files - 1); | ||
| } else { | ||
| #ifdef NO_REPEAT_RANDOM | ||
| #ifdef RANDOM_SAMPLE_WITHOUT_REPLACEMENT | ||
| const size_t total_files = files_found() * number_of_subfiles(); | ||
| if (total_files > 2 && total_files <= SAMPLE_WITHOUT_REPLACEMENT_BUFFER_SIZE_BITS) { | ||
| uint8_t n = available_.popcount(); | ||
| if (!n) { | ||
| available_ = BitSet<SAMPLE_WITHOUT_REPLACEMENT_BUFFER_SIZE_BITS>::fill(total_files); | ||
| n = total_files; | ||
| } | ||
| if (n == total_files && (last_ >= 0 || last_subid_ >= 0)) n--; | ||
|
|
||
| size_t ret = available_.nth(RANDOMIZE(n, -1)); | ||
| const int16_t last = sub_files_ ? last_ * sub_files_ + last_subid_ : last_; | ||
| if (ret == last) ret = available_.next(ret); | ||
|
|
||
| available_.clear(ret); | ||
| last_ = sub_files_ ? ret / sub_files_ : ret; | ||
| if (sub_files_) last_subid_ = ret % sub_files_; | ||
| return FileID(this, last_, sub_files_ ? last_subid_ : 0); | ||
| } | ||
| #endif | ||
| #endif | ||
| n = RANDOMIZE(num_files, last_); | ||
| } | ||
| int subid = random_subid(n); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm wondering if it would be easier (and faster) to do all this subset stuff with ands and ors.
Basically something like:
Bitset subset = ~(Bitset::All() << last) & (Bitset::All() << first); size_t popcount_subset = (subset & bitset).popcount();