Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions VERSIONS
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,13 @@ multitrait branch:
whenever a mutationEffect() callback is added, removed, rescheduled, or activated/deactivated in a way that affects the current tick (invalidating all affected trait values)
whenever the tick counter advances such that a previously active mutationEffect() callback becomes inactive, or vice versa (invalidating all affected trait values)
whenever the tick counter is changed in script to a new arbitrary value (invalidating all trait values in all individuals in all species)
tree-sequence recording work for multitrait:
individual metadata now has a variable-length per-trait array of phenotype and offset information
mutation metadata is now moved from the mutation table's metadata column to a table placed in top-level metadata with the json+struct codec
mutation metadata now has a variable-length per-trait array of effect size and dominance information
top-level JSON metadata now has a new `traits` key (required), which lists the traits used in the tree sequence; duplicated in provenance
the top-level JSON, mutation, and individual metadata schemas are updated accordingly
pushed the .trees file version from 0.9 to 1.0


version 5.1 (Eidos version 4.1):
Expand Down
108 changes: 59 additions & 49 deletions core/slim_globals.cpp

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions core/slim_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -683,12 +683,14 @@ enum class IndDomCacheIndex : slim_trait_index_t {};
#define SLIM_TSK_INDIVIDUAL_REMEMBERED ((tsk_flags_t)(1 << 17))
#define SLIM_TSK_INDIVIDUAL_RETAINED ((tsk_flags_t)(1 << 18))

extern const std::string gSLiM_tsk_metadata_schema;
extern const std::string gSLiM_tsk_metadata_JSON_schema;
extern const std::string gSLiM_tsk_metadata_binary_schema_FORMAT;

extern const std::string gSLiM_tsk_edge_metadata_schema;
extern const std::string gSLiM_tsk_site_metadata_schema;
extern const std::string gSLiM_tsk_mutation_metadata_schema;
extern const std::string gSLiM_tsk_node_metadata_schema_FORMAT;
extern const std::string gSLiM_tsk_individual_metadata_schema;
extern const std::string gSLiM_tsk_individual_metadata_schema_FORMAT;
extern const std::string gSLiM_tsk_population_metadata_schema_PREJSON; // before SLiM 3.7
extern const std::string gSLiM_tsk_population_metadata_schema;

Expand Down
382 changes: 294 additions & 88 deletions core/species.cpp

Large diffs are not rendered by default.

34 changes: 29 additions & 5 deletions core/species.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ enum class SLiMFileFormat
// Note that these structs are packed, and so accesses to them and within them may be unaligned; we assume
// that is OK on the platforms we run on, so as to keep file sizes down.

#warning MutationMetadataRec needs to be removed!!!
typedef struct __attribute__((__packed__)) {
slim_objectid_t mutation_type_id_; // 4 bytes (int32_t): the id of the mutation type the mutation belongs to
slim_effect_t selection_coeff_; // 4 bytes (float): the mutation effect (e.g., selection coefficient)
Expand All @@ -106,6 +107,22 @@ typedef struct __attribute__((__packed__)) {
int8_t nucleotide_; // 1 byte (int8_t): the nucleotide for the mutation (0='A', 1='C', 2='G', 3='T'), or -1
} MutationMetadataRec;

typedef struct __attribute__((__packed__)) {
slim_effect_t effect_size_; // 4 bytes (float): the mutation effect size (e.g., selection coefficient)
slim_effect_t dominance_coeff_; // 4 bytes (float): the dominance coefficient; note that NAN indicates independent dominance
slim_effect_t hemizygous_dominance_coeff_; // 4 bytes (float): the hemizygous dominance coefficient
} _MutationPerTraitMetadata;

typedef struct __attribute__((__packed__)) {
slim_mutationid_t mutation_id_; // 8 bytes (int64_t): the SLiM id of the mutation
slim_objectid_t mutation_type_id_; // 4 bytes (int32_t): the id of the mutation type the mutation belongs to
slim_objectid_t subpop_index_; // 4 bytes (int32_t): the id of the subpopulation in which the mutation arose
slim_tick_t origin_tick_; // 4 bytes (int32_t): the tick in which the mutation arose
int8_t nucleotide_; // 1 byte (int8_t): the nucleotide for the mutation (0='A', 1='C', 2='G', 3='T'), or -1
int8_t unused_[3]; // 3 bytes (int8_t): UNUSED SPACE, PRESENTLY FOR PADDING
_MutationPerTraitMetadata per_trait_[1]; // 12 bytes per entry: 1 or more per-trait entries (count determined by the schema!)
} MutationTableMetadataRec;

typedef struct __attribute__((__packed__)) {
// BCH 12/10/2024: This metadata record is becoming a bit complicated, for multichromosome SLiM, and is now actually variable-length.
// The difficulty is that this metadata gets attached to nodes in the tree sequence, and in multichrom models the node table is
Expand All @@ -129,6 +146,11 @@ typedef struct __attribute__((__packed__)) {
// BCH 12/6/2024: type_, the chromosome type for the haplosome, has moved to top-level metadata; it is constant across a tree sequence
} HaplosomeMetadataRec;

typedef struct __attribute__((__packed__)) {
slim_phenotype_t phenotype_; // 8 bytes (double): the phenotypic value for a trait
slim_trait_offset_t offset_; // 8 bytes (double): the individual offset combined in to produce a trait value
} _IndividualPerTraitMetadata;

typedef struct __attribute__((__packed__)) {
slim_pedigreeid_t pedigree_id_; // 8 bytes (int64_t): the SLiM pedigree ID for this individual, assigned by pedigree rec
slim_pedigreeid_t pedigree_p1_; // 8 bytes (int64_t): the SLiM pedigree ID for this individual's parent 1
Expand All @@ -137,14 +159,18 @@ typedef struct __attribute__((__packed__)) {
slim_objectid_t subpopulation_id_; // 4 bytes (int32_t): the subpopulation the individual belongs to
int32_t sex_; // 4 bytes (int32_t): the sex of the individual, as defined by the IndividualSex enum
uint32_t flags_; // 4 bytes (uint32_t): assorted flags, see below
_IndividualPerTraitMetadata per_trait_[1]; // 16 bytes per entry: 1 or more per-trait entries (count determined by the schema!)
} IndividualMetadataRec;

#define SLIM_INDIVIDUAL_METADATA_MIGRATED 0x01 // set if the individual has migrated in this cycle

// We double-check the size of these records to make sure we understand what they contain and how they're packed
static_assert(sizeof(MutationMetadataRec) == 17, "MutationMetadataRec is not 17 bytes!");
// BCH 2/11/2026: Note that all of these metadata structs are now actually variable-length; this is just a base.
static_assert(sizeof(_MutationPerTraitMetadata) == 12, "_MutationPerTraitMetadata is not 12 bytes!");
static_assert(sizeof(MutationTableMetadataRec) == 36, "MutationTableMetadataRec is not 36 bytes!");
static_assert(sizeof(HaplosomeMetadataRec) == 9, "HaplosomeMetadataRec is not 9 bytes!"); // but its size is dynamic at runtime
static_assert(sizeof(IndividualMetadataRec) == 40, "IndividualMetadataRec is not 40 bytes!");
static_assert(sizeof(_IndividualPerTraitMetadata) == 16, "_IndividualPerTraitMetadata is not 16 bytes!");
static_assert(sizeof(IndividualMetadataRec) == 56, "IndividualMetadataRec is not 56 bytes!");

// We check endianness on the platform we're building on; we assume little-endianness in our read/write code, I think.
#if defined(__BYTE_ORDER__)
Expand Down Expand Up @@ -664,9 +690,6 @@ class Species : public EidosDictionaryUnretained
void DisconnectCopiedSharedTables(tsk_table_collection_t &p_tables); // zeroes out the shared table copies in p_tables

static void handle_error(const std::string &msg, int error) __attribute__((__noreturn__)) __attribute__((cold)) __attribute__((analyzer_noreturn));
static void MetadataForMutation(Mutation *p_mutation, MutationMetadataRec *p_metadata);
static void MetadataForSubstitution(Substitution *p_substitution, MutationMetadataRec *p_metadata);
static void MetadataForIndividual(Individual *p_individual, IndividualMetadataRec *p_metadata);
static void DerivedStatesFromAscii(tsk_table_collection_t *p_tables);
static void DerivedStatesToAscii(tsk_table_collection_t *p_tables);

Expand All @@ -678,6 +701,7 @@ class Species : public EidosDictionaryUnretained
void RecordNewHaplosome_NULL(Haplosome *p_new_haplosome);
void RecordNewDerivedState(const Haplosome *p_haplosome, slim_position_t p_position, const std::vector<Mutation *> &p_derived_mutations);
void RetractNewIndividual(void);
void MetadataForIndividual(Individual *p_individual, IndividualMetadataRec *p_metadata);
void AddIndividualsToTable(Individual * const *p_individual, size_t p_num_individuals, tsk_table_collection_t *p_tables, INDIVIDUALS_HASH *p_individuals_hash, tsk_flags_t p_flags);
void AddLiveIndividualsToIndividualsTable(tsk_table_collection_t *p_tables, INDIVIDUALS_HASH *p_individuals_hash);
void FixAliveIndividuals(tsk_table_collection_t *p_tables);
Expand Down
2 changes: 1 addition & 1 deletion core/trait.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class Trait : public EidosDictionaryRetained
void _RecacheIndividualOffsetDistribution(void); // caches individualOffsetDistributionFixed_ and individualOffsetDistributionFixedValue_
slim_trait_offset_t _DrawIndividualOffset(void) const; // draws from the distribution defined by individualOffsetDistributionMean_ and individualOffsetDistributionSD_
inline __attribute__((always_inline)) slim_trait_offset_t DrawIndividualOffset(void) const { return (individualOffsetDistributionFixed_) ? individualOffsetDistributionFixedValue_ : _DrawIndividualOffset(); }
//inline __attribute__((always_inline)) slim_trait_offset_t IndividualOffsetDistributionMean(void) const { return individualOffsetDistributionMean_; } // a bit dangerous because of the exp() post-transform; probably nobody should use this
inline __attribute__((always_inline)) slim_trait_offset_t IndividualOffsetDistributionMean(void) const { return individualOffsetDistributionMean_; } // a bit dangerous because of the exp() post-transform; probably nobody should use this
inline __attribute__((always_inline)) slim_trait_offset_t IndividualOffsetDistributionSD(void) const { return individualOffsetDistributionSD_; }
inline __attribute__((always_inline)) void IndividualOffsetChanged(void) { individualOffsetEverOverridden_ = true; }
inline __attribute__((always_inline)) bool IndividualOffsetEverChanged(void) { return individualOffsetEverOverridden_; }
Expand Down
13 changes: 13 additions & 0 deletions eidos/eidos_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,19 @@ bool Eidos_RegexWorks(void);
// Checks that symbol_name does not contain any illegal Unicode characters; used to check identifiers, in particular
bool Eidos_ContainsIllegalUnicode(const std::string &symbol_name);

// little-endian write of a uint64_t to an address; taken from tskit/test_core.c in PR https://github.com/tskit-dev/tskit/pull/3306
inline void Eidos_set_u64_le(uint8_t *dest, uint64_t value)
{
dest[0] = (uint8_t)(value & 0xFF);
dest[1] = (uint8_t)((value >> 8) & 0xFF);
dest[2] = (uint8_t)((value >> 16) & 0xFF);
dest[3] = (uint8_t)((value >> 24) & 0xFF);
dest[4] = (uint8_t)((value >> 32) & 0xFF);
dest[5] = (uint8_t)((value >> 40) & 0xFF);
dest[6] = (uint8_t)((value >> 48) & 0xFF);
dest[7] = (uint8_t)((value >> 56) & 0xFF);
}


// *******************************************************************************************************************
//
Expand Down
80 changes: 80 additions & 0 deletions treerec/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
#include <tskit/core.h>

#define UUID_NUM_BYTES 16
#define TSK_JSON_BINARY_HEADER_SIZE 21

static const uint8_t TSK_JSON_BINARY_MAGIC[4] = { 'J', 'B', 'L', 'B' };

#if defined(_WIN32)

Expand Down Expand Up @@ -95,6 +98,22 @@ get_random_bytes(uint8_t *buf)

#endif

static uint64_t
tsk_load_u64_le(const uint8_t *p)
{
uint64_t value;

value = (uint64_t) p[0];
value |= (uint64_t) p[1] << 8;
value |= (uint64_t) p[2] << 16;
value |= (uint64_t) p[3] << 24;
value |= (uint64_t) p[4] << 32;
value |= (uint64_t) p[5] << 40;
value |= (uint64_t) p[6] << 48;
value |= (uint64_t) p[7] << 56;
return value;
}

/* Generate a new UUID4 using a system-generated source of randomness.
* Note that this function writes a NULL terminator to the end of this
* string, so that the total length of the buffer must be 37 bytes.
Expand All @@ -121,6 +140,67 @@ tsk_generate_uuid(char *dest, int TSK_UNUSED(flags))
out:
return ret;
}

int
tsk_json_struct_metadata_get_blob(const char *metadata, tsk_size_t metadata_length,
const char **json, tsk_size_t *json_length, const uint8_t **blob,
tsk_size_t *blob_length)
{
int ret;
uint8_t version;
uint64_t json_length_u64;
uint64_t binary_length_u64;
uint64_t header_and_json_length;
uint64_t total_length;
const uint8_t *bytes;
const uint8_t *blob_start;
const char *json_start;

if (metadata == NULL || json == NULL || json_length == NULL || blob == NULL
|| blob_length == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
bytes = (const uint8_t *) metadata;
if (metadata_length < TSK_JSON_BINARY_HEADER_SIZE) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
if (memcmp(bytes, TSK_JSON_BINARY_MAGIC, sizeof(TSK_JSON_BINARY_MAGIC)) != 0) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
version = bytes[4];
if (version != 1) {
ret = tsk_trace_error(TSK_ERR_FILE_VERSION_TOO_NEW);
goto out;
}
json_length_u64 = tsk_load_u64_le(bytes + 5);
binary_length_u64 = tsk_load_u64_le(bytes + 13);
if (json_length_u64 > UINT64_MAX - (uint64_t) TSK_JSON_BINARY_HEADER_SIZE) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
header_and_json_length = (uint64_t) TSK_JSON_BINARY_HEADER_SIZE + json_length_u64;
if (binary_length_u64 > UINT64_MAX - header_and_json_length) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
total_length = header_and_json_length + binary_length_u64;
if ((uint64_t) metadata_length < total_length) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
json_start = (const char *) bytes + TSK_JSON_BINARY_HEADER_SIZE;
blob_start = bytes + TSK_JSON_BINARY_HEADER_SIZE + json_length_u64;
*json = json_start;
*json_length = (tsk_size_t) json_length_u64;
*blob = blob_start;
*blob_length = (tsk_size_t) binary_length_u64;
ret = 0;
out:
return ret;
}
static const char *
tsk_strerror_internal(int err)
{
Expand Down
25 changes: 25 additions & 0 deletions treerec/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,31 @@ bool tsk_isfinite(double val);
#define TSK_UUID_SIZE 36
int tsk_generate_uuid(char *dest, int flags);

/**
@brief Extract the binary payload from ``json+struct`` encoded metadata.
@rst
Metadata produced by :py:class:`tskit.metadata.JSONStructCodec` consists of a fixed-size
header followed by canonical JSON bytes and an optional binary payload. This helper
validates the framing, returning pointers to the embedded JSON and binary sections
without copying.
The output pointers reference memory owned by the caller and remain valid only while
the original metadata buffer is alive.
@endrst
@param[in] metadata Pointer to the encoded metadata bytes.
@param[in] metadata_length Number of bytes available at ``metadata``.
@param[out] json On success, set to the start of the JSON bytes.
@param[out] json_length On success, set to the JSON length in bytes.
@param[out] blob On success, set to the start of the binary payload.
@param[out] blob_length On success, set to the payload length in bytes.
@return 0 on success, or a :ref:`TSK_ERR <c_api_errors>` code on failure.
*/
int tsk_json_struct_metadata_get_blob(const char *metadata, tsk_size_t metadata_length,
const char **json, tsk_size_t *json_length, const uint8_t **blob,
tsk_size_t *blob_length);

/* TODO most of these can probably be macros so they compile out as no-ops.
* Lets do the 64 bit tsk_size_t switch first though. */
void *tsk_malloc(tsk_size_t size);
Expand Down
Loading