diff --git a/.gitignore b/.gitignore index e04d956..b5a7b79 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,33 @@ html/ *.egg-info __pycache__ dist + +# editor settings +.vscode/ +.spyproject/ + +# cmake output files from CMake.gitignore +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps + +# Additional CMake files +CPackConfig.cmake +CPackSourceConfig.cmake + +# file objects +*.a + +# compiled binaries +tests/bed_test +tests/bim_test +tests/fam_test +tests/plinkio_test diff --git a/README.md b/README.md index 22a7c59..1449901 100644 --- a/README.md +++ b/README.md @@ -20,25 +20,43 @@ Project rationales: Installing this library is easy, just **configure** and **make**. This will also install Python bindings for the active interpeter. -*NEWS* The python extension can now be installed by +*NEWS* The python extension can now be installed +[from pypi](https://pypi.org/project/plinkio/) by: pip install plinkio +To compile and install from source code: + + python setup.py build + python setup.py install + +You require [tox](https://pypi.org/project/tox/) to test plinkio properly. +By calling: + + tox + +in your project directory, you will download dependencies, compile the library +and run tests. If you get stuck with compiled libraries, adding `-r` option +will reset the test environment (this means wiping out the testing environment +and re-initialize it from scratch by downloading dependencies and compiling again) + ### Installing to a standard location - mkdir build - cd build - ../configure - make && make check && sudo make install +[CMake](https://cmake.org/) is required in order to compile the C libraries (mind +to the final `.` after *CMake*, which stands for your current *project* local +directory): -You can also pass the --disable-tests flag to **configure** to avoid building the unit tests and the dependency to libcmockery. Note howerver, in this case **make check** will not do anything. + cmake . + make + make test + make install ### Installing to a custom location - mkdir build - cd build - ../configure --prefix=/path/to/plinkio - make && make check && make install + cmake -DCMAKE_INSTALL_PREFIX:PATH=/path/to/plinkio . + make + make test + make install ### Linking to your program @@ -56,7 +74,7 @@ The genotypes are coded 0, 1, 2, and 3. The numbers 0-2 represent the number of ## Using in C -For specific information look at http://mfranberg.github.com/libplinkio/index.html +For specific information look at https://mfranberg.github.io/libplinkio/index.html The following C program prints the genotypes of all individuals. Note, that it is not recommended to run this program on a big plink file since it will fill your screen with data. @@ -175,9 +193,9 @@ struct pio_locus_t size_t pio_id; /** - * Chromosome number starting from 1. + * Chromosome as strings. */ - unsigned char chromosome; + char *chromosome; /** * Name of the SNP. @@ -302,7 +320,7 @@ class Sample: class Locus: def __init__(self, chromosome, name, position, bp_position, allele1, allele2): ## - # Chromosome number starting from 1 + # Chromosome string # self.chromosome = chromosome diff --git a/py-plinkio/cplinkio.c b/py-plinkio/cplinkio.c index bcabd07..c09f5db 100644 --- a/py-plinkio/cplinkio.c +++ b/py-plinkio/cplinkio.c @@ -393,11 +393,11 @@ int parse_locus(PyObject *py_locus, struct pio_locus_t *locus) PyObject *allele1_object; PyObject *allele2_object; + PyObject *chromosome_string; PyObject *name_string; PyObject *allele1_string; PyObject *allele2_string; - int chromosome; float position; int bp_position; @@ -410,16 +410,16 @@ int parse_locus(PyObject *py_locus, struct pio_locus_t *locus) allele1_object = PyObject_GetAttrString( py_locus, "allele1" ); allele2_object = PyObject_GetAttrString( py_locus, "allele2" ); - chromosome = PyInt_AsLong( chromosome_object ); + chromosome_string = PyObject_Str( chromosome_object ); name_string = PyObject_Str( name_object ); position = PyFloat_AsDouble( position_object ); bp_position = PyInt_AsLong( bp_position_object ); allele1_string = PyObject_Str( allele1_object ); allele2_string = PyObject_Str( allele2_object ); - if( chromosome == -1 && PyErr_Occurred( ) ) + if( chromosome_string == NULL ) { - PyErr_SetString( PyExc_TypeError, "Error chromosome field must be an integer." ); + PyErr_SetString( PyExc_TypeError, "Error chromosome field must be a string" ); ret = 0; } else if( name_string == NULL ) @@ -449,7 +449,7 @@ int parse_locus(PyObject *py_locus, struct pio_locus_t *locus) } /* The strings wont get freed by plinkio so remove const qualifier */ - locus->chromosome = PyInt_AsLong( chromosome_object ); + locus->chromosome = (char *) PyString_AsString( chromosome_string ); locus->name = (char *) PyString_AsString( name_string ); locus->position = PyFloat_AsDouble( position_object ); locus->bp_position = PyInt_AsLong( bp_position_object ); @@ -457,6 +457,7 @@ int parse_locus(PyObject *py_locus, struct pio_locus_t *locus) locus->allele2 = (char *) PyString_AsString( allele2_string ); locus_error: + Py_DECREF( chromosome_string ); Py_DECREF( name_string ); Py_DECREF( allele1_string ); Py_DECREF( allele2_string ); @@ -639,7 +640,7 @@ plinkio_get_loci(PyObject *self, PyObject *args) { struct pio_locus_t *locus = pio_get_locus( &c_plink_file->file, i ); - PyObject *args = Py_BuildValue( "BsfLss", + PyObject *args = Py_BuildValue( "ssfLss", locus->chromosome, locus->name, locus->position, diff --git a/py-plinkio/plinkio/plinkfile.py b/py-plinkio/plinkio/plinkfile.py index b50f47b..fe8ba97 100644 --- a/py-plinkio/plinkio/plinkfile.py +++ b/py-plinkio/plinkio/plinkfile.py @@ -206,7 +206,7 @@ class Locus: def __init__(self, chromosome, name, position, bp_position, allele1, allele2): # pylint: disable = too-many-arguments ## - # Chromosome number starting from 1 + # Chromosome string # self.chromosome = chromosome diff --git a/py-plinkio/tests/write_test.py b/py-plinkio/tests/write_test.py index 866a9e7..fcbb46c 100644 --- a/py-plinkio/tests/write_test.py +++ b/py-plinkio/tests/write_test.py @@ -9,9 +9,19 @@ def test_read_write(): with tempfile.TemporaryDirectory() as temp_dir: plink_prefix = os.path.join(temp_dir, "test") - samples = [Sample("fid1", "iid1", "0", "0", 0, 0), Sample("fid2", "iid2", "0", "0", 0, 1)] - loci = [Locus(1, "chr1:1", 1.0, 1, "A", "C"), Locus(2, "chr1:2", 2.0, 2, "G", "T")] - rows = [[0, 1], [1, 2]] + samples = [ + Sample("fid1", "iid1", "0", "0", 0, 0), + Sample("fid2", "iid2", "0", "0", 0, 1), + ] + + loci = [ + Locus("1", "chr1:1", 1.0, 1, "A", "C"), + Locus("2", "chr1:2", 2.0, 2, "G", "T"), + Locus("X", "chrX:3", 1.0, 3, "A", "G"), + Locus("Contig123456", "Contig123456:4", 1.0, 4, "T", "C"), + ] + + rows = [[0, 1], [1, 2], [1, 1], [0, 0]] writer = plinkfile.create(plink_prefix, samples) diff --git a/setup.py b/setup.py index 3712f59..af2a6a0 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ # Versions should comply with PEP440. For a discussion on single-sourcing # the version across setup.py and the project code, see # https://packaging.python.org/en/latest/single_source_version.html - version="0.9.8", + version="0.9.9.dev0", description="A library for parsing plink genotype files", long_description=long_description, # The project's main homepage. diff --git a/src/bim.c b/src/bim.c index aaa8174..2a720ef 100644 --- a/src/bim.c +++ b/src/bim.c @@ -38,6 +38,10 @@ utarray_locus_dtor(void *element) { struct pio_locus_t *locus = (struct pio_locus_t *) element; + if( locus->chromosome != NULL ) + { + free( locus->chromosome ); + } if( locus->name != NULL ) { free( locus->name ); @@ -103,7 +107,7 @@ bim_write(struct pio_bim_file_t *bim_file, struct pio_locus_t *locus) if( write_locus( bim_file->fp, locus ) == PIO_OK ) { locus_copy.pio_id = bim_num_loci( bim_file ); - locus_copy.chromosome = locus->chromosome; + locus_copy.chromosome = strdup( locus->chromosome ); locus_copy.name = strdup( locus->name ); locus_copy.position = locus->position; locus_copy.bp_position = locus->bp_position; diff --git a/src/bim_parse.c b/src/bim_parse.c index 37c0305..34d3742 100644 --- a/src/bim_parse.c +++ b/src/bim_parse.c @@ -99,31 +99,6 @@ parse_str(const char *field, size_t length, pio_status_t *status) } } -/** - * Parses a chromosome number and returns it. - * - * @param field Csv field. - * @param length Length of the field. - * @param status Status of the conversion. - * - * @return The parsed csv field, or 0 if it could - * not be parsed. - */ -static unsigned char -parse_chr(const char *field, size_t length, pio_status_t *status) -{ - char *endptr; - unsigned char chr = (unsigned char) strtol( field, &endptr, 10 ); - if( length > 0 && ( endptr == NULL || *endptr == '\0' ) ) - { - *status = PIO_OK; - return chr; - } - - *status = PIO_ERROR; - return 0; -} - /** * Parses a genetic distance (float). * @@ -204,7 +179,7 @@ new_field(void *field, size_t field_length, void *data) switch( state->field ) { case 0: - state->cur_locus.chromosome = parse_chr( buffer, field_length, &status ); + state->cur_locus.chromosome = parse_str( buffer, field_length, &status ); break; case 1: state->cur_locus.name = parse_str( buffer, field_length, &status ); @@ -286,7 +261,7 @@ pio_status_t write_locus(FILE *bim_fp, struct pio_locus_t *locus) { int bytes_written = fprintf( bim_fp, - "%d\t%s\t%f\t%lld\t%s\t%s\n", + "%s\t%s\t%f\t%lld\t%s\t%s\n", locus->chromosome, locus->name, locus->position, diff --git a/src/plinkio/bim.h b/src/plinkio/bim.h index 621a09a..93d0218 100644 --- a/src/plinkio/bim.h +++ b/src/plinkio/bim.h @@ -27,9 +27,9 @@ struct pio_locus_t size_t pio_id; /** - * Chromosome number starting from 1. + * Chromosome as strings. */ - unsigned char chromosome; + char *chromosome; /** * Name of the SNP. diff --git a/tests/bim_test.c b/tests/bim_test.c index ba3d272..ea4c4ac 100644 --- a/tests/bim_test.c +++ b/tests/bim_test.c @@ -36,11 +36,30 @@ test_parse_position(void **state) void test_parse_chr(void **state) { - const char *TEST_STRING = "16"; + const char *TEST_STRING1 = "16"; pio_status_t status; - assert_int_equal( parse_chr( TEST_STRING, strlen( TEST_STRING ), &status ), 16 ); + char *chrom1 = parse_str( TEST_STRING1, strlen( TEST_STRING1 ), &status ); + + assert_string_equal( chrom1, TEST_STRING1 ); + assert_int_equal( status, PIO_OK ); + free(chrom1); + + const char *TEST_STRING2 = "X"; + + char *chrom2 = parse_str( TEST_STRING2, strlen( TEST_STRING2 ), &status ); + + assert_string_equal( chrom2, TEST_STRING2 ); + assert_int_equal( status, PIO_OK ); + free(chrom2); + + const char *TEST_STRING3 = "Contig123456"; + + char *chrom3 = parse_str( TEST_STRING3, strlen( TEST_STRING3 ), &status ); + + assert_string_equal( chrom3, "Contig123456" ); assert_int_equal( status, PIO_OK ); + free(chrom3); } /** @@ -58,7 +77,7 @@ test_parse_multiple_loci(void **state) assert_int_equal( bim_num_loci( &bim_file ), 2 ); locus = *bim_get_locus( &bim_file, 0 ); - assert_int_equal( locus.chromosome, 1 ); + assert_string_equal( locus.chromosome, "1" ); assert_string_equal( locus.name, "rs1" ); assert_true( fabs( locus.position - 0.0 ) <= 1e-6 ); assert_int_equal( locus.bp_position, 1234567 ); @@ -66,7 +85,7 @@ test_parse_multiple_loci(void **state) assert_string_equal( locus.allele2, "C" ); locus = *bim_get_locus( &bim_file, 1 ); - assert_int_equal( locus.chromosome, 1 ); + assert_string_equal( locus.chromosome, "1" ); assert_string_equal( locus.name, "rs2" ); assert_true( fabs( locus.position - 0.23 ) <= 1e-6 ); assert_int_equal( locus.bp_position, 7654321 );