-
Notifications
You must be signed in to change notification settings - Fork 180
BCP Out performance #705
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BCP Out performance #705
Changes from all commits
2efa499
f905073
566731d
0572a7f
8de97ba
3dc29f9
8d33530
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,9 +41,11 @@ | |
| #endif | ||
|
|
||
| #include <freetds/tds.h> | ||
| #include <freetds/checks.h> | ||
| #include <freetds/iconv.h> | ||
| #include <freetds/convert.h> | ||
| #include <freetds/bytes.h> | ||
| #include <freetds/stream.h> | ||
| #include <freetds/utils/string.h> | ||
| #include <freetds/encodings.h> | ||
| #include <freetds/replacements.h> | ||
|
|
@@ -52,6 +54,8 @@ | |
| #include <syberror.h> | ||
| #include <dblib.h> | ||
|
|
||
| #define BCP_DEFAULT_DATEFMT "%Y-%m-%d %H:%M:%S.%z" | ||
|
|
||
| #define HOST_COL_CONV_ERROR 1 | ||
| #define HOST_COL_NULL_ERROR 2 | ||
|
|
||
|
|
@@ -85,6 +89,7 @@ static int rtrim_u16(uint16_t *str, int len, uint16_t space); | |
| static STATUS _bcp_read_hostfile(DBPROCESS * dbproc, FILE * hostfile, bool *row_error, bool skip); | ||
| static int _bcp_readfmt_colinfo(DBPROCESS * dbproc, char *buf, BCP_HOSTCOLINFO * ci); | ||
| static int _bcp_get_term_var(const BYTE * pdata, const BYTE * term, int term_len); | ||
| static int _bcp_strftime(TDS_UCHAR* buf, size_t maxsize, const char* format, const TDSDATEREC* dr, int prec); | ||
|
|
||
| /* | ||
| * "If a host file is being used ... the default data formats are as follows: | ||
|
|
@@ -765,9 +770,17 @@ _bcp_convert_out(DBPROCESS * dbproc, TDSCOLUMN *curcol, BCP_HOSTCOLINFO *hostcol | |
| if (is_datetime_type(srctype) && is_ascii_type(hostcol->datatype)) { | ||
| TDSDATEREC when; | ||
|
|
||
| /* Some date/time types have variable precision (e.g. MS datetime2), | ||
| * and set curcol->column_prec with that precision. | ||
| * DATETIME is printed with precision 3 by MS BCP & ASE BCP however | ||
| * curcol->column_prec is not set for that type. | ||
| * (It might be better to set column_prec at load time for all | ||
| * of the datetime types...) | ||
| */ | ||
| int prec = curcol->column_prec ? curcol->column_prec : 3; | ||
|
|
||
| tds_datecrack(srctype, src, &when); | ||
| buflen = (int)tds_strftime((TDS_CHAR *)(*p_data), 256, | ||
| bcpdatefmt, &when, 3); | ||
| buflen = _bcp_strftime(*p_data, 256, bcpdatefmt, &when, prec); | ||
| } else if (srclen == 0 && is_variable_type(curcol->column_type) | ||
| && is_ascii_type(hostcol->datatype)) { | ||
| /* | ||
|
|
@@ -847,7 +860,7 @@ bcp_cache_prefix_len(BCP_HOSTCOLINFO *hostcol, const TDSCOLUMN *curcol) | |
| } | ||
|
|
||
| static RETCODE | ||
| bcp_write_prefix(FILE *hostfile, BCP_HOSTCOLINFO *hostcol, TDSCOLUMN *curcol, int buflen) | ||
| bcp_write_prefix(TDSFILEOUTSTREAM *hoststream, BCP_HOSTCOLINFO *hostcol, TDSCOLUMN *curcol, int buflen) | ||
| { | ||
| union { | ||
| TDS_TINYINT ti; | ||
|
|
@@ -874,10 +887,7 @@ bcp_write_prefix(FILE *hostfile, BCP_HOSTCOLINFO *hostcol, TDSCOLUMN *curcol, in | |
| u.li = buflen; | ||
| break; | ||
| } | ||
| if (fwrite(&u, plen, 1, hostfile) == 1) | ||
| return SUCCEED; | ||
|
|
||
| return FAIL; | ||
| return TDS_SUCCEED(tds_fileout_stream_put(hoststream, &u, plen)) ? SUCCEED : FAIL; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -894,7 +904,8 @@ bcp_write_prefix(FILE *hostfile, BCP_HOSTCOLINFO *hostcol, TDSCOLUMN *curcol, in | |
| static RETCODE | ||
| _bcp_exec_out(DBPROCESS * dbproc, DBINT * rows_copied) | ||
| { | ||
| FILE *hostfile = NULL; | ||
| TDSFILEOUTSTREAM hoststream; | ||
| FILE* hostfp = NULL; | ||
|
freddy77 marked this conversation as resolved.
|
||
| TDS_UCHAR *data = NULL; | ||
| int i; | ||
|
|
||
|
|
@@ -918,9 +929,13 @@ _bcp_exec_out(DBPROCESS * dbproc, DBINT * rows_copied) | |
| tds = dbproc->tds_socket; | ||
| assert(tds); | ||
|
|
||
| /* See if they want a BCP date format different from the default. | ||
| * The ASE bcp client does use the locale for bcp out; but all bcp clients | ||
| * appear to successfully read this format for input, regardless of locale. | ||
| */ | ||
| bcpdatefmt = getenv("FREEBCP_DATEFMT"); | ||
| if (!bcpdatefmt) | ||
| bcpdatefmt = "%Y-%m-%d %H:%M:%S.%z"; | ||
| if (bcpdatefmt && !strcmp(bcpdatefmt, BCP_DEFAULT_DATEFMT)) | ||
| bcpdatefmt = NULL; | ||
|
|
||
| if (dbproc->bcpinfo->direction == DB_QUERYOUT ) { | ||
| if (TDS_FAILED(tds_submit_query(tds, tds_dstr_cstr(&dbproc->bcpinfo->tablename)))) | ||
|
|
@@ -957,11 +972,11 @@ _bcp_exec_out(DBPROCESS * dbproc, DBINT * rows_copied) | |
| * TODO above we allocate many buffer just to convert and store | ||
| * to file.. avoid all that passages... | ||
| */ | ||
|
|
||
| if (!(hostfile = fopen(dbproc->hostfileinfo->hostfile, "w"))) { | ||
| if (!(hostfp = fopen(dbproc->hostfileinfo->hostfile, "w"))) { | ||
| dbperror(dbproc, SYBEBCUO, errno); | ||
| goto Cleanup; | ||
| } | ||
| tds_fileout_stream_init(&hoststream, hostfp, _IOFBF); | ||
|
|
||
| /* fetch a row of data from the server */ | ||
|
|
||
|
|
@@ -997,7 +1012,7 @@ _bcp_exec_out(DBPROCESS * dbproc, DBINT * rows_copied) | |
| } | ||
|
|
||
| /* The prefix */ | ||
| if (bcp_write_prefix(hostfile, hostcol, curcol, buflen) != SUCCEED) | ||
| if (bcp_write_prefix(&hoststream, hostcol, curcol, buflen) != SUCCEED) | ||
| goto write_error; | ||
|
|
||
| /* The data */ | ||
|
|
@@ -1006,23 +1021,25 @@ _bcp_exec_out(DBPROCESS * dbproc, DBINT * rows_copied) | |
| } | ||
|
|
||
| if (buflen > 0) { | ||
| if (fwrite(data, buflen, 1, hostfile) != 1) | ||
| if ( !TDS_SUCCEED(tds_fileout_stream_put(&hoststream, data, buflen)) ) | ||
| goto write_error; | ||
| } | ||
|
|
||
| /* The terminator */ | ||
| if (hostcol->terminator && hostcol->term_len > 0) { | ||
| if (fwrite(hostcol->terminator, hostcol->term_len, 1, hostfile) != 1) | ||
| if ( !TDS_SUCCEED(tds_fileout_stream_put(&hoststream, hostcol->terminator, hostcol->term_len)) ) | ||
| goto write_error; | ||
| } | ||
| } | ||
| rows_written++; | ||
| } | ||
| if (fclose(hostfile) != 0) { | ||
|
|
||
| tds_fileout_stream_flush(&hoststream); | ||
| if (hostfp && fclose(hostfp) != 0) { | ||
| dbperror(dbproc, SYBEBCUC, errno); | ||
| goto Cleanup; | ||
| } | ||
| hostfile = NULL; | ||
| hostfp = NULL; | ||
|
|
||
| if (row_of_query + 1 < dbproc->hostfileinfo->firstrow) { | ||
| /* | ||
|
|
@@ -1043,8 +1060,8 @@ _bcp_exec_out(DBPROCESS * dbproc, DBINT * rows_copied) | |
| dbperror(dbproc, SYBEBCWE, errno); | ||
|
|
||
| Cleanup: | ||
| if (hostfile) | ||
| fclose(hostfile); | ||
| if (hostfp) | ||
| fclose(hostfp); | ||
| free(data); | ||
| return FAIL; | ||
| } | ||
|
|
@@ -2421,3 +2438,59 @@ _bcp_free_storage(DBPROCESS * dbproc) | |
| dbproc->bcpinfo = NULL; | ||
| } | ||
|
|
||
| /** Fast strftime for default bcp format */ | ||
| static int _bcp_strftime(TDS_UCHAR* buf, size_t maxsize, const char* format, const TDSDATEREC* dr, int prec) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. I was thinking in terms of avoiding the need for a cast at the call site. |
||
| { | ||
| char* cbuf = (char*)buf; | ||
|
|
||
| if (format) | ||
| return (int)tds_strftime(cbuf, maxsize, format, dr, prec); | ||
|
|
||
| cbuf += tds_u32toafast(cbuf, dr->year); | ||
|
|
||
| *cbuf++ = '-'; | ||
| tds_02dfast(cbuf, dr->month + 1); | ||
| cbuf += 2; | ||
|
|
||
| *cbuf++ = '-'; | ||
| tds_02dfast(cbuf, dr->day); | ||
| cbuf += 2; | ||
|
|
||
| *cbuf++ = ' '; | ||
| tds_02dfast(cbuf, dr->hour); | ||
| cbuf += 2; | ||
|
|
||
| *cbuf++ = ':'; | ||
| tds_02dfast(cbuf, dr->minute); | ||
| cbuf += 2; | ||
|
|
||
| *cbuf++ = ':'; | ||
| tds_02dfast(cbuf, dr->second); | ||
| cbuf += 2; | ||
|
|
||
| if (prec > 0) | ||
| { | ||
| char ibuf[10]; | ||
| *cbuf++ = '.'; | ||
|
|
||
| /* Mask off a negative sign (input should not contain this anyway) */ | ||
| memset(ibuf, '0', tds_u32toafast_right(ibuf, dr->decimicrosecond & 0x7FFFFFFF)); | ||
| /* Skip first 3 digits here, as decimicrosecond should not exceed 9,999,999 */ | ||
| memcpy(cbuf, ibuf + 3, prec); | ||
| cbuf += prec; | ||
| } | ||
| *cbuf = '\0'; | ||
|
|
||
| #if ENABLE_EXTRA_CHECKS | ||
| { | ||
| char tbuf[256]; | ||
| tds_strftime(tbuf, sizeof tbuf, BCP_DEFAULT_DATEFMT, dr, prec); | ||
| if (strcmp(tbuf, (char*)buf)) | ||
| { | ||
| fprintf(stderr, "_bcp_strftime(%s) does not match tds_strftime(%s)\n", tbuf, (char *)buf); | ||
| tds_extra_assert(!"_bcp_strftime mismatch"); | ||
| } | ||
| } | ||
| #endif | ||
| return (int)(cbuf - (char*)buf); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at the caller and to avoid "surprises" why not saying that range is 0-63 and do a
& 63intds_02dfast?I would also use an underscore before "fast", like
tds_u32toa_fast_right.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, although it might also be surprising to another coder who assumed
02dwould mean 0-99 ! perhaps a more meaningful function name is possible.Note that
tds_u32toafast_rightdoes use the full 0-99 capability of the underlying functionThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about
??
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK but for performance I think it would be best to to only have this on code paths where it is needed (e.g.
tds_u32toafast_rightdoes not need it since that call site guarantees the value is in range already). So perhaps it could go intds_02dfastbut not intds02dfast. And include aTDS_LIKELY.You would have noticed I had two functions
tds02dfastandtds_02dfast, this is so thattds02dfastcan be inlined inconvert.c. It would be nice to have it inlinable everywhere but I wasn't sure about the support for having inline function bodies in header files across all the compilers and linkers that we need to support.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's why I was suggesting the
v &= 63fortds_02dfast, it's just a single machine instruction with no conditions.