-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathstrcountutf832.asm
More file actions
162 lines (132 loc) · 5.69 KB
/
strcountutf832.asm
File metadata and controls
162 lines (132 loc) · 5.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
;************************* strcountutf832.asm ***********************************
; Author: Agner Fog
; Date created: 2011-07-20
; Last modified: 2013-09-11
; Description:
; size_t strcount_UTF8(const char * str);
; Counts the number of characters in a UTF-8 encoded string.
;
; This functions does not check if the string contains valid UTF-8 code, it
; simply counts all bytes except continuation bytes 10xxxxxxB.
;
; Note that this functions may read up to 15 bytes beyond the end of the string.
; This is rarely a problem but it can in principle generate a protection violation
; if a string is placed at the end of the data segment.
;
; Position-independent code is generated if POSITIONINDEPENDENT is defined.
;
; CPU dispatching included for 386 and SSE4.2 instruction sets.
;
; Copyright (c) 2011 GNU General Public License www.gnu.org/licenses
;******************************************************************************
global _strcount_UTF8: function
; Direct entries to CPU-specific versions
global _strcount_UTF8Generic: function
global _strcount_UTF8SSE42: function
; Imported from instrset32.asm:
extern _InstructionSet ; Instruction set for CPU dispatcher
section .data
align 16
byterange: times 8 DB 10000000b, 10111111b ; range for UTF-8 continuation bytes
section .text
;******************************************************************************
; strcount_UTF8 function
;******************************************************************************
_strcount_UTF8: ; function dispatching
%IFNDEF POSITIONINDEPENDENT
jmp near [strcount_UTF8Dispatch] ; Go to appropriate version, depending on instruction set
%ELSE ; Position-independent code
call get_thunk_edx ; get reference point for position-independent code
RP1: ; reference point edx = offset RP1
; Make the following instruction with address relative to RP1:
jmp near [edx+strcount_UTF8Dispatch-RP1]
%ENDIF
;******************************************************************************
; strcount_UTF8 function SSE4.2 version
;******************************************************************************
align 16
_strcount_UTF8SSE42: ; SSE4.2 version
mov edx, [esp+4] ; str
movdqa xmm1, [byterange] ; define range of continuation bytes to ignore
xor ecx, ecx ; character counter
str_next:
pcmpistrm xmm1, [edx], 00110100b; check range, invert valid bits, return bit mask in xmm0
movd eax, xmm0
jz str_finished ; terminating zero found
popcnt eax, eax ; count
add ecx, eax
add edx, 16
jmp str_next
str_finished:
popcnt eax, eax
add eax, ecx
ret
;******************************************************************************
; strcount_UTF8 function generic
;******************************************************************************
align 8
_strcount_UTF8Generic:
mov edx, [esp+4] ; str
xor eax, eax ; character counter
xor ecx, ecx ; zero extend cl
str_next1:
mov cl, [edx] ; one byte fron string
test cl, cl
jz str_finished1 ; terminating zero
sub cl, 10000000b ; lower limit of continuation bytes
cmp cl, 00111111b ; upper limit - lower limit
seta cl ; 1 if outside limit (unsigned compare includes negative values as above)
add eax, ecx
inc edx
jmp str_next1
str_finished1:
ret
;_strcount_UTF8Generic end
; ********************************************************************************
%IFDEF POSITIONINDEPENDENT
get_thunk_edx: ; load caller address into edx for position-independent code
mov edx, [esp]
ret
%ENDIF
; ********************************************************************************
; CPU dispatching for strcount_UTF8. This is executed only once
; ********************************************************************************
strcount_UTF8CPUDispatch:
%IFNDEF POSITIONINDEPENDENT
; get supported instruction set
call _InstructionSet
; Point to generic version of strstr
mov ecx, _strcount_UTF8Generic
cmp eax, 10 ; check SSE4.2
jb Q100
; SSE4.2 supported
; Point to SSE4.2 version of strstr
mov ecx, _strcount_UTF8SSE42
Q100: mov [strcount_UTF8Dispatch], ecx
; Continue in appropriate version
jmp ecx
%ELSE ; Position-independent version
; get supported instruction set
call _InstructionSet
call get_thunk_edx
RP11: ; reference point edx
; Point to generic version
lea ecx, [edx+_strcount_UTF8Generic-RP11]
cmp eax, 10 ; check SSE4.2
jb Q100
; SSE4.2 supported
; Point to SSE4.2 version of strstr
lea ecx, [edx+_strcount_UTF8SSE42-RP11]
Q100: mov [edx+strcount_UTF8Dispatch-RP11], ecx
; Continue in appropriate version
jmp ecx
%ENDIF
SECTION .data
; Pointer to appropriate versions. Initially point to dispatcher
strcount_UTF8Dispatch DD strcount_UTF8CPUDispatch
%IFDEF POSITIONINDEPENDENT
; Fix potential problem in Mac linker
DD 0, 0
%ENDIF
SECTION .bss
dq 0, 0