@@ -252,6 +252,33 @@ def test_refresh_control(self):
252252 self .assertIs (win .is_wintouched (), syncok )
253253 self .assertIs (stdscr .is_wintouched (), syncok )
254254
255+ # Many tests below use a common set of non-ASCII cases, each applied only
256+ # when the window encoding can represent it -- so the whole suite is meant to
257+ # be run under several locales (e.g. ISO-8859-1, ISO-8859-15, KOI8-U):
258+ # 'A'/'a' ASCII
259+ # 'é' common to the Latin encodings
260+ # '¤'/'€'/'є' byte 0xA4 in ISO-8859-1 / ISO-8859-15 / KOI8-U
261+ # Precomposed characters are used so a round-trip does not depend on the form.
262+
263+ def _encodable (self , s ):
264+ # Wide characters are only supported in a locale that can encode them.
265+ try :
266+ s .encode (self .stdscr .encoding )
267+ except UnicodeEncodeError :
268+ return False
269+ return True
270+
271+ def _read_char (self , y , x ):
272+ # The character written to a cell, read back for output checks. inch()
273+ # is unusable here: on a wide build it returns the low 8 bits of the
274+ # character's code point rather than its locale-encoded byte, mangling
275+ # anything outside Latin-1. in_wch() reads the wide cell directly;
276+ # without it, instr() re-encodes the cell to the window encoding.
277+ stdscr = self .stdscr
278+ if hasattr (stdscr , 'in_wch' ):
279+ return str (stdscr .in_wch (y , x ))
280+ return stdscr .instr (y , x , 1 ).decode (stdscr .encoding )
281+
255282 def test_output_character (self ):
256283 stdscr = self .stdscr
257284 encoding = stdscr .encoding
@@ -261,32 +288,98 @@ def test_output_character(self):
261288 stdscr .addch ('A' )
262289 stdscr .addch (b'A' )
263290 stdscr .addch (65 )
264- c = '\u20ac '
265- try :
266- stdscr .addch (c )
267- except UnicodeEncodeError :
268- self .assertRaises (UnicodeEncodeError , c .encode , encoding )
269- except OverflowError :
270- encoded = c .encode (encoding )
271- self .assertNotEqual (len (encoded ), 1 , repr (encoded ))
291+ # See _encodable for the character set. Each is either written (mapped
292+ # to a single byte), or raises UnicodeEncodeError (not in the encoding)
293+ # or OverflowError (a multibyte sequence, e.g. in UTF-8).
294+ for c in ('A' , '\u00e9 ' , '\u00a4 ' , '\u20ac ' , '\u0454 ' ):
295+ try :
296+ stdscr .addch (c )
297+ except UnicodeEncodeError :
298+ self .assertRaises (UnicodeEncodeError , c .encode , encoding )
299+ except OverflowError :
300+ encoded = c .encode (encoding )
301+ self .assertNotEqual (len (encoded ), 1 , repr (encoded ))
272302 stdscr .addch ('A' , curses .A_BOLD )
273303 stdscr .addch (1 , 2 , 'A' )
274304 stdscr .addch (2 , 3 , 'A' , curses .A_BOLD )
275305 self .assertIs (stdscr .is_wintouched (), True )
276306
307+ # The same characters supplied as an int chtype (a byte > 127). The
308+ # cell is read back with _read_char(), not inch(): on a wide build the
309+ # int is stored through the locale as a wide character that inch()
310+ # cannot represent for a character outside Latin-1.
311+ for c in ('é' , '¤' , '€' , 'є' ):
312+ try :
313+ b = c .encode (encoding )
314+ except UnicodeEncodeError :
315+ continue
316+ if len (b ) != 1 :
317+ continue
318+ # A wide build stores a character outside Latin-1 as a wide cell,
319+ # not as its encoded byte, so it cannot round-trip here.
320+ if ord (c ) > 0xff and hasattr (stdscr , 'get_wch' ):
321+ continue
322+ v = b [0 ]
323+ with self .subTest (c = c ):
324+ stdscr .addch (0 , 0 , v )
325+ self .assertEqual (self ._read_char (0 , 0 ), c )
326+ stdscr .addch (0 , 1 , v , curses .A_BOLD )
327+ self .assertEqual (self ._read_char (0 , 1 ), c )
328+ self .assertTrue (stdscr .inch (0 , 1 ) & curses .A_BOLD )
329+ stdscr .move (2 , 0 )
330+ stdscr .echochar (v )
331+ self .assertEqual (self ._read_char (2 , 0 ), c )
332+ # insch() round-trips a byte only where its code point equals
333+ # the byte value (Latin-1): on a wide build ncurses winsch
334+ # stores a printable byte directly as a code point instead of
335+ # decoding it through the locale.
336+ if ord (c ) < 0x100 :
337+ stdscr .insch (1 , 0 , v )
338+ self .assertEqual (self ._read_char (1 , 0 ), c )
339+
340+ # The same characters supplied as a str. Unlike the int path above, a
341+ # str is stored as a wide-character cell on a wide build, so every
342+ # encodable character round-trips, insch() included. A multibyte
343+ # character does not fit a cell on a narrow build and is skipped.
344+ wide = hasattr (stdscr , 'in_wch' )
345+ for c in ('é' , '¤' , '€' , 'є' ):
346+ if not self ._encodable (c ):
347+ continue
348+ if not wide and len (c .encode (encoding )) != 1 :
349+ continue
350+ # A wide build stores a character outside Latin-1 as a wide cell,
351+ # not as its encoded byte, so it cannot round-trip here.
352+ if ord (c ) > 0xff and hasattr (stdscr , 'get_wch' ):
353+ continue
354+ with self .subTest (c = c ):
355+ stdscr .addch (0 , 0 , c )
356+ self .assertEqual (self ._read_char (0 , 0 ), c )
357+ stdscr .addch (0 , 1 , c , curses .A_BOLD )
358+ self .assertEqual (self ._read_char (0 , 1 ), c )
359+ self .assertTrue (stdscr .inch (0 , 1 ) & curses .A_BOLD )
360+ stdscr .insch (1 , 0 , c )
361+ self .assertEqual (self ._read_char (1 , 0 ), c )
362+ stdscr .move (2 , 0 )
363+ stdscr .echochar (c )
364+ self .assertEqual (self ._read_char (2 , 0 ), c )
365+
277366 # echochar()
278367 stdscr .refresh ()
279368 stdscr .move (0 , 0 )
280369 stdscr .echochar ('A' )
281370 stdscr .echochar (b'A' )
282371 stdscr .echochar (65 )
283- with self .assertRaises ((UnicodeEncodeError , OverflowError )):
284- # Unicode is not fully supported yet, but at least it does
285- # not crash.
286- # It is supposed to fail because either the character is
287- # not encodable with the current encoding, or it is encoded to
288- # a multibyte sequence.
289- stdscr .echochar ('\u0114 ' )
372+ # See _encodable for the character set; as in the addch() loop above.
373+ for c in ('A' , '\u00e9 ' , '\u00a4 ' , '\u20ac ' , '\u0454 ' ):
374+ try :
375+ stdscr .echochar (c )
376+ except UnicodeEncodeError :
377+ # The character is not encodable with the current encoding.
378+ self .assertRaises (UnicodeEncodeError , c .encode , encoding )
379+ except OverflowError :
380+ # The character is encoded to a multibyte sequence.
381+ encoded = c .encode (encoding )
382+ self .assertNotEqual (len (encoded ), 1 , repr (encoded ))
290383 stdscr .echochar ('A' , curses .A_BOLD )
291384 self .assertIs (stdscr .is_wintouched (), False )
292385
@@ -296,14 +389,18 @@ def test_output_string(self):
296389 # addstr()/insstr()
297390 for func in [stdscr .addstr , stdscr .insstr ]:
298391 with self .subTest (func .__qualname__ ):
299- stdscr .move (0 , 0 )
300392 func ('abcd' )
301393 func (b'abcd' )
302- s = 'àßçđ'
303- try :
304- func (s )
305- except UnicodeEncodeError :
306- self .assertRaises (UnicodeEncodeError , s .encode , encoding )
394+ # Common and encoding-distinctive strings (see _encodable for the
395+ # 0xA4 set); 'àßçđ' is UTF-8-only. Each is written if the
396+ # encoding allows, else raises UnicodeEncodeError.
397+ for s in ('soupçon' , 'àßçđ' , 'soupçon ¤' , 'soupçon €' , 'дякую' ):
398+ stdscr .move (0 , 0 )
399+ try :
400+ func (s )
401+ except UnicodeEncodeError :
402+ self .assertRaises (UnicodeEncodeError , s .encode , encoding )
403+ stdscr .move (0 , 0 )
307404 func ('abcd' , curses .A_BOLD )
308405 func (1 , 2 , 'abcd' )
309406 func (2 , 3 , 'abcd' , curses .A_BOLD )
@@ -314,11 +411,14 @@ def test_output_string(self):
314411 stdscr .move (0 , 0 )
315412 func ('1234' , 3 )
316413 func (b'1234' , 3 )
317- s = '\u0661 \u0662 \u0663 \u0664 '
318- try :
319- func (s , 3 )
320- except UnicodeEncodeError :
321- self .assertRaises (UnicodeEncodeError , s .encode , encoding )
414+ # As above (see _encodable); Arabic-Indic digits are UTF-8-only.
415+ for s in ('caf\u00e9 ' , '\u0661 \u0662 \u0663 \u0664 ' , 'caf\u00e9 \u00a4 ' , 'caf\u00e9 \u20ac ' , '\u0434 \u044f \u043a \u0443 \u044e ' ):
416+ stdscr .move (0 , 0 )
417+ try :
418+ func (s , 3 )
419+ except UnicodeEncodeError :
420+ self .assertRaises (UnicodeEncodeError , s .encode , encoding )
421+ stdscr .move (0 , 0 )
322422 func ('1234' , 5 )
323423 func ('1234' , 3 , curses .A_BOLD )
324424 func (1 , 2 , '1234' , 3 )
@@ -408,6 +508,24 @@ def test_read_from_window(self):
408508 self .assertEqual (stdscr .instr (0 , 2 , 4 ), b'BCD ' )
409509 self .assertRaises (ValueError , stdscr .instr , - 2 )
410510 self .assertRaises (ValueError , stdscr .instr , 0 , 2 , - 2 )
511+ # A non-ASCII character of an 8-bit locale reads back as its encoded
512+ # byte (see _encodable for the set). instr() returns the locale bytes
513+ # for any single-byte character; inch() packs the text into a chtype, so
514+ # on a wide build it only round-trips a Latin-1 codepoint (byte ==
515+ # codepoint).
516+ encoding = stdscr .encoding
517+ for ch in ('A' , 'é' , '¤' , '€' , 'є' ):
518+ try :
519+ b = ch .encode (encoding )
520+ except UnicodeEncodeError :
521+ continue
522+ if len (b ) != 1 :
523+ continue
524+ with self .subTest (ch = ch ):
525+ stdscr .addstr (2 , 0 , ch )
526+ self .assertEqual (stdscr .instr (2 , 0 , 1 ), b )
527+ if ord (ch ) < 0x100 :
528+ self .assertEqual (stdscr .inch (2 , 0 ) & curses .A_CHARTEXT , b [0 ])
411529
412530 def test_coordinate_errors (self ):
413531 # Addressing a cell outside the window raises curses.error.
@@ -445,6 +563,10 @@ def test_getch(self):
445563 self .assertEqual (win .getch (), b'm' [0 ])
446564 self .assertEqual (win .getch (), b'\n ' [0 ])
447565
566+ # A key value > 127 is delivered unchanged (it is not locale text).
567+ curses .ungetch (0xE9 )
568+ self .assertEqual (win .getch (), 0xE9 )
569+
448570 def test_getstr (self ):
449571 win = curses .newwin (5 , 12 , 5 , 2 )
450572 curses .echo ()
@@ -617,6 +739,33 @@ def test_background(self):
617739 self .assertEqual (win .inch (0 , 0 ), b'L' [0 ] | curses .A_REVERSE )
618740 self .assertEqual (win .inch (0 , 5 ), b'#' [0 ] | curses .A_REVERSE )
619741
742+ # A non-ASCII background character of an 8-bit locale reads back as its
743+ # encoded byte. See _encodable for the character set.
744+ win .bkgd (' ' )
745+ encoding = win .encoding
746+ for ch in ('é' , '¤' , '€' , 'є' ):
747+ try :
748+ b = ch .encode (encoding )
749+ except UnicodeEncodeError :
750+ continue
751+ if len (b ) != 1 :
752+ continue
753+ # A wide build stores a character outside Latin-1 as a wide cell,
754+ # not as its encoded byte, so it cannot round-trip here.
755+ if ord (ch ) > 0xff and hasattr (win , 'get_wch' ):
756+ continue
757+ with self .subTest (ch = ch ):
758+ win .bkgd (ch )
759+ self .assertEqual (win .getbkgd (), b [0 ])
760+ if ord (ch ) < 0x100 :
761+ # The same byte given as an int. A wide build stores it
762+ # through the locale, so only a Latin-1 byte round-trips.
763+ win .bkgd (' ' )
764+ win .bkgdset (b [0 ])
765+ self .assertEqual (win .getbkgd (), b [0 ])
766+ win .bkgd (b [0 ])
767+ self .assertEqual (win .getbkgd (), b [0 ])
768+
620769 def test_overlay (self ):
621770 srcwin = curses .newwin (5 , 18 , 3 , 4 )
622771 lorem_ipsum (srcwin )
@@ -709,6 +858,16 @@ def test_borders_and_lines(self):
709858 win .border (65 , 66 )
710859 win .border (65 )
711860 win .border ()
861+ # With no arguments, border() fills the edges with ACS line and corner
862+ # characters.
863+ chartext = curses .A_CHARTEXT
864+ maxy , maxx = win .getmaxyx ()
865+ self .assertEqual (win .inch (0 , 0 ) & chartext , curses .ACS_ULCORNER & chartext )
866+ self .assertEqual (win .inch (0 , maxx - 1 ) & chartext , curses .ACS_URCORNER & chartext )
867+ self .assertEqual (win .inch (maxy - 1 , 0 ) & chartext , curses .ACS_LLCORNER & chartext )
868+ self .assertEqual (win .inch (maxy - 1 , maxx - 1 ) & chartext , curses .ACS_LRCORNER & chartext )
869+ self .assertEqual (win .inch (0 , 1 ) & chartext , curses .ACS_HLINE & chartext )
870+ self .assertEqual (win .inch (1 , 0 ) & chartext , curses .ACS_VLINE & chartext )
712871
713872 win .box (':' , '~' )
714873 self .assertEqual (win .instr (0 , 1 , 8 ), b'~~~~~~~~' )
@@ -719,6 +878,11 @@ def test_borders_and_lines(self):
719878 self .assertRaises (TypeError , win .box , 65 , 66 , 67 )
720879 self .assertRaises (TypeError , win .box , 65 )
721880 win .box ()
881+ # With no arguments, box() likewise draws ACS corners and lines.
882+ self .assertEqual (win .inch (0 , 0 ) & chartext , curses .ACS_ULCORNER & chartext )
883+ self .assertEqual (win .inch (0 , maxx - 1 ) & chartext , curses .ACS_URCORNER & chartext )
884+ self .assertEqual (win .inch (0 , 1 ) & chartext , curses .ACS_HLINE & chartext )
885+ self .assertEqual (win .inch (1 , 0 ) & chartext , curses .ACS_VLINE & chartext )
722886
723887 win .move (1 , 2 )
724888 win .hline ('-' , 5 )
@@ -740,6 +904,43 @@ def test_borders_and_lines(self):
740904 self .assertEqual (win .inch (2 , 1 ), b';' [0 ] | curses .A_STANDOUT )
741905 self .assertEqual (win .inch (3 , 1 ), b'a' [0 ])
742906
907+ # A border or line character of an 8-bit locale round-trips as its
908+ # encoded byte. See _encodable for the character set.
909+ encoding = win .encoding
910+ for ch in ('é' , '¤' , '€' , 'є' ):
911+ try :
912+ b = ch .encode (encoding )
913+ except UnicodeEncodeError :
914+ continue
915+ if len (b ) != 1 :
916+ continue
917+ # A wide build stores a character outside Latin-1 as a wide cell,
918+ # not as its encoded byte, so it cannot round-trip here.
919+ if ord (ch ) > 0xff and hasattr (win , 'get_wch' ):
920+ continue
921+ with self .subTest (ch = ch ):
922+ win .erase ()
923+ win .hline (2 , 0 , ch , 5 )
924+ self .assertEqual (win .instr (2 , 0 , 5 ), b * 5 )
925+ win .vline (0 , 0 , ch , 3 )
926+ self .assertEqual (win .instr (0 , 0 , 1 ), b )
927+ self .assertEqual (win .instr (1 , 0 , 1 ), b )
928+ win .border (ch , ch , ch , ch , ch , ch , ch , ch )
929+ self .assertEqual (win .instr (0 , 0 ), b * maxx )
930+ if ord (ch ) < 0x100 :
931+ # The same byte given as an int. A wide build stores it
932+ # through the locale, so only a Latin-1 byte round-trips.
933+ v = b [0 ]
934+ win .erase ()
935+ win .hline (2 , 0 , v , 5 )
936+ self .assertEqual (win .instr (2 , 0 , 5 ), b * 5 )
937+ win .vline (0 , 0 , v , 3 )
938+ self .assertEqual (win .instr (1 , 0 , 1 ), b )
939+ win .border (v , v , v , v , v , v , v , v )
940+ self .assertEqual (win .instr (0 , 0 ), b * maxx )
941+ win .box (v , v )
942+ self .assertEqual (win .instr (0 , 1 , 1 ), b )
943+
743944 def test_unctrl (self ):
744945 # TODO: wunctrl()
745946 self .assertEqual (curses .unctrl (b'A' ), b'A' )
@@ -748,6 +949,19 @@ def test_unctrl(self):
748949 self .assertEqual (curses .unctrl (b'\n ' ), b'^J' )
749950 self .assertEqual (curses .unctrl ('\n ' ), b'^J' )
750951 self .assertEqual (curses .unctrl (10 ), b'^J' )
952+ # A printable non-ASCII byte of an 8-bit locale is returned unchanged.
953+ # See _encodable for the character set.
954+ encoding = self .stdscr .encoding
955+ for ch in ('é' , '¤' , '€' , 'є' ):
956+ try :
957+ b = ch .encode (encoding )
958+ except UnicodeEncodeError :
959+ continue
960+ if len (b ) != 1 :
961+ continue
962+ with self .subTest (ch = ch ):
963+ self .assertEqual (curses .unctrl (ch ), b )
964+ self .assertEqual (curses .unctrl (b [0 ]), b ) # the byte as an int
751965 self .assertRaises (TypeError , curses .unctrl , b'' )
752966 self .assertRaises (TypeError , curses .unctrl , b'AB' )
753967 self .assertRaises (TypeError , curses .unctrl , '' )
@@ -1459,7 +1673,8 @@ def test_issue6243(self):
14591673 def test_unget_wch (self ):
14601674 stdscr = self .stdscr
14611675 encoding = stdscr .encoding
1462- for ch in ('a' , '\xe9 ' , '\u20ac ' , '\U0010FFFF ' ):
1676+ # See _encodable for the character set, plus a non-BMP character.
1677+ for ch in ('a' , '\xe9 ' , '\xa4 ' , '\u20ac ' , '\u0454 ' , '\U0010FFFF ' ):
14631678 try :
14641679 ch .encode (encoding )
14651680 except UnicodeEncodeError :
0 commit comments