Eiffel Media API
Overview Cluster Class Classes Index      Previous Next      Top Features

kernel.unicode

Class UC_UTF8_ROUTINES


Direct ancestors

UC_IMPORTED_UNICODE_ROUTINES, KL_IMPORTED_STRING_ROUTINES, KL_IMPORTED_INTEGER_ROUTINES, KL_IMPORTED_ANY_ROUTINES, UC_STRING_HANDLER

Features

Invariants

indexing

description

UTF-8 encoding routines

library

Gobo Eiffel Kernel Library

copyright

Copyright (c) 2001, Eric Bezault and others

license

Eiffel Forum License v2 (see forum.txt)

date

$Date: 2005/06/29 13:19:29 $

revision

$Revision: 1.11 $

class

UC_UTF8_ROUTINES

inherit

UC_IMPORTED_UNICODE_ROUTINES
KL_IMPORTED_STRING_ROUTINES
KL_IMPORTED_INTEGER_ROUTINES
KL_IMPORTED_ANY_ROUTINES
UC_STRING_HANDLER

feature -- Access

any_: KL_ANY_ROUTINES

-- Routines that ought to be in class ANY

-- (From KL_IMPORTED_ANY_ROUTINES)

ensure
any_routines_not_void: Result /= Void
integer_: KL_INTEGER_ROUTINES

-- Routines that ought to be in class INTEGER

-- (From KL_IMPORTED_INTEGER_ROUTINES)

ensure
integer_routines_not_void: Result /= Void
string_: KL_STRING_ROUTINES

-- Routines that ought to be in class STRING

-- (From KL_IMPORTED_STRING_ROUTINES)

ensure
string_routines_not_void: Result /= Void
encoded_first_value (a_byte: CHARACTER): INTEGER

-- Value encoded in first byte

require
is_encoded_first_byte: is_encoded_first_byte (a_byte)
ensure
value_positive: Result >= 0
value_small_enough: Result < 128
encoded_next_value (a_byte: CHARACTER): INTEGER

-- Value encoded in one of the next bytes

require
is_encoded_next_byte: is_encoded_next_byte (a_byte)
ensure
value_positive: Result >= 0
value_small_enough: Result < 64
unicode: UC_UNICODE_ROUTINES

-- Unicode routines

-- (From UC_IMPORTED_UNICODE_ROUTINES)

ensure
unicode_not_void: Result /= Void

feature -- Measurement

character_byte_count (c: CHARACTER): INTEGER

-- Number of bytes needed to encode character
-- c with the UTF-8 encoding

ensure
character_byte_count_large_enough: Result >= 1
character_byte_count_small_enough: Result <= 6
code_byte_count (a_code: INTEGER): INTEGER

-- Number of bytes needed to encode unicode character
-- of code a_code with the UTF-8 encoding

require
valid_code: unicode.valid_code (a_code)
ensure
code_byte_count_large_enough: Result >= 1
code_byte_count_small_enough: Result <= 6
encoded_byte_count (a_byte: CHARACTER): INTEGER

-- Number of bytes which were necessary to encode
-- the unicode character whose first byte is a_byte

require
is_encoded_first_byte: is_encoded_first_byte (a_byte)
ensure
encoded_byte_code_large_enough: Result >= 1
encoded_byte_code_small_enough: Result <= 6
substring_byte_count (a_string: STRING; start_index, end_index: INTEGER): INTEGER

-- Number of bytes needed to encode characters of
-- a_string between start_index and end_index
-- inclusive with the UTF-8 encoding

require
a_string_not_void: a_string /= Void
valid_start_index: 1 <= start_index
valid_end_index: end_index <= a_string.count
meaningful_interval: start_index <= end_index + 1
ensure
substring_byte_count_positive: Result >= 0

feature -- Status report

is_encoded_first_byte (a_byte: CHARACTER): BOOLEAN

-- Is a_byte the first byte in UTF-8 encoding?

is_encoded_next_byte (a_byte: CHARACTER): BOOLEAN

-- Is a_byte one of the next bytes in UTF-8 encoding?

is_endian_detection_character (a_first, a_second, a_third: CHARACTER): BOOLEAN

-- Is this sequence a UTF-8 Byte Order Marker (BOM)?

ensure
result_start: Result implies is_endian_detection_character_start (a_first, a_second)
is_endian_detection_character_start (a_first, a_second: CHARACTER): BOOLEAN

-- Are these characters the start of a UTF-8 encoded Byte Order Marker (BOM)?

valid_utf8 (a_string: STRING): BOOLEAN

-- Are the bytes in a_string a valid UTF-8 encoding?

require
a_string_not_void: a_string /= Void
a_string_is_string: ANY_.same_types (a_string,
)

feature -- Element change

append_code_to_utf8 (a_utf8: STRING; a_code: INTEGER)

-- Add UTF-8 encoded character of code a_code
-- at the end of a_utf8.

require
a_utf8_not_void: a_utf8 /= Void
a_utf8_is_string: ANY_.same_types (a_utf8,
)
a_utf8_valid: valid_utf8 (a_utf8)
valid_code: unicode.valid_code (a_code)
ensure
a_utf8_valid: valid_utf8 (a_utf8)

feature -- Conversion

to_utf8 (a_string: STRING): STRING

-- New STRING made up of bytes corresponding to
-- the UTF-8 representation of a_string

require
a_string_not_void: a_string /= Void
ensure
to_utf8_not_void: Result /= Void
string_type: ANY_.same_types (Result,
)
valid_utf8: valid_utf8 (Result)

invariant


-- From ANY
reflexive_equality: standard_is_equal (Current)
reflexive_conformance: conforms_to (Current)

end