Eiffel Media API
Overview Cluster Class Classes Index      Previous Next      Top Features

kernel.unicode

Class UC_UTF16_ROUTINES


Direct ancestors

KL_IMPORTED_ANY_ROUTINES

Features

Invariants

indexing

description

UTF-16 encoding routines

library

Gobo Eiffel Kernel Library

copyright

Copyright (c) 2002, Eric Bezault and others

license

Eiffel Forum License v2 (see forum.txt)

date

$Date: 2005/09/23 13:22:11 $

revision

$Revision: 1.6 $

class

UC_UTF16_ROUTINES

inherit

KL_IMPORTED_ANY_ROUTINES

feature -- Access

any_: KL_ANY_ROUTINES

-- Routines that ought to be in class ANY

-- (From KL_IMPORTED_ANY_ROUTINES)

ensure
any_routines_not_void: Result /= Void

feature -- Status report

valid_utf16 (a_string: STRING): BOOLEAN

-- Are the bytes in a_string a valid UTF-16 encoding?
-- 'a_string' has one byte per character.
-- Default to big endian when no BOM.

require
a_string_not_void: a_string /= Void
a_string_is_string: ANY_.same_types (a_string,
)
ensure
empty_is_true: a_string.count = 0 implies Result
utf16_even_count: Result implies ((a_string.count \\ 2) = 0)

feature -- Endian-ness detection

is_endian_detection_character (a_byte, other_byte: INTEGER): BOOLEAN

-- Can these two bytes represent ZERO WIDTH NON-BREAKING SPACE?
-- (It has to be unicode character 0xFEFF, because 0xFFFE is not a valid character.)

require
a_byte_is_byte: is_byte (a_byte)
other_byte_is_byte: is_byte (other_byte)
ensure
definition: Result = (a_byte.min (other_byte) = Hex_fe and a_byte.max (other_byte) = Hex_ff)
is_endian_detection_character_least_first (first, second: INTEGER): BOOLEAN

-- Do the two bytes first and second represent the character
-- 0xFEFF with first being the least significant byte?

require
a_byte_is_byte: is_byte (first)
other_byte_is_byte: is_byte (second)
ensure
definition: Result = (is_endian_detection_character (first, second) and (first = Hex_ff))
is_endian_detection_character_most_first (first, second: INTEGER): BOOLEAN

-- Do the two bytes first and second represent the character
-- 0xFEFF with first being the most significant byte?

require
a_byte_is_byte: is_byte (first)
other_byte_is_byte: is_byte (second)
ensure
definition: Result = (is_endian_detection_character (first, second) and (first = Hex_fe))

feature -- Surrogate

is_byte (a: INTEGER): BOOLEAN

-- Is a a byte?

ensure
definition: Result = (a >= 0 and a < Hex_100)
is_high_surrogate (a_most: INTEGER): BOOLEAN

-- Is this a high surrogate character?

require
byte: is_byte (a_most)
is_low_surrogate (a_most: INTEGER): BOOLEAN

-- Is this a low surrogate character?

require
byte: is_byte (a_most)
is_surrogate (a_most: INTEGER): BOOLEAN

-- Is this a high surrogate character?

require
byte: is_byte (a_most)
least_10_bits (msb, lsb: INTEGER): INTEGER

-- UTF16 least 10 bytes of a byte pair

require
msb_byte: is_byte (msb)
lsb_byte: is_byte (lsb)
surrogate: is_surrogate (msb)
ensure
ten_bits: Result >= 0 and Result < Hex_400
surrogate (a_high_10: INTEGER; a_low_10: INTEGER): INTEGER

-- Surrogate from high and low values

require
high_10: a_high_10 >= 0 and a_high_10 < 1024
low_10: a_low_10 >= 0 and a_low_10 < 1024
ensure
more_than_16bits: Result >= Hex_10000
surrogate_from_bytes (a_high_most, a_high_least, a_low_most, a_low_least: INTEGER): INTEGER

-- Surrogate from bytes

require
surrogate_high: is_high_surrogate (a_high_most)
high_least_byte: is_byte (a_high_least)
surrogate_low: is_low_surrogate (a_low_most)
low_least_byte: is_byte (a_low_least)
ensure
more_than_16bits: Result >= Hex_10000

invariant


-- From ANY
reflexive_equality: standard_is_equal (Current)
reflexive_conformance: conforms_to (Current)

end