Logo Search packages:      
Sourcecode: inkscape version File versions

enum CRStatus cr_utils_utf8_str_len_as_ucs4 ( const guchar *  a_in_start,
const guchar *  a_in_end,
gulong *  a_len 
)

Given an utf8 string buffer, calculates the length of this string if it was encoded in ucs4.

Parameters:
a_in_start a pointer to the begining of the input utf8 string.
a_in_end a pointre to the end of the input utf8 string (points to the last byte of the buffer)
a_len out parameter the calculated length.
Returns:
CR_OK upon succesfull completion, an error code otherwise.

Definition at line 69 of file cr-utils.c.

Referenced by cr_utils_utf8_str_to_ucs1(), and cr_utils_utf8_str_to_ucs4().

{
        guchar *byte_ptr = NULL;
        gint len = 0;

        /*
         *to store the final decoded 
         *unicode char
         */
        guint c = 0;

        g_return_val_if_fail (a_in_start && a_in_end && a_len,
                              CR_BAD_PARAM_ERROR);
        *a_len = 0;

        for (byte_ptr = (guchar *) a_in_start;
             byte_ptr <= a_in_end; byte_ptr++) {
                gint nb_bytes_2_decode = 0;

                if (*byte_ptr <= 0x7F) {
                        /*
                         *7 bits long char
                         *encoded over 1 byte:
                         * 0xxx xxxx
                         */
                        c = *byte_ptr;
                        nb_bytes_2_decode = 1;

                } else if ((*byte_ptr & 0xE0) == 0xC0) {
                        /*
                         *up to 11 bits long char.
                         *encoded over 2 bytes:
                         *110x xxxx  10xx xxxx
                         */
                        c = *byte_ptr & 0x1F;
                        nb_bytes_2_decode = 2;

                } else if ((*byte_ptr & 0xF0) == 0xE0) {
                        /*
                         *up to 16 bit long char
                         *encoded over 3 bytes:
                         *1110 xxxx  10xx xxxx  10xx xxxx
                         */
                        c = *byte_ptr & 0x0F;
                        nb_bytes_2_decode = 3;

                } else if ((*byte_ptr & 0xF8) == 0xF0) {
                        /*
                         *up to 21 bits long char
                         *encoded over 4 bytes:
                         *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
                         */
                        c = *byte_ptr & 0x7;
                        nb_bytes_2_decode = 4;

                } else if ((*byte_ptr & 0xFC) == 0xF8) {
                        /*
                         *up to 26 bits long char
                         *encoded over 5 bytes.
                         *1111 10xx  10xx xxxx  10xx xxxx  
                         *10xx xxxx  10xx xxxx
                         */
                        c = *byte_ptr & 3;
                        nb_bytes_2_decode = 5;

                } else if ((*byte_ptr & 0xFE) == 0xFC) {
                        /*
                         *up to 31 bits long char
                         *encoded over 6 bytes:
                         *1111 110x  10xx xxxx  10xx xxxx  
                         *10xx xxxx  10xx xxxx  10xx xxxx
                         */
                        c = *byte_ptr & 1;
                        nb_bytes_2_decode = 6;

                } else {
                        /*
                         *BAD ENCODING
                         */
                        return CR_ENCODING_ERROR;
                }

                /*
                 *Go and decode the remaining byte(s)
                 *(if any) to get the current character.
                 */
                for (; nb_bytes_2_decode > 1; nb_bytes_2_decode--) {
                        /*decode the next byte */
                        byte_ptr++;

                        /*byte pattern must be: 10xx xxxx */
                        if ((*byte_ptr & 0xC0) != 0x80) {
                                return CR_ENCODING_ERROR;
                        }

                        c = (c << 6) | (*byte_ptr & 0x3F);
                }

                len++;
        }

        *a_len = len;

        return CR_OK;
}


Generated by  Doxygen 1.6.0   Back to index