Logo Search packages:      
Sourcecode: inkscape version File versions

enum CRStatus cr_utils_utf8_to_ucs1 ( const guchar *  a_in,
gulong *  a_in_len,
guchar *  a_out,
gulong *  a_out_len 
)

Converts an utf8 buffer into an ucs1 buffer. The caller must know the size of the resulting converted buffer, and allocated it prior to calling this function.

Parameters:
a_in the input utf8 buffer to convert.
a_in_len in/out parameter. The size of the input utf8 buffer. After return, points to the number of bytes consumed by the function even in case of encoding error.
a_out out parameter. Points to the resulting buffer. Must be allocated by the caller. If the size of a_out is shorter than its required size, this function converts what it can and return a successfull status.
a_out_len in/out parameter. The size of the output buffer. After return, points to the number of bytes consumed even in case of encoding error.
Returns:
CR_OK upon successfull completion, an error code otherwise.

Definition at line 1007 of file cr-utils.c.

{
        gulong in_index = 0,
                out_index = 0,
                in_len = 0,
                out_len = 0;
        enum CRStatus status = CR_OK;

        /*
         *to store the final decoded 
         *unicode char
         */
        guint32 c = 0;

        g_return_val_if_fail (a_in && a_in_len
                              && a_out && a_out_len, CR_BAD_PARAM_ERROR);

        if (*a_in_len < 1) {
                status = CR_OK;
                goto end;
        }

        in_len = *a_in_len;
        out_len = *a_out_len;

        for (in_index = 0, out_index = 0;
             (in_index < in_len) && (out_index < out_len);
             in_index++, out_index++) {
                gint nb_bytes_2_decode = 0;

                if (a_in[in_index] <= 0x7F) {
                        /*
                         *7 bits long char
                         *encoded over 1 byte:
                         * 0xxx xxxx
                         */
                        c = a_in[in_index];
                        nb_bytes_2_decode = 1;

                } else if ((a_in[in_index] & 0xE0) == 0xC0) {
                        /*
                         *up to 11 bits long char.
                         *encoded over 2 bytes:
                         *110x xxxx  10xx xxxx
                         */
                        c = a_in[in_index] & 0x1F;
                        nb_bytes_2_decode = 2;

                } else if ((a_in[in_index] & 0xF0) == 0xE0) {
                        /*
                         *up to 16 bit long char
                         *encoded over 3 bytes:
                         *1110 xxxx  10xx xxxx  10xx xxxx
                         */
                        c = a_in[in_index] & 0x0F;
                        nb_bytes_2_decode = 3;

                } else if ((a_in[in_index] & 0xF8) == 0xF0) {
                        /*
                         *up to 21 bits long char
                         *encoded over 4 bytes:
                         *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
                         */
                        c = a_in[in_index] & 0x7;
                        nb_bytes_2_decode = 4;

                } else if ((a_in[in_index] & 0xFC) == 0xF8) {
                        /*
                         *up to 26 bits long char
                         *encoded over 5 bytes.
                         *1111 10xx  10xx xxxx  10xx xxxx  
                         *10xx xxxx  10xx xxxx
                         */
                        c = a_in[in_index] & 3;
                        nb_bytes_2_decode = 5;

                } else if ((a_in[in_index] & 0xFE) == 0xFC) {
                        /*
                         *up to 31 bits long char
                         *encoded over 6 bytes:
                         *1111 110x  10xx xxxx  10xx xxxx  
                         *10xx xxxx  10xx xxxx  10xx xxxx
                         */
                        c = a_in[in_index] & 1;
                        nb_bytes_2_decode = 6;

                } else {
                        /*BAD ENCODING */
                        status = CR_ENCODING_ERROR;
                        goto end;
                }

                /*
                 *Go and decode the remaining byte(s)
                 *(if any) to get the current character.
                 */
                if (in_index + nb_bytes_2_decode - 1 >= in_len) {
                        status = CR_OK;
                        goto end;
                }

                for (; nb_bytes_2_decode > 1; nb_bytes_2_decode--) {
                        /*decode the next byte */
                        in_index++;

                        /*byte pattern must be: 10xx xxxx */
                        if ((a_in[in_index] & 0xC0) != 0x80) {
                                status = CR_ENCODING_ERROR;
                                goto end;
                        }

                        c = (c << 6) | (a_in[in_index] & 0x3F);
                }

                /*
                 *The decoded ucs4 char is now
                 *in c.
                 */

                if (c > 0xFF) {
                        status = CR_ENCODING_ERROR;
                        goto end;
                }

                a_out[out_index] = c;
        }

      end:
        *a_out_len = out_index;
        *a_in_len = in_index;

        return CR_OK;
}


Generated by  Doxygen 1.6.0   Back to index