mirror of
https://github.com/cookiengineer/audacity
synced 2025-05-05 06:09:47 +02:00
675 lines
18 KiB
C
675 lines
18 KiB
C
/* -*- Mode: c; c-basic-offset: 2 -*-
|
|
*
|
|
* rdf_utf8.c - RDF UTF8 / Unicode chars helper routines Implementation
|
|
*
|
|
* Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
|
|
* Copyright (C) 2000-2004, University of Bristol, UK http://www.bristol.ac.uk/
|
|
*
|
|
* This package is Free Software and part of Redland http://librdf.org/
|
|
*
|
|
* It is licensed under the following three licenses as alternatives:
|
|
* 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
|
|
* 2. GNU General Public License (GPL) V2 or any newer version
|
|
* 3. Apache License, V2.0 or any newer version
|
|
*
|
|
* You may not use this file except in compliance with at least one of
|
|
* the above three licenses.
|
|
*
|
|
* See LICENSE.html or LICENSE.txt at the top of this package for the
|
|
* complete terms and further detail along with the license texts for
|
|
* the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
|
|
*
|
|
*
|
|
*/
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include <rdf_config.h>
|
|
#endif
|
|
|
|
#ifdef WIN32
|
|
#include <win32_rdf_config.h>
|
|
#endif
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <ctype.h> /* for isprint() */
|
|
|
|
#include <redland.h>
|
|
#include <rdf_utf8.h>
|
|
|
|
|
|
#ifndef STANDALONE
|
|
|
|
/* UTF-8 encoding of 32 bit Unicode chars
|
|
*
|
|
* Characters 0x00000000 to 0x0000007f are US-ASCII
|
|
* Characters 0x00000080 to 0x000000ff are ISO Latin 1 (ISO 8859-1)
|
|
*
|
|
* incoming char| outgoing
|
|
* bytes | bits | representation
|
|
* ==================================================
|
|
* 1 | 7 | 0xxxxxxx
|
|
* 2 | 11 | 110xxxxx 10xxxxxx
|
|
* 3 | 16 | 1110xxxx 10xxxxxx 10xxxxxx
|
|
* 4 | 21 | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
* 5 | 26 | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
* 6 | 31 | 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
*
|
|
* The first byte is always in the range 0xC0-0xFD
|
|
* Further bytes are all in the range 0x80-0xBF
|
|
* No byte is ever 0xFE or 0xFF
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* Unicode 3.0 Corrigendum #1: UTF-8 Shortest Form
|
|
* http://www.unicode.org/versions/corrigendum1.html
|
|
*
|
|
* C12
|
|
*
|
|
* (a) When a process generates data in a Unicode Transformation
|
|
* Format, it shall not emit ill-formed code unit sequences.
|
|
*
|
|
* (b) When a process interprets data in a Unicode Transformation
|
|
* Format, it shall treat illegal code unit sequences as an error
|
|
* condition.
|
|
*
|
|
* (c) A conformant process shall not interpret illegal UTF code unit
|
|
* sequences as characters.
|
|
*
|
|
* (d) Irregular UTF code unit sequences shall not be used for
|
|
* encoding any other information.
|
|
*
|
|
*
|
|
* My Summary: never encode non-shortest form UTF-8 sequences - they are
|
|
* are illegal sequences. Do not accept them on decoding.
|
|
*
|
|
* Table 3.1B. Legal UTF-8 Byte Sequences
|
|
* Code Points 1st Byte 2nd Byte 3rd Byte 4th Byte
|
|
* U+0000..U+007F 00..7F
|
|
* U+0080..U+07FF C2..DF 80..BF
|
|
* U+0800..U+0FFF E0 A0..BF 80..BF
|
|
* U+1000..U+FFFF E1..EF 80..BF 80..BF
|
|
* U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
|
|
* U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
|
|
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
|
*
|
|
*/
|
|
|
|
|
|
/**
|
|
* librdf_unicode_char_to_utf8:
|
|
* @c: Unicode character
|
|
* @output: UTF-8 string buffer or NULL
|
|
* @length: buffer size
|
|
*
|
|
* Convert a Unicode character to UTF-8 encoding.
|
|
*
|
|
* If buffer is NULL, then will calculate the length rather than
|
|
* perform it. This can be used by the caller to allocate space
|
|
* and then re-call this function with the new buffer.
|
|
*
|
|
* Return value: bytes written to output buffer or <0 on failure
|
|
**/
|
|
int
|
|
librdf_unicode_char_to_utf8(librdf_unichar c, byte *output, int length)
|
|
{
|
|
int size=0;
|
|
|
|
/* check for illegal code positions:
|
|
* U+D800 to U+DFFF (UTF-16 surrogates)
|
|
* U+FFFE and U+FFFF
|
|
*/
|
|
if((c > 0xD7FF && c < 0xE000) || c == 0xFFFE || c == 0xFFFF)
|
|
return -1;
|
|
|
|
/* Unicode 3.2 only defines U+0000 to U+10FFFF and UTF-8 encodings of it */
|
|
if(c > 0x10ffff)
|
|
return -1;
|
|
|
|
if (c < 0x00000080)
|
|
size=1;
|
|
else if (c < 0x00000800)
|
|
size=2;
|
|
else if (c < 0x00010000)
|
|
size=3;
|
|
else
|
|
size=4;
|
|
|
|
/* when no buffer given, return size */
|
|
if(!output)
|
|
return size;
|
|
|
|
if(size > length)
|
|
return -1;
|
|
|
|
switch(size) {
|
|
case 4:
|
|
output[3]=0x80 | (c & 0x3F);
|
|
c= c >> 6;
|
|
/* set bit 4 (bits 7,6,5,4 less 7,6,5 set below) on last byte */
|
|
c |= 0x10000; /* 0x10000 = 0x10 << 12 */
|
|
/* FALLTHROUGH */
|
|
case 3:
|
|
output[2]=0x80 | (c & 0x3F);
|
|
c= c >> 6;
|
|
/* set bit 5 (bits 7,6,5 less 7,6 set below) on last byte */
|
|
c |= 0x800; /* 0x800 = 0x20 << 6 */
|
|
/* FALLTHROUGH */
|
|
case 2:
|
|
output[1]=0x80 | (c & 0x3F);
|
|
c= c >> 6;
|
|
/* set bits 7,6 on last byte */
|
|
c |= 0xc0;
|
|
/* FALLTHROUGH */
|
|
case 1:
|
|
output[0]=c;
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* librdf_utf8_to_unicode_char:
|
|
* @output: Pointer to the Unicode character or NULL
|
|
* @input: UTF-8 string buffer
|
|
* @length: buffer size
|
|
*
|
|
* Convert an UTF-8 encoded buffer to a Unicode character.
|
|
*
|
|
* If output is NULL, then will calculate the number of bytes that
|
|
* will be used from the input buffer and not perform the conversion.
|
|
*
|
|
* Return value: bytes used from input buffer or <0 on failure
|
|
**/
|
|
int
|
|
librdf_utf8_to_unicode_char(librdf_unichar *output, const byte *input, int length)
|
|
{
|
|
byte in;
|
|
int size;
|
|
librdf_unichar c=0;
|
|
|
|
if(length < 1)
|
|
return -1;
|
|
|
|
in=*input++;
|
|
if((in & 0x80) == 0) { /* First byte 00..7F */
|
|
size=1;
|
|
c= in & 0x7f;
|
|
} else if((in & 0xe0) == 0xc0) { /* First byte C0..DF */
|
|
size=2;
|
|
c= in & 0x1f;
|
|
} else if((in & 0xf0) == 0xe0) { /* First byte E0..EF */
|
|
size=3;
|
|
c= in & 0x0f;
|
|
} else if((in & 0xf8) == 0xf0) { /* First byte F0..F7 */
|
|
size=4;
|
|
c = in & 0x07;
|
|
} else /* First byte anything else: 80..BF F8..FF - illegal */
|
|
return -1;
|
|
|
|
|
|
if(!output)
|
|
return size;
|
|
|
|
if(length < size)
|
|
return -1;
|
|
|
|
switch(size) {
|
|
case 4:
|
|
in=*input++ & 0x3f;
|
|
c= c << 6;
|
|
c |= in;
|
|
/* FALLTHROUGH */
|
|
case 3:
|
|
in=*input++ & 0x3f;
|
|
c= c << 6;
|
|
c |= in;
|
|
/* FALLTHROUGH */
|
|
case 2:
|
|
in=*input++ & 0x3f;
|
|
c= c << 6;
|
|
c |= in;
|
|
/* FALLTHROUGH */
|
|
default:
|
|
break;
|
|
}
|
|
|
|
|
|
/* check for overlong UTF-8 sequences */
|
|
switch(size) {
|
|
case 2:
|
|
if(c < 0x00000080)
|
|
return -2;
|
|
break;
|
|
case 3:
|
|
if(c < 0x00000800)
|
|
return -2;
|
|
break;
|
|
case 4:
|
|
if(c < 0x00010000)
|
|
return -2;
|
|
break;
|
|
|
|
default: /* 1 */
|
|
break;
|
|
}
|
|
|
|
|
|
/* check for illegal code positions:
|
|
* U+D800 to U+DFFF (UTF-16 surrogates)
|
|
* U+FFFE and U+FFFF
|
|
*/
|
|
if((c > 0xD7FF && c < 0xE000) || c == 0xFFFE || c == 0xFFFF)
|
|
return -1;
|
|
|
|
/* Unicode 3.2 only defines U+0000 to U+10FFFF and UTF-8 encodings of it */
|
|
/* of course this makes some 4 byte forms illegal */
|
|
if(c > 0x10ffff)
|
|
return -1;
|
|
|
|
*output=c;
|
|
|
|
return size;
|
|
}
|
|
|
|
|
|
/**
|
|
* librdf_utf8_to_latin1:
|
|
* @input: UTF-8 string buffer
|
|
* @length: buffer size
|
|
* @output_length: Pointer to variable to store resulting string length or NULL
|
|
*
|
|
* Convert a UTF-8 string to ISO Latin-1.
|
|
*
|
|
* Converts the given UTF-8 string to the ISO Latin-1 subset of
|
|
* Unicode (characters 0x00-0xff), discarding any out of range
|
|
* characters.
|
|
*
|
|
* If the output_length pointer is not NULL, the returned string
|
|
* length will be stored there.
|
|
*
|
|
* Return value: pointer to new ISO Latin-1 string or NULL on failure
|
|
**/
|
|
byte*
|
|
librdf_utf8_to_latin1(const byte *input, int length, int *output_length)
|
|
{
|
|
int utf8_char_length=0;
|
|
int utf8_byte_length=0;
|
|
int i;
|
|
int j;
|
|
byte *output;
|
|
|
|
i=0;
|
|
while(input[i]) {
|
|
int size=librdf_utf8_to_unicode_char(NULL, &input[i], length-i);
|
|
if(size <= 0)
|
|
return NULL;
|
|
utf8_char_length++;
|
|
i+= size;
|
|
}
|
|
|
|
/* This is a maximal length; since chars may be discarded, the
|
|
* actual length of the resulting can be shorter
|
|
*/
|
|
utf8_byte_length=i;
|
|
|
|
|
|
output=(byte*)LIBRDF_MALLOC(byte_string, utf8_byte_length+1);
|
|
if(!output)
|
|
return NULL;
|
|
|
|
|
|
i=0; j=0;
|
|
while(i < utf8_byte_length) {
|
|
librdf_unichar c;
|
|
int size=librdf_utf8_to_unicode_char(&c, &input[i], length-i);
|
|
if(size <= 0)
|
|
return NULL;
|
|
if(c < 0x100) /* Discards characters! */
|
|
output[j++]=c;
|
|
i+= size;
|
|
}
|
|
output[j]='\0';
|
|
|
|
if(output_length)
|
|
*output_length=j;
|
|
|
|
return output;
|
|
}
|
|
|
|
|
|
/**
|
|
* librdf_latin1_to_utf8:
|
|
* @input: ISO Latin-1 string buffer
|
|
* @length: buffer size
|
|
* @output_length: Pointer to variable to store resulting string length or NULL
|
|
*
|
|
* Convert an ISO Latin-1 encoded string to UTF-8.
|
|
*
|
|
* Converts the given ISO Latin-1 string to an UTF-8 encoded string
|
|
* representing the same content. This is lossless.
|
|
*
|
|
* If the output_length pointer is not NULL, the returned string
|
|
* length will be stored there.
|
|
*
|
|
* Return value: pointer to new UTF-8 string or NULL on failure
|
|
**/
|
|
byte*
|
|
librdf_latin1_to_utf8(const byte *input, int length, int *output_length)
|
|
{
|
|
int utf8_length=0;
|
|
int i;
|
|
int j;
|
|
byte *output;
|
|
|
|
for(i=0; input[i]; i++) {
|
|
int size=librdf_unicode_char_to_utf8(input[i], NULL, length-i);
|
|
if(size <= 0)
|
|
return NULL;
|
|
utf8_length += size;
|
|
}
|
|
|
|
output=(byte*)LIBRDF_MALLOC(byte_string, utf8_length+1);
|
|
if(!output)
|
|
return NULL;
|
|
|
|
|
|
j=0;
|
|
for(i=0; input[i]; i++) {
|
|
int size=librdf_unicode_char_to_utf8(input[i], &output[j], length-i);
|
|
if(size <= 0)
|
|
return NULL;
|
|
j+= size;
|
|
}
|
|
output[j]='\0';
|
|
|
|
if(output_length)
|
|
*output_length=j;
|
|
|
|
return output;
|
|
}
|
|
|
|
|
|
/**
|
|
* librdf_utf8_print:
|
|
* @input: UTF-8 string buffer
|
|
* @length: buffer size
|
|
* @stream: FILE* stream
|
|
*
|
|
* Print a UTF-8 string to a stream.
|
|
*
|
|
* Pretty prints the UTF-8 string in a pseudo-C character
|
|
* format like \u<emphasis>hex digits</emphasis> when the characters fail
|
|
* the isprint() test.
|
|
**/
|
|
void
|
|
librdf_utf8_print(const byte *input, int length, FILE *stream)
|
|
{
|
|
int i=0;
|
|
|
|
while(i<length && *input) {
|
|
librdf_unichar c;
|
|
int size=librdf_utf8_to_unicode_char(&c, input, length-i);
|
|
if(size <= 0)
|
|
return;
|
|
if(c < 0x100) {
|
|
if(isprint(c))
|
|
fputc(c, stream);
|
|
else
|
|
fprintf(stream, "\\u%02X", c);
|
|
} else if (c < 0x10000)
|
|
fprintf(stream, "\\u%04X", c);
|
|
else
|
|
fprintf(stream, "\\U%08X", c);
|
|
input += size;
|
|
i += size;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
/* TEST CODE */
|
|
|
|
|
|
#ifdef STANDALONE
|
|
|
|
/* static prototypes */
|
|
void librdf_bad_string_print(const byte *input, int length, FILE *stream);
|
|
int main(int argc, char *argv[]);
|
|
|
|
void
|
|
librdf_bad_string_print(const byte *input, int length, FILE *stream)
|
|
{
|
|
while(*input && length>0) {
|
|
char c=*input;
|
|
if(isprint(c))
|
|
fputc(c, stream);
|
|
else
|
|
fprintf(stream, "\\x%02X", (c & 0xff));
|
|
input++;
|
|
length--;
|
|
}
|
|
}
|
|
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
const char *program=librdf_basename((const char*)argv[0]);
|
|
librdf_unichar c;
|
|
struct tv {
|
|
const byte *string;
|
|
const int length;
|
|
const librdf_unichar result;
|
|
};
|
|
struct tv *t;
|
|
struct tv test_values[]={
|
|
/* what is the capital of England? 'E' */
|
|
{(const byte*)"E", 1, 'E'},
|
|
/* latin small letter e with acute, U+00E9 ISOlat1 */
|
|
{(const byte*)"\xc3\xa9", 2, 0xE9},
|
|
/* euro sign, U+20AC NEW */
|
|
{(const byte*)"\xe2\x82\xac", 3, 0x20AC},
|
|
/* unknown char - U+1FFFFF (21 bits) */
|
|
|
|
/* First possible sequence of a certain length */
|
|
{(const byte*)"\x00", 1, 0x00000000},
|
|
{(const byte*)"\xc2\x80", 2, 0x00000080},
|
|
{(const byte*)"\xe0\xa0\x80", 3, 0x00000800},
|
|
{(const byte*)"\xf0\x90\x80\x80", 4, 0x00010000},
|
|
|
|
/* Last possible sequence of a certain length */
|
|
{(const byte*)"\x7f", 1, 0x0000007F},
|
|
{(const byte*)"\xdf\xbf", 2, 0x000007FF},
|
|
{(const byte*)"\xef\xbf\xbd", 3, 0x0000FFFD}, /*no FFFE-FFFF */
|
|
{(const byte*)"\xf4\x8f\xbf\xbf", 4, 0x0010FFFF},
|
|
|
|
/* Boundary conditions */
|
|
{(const byte*)"\xed\x9f\xbf", 3, 0x0000D7FF},
|
|
{(const byte*)"\xee\x80\x80", 3, 0x0000E000},
|
|
{(const byte*)"\xef\xbf\xbd", 3, 0x0000FFFD},
|
|
{(const byte*)"\xf4\x8f\xbf\xbf", 4, 0x0010FFFF},
|
|
|
|
{NULL, 0, 0}
|
|
};
|
|
struct tv bad_test_values[]={
|
|
/* Sequences that cannot appear in UTF-8 */
|
|
{(const byte*)"\xfe", 1, 0x000000FE},
|
|
{(const byte*)"\xff", 1, 0x000000FF},
|
|
{(const byte*)"\xef\xbf\xbe", 3, 0x0000FFFE},
|
|
{(const byte*)"\xef\xbf\xbf", 3, 0x0000FFFF},
|
|
|
|
/* Minumum (ASCII NUL) overlong sequences */
|
|
{(const byte*)"\xc0\x80", 2, 0x00000000},
|
|
{(const byte*)"\xe0\x80\x80", 3, 0x00000000},
|
|
{(const byte*)"\xf0\x80\x80\x80", 4, 0x00000000},
|
|
|
|
/* Maximum overlong sequences */
|
|
{(const byte*)"\xc1\xbf", 2, 0x0000007F},
|
|
{(const byte*)"\xe0\x9f\xbf", 3, 0x000007FF},
|
|
{(const byte*)"\xf0\x8f\xbf\xbf", 4, 0x0000FFFF},
|
|
|
|
/* Beyond U+10FFFF */
|
|
{(const byte*)"\xf4\x90\x80\x80", 4, 0x00110000},
|
|
|
|
{NULL, 0, 0}
|
|
};
|
|
|
|
const byte test_utf8_string[]="Lib" "\xc3\xa9" "ration costs " "\xe2\x82\xac" "3.50";
|
|
int test_utf8_string_length=strlen((const char*)test_utf8_string);
|
|
const byte result_latin1_string[]="Lib" "\xe9" "ration costs 3.50";
|
|
int result_latin1_string_length=strlen((const char*)result_latin1_string);
|
|
const byte result_utf8_string[]="Lib" "\xc3\xa9" "ration costs 3.50";
|
|
int result_utf8_string_length=strlen((const char*)result_utf8_string);
|
|
|
|
int i;
|
|
byte *latin1_string;
|
|
int latin1_string_length;
|
|
byte *utf8_string;
|
|
int utf8_string_length;
|
|
int failures=0;
|
|
int verbose=0;
|
|
|
|
for(i=0; (t=&test_values[i]) && t->string; i++) {
|
|
int size;
|
|
const byte *buffer=t->string;
|
|
int length=t->length;
|
|
#define OUT_BUFFER_SIZE 6
|
|
byte out_buffer[OUT_BUFFER_SIZE];
|
|
|
|
size=librdf_utf8_to_unicode_char(&c, buffer, length);
|
|
if(size < 0) {
|
|
fprintf(stderr, "%s: librdf_utf8_to_unicode_char FAILED to convert UTF-8 string '", program);
|
|
librdf_bad_string_print(buffer, length, stderr);
|
|
fprintf(stderr, "' (length %d) to Unicode\n", length);
|
|
failures++;
|
|
continue;
|
|
}
|
|
if(c != t->result) {
|
|
fprintf(stderr, "%s: librdf_utf8_to_unicode_char FAILED conversion of UTF-8 string '", program);
|
|
librdf_bad_string_print(buffer, size, stderr);
|
|
fprintf(stderr, "' to Unicode char U+%04X, expected U+%04X\n",
|
|
(u32)c, (u32)t->result);
|
|
failures++;
|
|
continue;
|
|
}
|
|
|
|
if(verbose) {
|
|
fprintf(stderr, "%s: librdf_utf8_to_unicode_char converted UTF-8 string '", program);
|
|
librdf_utf8_print(buffer, size, stderr);
|
|
fprintf(stderr, "' to Unicode char U+%04X correctly\n", (u32)c);
|
|
}
|
|
|
|
size=librdf_unicode_char_to_utf8(t->result, out_buffer, OUT_BUFFER_SIZE);
|
|
if(size <= 0) {
|
|
fprintf(stderr, "%s: librdf_unicode_char_to_utf8 FAILED to convert U+%04X to UTF-8 string\n", program, (u32)t->result);
|
|
failures++;
|
|
continue;
|
|
}
|
|
|
|
if(memcmp(out_buffer, buffer, length)) {
|
|
fprintf(stderr, "%s: librdf_unicode_char_to_utf8 FAILED conversion U+%04X to UTF-8 - returned '", program, (u32)t->result);
|
|
librdf_utf8_print(buffer, size, stderr);
|
|
fputs("', expected '", stderr);
|
|
librdf_utf8_print(out_buffer, t->length, stderr);
|
|
fputs("'\n", stderr);
|
|
failures++;
|
|
continue;
|
|
}
|
|
|
|
if(verbose) {
|
|
fprintf(stderr, "%s: librdf_unicode_char_to_utf8 converted U+%04X to UTF-8 string '", program, (u32)t->result);
|
|
librdf_utf8_print(out_buffer, size, stderr);
|
|
fputs("' correctly\n", stderr);
|
|
}
|
|
}
|
|
|
|
|
|
/* Check for failures */
|
|
for(i=0; (t=&bad_test_values[i]) && t->string; i++) {
|
|
int size;
|
|
const byte *buffer=t->string;
|
|
int length=t->length;
|
|
|
|
size=librdf_utf8_to_unicode_char(&c, buffer, length);
|
|
if(size >= 0) {
|
|
fprintf(stderr, "%s: librdf_utf8_to_unicode_char SUCCEEDED when it should have failed to convert UTF-8 string '", program);
|
|
librdf_bad_string_print(buffer, length, stderr);
|
|
fprintf(stderr, "' (length %d) to Unicode\n", length);
|
|
failures++;
|
|
continue;
|
|
}
|
|
if(verbose) {
|
|
fprintf(stderr, "%s: librdf_utf8_to_unicode_char failed as expected converting bad UTF-8 string '", program);
|
|
librdf_bad_string_print(buffer, length, stderr);
|
|
fprintf(stderr, "' (length %d) to Unicode\n", length);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
latin1_string=librdf_utf8_to_latin1(test_utf8_string,
|
|
test_utf8_string_length,
|
|
&latin1_string_length);
|
|
if(!latin1_string) {
|
|
fprintf(stderr, "%s: librdf_utf8_to_latin1 FAILED to convert UTF-8 string '", program);
|
|
librdf_bad_string_print(test_utf8_string, test_utf8_string_length, stderr);
|
|
fputs("' to Latin-1\n", stderr);
|
|
failures++;
|
|
}
|
|
|
|
if(memcmp(latin1_string, result_latin1_string, result_latin1_string_length)) {
|
|
fprintf(stderr, "%s: librdf_utf8_to_latin1 FAILED to convert UTF-8 string '", program);
|
|
librdf_utf8_print(test_utf8_string, test_utf8_string_length, stderr);
|
|
fprintf(stderr, "' to Latin-1 - returned '%s' but expected '%s'\n",
|
|
latin1_string, result_latin1_string);
|
|
failures++;
|
|
}
|
|
|
|
if(verbose) {
|
|
fprintf(stderr, "%s: librdf_utf8_to_latin1 converted UTF-8 string '",
|
|
program);
|
|
librdf_utf8_print(test_utf8_string, test_utf8_string_length, stderr);
|
|
fprintf(stderr, "' to Latin-1 string '%s' OK\n", latin1_string);
|
|
}
|
|
|
|
|
|
utf8_string=librdf_latin1_to_utf8(latin1_string, latin1_string_length,
|
|
&utf8_string_length);
|
|
if(!utf8_string) {
|
|
fprintf(stderr, "%s: librdf_latin1_to_utf8 FAILED to convert Latin-1 string '%s' to UTF-8\n", program, latin1_string);
|
|
failures++;
|
|
}
|
|
|
|
if(memcmp(utf8_string, result_utf8_string, result_utf8_string_length)) {
|
|
fprintf(stderr, "%s: librdf_latin1_to_utf8 FAILED to convert Latin-1 string '%s' to UTF-8 - returned '", program, latin1_string);
|
|
librdf_utf8_print(utf8_string, utf8_string_length, stderr);
|
|
fputs("' but expected '", stderr);
|
|
librdf_utf8_print(result_utf8_string, result_utf8_string_length, stderr);
|
|
fputs("'\n", stderr);
|
|
failures++;
|
|
}
|
|
|
|
if(verbose) {
|
|
fprintf(stderr, "%s: librdf_latin1_to_utf8 converted Latin-1 string '%s' to UTF-8 string '", program, latin1_string);
|
|
librdf_utf8_print(utf8_string, utf8_string_length, stderr);
|
|
fputs("' OK\n", stderr);
|
|
}
|
|
|
|
LIBRDF_FREE(cstring, latin1_string);
|
|
LIBRDF_FREE(cstring, utf8_string);
|
|
|
|
#ifdef LIBRDF_MEMORY_DEBUG
|
|
librdf_memory_report(stderr);
|
|
#endif
|
|
|
|
return failures;
|
|
}
|
|
|
|
#endif
|