INTEGRATION: CWS swqcore06 (1.6.26); FILE MERGED
2005/02/04 17:52:41 dvo 1.6.26.1: #i39255# fix byte order mark (BOM) recognition; add UTF-8 BOM
This commit is contained in:
@@ -2,9 +2,9 @@
|
|||||||
*
|
*
|
||||||
* $RCSfile: xml2utf.cxx,v $
|
* $RCSfile: xml2utf.cxx,v $
|
||||||
*
|
*
|
||||||
* $Revision: 1.6 $
|
* $Revision: 1.7 $
|
||||||
*
|
*
|
||||||
* last change: $Author: hr $ $Date: 2004-02-04 13:40:37 $
|
* last change: $Author: vg $ $Date: 2005-02-22 10:06:12 $
|
||||||
*
|
*
|
||||||
* The Contents of this file are made available subject to the terms of
|
* The Contents of this file are made available subject to the terms of
|
||||||
* either of the following licenses
|
* either of the following licenses
|
||||||
@@ -257,7 +257,7 @@ sal_Bool XMLFile2UTFConverter::isEncodingRecognizable( const Sequence< sal_Int8
|
|||||||
|
|
||||||
sal_Bool XMLFile2UTFConverter::scanForEncoding( Sequence< sal_Int8 > &seq )
|
sal_Bool XMLFile2UTFConverter::scanForEncoding( Sequence< sal_Int8 > &seq )
|
||||||
{
|
{
|
||||||
const sal_Int8 *pSource = seq.getConstArray();
|
const sal_uInt8 *pSource = reinterpret_cast<const sal_uInt8*>( seq.getConstArray() );
|
||||||
sal_Bool bReturn = sal_True;
|
sal_Bool bReturn = sal_True;
|
||||||
|
|
||||||
if( seq.getLength() < 4 ) {
|
if( seq.getLength() < 4 ) {
|
||||||
@@ -299,14 +299,14 @@ sal_Bool XMLFile2UTFConverter::scanForEncoding( Sequence< sal_Int8 > &seq )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if( 0xFE == static_cast<unsigned char> (pSource[0]) &&
|
else if( 0xFE == pSource[0] &&
|
||||||
0xFF == static_cast<unsigned char> (pSource[1]) ) {
|
0xFF == pSource[1] ) {
|
||||||
// UTF-16 big endian
|
// UTF-16 big endian
|
||||||
// conversion is done so that encoding information can be easily extracted
|
// conversion is done so that encoding information can be easily extracted
|
||||||
m_sEncoding = "utf-16";
|
m_sEncoding = "utf-16";
|
||||||
}
|
}
|
||||||
else if( 0xFF == static_cast<unsigned char> (pSource[0]) &&
|
else if( 0xFF == pSource[0] &&
|
||||||
0xFE == static_cast<unsigned char> (pSource[1]) ) {
|
0xFE == pSource[1] ) {
|
||||||
// UTF-16 little endian
|
// UTF-16 little endian
|
||||||
// conversion is done so that encoding information can be easily extracted
|
// conversion is done so that encoding information can be easily extracted
|
||||||
m_sEncoding = "utf-16";
|
m_sEncoding = "utf-16";
|
||||||
@@ -334,6 +334,16 @@ sal_Bool XMLFile2UTFConverter::scanForEncoding( Sequence< sal_Int8 > &seq )
|
|||||||
|
|
||||||
m_sEncoding = "utf-16";
|
m_sEncoding = "utf-16";
|
||||||
}
|
}
|
||||||
|
else if( 0xEF == pSource[0] &&
|
||||||
|
0xBB == pSource[1] &&
|
||||||
|
0xBF == pSource[2] )
|
||||||
|
{
|
||||||
|
// UTF-8 BOM (byte order mark); signifies utf-8, and not byte order
|
||||||
|
// The BOM is removed.
|
||||||
|
memmove( seq.getArray(), &( seq.getArray()[3] ), seq.getLength()-3 );
|
||||||
|
seq.realloc( seq.getLength() - 3 );
|
||||||
|
m_sEncoding = "utf-8";
|
||||||
|
}
|
||||||
else if( 0x00 == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x3c == pSource[3] ) {
|
else if( 0x00 == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x3c == pSource[3] ) {
|
||||||
// UCS-4 big endian
|
// UCS-4 big endian
|
||||||
m_sEncoding = "ucs-4";
|
m_sEncoding = "ucs-4";
|
||||||
|
Reference in New Issue
Block a user