mirror of
https://bitbucket.org/smil3y/kdelibs.git
synced 2025-02-23 18:32:49 +00:00
kdecore: remove redundant KEncodingDetector class
use QTextCodec::codecForUtfText() or QTextCodec::codecForText() instead Signed-off-by: Ivailo Monev <xakepa10@gmail.com>
This commit is contained in:
parent
894a2d52bb
commit
718ccc145f
11 changed files with 66 additions and 2280 deletions
|
@ -181,8 +181,6 @@ set(kdecore_LIB_SRCS
|
|||
|
||||
localization/kcatalog.cpp
|
||||
localization/kcharsets.cpp
|
||||
localization/kencodingdetector.cpp
|
||||
localization/guess_ja.cpp
|
||||
localization/klocale.cpp
|
||||
localization/klocale_kde.cpp
|
||||
localization/klocalizedstring.cpp
|
||||
|
@ -337,7 +335,6 @@ install(
|
|||
#services/kserviceoffer.h: do not install, internal API
|
||||
services/kplugininfo.h
|
||||
localization/kcharsets.h
|
||||
localization/kencodingdetector.h
|
||||
localization/klocale.h
|
||||
localization/klocalizedstring.h
|
||||
sycoca/kprotocolinfo.h
|
||||
|
|
|
@ -1,376 +0,0 @@
|
|||
/*
|
||||
* This file is part of the KDE libraries
|
||||
*
|
||||
* Copyright 2000-2003 Shiro Kawai <shiro@acm.org>, All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the authors nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
/*
|
||||
* original code is here.
|
||||
* http://cvs.sourceforge.net/viewcvs.py/gauche/Gauche/ext/charconv/guess.c?view=markup
|
||||
*/
|
||||
|
||||
/*
|
||||
* Maybe we should use QTextCodec::heuristicContentMatch()
|
||||
* But it fails detection. It's not useful.
|
||||
*/
|
||||
#include "guess_ja_p.h"
|
||||
|
||||
/* DFA tables */
|
||||
const dfa_table guess_eucj_st[] = {
|
||||
{ /* state init */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 2,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -1,
|
||||
},
|
||||
{ /* state jis0201_kana */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
},
|
||||
{ /* state jis0213_1 */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -1,
|
||||
},
|
||||
{ /* state jis0213_2 */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -1,
|
||||
},
|
||||
};
|
||||
|
||||
guess_arc guess_eucj_ar[7] = {
|
||||
{ 0, 1.0 }, /* init -> init */
|
||||
{ 1, 0.8 }, /* init -> jis0201_kana */
|
||||
{ 3, 0.95 }, /* init -> jis0213_2 */
|
||||
{ 2, 1.0 }, /* init -> jis0213_1 */
|
||||
{ 0, 1.0 }, /* jis0201_kana -> init */
|
||||
{ 0, 1.0 }, /* jis0213_1 -> init */
|
||||
{ 0, 1.0 }, /* jis0213_2 -> init */
|
||||
};
|
||||
|
||||
const dfa_table guess_sjis_st[] = {
|
||||
{ /* state init */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
|
||||
},
|
||||
{ /* state jis0213 */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -1,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -1, -1, -1,
|
||||
},
|
||||
};
|
||||
|
||||
guess_arc guess_sjis_ar[6] = {
|
||||
{ 0, 1.0 }, /* init -> init */
|
||||
{ 1, 1.0 }, /* init -> jis0213 */
|
||||
{ 0, 0.8 }, /* init -> init */
|
||||
{ 1, 0.95 }, /* init -> jis0213 */
|
||||
{ 0, 0.8 }, /* init -> init */
|
||||
{ 0, 1.0 }, /* jis0213 -> init */
|
||||
};
|
||||
|
||||
const dfa_table guess_utf8_st[] = {
|
||||
{ /* state init */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, -1, -1,
|
||||
},
|
||||
{ /* state 1byte_more */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
},
|
||||
{ /* state 2byte_more */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
},
|
||||
{ /* state 3byte_more */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
},
|
||||
{ /* state 4byte_more */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
},
|
||||
{ /* state 5byte_more */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
},
|
||||
};
|
||||
|
||||
guess_arc guess_utf8_ar[11] = {
|
||||
{ 0, 1.0 }, /* init -> init */
|
||||
{ 1, 1.0 }, /* init -> 1byte_more */
|
||||
{ 2, 1.0 }, /* init -> 2byte_more */
|
||||
{ 3, 1.0 }, /* init -> 3byte_more */
|
||||
{ 4, 1.0 }, /* init -> 4byte_more */
|
||||
{ 5, 1.0 }, /* init -> 5byte_more */
|
||||
{ 0, 1.0 }, /* 1byte_more -> init */
|
||||
{ 1, 1.0 }, /* 2byte_more -> 1byte_more */
|
||||
{ 2, 1.0 }, /* 3byte_more -> 2byte_more */
|
||||
{ 3, 1.0 }, /* 4byte_more -> 3byte_more */
|
||||
{ 4, 1.0 }, /* 5byte_more -> 4byte_more */
|
||||
};
|
||||
|
||||
/* Guessing Routine */
|
||||
enum JapaneseCode::Type JapaneseCode::guess_jp(const char *buf, int buflen)
|
||||
{
|
||||
int i;
|
||||
guess_dfa *top = 0;
|
||||
|
||||
for (i=0; i<buflen; i++) {
|
||||
int c = (unsigned char)buf[i];
|
||||
|
||||
/* special treatment of jis escape sequence */
|
||||
if (c == 0x1b || last_JIS_escape) {
|
||||
if (i < buflen-1) {
|
||||
if (last_JIS_escape)
|
||||
c = (unsigned char)buf[i];
|
||||
else
|
||||
c = (unsigned char)buf[++i];
|
||||
last_JIS_escape = false;
|
||||
|
||||
if (c == '$' || c == '(') {
|
||||
return JapaneseCode::JIS;
|
||||
}
|
||||
} else {
|
||||
last_JIS_escape = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (DFA_ALIVE(eucj)) {
|
||||
if (!DFA_ALIVE(sjis) && !DFA_ALIVE(utf8)) return JapaneseCode::EUC;
|
||||
DFA_NEXT(eucj, c);
|
||||
}
|
||||
if (DFA_ALIVE(sjis)) {
|
||||
if (!DFA_ALIVE(eucj) && !DFA_ALIVE(utf8)) return JapaneseCode::SJIS;
|
||||
DFA_NEXT(sjis, c);
|
||||
}
|
||||
if (DFA_ALIVE(utf8)) {
|
||||
if (!DFA_ALIVE(sjis) && !DFA_ALIVE(eucj)) return JapaneseCode::UTF8;
|
||||
DFA_NEXT(utf8, c);
|
||||
}
|
||||
|
||||
if (!DFA_ALIVE(eucj) && !DFA_ALIVE(sjis) && !DFA_ALIVE(utf8)) {
|
||||
/* we ran out the possibilities */
|
||||
return JapaneseCode::ASCII;
|
||||
}
|
||||
}
|
||||
|
||||
/* ascii code check */
|
||||
if (eucj->score == 1.0 && sjis->score == 1.0 && utf8->score == 1.0)
|
||||
return JapaneseCode::ASCII;
|
||||
|
||||
/* Now, we have ambigous code. Pick the highest score. If more than
|
||||
one candidate tie, pick the default encoding. */
|
||||
if (DFA_ALIVE(eucj)) top = eucj;
|
||||
if (DFA_ALIVE(utf8)) {
|
||||
if (top) {
|
||||
if (top->score < utf8->score) top = utf8;
|
||||
} else {
|
||||
top = utf8;
|
||||
}
|
||||
}
|
||||
if (DFA_ALIVE(sjis)) {
|
||||
if (top) {
|
||||
if (top->score <= sjis->score) top = sjis;
|
||||
} else {
|
||||
top = sjis;
|
||||
}
|
||||
}
|
||||
|
||||
if (top == eucj) return JapaneseCode::EUC;
|
||||
if (top == utf8) return JapaneseCode::UTF8;
|
||||
if (top == sjis) return JapaneseCode::SJIS;
|
||||
|
||||
return JapaneseCode::ASCII;
|
||||
}
|
|
@ -1,120 +0,0 @@
|
|||
/*
|
||||
* This file is part of the KDE libraries
|
||||
*
|
||||
* Copyright 2000-2003 Shiro Kawai <shiro@acm.org>, All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the authors nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
/*
|
||||
* original code is here.
|
||||
* http://cvs.sourceforge.net/viewcvs.py/gauche/Gauche/ext/charconv/guess.c?view=markup
|
||||
*/
|
||||
#ifndef GUESS_JA_H
|
||||
#define GUESS_JA_H
|
||||
|
||||
#include <qglobal.h>
|
||||
#ifdef SOLARIS
|
||||
#undef UNICODE
|
||||
#endif
|
||||
|
||||
class guess_arc {
|
||||
public:
|
||||
unsigned int next; /* next state */
|
||||
double score; /* score */
|
||||
};
|
||||
|
||||
typedef signed char dfa_table[256];
|
||||
|
||||
/* DFA tables declared in guess_ja.cpp */
|
||||
extern const dfa_table guess_eucj_st[];
|
||||
extern guess_arc guess_eucj_ar[7];
|
||||
extern const dfa_table guess_sjis_st[];
|
||||
extern guess_arc guess_sjis_ar[6];
|
||||
extern const dfa_table guess_utf8_st[];
|
||||
extern guess_arc guess_utf8_ar[11];
|
||||
|
||||
class guess_dfa {
|
||||
public:
|
||||
const dfa_table *states;
|
||||
const guess_arc *arcs;
|
||||
int state;
|
||||
double score;
|
||||
|
||||
guess_dfa (const dfa_table stable[], const guess_arc *atable) :
|
||||
states(stable), arcs(atable)
|
||||
{
|
||||
state = 0;
|
||||
score = 1.0;
|
||||
}
|
||||
};
|
||||
|
||||
class JapaneseCode
|
||||
{
|
||||
public:
|
||||
enum Type {ASCII, JIS, EUC, SJIS, UNICODE, UTF8 };
|
||||
enum Type guess_jp(const char* buf, int buflen);
|
||||
|
||||
JapaneseCode () {
|
||||
eucj = new guess_dfa(guess_eucj_st, guess_eucj_ar);
|
||||
sjis = new guess_dfa(guess_sjis_st, guess_sjis_ar);
|
||||
utf8 = new guess_dfa(guess_utf8_st, guess_utf8_ar);
|
||||
last_JIS_escape = false;
|
||||
}
|
||||
|
||||
~JapaneseCode () {
|
||||
delete eucj;
|
||||
delete sjis;
|
||||
delete utf8;
|
||||
}
|
||||
|
||||
protected:
|
||||
guess_dfa *eucj;
|
||||
guess_dfa *sjis;
|
||||
guess_dfa *utf8;
|
||||
|
||||
bool last_JIS_escape;
|
||||
};
|
||||
|
||||
#define DFA_NEXT(dfa, ch) \
|
||||
do { \
|
||||
int arc__; \
|
||||
if (dfa->state >= 0) { \
|
||||
arc__ = dfa->states[dfa->state][ch]; \
|
||||
if (arc__ < 0) { \
|
||||
dfa->state = -1; \
|
||||
} else { \
|
||||
dfa->state = dfa->arcs[arc__].next; \
|
||||
dfa->score *= dfa->arcs[arc__].score; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define DFA_ALIVE(dfa) (dfa->state >= 0)
|
||||
|
||||
#endif /* GUESS_JA_H */
|
File diff suppressed because it is too large
Load diff
|
@ -1,219 +0,0 @@
|
|||
/*
|
||||
This file is part of the KDE libraries
|
||||
|
||||
Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
|
||||
Copyright (C) 2007 Nick Shaforostoff (shafff@ukr.net)
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with this library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
Boston, MA 02110-1301, USA.
|
||||
|
||||
*/
|
||||
#ifndef KENCODINGDETECTOR_H
|
||||
#define KENCODINGDETECTOR_H
|
||||
|
||||
#include <kdecore_export.h>
|
||||
#include <QtCore/qstring.h>
|
||||
|
||||
#include <QTextCodec>
|
||||
#include <QTextDecoder>
|
||||
class KEncodingDetectorPrivate;
|
||||
|
||||
/**
|
||||
* @short Provides encoding detection capabilities.
|
||||
*
|
||||
* Searches for encoding declaration inside raw data -- meta and xml tags.
|
||||
* In the case it can't find it, uses heuristics for specified language.
|
||||
*
|
||||
* If it finds unicode BOM marks, it changes encoding regardless of what the user has told
|
||||
*
|
||||
* Intended lifetime of the object: one instance per document.
|
||||
*
|
||||
* Typical use:
|
||||
* \code
|
||||
* QByteArray data;
|
||||
* ...
|
||||
* KEncodingDetector detector;
|
||||
* detector.setAutoDetectLanguage(KEncodingDetector::Cyrillic);
|
||||
* QString out=detector.decode(data);
|
||||
* \endcode
|
||||
*
|
||||
*
|
||||
* Do not mix decode() with decodeWithBuffering()
|
||||
*
|
||||
* @short Guess encoding of char array
|
||||
*
|
||||
*/
|
||||
class KDECORE_EXPORT KEncodingDetector
|
||||
{
|
||||
public:
|
||||
enum EncodingChoiceSource
|
||||
{
|
||||
DefaultEncoding,
|
||||
AutoDetectedEncoding,
|
||||
BOM,
|
||||
EncodingFromXMLHeader,
|
||||
EncodingFromMetaTag,
|
||||
EncodingFromHTTPHeader,
|
||||
UserChosenEncoding
|
||||
};
|
||||
|
||||
enum AutoDetectScript
|
||||
{
|
||||
None,
|
||||
SemiautomaticDetection,
|
||||
Arabic,
|
||||
Baltic,
|
||||
CentralEuropean,
|
||||
ChineseSimplified,
|
||||
ChineseTraditional,
|
||||
Cyrillic,
|
||||
Greek,
|
||||
Hebrew,
|
||||
Japanese,
|
||||
Korean,
|
||||
NorthernSaami,
|
||||
SouthEasternEurope,
|
||||
Thai,
|
||||
Turkish,
|
||||
Unicode,
|
||||
WesternEuropean
|
||||
};
|
||||
|
||||
/**
|
||||
* Default codec is latin1 (as html spec says), EncodingChoiceSource is default, AutoDetectScript=Semiautomatic
|
||||
*/
|
||||
KEncodingDetector();
|
||||
|
||||
/**
|
||||
* Allows to set Default codec, EncodingChoiceSource, AutoDetectScript
|
||||
*/
|
||||
KEncodingDetector(QTextCodec* codec, EncodingChoiceSource source, AutoDetectScript script=None);
|
||||
~KEncodingDetector();
|
||||
|
||||
//const QTextCodec* codec() const;
|
||||
|
||||
/**
|
||||
* @returns true if specified encoding was recognized
|
||||
*/
|
||||
bool setEncoding(const char *encoding, EncodingChoiceSource type);
|
||||
|
||||
/**
|
||||
* Convenience method.
|
||||
* @returns mime name of detected encoding
|
||||
*/
|
||||
const char* encoding() const;
|
||||
|
||||
bool visuallyOrdered() const;
|
||||
|
||||
// void setAutoDetectLanguage( const QString& );
|
||||
// const QString& autoDetectLanguage() const;
|
||||
|
||||
void setAutoDetectLanguage( AutoDetectScript );
|
||||
AutoDetectScript autoDetectLanguage() const;
|
||||
|
||||
EncodingChoiceSource encodingChoiceSource() const;
|
||||
|
||||
/**
|
||||
* The main class method
|
||||
*
|
||||
* Calls protected analyze() only the first time of the whole object life
|
||||
*
|
||||
* Replaces all null chars with spaces.
|
||||
*/
|
||||
QString decode(const char *data, int len);
|
||||
QString decode(const QByteArray &data);
|
||||
|
||||
//* You don't need to call analyze() if you use this method.
|
||||
/**
|
||||
* Convenience method that uses buffering. It waits for full html head to be buffered
|
||||
* (i.e. calls analyze every time until it returns true).
|
||||
*
|
||||
* Replaces all null chars with spaces.
|
||||
*
|
||||
* @returns Decoded data, or empty string, if there was not enough data for accurate detection
|
||||
* @see flush()
|
||||
*/
|
||||
QString decodeWithBuffering(const char *data, int len);
|
||||
|
||||
/**
|
||||
* This method checks whether invalid characters were found
|
||||
* during a decoding operation.
|
||||
*
|
||||
* Note that this bit is never reset once invalid characters have been found.
|
||||
* To force a reset, either change the encoding using setEncoding() or call
|
||||
* resetDecoder()
|
||||
*
|
||||
* @returns a boolean reflecting said state.
|
||||
* @since 4.3
|
||||
* @see resetDecoder() setEncoding()
|
||||
*/
|
||||
bool decodedInvalidCharacters() const;
|
||||
|
||||
/**
|
||||
* Resets the decoder. Any stateful decoding information (such as resulting from previous calls
|
||||
* to decodeWithBuffering()) will be lost.
|
||||
* Will Reset the state of decodedInvalidCharacters() as a side effect.
|
||||
*
|
||||
* @since 4.3
|
||||
* @see decodeWithBuffering() decodedInvalidCharacters()
|
||||
*
|
||||
*/
|
||||
void resetDecoder();
|
||||
|
||||
/**
|
||||
* Convenience method to be used with decodeForHtml. Flushes buffer.
|
||||
* @see decodeForHtml()
|
||||
*/
|
||||
QString flush();
|
||||
|
||||
/**
|
||||
* Takes lang name _after_ it were i18n()'ed
|
||||
*/
|
||||
static AutoDetectScript scriptForName(const QString& lang);
|
||||
static QString nameForScript(AutoDetectScript);
|
||||
static bool hasAutoDetectionForScript(AutoDetectScript);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* This nice method will kill all 0 bytes (or double bytes)
|
||||
* and remember if this was a binary or not ;)
|
||||
*/
|
||||
bool processNull(char* data,int length);
|
||||
|
||||
/**
|
||||
* Check if we are really utf8. Taken from kate
|
||||
*
|
||||
* @returns true if current encoding is utf8 and the text cannot be in this encoding
|
||||
*
|
||||
* Please somebody read http://de.wikipedia.org/wiki/UTF-8 and check this code...
|
||||
*/
|
||||
bool errorsIfUtf8 (const char* data, int length);
|
||||
|
||||
/**
|
||||
* Analyze text data.
|
||||
* @returns true if there was enough data for accurate detection
|
||||
*/
|
||||
bool analyze (const char *data, int len);
|
||||
|
||||
/**
|
||||
* @returns QTextDecoder for detected encoding
|
||||
*/
|
||||
QTextDecoder* decoder();
|
||||
|
||||
private:
|
||||
KEncodingDetectorPrivate* const d;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -57,7 +57,6 @@ KDECORE_UNIT_TESTS(
|
|||
kconfigafterkglobaltest2
|
||||
ksycocathreadtest
|
||||
kdebug_unittest
|
||||
kencodingdetectortest
|
||||
qcoreapptest
|
||||
kdebug_qcoreapptest
|
||||
kmimetype_nomimetypes
|
||||
|
|
|
@ -1,89 +0,0 @@
|
|||
/* This file is part of the KDE libraries
|
||||
Copyright (c) 2009 Germain Garand <germain@ebooksfrance.org>
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public
|
||||
License version 2 as published by the Free Software Foundation.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with this library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "kencodingdetectortest.h"
|
||||
#include "qtest_kde.h"
|
||||
#include <kdebug.h>
|
||||
#include <kencodingdetector.h>
|
||||
|
||||
#include "moc_kencodingdetectortest.cpp"
|
||||
|
||||
static const char data1[] = "this should decode correctly";
|
||||
static const char data2[] = "this is an invalid utf-8 byte: \xBF and another one: \xBE";
|
||||
|
||||
static KEncodingDetector* ed = 0;
|
||||
|
||||
void KEncodingDetectorTest::initTestCase()
|
||||
{
|
||||
ed = new KEncodingDetector();
|
||||
}
|
||||
|
||||
void KEncodingDetectorTest::testSetEncoding()
|
||||
{
|
||||
QCOMPARE(ed->setEncoding( "iso8859-1", KEncodingDetector::UserChosenEncoding ), true);
|
||||
QCOMPARE(ed->setEncoding( "utf-8", KEncodingDetector::UserChosenEncoding ), true);
|
||||
}
|
||||
|
||||
void KEncodingDetectorTest::testDecode()
|
||||
{
|
||||
QString s = ed->decode( data1, sizeof(data1)-1);
|
||||
QCOMPARE(ed->decodedInvalidCharacters(), false);
|
||||
QString s2 = ed->decode( data2, sizeof(data2)-1);
|
||||
QCOMPARE(ed->decodedInvalidCharacters(), true);
|
||||
QCOMPARE( s == data1, true );
|
||||
|
||||
ed->resetDecoder();
|
||||
QCOMPARE(ed->decodedInvalidCharacters(), false);
|
||||
|
||||
// set to automatic detection
|
||||
ed->setEncoding( "", KEncodingDetector::DefaultEncoding );
|
||||
|
||||
// decodeWithBuffering should just accumulate the buffer here,
|
||||
// waiting for some HTML/XML encoding tags
|
||||
s = ed->decodeWithBuffering(data2, sizeof data2 -1);
|
||||
|
||||
// shouldn't even decode anything yet, so:
|
||||
QCOMPARE(s.isEmpty(), true);
|
||||
QCOMPARE(ed->decodedInvalidCharacters(), false);
|
||||
|
||||
// force encoding, as the high bytes must have switched the encoding
|
||||
// to anything *but* utf-8
|
||||
QCOMPARE(QString("utf-8").startsWith(ed->encoding(), Qt::CaseInsensitive), false);
|
||||
ed->setEncoding( "utf-8", KEncodingDetector::UserChosenEncoding );
|
||||
QCOMPARE(QString("utf-8").startsWith(ed->encoding(), Qt::CaseInsensitive), true);
|
||||
|
||||
// force decoding now
|
||||
s = ed->flush();
|
||||
QCOMPARE(s.isEmpty(), false);
|
||||
QCOMPARE(ed->decodedInvalidCharacters(), true);
|
||||
|
||||
// now check that resetDecoder() empties the buffer
|
||||
s2 = ed->decodeWithBuffering(data1, sizeof data1 -1);
|
||||
ed->resetDecoder();
|
||||
s2 = ed->flush();
|
||||
QCOMPARE(s2.isEmpty(), true);
|
||||
|
||||
// check that buffered decoding with non-overridable specified codec decodes right away
|
||||
ed->setEncoding( "utf-8", KEncodingDetector::EncodingFromHTTPHeader );
|
||||
s = ed->decodeWithBuffering(data2, sizeof data2 -1);
|
||||
|
||||
QCOMPARE( s.isEmpty(), false );
|
||||
QCOMPARE( ed->decodedInvalidCharacters(), true );
|
||||
}
|
||||
|
||||
QTEST_KDEMAIN_CORE(KEncodingDetectorTest)
|
|
@ -1,33 +0,0 @@
|
|||
/* This file is part of the KDE libraries
|
||||
Copyright (c) 2009 Germain Garand <germain@ebooksfrance.org>
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public
|
||||
License version 2 as published by the Free Software Foundation.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with this library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KENCODINGDETECTORTEST_H
|
||||
#define KENCODINGDETECTORTEST_H
|
||||
|
||||
#include <QtCore/QObject>
|
||||
|
||||
class KEncodingDetectorTest : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
private Q_SLOTS:
|
||||
void initTestCase();
|
||||
void testSetEncoding();
|
||||
void testDecode();
|
||||
};
|
||||
|
||||
#endif // KENCODINGDETECTORTEST_H
|
|
@ -194,7 +194,6 @@ namespace KStringHandler
|
|||
|
||||
@param str the input string
|
||||
@return the (hopefully correctly guessed) QString representation of @p str
|
||||
@see KEncodingDetector
|
||||
|
||||
*/
|
||||
KDECORE_EXPORT QString from8Bit( const char *str );
|
||||
|
|
|
@ -45,39 +45,37 @@ class KCodecAction::Private
|
|||
public:
|
||||
Private(KCodecAction *parent)
|
||||
: q(parent),
|
||||
defaultAction(0),
|
||||
currentSubAction(0)
|
||||
{
|
||||
}
|
||||
|
||||
void init(bool);
|
||||
void init();
|
||||
|
||||
void _k_subActionTriggered(QAction*);
|
||||
|
||||
KCodecAction *q;
|
||||
QAction *defaultAction;
|
||||
QAction *currentSubAction;
|
||||
};
|
||||
|
||||
KCodecAction::KCodecAction(QObject *parent,bool showAutoOptions)
|
||||
KCodecAction::KCodecAction(QObject *parent)
|
||||
: KSelectAction(parent)
|
||||
, d(new Private(this))
|
||||
{
|
||||
d->init(showAutoOptions);
|
||||
d->init();
|
||||
}
|
||||
|
||||
KCodecAction::KCodecAction(const QString &text, QObject *parent,bool showAutoOptions)
|
||||
KCodecAction::KCodecAction(const QString &text, QObject *parent)
|
||||
: KSelectAction(text, parent)
|
||||
, d(new Private(this))
|
||||
{
|
||||
d->init(showAutoOptions);
|
||||
d->init();
|
||||
}
|
||||
|
||||
KCodecAction::KCodecAction(const KIcon &icon, const QString &text, QObject *parent,bool showAutoOptions)
|
||||
KCodecAction::KCodecAction(const KIcon &icon, const QString &text, QObject *parent)
|
||||
: KSelectAction(icon, text, parent)
|
||||
, d(new Private(this))
|
||||
{
|
||||
d->init(showAutoOptions);
|
||||
d->init();
|
||||
}
|
||||
|
||||
KCodecAction::~KCodecAction()
|
||||
|
@ -85,26 +83,13 @@ KCodecAction::~KCodecAction()
|
|||
delete d;
|
||||
}
|
||||
|
||||
void KCodecAction::Private::init(bool showAutoOptions)
|
||||
void KCodecAction::Private::init()
|
||||
{
|
||||
q->setToolBarMode(MenuMode);
|
||||
defaultAction = q->addAction(i18nc("Encodings menu", "Default"));
|
||||
|
||||
int i;
|
||||
foreach(const QStringList &encodingsForScript, KGlobal::charsets()->encodingsByScript())
|
||||
{
|
||||
KSelectAction* tmp = new KSelectAction(encodingsForScript.at(0),q);
|
||||
if (showAutoOptions)
|
||||
{
|
||||
KEncodingDetector::AutoDetectScript scri=KEncodingDetector::scriptForName(encodingsForScript.at(0));
|
||||
if (KEncodingDetector::hasAutoDetectionForScript(scri))
|
||||
{
|
||||
tmp->addAction(i18nc("Encodings menu","Autodetect"))->setData(QVariant((uint)scri));
|
||||
tmp->menu()->addSeparator();
|
||||
}
|
||||
}
|
||||
for (i=1; i<encodingsForScript.size(); ++i)
|
||||
{
|
||||
foreach(const QStringList &encodingsForScript, KGlobal::charsets()->encodingsByScript()) {
|
||||
KSelectAction* tmp = new KSelectAction(encodingsForScript.at(0), q);
|
||||
for (int i = 1; i<encodingsForScript.size(); ++i) {
|
||||
tmp->addAction(encodingsForScript.at(i));
|
||||
}
|
||||
q->connect(tmp,SIGNAL(triggered(QAction*)),q,SLOT(_k_subActionTriggered(QAction*)));
|
||||
|
@ -122,26 +107,23 @@ int KCodecAction::mibForName(const QString &codecName, bool *ok) const
|
|||
int mib = MIB_DEFAULT;
|
||||
KCharsets *charsets = KGlobal::charsets();
|
||||
|
||||
if (codecName == d->defaultAction->text())
|
||||
success = true;
|
||||
else
|
||||
{
|
||||
QTextCodec *codec = charsets->codecForName(codecName, success);
|
||||
if (!success)
|
||||
{
|
||||
if (!success) {
|
||||
// Maybe we got a description name instead
|
||||
codec = charsets->codecForName(charsets->encodingForName(codecName), success);
|
||||
}
|
||||
|
||||
if (codec)
|
||||
if (codec) {
|
||||
mib = codec->mibEnum();
|
||||
}
|
||||
|
||||
if (ok)
|
||||
if (ok) {
|
||||
*ok = success;
|
||||
}
|
||||
|
||||
if (success)
|
||||
if (success) {
|
||||
return mib;
|
||||
}
|
||||
|
||||
kWarning() << "Invalid codec name: " << codecName;
|
||||
return MIB_DEFAULT;
|
||||
|
@ -149,43 +131,24 @@ int KCodecAction::mibForName(const QString &codecName, bool *ok) const
|
|||
|
||||
QTextCodec *KCodecAction::codecForMib(int mib) const
|
||||
{
|
||||
if (mib == MIB_DEFAULT)
|
||||
{
|
||||
// FIXME offer to change the default codec
|
||||
if (mib == MIB_DEFAULT) {
|
||||
return QTextCodec::codecForLocale();
|
||||
}
|
||||
else
|
||||
return QTextCodec::codecForMib(mib);
|
||||
}
|
||||
|
||||
void KCodecAction::actionTriggered(QAction *action)
|
||||
{
|
||||
//we don't want to emit any signals from top-level items
|
||||
//except for the default one
|
||||
if (action==d->defaultAction)
|
||||
{
|
||||
emit triggered(KEncodingDetector::SemiautomaticDetection);
|
||||
emit defaultItemTriggered();
|
||||
}
|
||||
}
|
||||
|
||||
void KCodecAction::Private::_k_subActionTriggered(QAction *action)
|
||||
{
|
||||
if (currentSubAction==action)
|
||||
if (currentSubAction == action) {
|
||||
return;
|
||||
currentSubAction=action;
|
||||
}
|
||||
currentSubAction = action;
|
||||
bool ok = false;
|
||||
int mib = q->mibForName(action->text(), &ok);
|
||||
if (ok)
|
||||
{
|
||||
if (ok) {
|
||||
emit q->triggered(action->text());
|
||||
emit q->triggered(q->codecForMib(mib));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!action->data().isNull())
|
||||
emit q->triggered((KEncodingDetector::AutoDetectScript) action->data().toUInt());
|
||||
}
|
||||
}
|
||||
|
||||
QTextCodec *KCodecAction::currentCodec() const
|
||||
|
@ -195,21 +158,15 @@ QTextCodec *KCodecAction::currentCodec() const
|
|||
|
||||
bool KCodecAction::setCurrentCodec( QTextCodec *codec )
|
||||
{
|
||||
if (!codec)
|
||||
if (!codec) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int i,j;
|
||||
for (i=0;i<actions().size();++i)
|
||||
{
|
||||
if (actions().at(i)->menu())
|
||||
{
|
||||
for (j=0;j<actions().at(i)->menu()->actions().size();++j)
|
||||
{
|
||||
if (!j && !actions().at(i)->menu()->actions().at(j)->data().isNull())
|
||||
continue;
|
||||
if (codec==KGlobal::charsets()->codecForName(actions().at(i)->menu()->actions().at(j)->text()))
|
||||
{
|
||||
d->currentSubAction=actions().at(i)->menu()->actions().at(j);
|
||||
for (int i = 0; i <actions().size(); ++i) {
|
||||
if (actions().at(i)->menu()) {
|
||||
for (int j = 0; j < actions().at(i)->menu()->actions().size(); ++j) {
|
||||
if (codec == KGlobal::charsets()->codecForName(actions().at(i)->menu()->actions().at(j)->text())) {
|
||||
d->currentSubAction = actions().at(i)->menu()->actions().at(j);
|
||||
d->currentSubAction->trigger();
|
||||
return true;
|
||||
}
|
||||
|
@ -225,7 +182,7 @@ QString KCodecAction::currentCodecName() const
|
|||
return d->currentSubAction->text();
|
||||
}
|
||||
|
||||
bool KCodecAction::setCurrentCodec( const QString &codecName )
|
||||
bool KCodecAction::setCurrentCodec(const QString &codecName)
|
||||
{
|
||||
return setCurrentCodec(KGlobal::charsets()->codecForName(codecName));
|
||||
}
|
||||
|
@ -235,47 +192,9 @@ int KCodecAction::currentCodecMib() const
|
|||
return mibForName(currentCodecName());
|
||||
}
|
||||
|
||||
bool KCodecAction::setCurrentCodec( int mib )
|
||||
bool KCodecAction::setCurrentCodec(int mib)
|
||||
{
|
||||
if (mib == MIB_DEFAULT)
|
||||
return setCurrentAction(d->defaultAction);
|
||||
else
|
||||
return setCurrentCodec(codecForMib(mib));
|
||||
}
|
||||
|
||||
KEncodingDetector::AutoDetectScript KCodecAction::currentAutoDetectScript() const
|
||||
{
|
||||
return d->currentSubAction->data().isNull()?
|
||||
KEncodingDetector::None :
|
||||
(KEncodingDetector::AutoDetectScript)d->currentSubAction->data().toUInt();
|
||||
}
|
||||
|
||||
bool KCodecAction::setCurrentAutoDetectScript(KEncodingDetector::AutoDetectScript scri)
|
||||
{
|
||||
if (scri==KEncodingDetector::SemiautomaticDetection)
|
||||
{
|
||||
d->currentSubAction=d->defaultAction;
|
||||
d->currentSubAction->trigger();
|
||||
return true;
|
||||
}
|
||||
|
||||
int i;
|
||||
for (i=0;i<actions().size();++i)
|
||||
{
|
||||
if (actions().at(i)->menu())
|
||||
{
|
||||
if (!actions().at(i)->menu()->actions().isEmpty()
|
||||
&&!actions().at(i)->menu()->actions().at(0)->data().isNull()
|
||||
&&actions().at(i)->menu()->actions().at(0)->data().toUInt()==(uint)scri
|
||||
)
|
||||
{
|
||||
d->currentSubAction=actions().at(i)->menu()->actions().at(0);
|
||||
d->currentSubAction->trigger();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#include "moc_kcodecaction.cpp"
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
#ifndef KCODECACTION_H
|
||||
#define KCODECACTION_H
|
||||
|
||||
#include <kencodingdetector.h>
|
||||
#include <QTextCodec>
|
||||
#include <kselectaction.h>
|
||||
|
||||
/**
|
||||
|
@ -35,8 +35,7 @@
|
|||
*
|
||||
* This action shows up a submenu with a list of the available codecs on the system.
|
||||
*/
|
||||
class KDEUI_EXPORT KCodecAction
|
||||
: public KSelectAction
|
||||
class KDEUI_EXPORT KCodecAction : public KSelectAction
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
|
@ -44,11 +43,9 @@ class KDEUI_EXPORT KCodecAction
|
|||
Q_PROPERTY(int codecMib READ currentCodecMib)
|
||||
|
||||
public:
|
||||
explicit KCodecAction(QObject *parent,bool showAutoOptions=false);
|
||||
|
||||
KCodecAction(const QString &text, QObject *parent,bool showAutoOptions=false);
|
||||
|
||||
KCodecAction(const KIcon &icon, const QString &text, QObject *parent,bool showAutoOptions=false);
|
||||
explicit KCodecAction(QObject *parent);
|
||||
KCodecAction(const QString &text, QObject *parent);
|
||||
KCodecAction(const KIcon &icon, const QString &text, QObject *parent);
|
||||
|
||||
virtual ~KCodecAction();
|
||||
|
||||
|
@ -65,20 +62,6 @@ public:
|
|||
int currentCodecMib() const;
|
||||
bool setCurrentCodec(int mib);
|
||||
|
||||
/**
|
||||
* Applicable only if showAutoOptions in c'tor was true
|
||||
*
|
||||
* @returns KEncodingDetector::None if specific encoding is selected, not autodetection, otherwise... you know it!
|
||||
*/
|
||||
KEncodingDetector::AutoDetectScript currentAutoDetectScript() const;
|
||||
/**
|
||||
* Applicable only if showAutoOptions in c'tor was true
|
||||
*
|
||||
* KEncodingDetector::SemiautomaticDetection means 'Default' item
|
||||
*/
|
||||
bool setCurrentAutoDetectScript(KEncodingDetector::AutoDetectScript);
|
||||
|
||||
|
||||
Q_SIGNALS:
|
||||
/**
|
||||
* Specific (proper) codec was selected
|
||||
|
@ -87,23 +70,6 @@ Q_SIGNALS:
|
|||
*/
|
||||
void triggered(QTextCodec *codec);
|
||||
|
||||
/**
|
||||
* Autodetection has been selected.
|
||||
* emits KEncodingDetector::SemiautomaticDetection if Default was selected.
|
||||
*
|
||||
* Applicable only if showAutoOptions in c'tor was true
|
||||
*/
|
||||
void triggered(KEncodingDetector::AutoDetectScript);
|
||||
|
||||
/**
|
||||
* If showAutoOptions==true, then better handle triggered(KEncodingDetector::AutoDetectScript) signal
|
||||
*/
|
||||
void defaultItemTriggered();
|
||||
|
||||
|
||||
protected Q_SLOTS:
|
||||
virtual void actionTriggered(QAction*);
|
||||
|
||||
protected:
|
||||
using KSelectAction::triggered;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue