mirror of
https://bitbucket.org/smil3y/kdelibs.git
synced 2025-02-24 10:52:49 +00:00
126 lines
3.9 KiB
C++
126 lines
3.9 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* -*- C++ -*-
|
|
* Copyright (C) 1998 <developer@mozilla.org>
|
|
*
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef __JPCNTX_H__
|
|
#define __JPCNTX_H__
|
|
|
|
#include "kdemacros.h"
|
|
|
|
#define NUM_OF_CATEGORY 6
|
|
|
|
#define ENOUGH_REL_THRESHOLD 100
|
|
#define MAX_REL_THRESHOLD 1000
|
|
namespace kencodingprober {
|
|
//hiragana frequency category table
|
|
extern const char jp2CharContext[83][83];
|
|
|
|
class KDE_NO_EXPORT JapaneseContextAnalysis
|
|
{
|
|
public:
|
|
JapaneseContextAnalysis() {Reset();};
|
|
virtual ~JapaneseContextAnalysis() {};
|
|
|
|
void HandleData(const char* aBuf, unsigned int aLen);
|
|
|
|
void HandleOneChar(const char* aStr, unsigned int aCharLen)
|
|
{
|
|
int order;
|
|
|
|
//if we received enough data, stop here
|
|
if (mTotalRel > MAX_REL_THRESHOLD) mDone = true;
|
|
if (mDone) return;
|
|
|
|
//Only 2-bytes characters are of our interest
|
|
order = (aCharLen == 2) ? GetOrder(aStr) : -1;
|
|
if (order != -1 && mLastCharOrder != -1)
|
|
{
|
|
mTotalRel++;
|
|
//count this sequence to its category counter
|
|
mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++;
|
|
}
|
|
mLastCharOrder = order;
|
|
};
|
|
|
|
float GetConfidence();
|
|
void Reset(void);
|
|
void SetOpion(){};
|
|
bool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;};
|
|
|
|
protected:
|
|
virtual int GetOrder(const char* str, unsigned int *charLen) = 0;
|
|
virtual int GetOrder(const char* str) = 0;
|
|
|
|
//category counters, each interger counts sequence in its category
|
|
unsigned int mRelSample[NUM_OF_CATEGORY];
|
|
|
|
//total sequence received
|
|
unsigned int mTotalRel;
|
|
|
|
//The order of previous char
|
|
int mLastCharOrder;
|
|
|
|
//if last byte in current buffer is not the last byte of a character, we
|
|
//need to know how many byte to skip in next buffer.
|
|
unsigned int mNeedToSkipCharNum;
|
|
|
|
//If this flag is set to true, detection is done and conclusion has been made
|
|
bool mDone;
|
|
};
|
|
|
|
|
|
class KDE_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis
|
|
{
|
|
//SJISContextAnalysis(){};
|
|
protected:
|
|
int GetOrder(const char* str, unsigned int *charLen);
|
|
|
|
int GetOrder(const char* str)
|
|
{
|
|
//We only interested in Hiragana, so first byte is '\202'
|
|
if (*str == '\202' &&
|
|
(unsigned char)*(str+1) >= (unsigned char)0x9f &&
|
|
(unsigned char)*(str+1) <= (unsigned char)0xf1)
|
|
return (unsigned char)*(str+1) - (unsigned char)0x9f;
|
|
return -1;
|
|
};
|
|
};
|
|
|
|
class KDE_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis
|
|
{
|
|
protected:
|
|
int GetOrder(const char* str, unsigned int *charLen);
|
|
int GetOrder(const char* str)
|
|
//We only interested in Hiragana, so first byte is '\244'
|
|
{
|
|
if (*str == '\244' &&
|
|
(unsigned char)*(str+1) >= (unsigned char)0xa1 &&
|
|
(unsigned char)*(str+1) <= (unsigned char)0xf3)
|
|
return (unsigned char)*(str+1) - (unsigned char)0xa1;
|
|
return -1;
|
|
};
|
|
};
|
|
}
|
|
#endif /* __JPCNTX_H__ */
|
|
|