kdelibs/kdecore/localization/probers/JpCntx.h
2014-11-13 01:04:59 +02:00

126 lines
3.9 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* -*- C++ -*-
* Copyright (C) 1998 <developer@mozilla.org>
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __JPCNTX_H__
#define __JPCNTX_H__
#include "kdemacros.h"
#define NUM_OF_CATEGORY 6
#define ENOUGH_REL_THRESHOLD 100
#define MAX_REL_THRESHOLD 1000
namespace kencodingprober {
//hiragana frequency category table
extern const char jp2CharContext[83][83];
class KDE_NO_EXPORT JapaneseContextAnalysis
{
public:
JapaneseContextAnalysis() {Reset();};
virtual ~JapaneseContextAnalysis() {};
void HandleData(const char* aBuf, unsigned int aLen);
void HandleOneChar(const char* aStr, unsigned int aCharLen)
{
int order;
//if we received enough data, stop here
if (mTotalRel > MAX_REL_THRESHOLD) mDone = true;
if (mDone) return;
//Only 2-bytes characters are of our interest
order = (aCharLen == 2) ? GetOrder(aStr) : -1;
if (order != -1 && mLastCharOrder != -1)
{
mTotalRel++;
//count this sequence to its category counter
mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++;
}
mLastCharOrder = order;
};
float GetConfidence();
void Reset(void);
void SetOpion(){};
bool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;};
protected:
virtual int GetOrder(const char* str, unsigned int *charLen) = 0;
virtual int GetOrder(const char* str) = 0;
//category counters, each interger counts sequence in its category
unsigned int mRelSample[NUM_OF_CATEGORY];
//total sequence received
unsigned int mTotalRel;
//The order of previous char
int mLastCharOrder;
//if last byte in current buffer is not the last byte of a character, we
//need to know how many byte to skip in next buffer.
unsigned int mNeedToSkipCharNum;
//If this flag is set to true, detection is done and conclusion has been made
bool mDone;
};
class KDE_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis
{
//SJISContextAnalysis(){};
protected:
int GetOrder(const char* str, unsigned int *charLen);
int GetOrder(const char* str)
{
//We only interested in Hiragana, so first byte is '\202'
if (*str == '\202' &&
(unsigned char)*(str+1) >= (unsigned char)0x9f &&
(unsigned char)*(str+1) <= (unsigned char)0xf1)
return (unsigned char)*(str+1) - (unsigned char)0x9f;
return -1;
};
};
class KDE_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis
{
protected:
int GetOrder(const char* str, unsigned int *charLen);
int GetOrder(const char* str)
//We only interested in Hiragana, so first byte is '\244'
{
if (*str == '\244' &&
(unsigned char)*(str+1) >= (unsigned char)0xa1 &&
(unsigned char)*(str+1) <= (unsigned char)0xf3)
return (unsigned char)*(str+1) - (unsigned char)0xa1;
return -1;
};
};
}
#endif /* __JPCNTX_H__ */