mirror of
https://bitbucket.org/smil3y/kdelibs.git
synced 2025-02-24 19:02:48 +00:00
114 lines
4.2 KiB
C
114 lines
4.2 KiB
C
![]() |
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||
|
/* -*- C++ -*-
|
||
|
* Copyright (C) 1998 <developer@mozilla.org>
|
||
|
*
|
||
|
*
|
||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||
|
* a copy of this software and associated documentation files (the
|
||
|
* "Software"), to deal in the Software without restriction, including
|
||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||
|
* the following conditions:
|
||
|
*
|
||
|
* The above copyright notice and this permission notice shall be included
|
||
|
* in all copies or substantial portions of the Software.
|
||
|
*
|
||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||
|
*/
|
||
|
|
||
|
#ifndef NSSBCHARSETPROBER_H
|
||
|
#define NSSBCHARSETPROBER_H
|
||
|
|
||
|
#include "nsCharSetProber.h"
|
||
|
|
||
|
#define SAMPLE_SIZE 64
|
||
|
#define SB_ENOUGH_REL_THRESHOLD 1024
|
||
|
#define POSITIVE_SHORTCUT_THRESHOLD (float)0.95
|
||
|
#define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05
|
||
|
#define SYMBOL_CAT_ORDER 250
|
||
|
#define NUMBER_OF_SEQ_CAT 4
|
||
|
#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1)
|
||
|
#define NEGATIVE_CAT 0
|
||
|
|
||
|
namespace kencodingprober {
|
||
|
typedef struct
|
||
|
{
|
||
|
const unsigned char *charToOrderMap; // [256] table use to find a char's order
|
||
|
const char *precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency
|
||
|
float mTypicalPositiveRatio; // = freqSeqs / totalSeqs
|
||
|
bool keepEnglishLetter; // says if this script contains English characters (not implemented)
|
||
|
const char* charsetName;
|
||
|
} SequenceModel;
|
||
|
|
||
|
|
||
|
class KDE_NO_EXPORT nsSingleByteCharSetProber : public nsCharSetProber{
|
||
|
public:
|
||
|
nsSingleByteCharSetProber(SequenceModel *model)
|
||
|
:mModel(model), mReversed(false), mNameProber(0) { Reset(); }
|
||
|
nsSingleByteCharSetProber(SequenceModel *model, bool reversed, nsCharSetProber* nameProber)
|
||
|
:mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }
|
||
|
|
||
|
virtual const char* GetCharSetName();
|
||
|
virtual nsProbingState HandleData(const char* aBuf, unsigned int aLen);
|
||
|
virtual nsProbingState GetState(void) {return mState;};
|
||
|
virtual void Reset(void);
|
||
|
virtual float GetConfidence(void);
|
||
|
virtual void SetOpion() {};
|
||
|
|
||
|
// This feature is not implemented yet. any current language model
|
||
|
// contain this parameter as false. No one is looking at this
|
||
|
// parameter or calling this method.
|
||
|
// Moreover, the nsSBCSGroupProber which calls the HandleData of this
|
||
|
// prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid
|
||
|
// of the English letters.
|
||
|
bool KeepEnglishLetters() {return mModel->keepEnglishLetter;}; // (not implemented)
|
||
|
|
||
|
#ifdef DEBUG_PROBE
|
||
|
virtual void DumpStatus();
|
||
|
#endif
|
||
|
|
||
|
protected:
|
||
|
nsProbingState mState;
|
||
|
const SequenceModel *mModel;
|
||
|
const bool mReversed; // true if we need to reverse every pair in the model lookup
|
||
|
|
||
|
//char order of last character
|
||
|
unsigned char mLastOrder;
|
||
|
|
||
|
unsigned int mTotalSeqs;
|
||
|
unsigned int mSeqCounters[NUMBER_OF_SEQ_CAT];
|
||
|
|
||
|
unsigned int mTotalChar;
|
||
|
//characters that fall in our sampling range
|
||
|
unsigned int mFreqChar;
|
||
|
|
||
|
// Optional auxiliary prober for name decision. created and destroyed by the GroupProber
|
||
|
nsCharSetProber* mNameProber;
|
||
|
|
||
|
};
|
||
|
|
||
|
|
||
|
extern SequenceModel Koi8rModel;
|
||
|
extern SequenceModel Win1251Model;
|
||
|
extern SequenceModel Latin5Model;
|
||
|
extern SequenceModel MacCyrillicModel;
|
||
|
extern SequenceModel Ibm866Model;
|
||
|
extern SequenceModel Ibm855Model;
|
||
|
extern SequenceModel Latin7Model;
|
||
|
extern SequenceModel Win1253Model;
|
||
|
extern SequenceModel Latin5BulgarianModel;
|
||
|
extern SequenceModel Win1251BulgarianModel;
|
||
|
extern SequenceModel Latin2HungarianModel;
|
||
|
extern SequenceModel Win1250HungarianModel;
|
||
|
extern SequenceModel Win1255Model;
|
||
|
}
|
||
|
#endif /* NSSBCHARSETPROBER_H */
|
||
|
|