/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* -*- C++ -*- * Copyright (C) 1998 * * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef NSSBCHARSETPROBER_H #define NSSBCHARSETPROBER_H #include "nsCharSetProber.h" #define SAMPLE_SIZE 64 #define SB_ENOUGH_REL_THRESHOLD 1024 #define POSITIVE_SHORTCUT_THRESHOLD (float)0.95 #define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05 #define SYMBOL_CAT_ORDER 250 #define NUMBER_OF_SEQ_CAT 4 #define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1) #define NEGATIVE_CAT 0 namespace kencodingprober { typedef struct { const unsigned char *charToOrderMap; // [256] table use to find a char's order const char *precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency float mTypicalPositiveRatio; // = freqSeqs / totalSeqs bool keepEnglishLetter; // says if this script contains English characters (not implemented) const char* charsetName; } SequenceModel; class KDE_NO_EXPORT nsSingleByteCharSetProber : public nsCharSetProber{ public: nsSingleByteCharSetProber(SequenceModel *model) :mModel(model), mReversed(false), mNameProber(0) { Reset(); } nsSingleByteCharSetProber(SequenceModel *model, bool reversed, nsCharSetProber* nameProber) :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); } virtual const char* GetCharSetName(); virtual nsProbingState HandleData(const char* aBuf, unsigned int aLen); virtual nsProbingState GetState(void) {return mState;}; virtual void Reset(void); virtual float GetConfidence(void); virtual void SetOpion() {}; // This feature is not implemented yet. any current language model // contain this parameter as false. No one is looking at this // parameter or calling this method. // Moreover, the nsSBCSGroupProber which calls the HandleData of this // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid // of the English letters. bool KeepEnglishLetters() {return mModel->keepEnglishLetter;}; // (not implemented) #ifdef DEBUG_PROBE virtual void DumpStatus(); #endif protected: nsProbingState mState; const SequenceModel *mModel; const bool mReversed; // true if we need to reverse every pair in the model lookup //char order of last character unsigned char mLastOrder; unsigned int mTotalSeqs; unsigned int mSeqCounters[NUMBER_OF_SEQ_CAT]; unsigned int mTotalChar; //characters that fall in our sampling range unsigned int mFreqChar; // Optional auxiliary prober for name decision. created and destroyed by the GroupProber nsCharSetProber* mNameProber; }; extern SequenceModel Koi8rModel; extern SequenceModel Win1251Model; extern SequenceModel Latin5Model; extern SequenceModel MacCyrillicModel; extern SequenceModel Ibm866Model; extern SequenceModel Ibm855Model; extern SequenceModel Latin7Model; extern SequenceModel Win1253Model; extern SequenceModel Latin5BulgarianModel; extern SequenceModel Win1251BulgarianModel; extern SequenceModel Latin2HungarianModel; extern SequenceModel Win1250HungarianModel; extern SequenceModel Win1255Model; } #endif /* NSSBCHARSETPROBER_H */