mirror of
https://bitbucket.org/smil3y/kdelibs.git
synced 2025-02-24 10:52:49 +00:00
217 lines
6 KiB
C++
217 lines
6 KiB
C++
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* -*- C++ -*-
|
|
* Copyright (C) 1998 <developer@mozilla.org>
|
|
*
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "nsSBCSGroupProber.h"
|
|
|
|
#include "nsSBCharSetProber.h"
|
|
#include "nsHebrewProber.h"
|
|
#include "UnicodeGroupProber.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
namespace kencodingprober {
|
|
nsSBCSGroupProber::nsSBCSGroupProber()
|
|
{
|
|
mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
|
|
mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
|
|
mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
|
|
mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
|
|
mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
|
|
mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
|
|
mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
|
|
mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
|
|
mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
|
|
mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
|
|
|
|
nsHebrewProber *hebprober = new nsHebrewProber();
|
|
// Notice: Any change in these indexes - 10,11,12 must be reflected
|
|
// in the code below as well.
|
|
mProbers[10] = hebprober;
|
|
mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew
|
|
mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew
|
|
mProbers[13] = new UnicodeGroupProber();
|
|
|
|
// Tell the Hebrew prober about the logical and visual probers
|
|
if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
|
|
{
|
|
hebprober->SetModelProbers(mProbers[11], mProbers[12]);
|
|
}
|
|
else // One or more is null. avoid any Hebrew probing, null them all
|
|
{
|
|
for (unsigned int i = 10; i <= 12; ++i)
|
|
{
|
|
delete mProbers[i];
|
|
mProbers[i] = 0;
|
|
}
|
|
}
|
|
|
|
// disable latin2 before latin1 is available, otherwise all latin1
|
|
// will be detected as latin2 because of their similarity.
|
|
//mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
|
|
//mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
|
|
|
|
Reset();
|
|
}
|
|
|
|
nsSBCSGroupProber::~nsSBCSGroupProber()
|
|
{
|
|
for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++)
|
|
{
|
|
delete mProbers[i];
|
|
}
|
|
}
|
|
|
|
|
|
const char* nsSBCSGroupProber::GetCharSetName()
|
|
{
|
|
//if we have no answer yet
|
|
if (mBestGuess == -1)
|
|
{
|
|
GetConfidence();
|
|
//no charset seems positive
|
|
if (mBestGuess == -1)
|
|
//we will use default.
|
|
mBestGuess = 0;
|
|
}
|
|
return mProbers[mBestGuess]->GetCharSetName();
|
|
}
|
|
|
|
void nsSBCSGroupProber::Reset(void)
|
|
{
|
|
mActiveNum = 0;
|
|
for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++)
|
|
{
|
|
if (mProbers[i]) // not null
|
|
{
|
|
mProbers[i]->Reset();
|
|
mIsActive[i] = true;
|
|
++mActiveNum;
|
|
}
|
|
else
|
|
mIsActive[i] = false;
|
|
}
|
|
mBestGuess = -1;
|
|
mState = eDetecting;
|
|
}
|
|
|
|
|
|
nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, unsigned int aLen)
|
|
{
|
|
nsProbingState st;
|
|
unsigned int i;
|
|
char *newBuf1 = 0;
|
|
unsigned int newLen1 = 0;
|
|
|
|
//apply filter to original buffer, and we got new buffer back
|
|
//depend on what script it is, we will feed them the new buffer
|
|
//we got after applying proper filter
|
|
//this is done without any consideration to KeepEnglishLetters
|
|
//of each prober since as of now, there are no probers here which
|
|
//recognize languages with English characters.
|
|
if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1))
|
|
goto done;
|
|
|
|
if (newLen1 == 0)
|
|
goto done; // Nothing to see here, move on.
|
|
|
|
for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i)
|
|
{
|
|
if (!mIsActive[i])
|
|
continue;
|
|
st = mProbers[i]->HandleData(newBuf1, newLen1);
|
|
if (st == eFoundIt)
|
|
{
|
|
mBestGuess = i;
|
|
mState = eFoundIt;
|
|
break;
|
|
}
|
|
else if (st == eNotMe)
|
|
{
|
|
mIsActive[i] = false;
|
|
mActiveNum--;
|
|
if (mActiveNum <= 0)
|
|
{
|
|
mState = eNotMe;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
done:
|
|
free(newBuf1);
|
|
|
|
return mState;
|
|
}
|
|
|
|
float nsSBCSGroupProber::GetConfidence(void)
|
|
{
|
|
unsigned int i;
|
|
float bestConf = 0.0, cf;
|
|
|
|
switch (mState)
|
|
{
|
|
case eFoundIt:
|
|
return (float)0.99; //sure yes
|
|
case eNotMe:
|
|
return (float)0.01; //sure no
|
|
default:
|
|
for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i)
|
|
{
|
|
if (!mIsActive[i])
|
|
continue;
|
|
cf = mProbers[i]->GetConfidence();
|
|
if (bestConf < cf)
|
|
{
|
|
bestConf = cf;
|
|
mBestGuess = i;
|
|
}
|
|
}
|
|
}
|
|
return bestConf;
|
|
}
|
|
|
|
#ifdef DEBUG_PROBE
|
|
void nsSBCSGroupProber::DumpStatus()
|
|
{
|
|
unsigned int i;
|
|
float cf;
|
|
|
|
cf = GetConfidence();
|
|
printf(" SBCS Group Prober --------begin status \r\n");
|
|
for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
|
|
{
|
|
if (!mIsActive[i])
|
|
printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
|
|
else
|
|
mProbers[i]->DumpStatus();
|
|
}
|
|
printf(" SBCS Group found best match [%s] confidence %f.\r\n",
|
|
mProbers[mBestGuess]->GetCharSetName(), cf);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|