kdelibs/kdecore/localization/probers/nsSBCSGroupProber.cpp
2014-11-13 01:04:59 +02:00

217 lines
6 KiB
C++

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* -*- C++ -*-
* Copyright (C) 1998 <developer@mozilla.org>
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "nsSBCSGroupProber.h"
#include "nsSBCharSetProber.h"
#include "nsHebrewProber.h"
#include "UnicodeGroupProber.h"
#include <stdio.h>
#include <stdlib.h>
namespace kencodingprober {
nsSBCSGroupProber::nsSBCSGroupProber()
{
mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
nsHebrewProber *hebprober = new nsHebrewProber();
// Notice: Any change in these indexes - 10,11,12 must be reflected
// in the code below as well.
mProbers[10] = hebprober;
mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew
mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew
mProbers[13] = new UnicodeGroupProber();
// Tell the Hebrew prober about the logical and visual probers
if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
{
hebprober->SetModelProbers(mProbers[11], mProbers[12]);
}
else // One or more is null. avoid any Hebrew probing, null them all
{
for (unsigned int i = 10; i <= 12; ++i)
{
delete mProbers[i];
mProbers[i] = 0;
}
}
// disable latin2 before latin1 is available, otherwise all latin1
// will be detected as latin2 because of their similarity.
//mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
//mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
Reset();
}
nsSBCSGroupProber::~nsSBCSGroupProber()
{
for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++)
{
delete mProbers[i];
}
}
const char* nsSBCSGroupProber::GetCharSetName()
{
//if we have no answer yet
if (mBestGuess == -1)
{
GetConfidence();
//no charset seems positive
if (mBestGuess == -1)
//we will use default.
mBestGuess = 0;
}
return mProbers[mBestGuess]->GetCharSetName();
}
void nsSBCSGroupProber::Reset(void)
{
mActiveNum = 0;
for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++)
{
if (mProbers[i]) // not null
{
mProbers[i]->Reset();
mIsActive[i] = true;
++mActiveNum;
}
else
mIsActive[i] = false;
}
mBestGuess = -1;
mState = eDetecting;
}
nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, unsigned int aLen)
{
nsProbingState st;
unsigned int i;
char *newBuf1 = 0;
unsigned int newLen1 = 0;
//apply filter to original buffer, and we got new buffer back
//depend on what script it is, we will feed them the new buffer
//we got after applying proper filter
//this is done without any consideration to KeepEnglishLetters
//of each prober since as of now, there are no probers here which
//recognize languages with English characters.
if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1))
goto done;
if (newLen1 == 0)
goto done; // Nothing to see here, move on.
for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i)
{
if (!mIsActive[i])
continue;
st = mProbers[i]->HandleData(newBuf1, newLen1);
if (st == eFoundIt)
{
mBestGuess = i;
mState = eFoundIt;
break;
}
else if (st == eNotMe)
{
mIsActive[i] = false;
mActiveNum--;
if (mActiveNum <= 0)
{
mState = eNotMe;
break;
}
}
}
done:
free(newBuf1);
return mState;
}
float nsSBCSGroupProber::GetConfidence(void)
{
unsigned int i;
float bestConf = 0.0, cf;
switch (mState)
{
case eFoundIt:
return (float)0.99; //sure yes
case eNotMe:
return (float)0.01; //sure no
default:
for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i)
{
if (!mIsActive[i])
continue;
cf = mProbers[i]->GetConfidence();
if (bestConf < cf)
{
bestConf = cf;
mBestGuess = i;
}
}
}
return bestConf;
}
#ifdef DEBUG_PROBE
void nsSBCSGroupProber::DumpStatus()
{
unsigned int i;
float cf;
cf = GetConfidence();
printf(" SBCS Group Prober --------begin status \r\n");
for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
{
if (!mIsActive[i])
printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
else
mProbers[i]->DumpStatus();
}
printf(" SBCS Group found best match [%s] confidence %f.\r\n",
mProbers[mBestGuess]->GetCharSetName(), cf);
}
#endif
}