mirror of
synced 2025-02-24 19:02:48 +00:00
1442 lines
54 KiB
1442 lines
54 KiB
/* This file is part of the KDE libraries
Copyright (C) 2007 Chusslove Illich <caslav.ilic@gmx.net>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
#include <kuitsemantics_p.h>
#include <config.h>
#include <QHash>
#include <QSet>
#include <QRegExp>
#include <QStack>
#include <QXmlStreamReader>
#include <QDir>
#include <kdebug.h>
#include <kglobal.h>
#include <kcatalog_p.h>
#include <klocale.h>
// Truncates string, for output of long messages.
// (But don't truncate too much otherwise it's impossible to determine
// which message is faulty if many messages start with the same beginning).
static QString shorten (const QString &str)
const int maxlen = 80;
if (str.length() <= maxlen)
return str;
return str.left(maxlen).append(QLatin1String("..."));
// Custom entity resolver for QXmlStreamReader.
class KuitEntityResolver : public QXmlStreamEntityResolver
void setEntities (const QHash<QString, QString> &entities)
entityMap = entities;
QString resolveUndeclaredEntity (const QString &name)
QString value = entityMap.value(name);
// This will return empty string if the entity name is not known,
// which will make QXmlStreamReader signal unknown entity error.
return value;
QHash<QString, QString> entityMap;
// -----------------------------------------------------------------------------
// All the tag, attribute, and context marker element enums.
namespace Kuit {
namespace Tag { // tag names
typedef enum {
TopLong, TopShort,
Title, Subtitle, Para, List, Item, Note, Warning, Link,
Filename, Application, Command, Resource,
Emphasis, Email, Numid, Envar, Message, Nl,
NumIntg, NumReal // internal helpers for numbers, not part of DTD
} Var;
namespace Att { // tag attribute names
typedef enum {
Ctx, Url, Address, Section, Label, Strong,
Width, Fill // internal helpers for numbers, not part of DTD
} Var;
namespace Rol { // semantic roles
typedef enum {
Action, Title, Option, Label, Item, Info
} Var;
namespace Cue { // interface subcues
typedef enum {
Button, Inmenu, Intoolbar,
Window, Menu, Tab, Group, Column, Row,
Slider, Spinbox, Listbox, Textbox, Chooser,
Check, Radio,
Inlistbox, Intable, Inrange, Intext,
Tooltip, Whatsthis, Status, Progress, Tipoftheday, Credit, Shell
} Var;
namespace Fmt { // visual formats
typedef enum {
None, Plain, Rich, Term
} Var;
typedef Tag::Var TagVar;
typedef Att::Var AttVar;
typedef Rol::Var RolVar;
typedef Cue::Var CueVar;
typedef Fmt::Var FmtVar;
// -----------------------------------------------------------------------------
// All the global data.
class KuitSemanticsStaticData
QHash<QString, Kuit::TagVar> knownTags;
QHash<QString, Kuit::AttVar> knownAtts;
QHash<QString, Kuit::FmtVar> knownFmts;
QHash<QString, Kuit::RolVar> knownRols;
QHash<QString, Kuit::CueVar> knownCues;
QHash<Kuit::TagVar, QSet<Kuit::TagVar> > tagSubs;
QHash<Kuit::TagVar, QSet<Kuit::AttVar> > tagAtts;
QHash<Kuit::RolVar, QSet<Kuit::CueVar> > rolCues;
QHash<Kuit::RolVar, QHash<Kuit::CueVar, Kuit::FmtVar> > defFmts;
QHash<Kuit::TagVar, QString> tagNames;
QSet<QString> qtHtmlTagNames;
QHash<Kuit::TagVar, int> leadingNewlines;
QHash<QString, QString> xmlEntities;
QHash<QString, QString> xmlEntitiesInverse;
KuitEntityResolver xmlEntityResolver;
KuitSemanticsStaticData ();
KuitSemanticsStaticData::KuitSemanticsStaticData ()
// Setup known tag names, attributes, and subtags.
// A "lax" version of the DTD.
#undef SETUP_TAG
#define SETUP_TAG(tag, name, atts, subs) do { \
knownTags.insert(QString::fromLatin1(name), Kuit::Tag::tag); \
tagNames.insert(Kuit::Tag::tag, QString::fromLatin1(name)); \
{ \
using namespace Kuit::Att; \
tagAtts[Kuit::Tag::tag] << atts; \
} \
{ \
using namespace Kuit::Tag; \
tagSubs[Kuit::Tag::tag] << subs << NumIntg << NumReal; \
} \
} while (0)
#undef INLINES
#define INLINES \
Filename << Link << Application << Command << Resource << \
Emphasis << Email << Numid << Envar << Nl
SETUP_TAG(TopLong, "kuit", Ctx, Title << Subtitle << Para);
SETUP_TAG(TopShort, "kuil", Ctx, INLINES << Note << Warning << Message);
SETUP_TAG(Title, "title", None, INLINES);
SETUP_TAG(Subtitle, "subtitle", None, INLINES);
SETUP_TAG(Para, "para", None,
INLINES << Note << Warning << Message << List);
SETUP_TAG(List, "list", None, Item);
SETUP_TAG(Item, "item", None, INLINES << Note << Warning << Message);
SETUP_TAG(Note, "note", Label, INLINES);
SETUP_TAG(Warning, "warning", Label, INLINES);
SETUP_TAG(Filename, "filename", None, Envar);
SETUP_TAG(Link, "link", Url, None);
SETUP_TAG(Application, "application", None, None);
SETUP_TAG(Command, "command", Section, None);
SETUP_TAG(Resource, "resource", None, None);
SETUP_TAG(Emphasis, "emphasis", Strong, None);
SETUP_TAG(Email, "email", Address, None);
SETUP_TAG(Envar, "envar", None, None);
SETUP_TAG(Message, "message", None, None);
SETUP_TAG(Numid, "numid", None, None);
SETUP_TAG(Nl, "nl", None, None);
// Internal, not part of DTD.
SETUP_TAG(NumIntg, KUIT_NUMINTG, Width << Fill, None);
SETUP_TAG(NumReal, KUIT_NUMREAL, Width << Fill, None);
// Setup known attribute names.
#undef SETUP_ATT
#define SETUP_ATT(att, name) do { \
knownAtts.insert(QString::fromLatin1(name), Kuit::Att::att); \
} while (0)
SETUP_ATT(Ctx, "ctx");
SETUP_ATT(Url, "url");
SETUP_ATT(Address, "address");
SETUP_ATT(Section, "section");
SETUP_ATT(Label, "label");
SETUP_ATT(Strong, "strong");
// Internal, not part of DTD.
SETUP_ATT(Width, "width");
SETUP_ATT(Fill, "fill");
// Setup known format names.
#undef SETUP_FMT
#define SETUP_FMT(fmt, name) do { \
knownFmts.insert(QString::fromLatin1(name), Kuit::Fmt::fmt); \
} while (0)
SETUP_FMT(Plain, "plain");
SETUP_FMT(Rich, "rich");
SETUP_FMT(Term, "term");
// Setup known role names, their default format and subcues.
#undef SETUP_ROL
#define SETUP_ROL(rol, name, fmt, cues) do { \
knownRols.insert(QString::fromLatin1(name), Kuit::Rol::rol); \
defFmts[Kuit::Rol::rol][Kuit::Cue::None] = Kuit::Fmt::fmt; \
{ \
using namespace Kuit::Cue; \
rolCues[Kuit::Rol::rol] << cues; \
} \
} while (0)
SETUP_ROL(Action, "action", Plain,
Button << Inmenu << Intoolbar);
SETUP_ROL(Title, "title", Plain,
Window << Menu << Tab << Group << Column << Row);
SETUP_ROL(Label, "label", Plain,
Slider << Spinbox << Listbox << Textbox << Chooser);
SETUP_ROL(Option, "option", Plain,
Check << Radio);
SETUP_ROL(Item, "item", Plain,
Inmenu << Inlistbox << Intable << Inrange << Intext);
SETUP_ROL(Info, "info", Rich,
Tooltip << Whatsthis << Kuit::Cue::Status << Progress
<< Tipoftheday << Credit << Shell);
// Setup override formats by subcue.
#define SETUP_ROLCUEFMT(rol, cue, fmt) do { \
defFmts[Kuit::Rol::rol][Kuit::Cue::cue] = Kuit::Fmt::fmt; \
} while (0)
SETUP_ROLCUEFMT(Info, Status, Plain);
SETUP_ROLCUEFMT(Info, Progress, Plain);
SETUP_ROLCUEFMT(Info, Credit, Plain);
SETUP_ROLCUEFMT(Info, Shell, Term);
// Setup known subcue names.
#undef SETUP_CUE
#define SETUP_CUE(cue, name) do { \
knownCues.insert(QString::fromLatin1(name), Kuit::Cue::cue); \
} while (0)
SETUP_CUE(Button, "button");
SETUP_CUE(Inmenu, "inmenu");
SETUP_CUE(Intoolbar, "intoolbar");
SETUP_CUE(Window, "window");
SETUP_CUE(Menu, "menu");
SETUP_CUE(Tab, "tab");
SETUP_CUE(Group, "group");
SETUP_CUE(Column, "column");
SETUP_CUE(Row, "row");
SETUP_CUE(Slider, "slider");
SETUP_CUE(Spinbox, "spinbox");
SETUP_CUE(Listbox, "listbox");
SETUP_CUE(Textbox, "textbox");
SETUP_CUE(Chooser, "chooser");
SETUP_CUE(Check, "check");
SETUP_CUE(Radio, "radio");
SETUP_CUE(Inlistbox, "inlistbox");
SETUP_CUE(Intable, "intable");
SETUP_CUE(Inrange, "inrange");
SETUP_CUE(Intext, "intext");
SETUP_CUE(Tooltip, "tooltip");
SETUP_CUE(Whatsthis, "whatsthis");
SETUP_CUE(Status, "status");
SETUP_CUE(Progress, "progress");
SETUP_CUE(Tipoftheday, "tipoftheday");
SETUP_CUE(Credit, "credit");
SETUP_CUE(Shell, "shell");
// Collect all Qt's rich text engine HTML tags, for some checks later.
qtHtmlTagNames << QLatin1String("a") << QLatin1String("address")
<< QLatin1String("b") << QLatin1String("big")
<< QLatin1String("blockquote") << QLatin1String("body")
<< QLatin1String("br") << QLatin1String("center")
<< QLatin1String("cita") << QLatin1String("code")
<< QLatin1String("dd") << QLatin1String("dfn")
<< QLatin1String("div") << QLatin1String("dl")
<< QLatin1String("dt") << QLatin1String("em")
<< QLatin1String("font") << QLatin1String("h1")
<< QLatin1String("h2") << QLatin1String("h3")
<< QLatin1String("h4") << QLatin1String("h5")
<< QLatin1String("h6") << QLatin1String("head")
<< QLatin1String("hr") << QLatin1String("html")
<< QLatin1String("i") << QLatin1String("img")
<< QLatin1String("kbd") << QLatin1String("meta")
<< QLatin1String("li") << QLatin1String("nobr")
<< QLatin1String("ol") << QLatin1String("p")
<< QLatin1String("pre") << QLatin1String("qt")
<< QLatin1String("s") << QLatin1String("samp")
<< QLatin1String("small") << QLatin1String("span")
<< QLatin1String("strong") << QLatin1String("sup")
<< QLatin1String("sub") << QLatin1String("table")
<< QLatin1String("tbody") << QLatin1String("td")
<< QLatin1String("tfoot") << QLatin1String("th")
<< QLatin1String("thead") << QLatin1String("title")
<< QLatin1String("tr") << QLatin1String("tt")
<< QLatin1String("u") << QLatin1String("ul")
<< QLatin1String("var");
// Tags that format with number of leading newlines.
#define SETUP_TAG_NL(tag, nlead) do { \
leadingNewlines.insert(Kuit::Tag::tag, nlead); \
} while (0)
SETUP_TAG_NL(Title, 2);
SETUP_TAG_NL(Subtitle, 2);
SETUP_TAG_NL(Para, 2);
SETUP_TAG_NL(List, 1);
// Default XML entities, direct and inverse mapping.
xmlEntities[QString::fromLatin1("lt")] = QString(QLatin1Char('<'));
xmlEntities[QString::fromLatin1("gt")] = QString(QLatin1Char('>'));
xmlEntities[QString::fromLatin1("amp")] = QString(QLatin1Char('&'));
xmlEntities[QString::fromLatin1("apos")] = QString(QLatin1Char('\''));
xmlEntities[QString::fromLatin1("quot")] = QString(QLatin1Char('"'));
xmlEntitiesInverse[QString(QLatin1Char('<'))] = QString::fromLatin1("lt");
xmlEntitiesInverse[QString(QLatin1Char('>'))] = QString::fromLatin1("gt");
xmlEntitiesInverse[QString(QLatin1Char('&'))] = QString::fromLatin1("amp");
xmlEntitiesInverse[QString(QLatin1Char('\''))] = QString::fromLatin1("apos");
xmlEntitiesInverse[QString(QLatin1Char('"'))] = QString::fromLatin1("quot");
// Custom XML entities.
xmlEntities[QString::fromLatin1("nbsp")] = QString(QChar(0xa0));
K_GLOBAL_STATIC(KuitSemanticsStaticData, semanticsStaticData)
// -----------------------------------------------------------------------------
// The KuitSemanticsPrivate methods, they do the work.
class KuitSemanticsPrivate
KuitSemanticsPrivate (const QString &lang_);
QString format (const QString &text, const QString &ctxt) const;
// Get metatranslation (formatting patterns, etc.)
QString metaTr (const char *ctxt, const char *id) const;
// Set visual formatting patterns for text in semantic tags.
void setFormattingPatterns ();
// Compute integer hash key from the set of attributes.
static int attSetKey (const QSet<Kuit::AttVar> &aset = QSet<Kuit::AttVar>());
// Determine visual format by parsing the context marker.
static Kuit::FmtVar formatFromContextMarker (const QString &ctxmark,
const QString &text);
// Determine visual format by parsing tags.
static Kuit::FmtVar formatFromTags (const QString &text);
// Apply appropriate top tag is to the text.
static QString equipTopTag (const QString &text, Kuit::TagVar &toptag);
// Formats the semantic into visual text.
QString semanticToVisualText (const QString &text,
Kuit::FmtVar fmtExp,
Kuit::FmtVar fmtImp) const;
// Final touches to the formatted text.
QString finalizeVisualText (const QString &final,
Kuit::FmtVar fmt,
bool hadQtTag = false,
bool hadAnyHtmlTag = false) const;
// In case of markup errors, try to make result not look too bad.
QString salvageMarkup (const QString &text, Kuit::FmtVar fmt) const;
// Data for XML parsing state.
class OpenEl
typedef enum { Proper, Ignored, Dropout } Handling;
Kuit::TagVar tag;
QString name;
QHash<Kuit::AttVar, QString> avals;
int akey;
QString astr;
Handling handling;
QString formattedText;
// Gather data about current element for the parse state.
KuitSemanticsPrivate::OpenEl parseOpenEl (const QXmlStreamReader &xml,
Kuit::TagVar etag,
const QString &text) const;
// Select visual pattern for given tag+attributes+format combination.
QString visualPattern (Kuit::TagVar tag, int akey, Kuit::FmtVar fmt) const;
// Format text of the element.
QString formatSubText (const QString &ptext, const OpenEl &oel,
Kuit::FmtVar fmt, int numctx) const;
// Count number of newlines at start and at end of text.
static void countWrappingNewlines (const QString &ptext,
int &numle, int &numtr);
// Modifies text for some tags.
QString modifyTagText (const QString &text, Kuit::TagVar tag,
const QHash<Kuit::AttVar, QString> &avals,
int numctx, Kuit::FmtVar fmt) const;
QString m_lang;
QHash<int, // attribute set key
QHash<Kuit::FmtVar, QString> > > m_patterns;
// For fetching metatranslations.
KCatalog *m_metaCat;
KuitSemanticsPrivate::KuitSemanticsPrivate (const QString &lang)
: m_metaCat(NULL)
m_lang = lang;
// NOTE: This function draws translation from raw message catalogs
// because full i18n system is not available at this point (this
// function is called within the initialization of the i18n system),
// Also, pattern/transformation strings are "metastrings", not
// fully proper i18n strings on their own.
m_metaCat = new KCatalog(QString::fromLatin1("kdelibs4"), lang);
// Get formatting patterns for all tag/att/fmt combinations.
// Catalog not needed any more.
delete m_metaCat;
m_metaCat = NULL;
QString KuitSemanticsPrivate::metaTr (const char *ctxt, const char *id) const
if (m_metaCat == NULL) {
return QString::fromLatin1(id);
return m_metaCat->translate(ctxt, id);
void KuitSemanticsPrivate::setFormattingPatterns ()
using namespace Kuit;
// Macro to expedite setting the patterns.
#define SET_PATTERN(tag, atts, fmt, ctxt_ptrn) do { \
QSet<AttVar> aset; \
aset << atts; \
int akey = attSetKey(aset); \
QString pattern = metaTr(ctxt_ptrn); \
m_patterns[tag][akey][fmt] = pattern; \
/* Make Term pattern same as Plain, unless explicitly given. */ \
if (fmt == Fmt::Plain && !m_patterns[tag][akey].contains(Fmt::Term)) { \
m_patterns[tag][akey][Fmt::Term] = pattern; \
} \
} while (0)
// Normal I18N_NOOP2 removes context, but below we need both.
#undef I18N_NOOP2
#define I18N_NOOP2(ctxt, msg) ctxt, msg
// Some of the formatting patterns are intentionally not exposed for
// localization.
#undef XXXX_NOOP2
#define XXXX_NOOP2(ctxt, msg) ctxt, msg
// NOTE: The following "i18n:" comments are oddly placed in order that
// xgettext extracts them properly.
// -------> Title
SET_PATTERN(Tag::Title, Att::None, Fmt::Plain,
// i18n: The following messages, with msgctxt "@tag/modifier",
// are KUIT patterns for formatting the text found inside semantic tags.
// For review of the KUIT semantic markup, see the article on Techbase:
// http://techbase.kde.org/Development/Tutorials/Localization/i18n_Semantics
// The "/modifier" tells if the pattern is used for plain text, or rich text
// which can use HTML tags.
// You may be in general satisfied with the patterns as they are in the
// original. Some things you may think about changing:
// - the proper quotes, those used in msgid are English-standard
// - the <i> and <b> tags, does your language script work well with them?
"== %1 =="));
SET_PATTERN(Tag::Title, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Subtitle
SET_PATTERN(Tag::Subtitle, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
"~ %1 ~"));
SET_PATTERN(Tag::Subtitle, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Para
SET_PATTERN(Tag::Para, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Para, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> List
SET_PATTERN(Tag::List, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::List, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Item
SET_PATTERN(Tag::Item, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
" * %1"));
SET_PATTERN(Tag::Item, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Note
SET_PATTERN(Tag::Note, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
"Note: %1"));
SET_PATTERN(Tag::Note, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
"<i>Note</i>: %1"));
SET_PATTERN(Tag::Note, Att::Label, Fmt::Plain,
"%1 is the note label, %2 is the text",
// i18n: KUIT pattern, see the comment to the first of these entries above.
"%1: %2"));
SET_PATTERN(Tag::Note, Att::Label, Fmt::Rich,
"%1 is the note label, %2 is the text",
// i18n: KUIT pattern, see the comment to the first of these entries above.
"<i>%1</i>: %2"));
// -------> Warning
SET_PATTERN(Tag::Warning, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
"WARNING: %1"));
SET_PATTERN(Tag::Warning, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
"<b>Warning</b>: %1"));
SET_PATTERN(Tag::Warning, Att::Label, Fmt::Plain,
"%1 is the warning label, %2 is the text",
// i18n: KUIT pattern, see the comment to the first of these entries above.
"%1: %2"));
SET_PATTERN(Tag::Warning, Att::Label, Fmt::Rich,
"%1 is the warning label, %2 is the text",
// i18n: KUIT pattern, see the comment to the first of these entries above.
"<b>%1</b>: %2"));
// -------> Link
SET_PATTERN(Tag::Link, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Link, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
"<a href=\"%1\">%1</a>"));
SET_PATTERN(Tag::Link, Att::Url, Fmt::Plain,
"%1 is the URL, %2 is the descriptive text",
// i18n: KUIT pattern, see the comment to the first of these entries above.
"%2 (%1)"));
SET_PATTERN(Tag::Link, Att::Url, Fmt::Rich,
"%1 is the URL, %2 is the descriptive text",
// i18n: KUIT pattern, see the comment to the first of these entries above.
"<a href=\"%1\">%2</a>"));
// -------> Filename
SET_PATTERN(Tag::Filename, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Filename, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Application
SET_PATTERN(Tag::Application, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Application, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Command
SET_PATTERN(Tag::Command, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Command, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Command, Att::Section, Fmt::Plain,
"%1 is the command name, %2 is its man section",
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Command, Att::Section, Fmt::Rich,
"%1 is the command name, %2 is its man section",
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Resource
SET_PATTERN(Tag::Resource, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Resource, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Emphasis
SET_PATTERN(Tag::Emphasis, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Emphasis, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Emphasis, Att::Strong, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Emphasis, Att::Strong, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Email
SET_PATTERN(Tag::Email, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Email, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
"<<a href=\"mailto:%1\">%1</a>>"));
SET_PATTERN(Tag::Email, Att::Address, Fmt::Plain,
"%1 is name, %2 is address",
// i18n: KUIT pattern, see the comment to the first of these entries above.
"%1 <%2>"));
SET_PATTERN(Tag::Email, Att::Address, Fmt::Rich,
"%1 is name, %2 is address",
// i18n: KUIT pattern, see the comment to the first of these entries above.
"<a href=\"mailto:%2\">%1</a>"));
// -------> Envar
SET_PATTERN(Tag::Envar, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Envar, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Message
SET_PATTERN(Tag::Message, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Message, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
// -------> Nl
SET_PATTERN(Tag::Nl, Att::None, Fmt::Plain,
// i18n: KUIT pattern, see the comment to the first of these entries above.
SET_PATTERN(Tag::Nl, Att::None, Fmt::Rich,
// i18n: KUIT pattern, see the comment to the first of these entries above.
QString KuitSemanticsPrivate::format (const QString &text,
const QString &ctxt) const
// Parse context marker to determine format.
Kuit::FmtVar fmtExplicit = formatFromContextMarker(ctxt, text);
// Quick check: are there any tags at all?
if (text.indexOf(QLatin1Char('<')) < 0) {
return finalizeVisualText(text, fmtExplicit);
// If format not explicitly given, heuristically determine
// implicit format based on presence or lack of HTML tags.
Kuit::FmtVar fmtImplicit = fmtExplicit;
if (fmtExplicit == Kuit::Fmt::None) {
fmtImplicit = formatFromTags(text);
// Decide on the top tag, either TopLong or TopShort,
// and wrap the text with it.
Kuit::TagVar toptag;
QString wtext = equipTopTag(text, toptag);
// Format the text.
QString ftext = semanticToVisualText(wtext, fmtExplicit, fmtImplicit);
if (ftext.isEmpty()) { // error while processing markup
return salvageMarkup(text, fmtImplicit);
return ftext;
int KuitSemanticsPrivate::attSetKey (const QSet<Kuit::AttVar> &aset)
QList<Kuit::AttVar> alist = aset.toList();
int key = 0;
int tenp = 1;
foreach (const Kuit::AttVar &att, alist) {
key += att * tenp;
tenp *= 10;
return key;
Kuit::FmtVar KuitSemanticsPrivate::formatFromContextMarker (
const QString &ctxmark_, const QString &text)
#ifdef NDEBUG
KuitSemanticsStaticData *s = semanticsStaticData;
// Semantic context marker is in the form @rolname:cuename/fmtname,
// and must start just after any leading whitespace in the context string.
QString rolname;
QString fmtname;
QString cuename;
QString ctxmark = ctxmark_.trimmed();
if (ctxmark.startsWith(QLatin1Char('@'))) { // found context marker
static QRegExp wsRx(QString::fromLatin1("\\s"));
ctxmark = ctxmark.mid(1, wsRx.indexIn(ctxmark) - 1);
// Possible visual format.
int pfmt = ctxmark.indexOf(QLatin1Char('/'));
if (pfmt >= 0) {
fmtname = ctxmark.mid(pfmt + 1);
ctxmark = ctxmark.left(pfmt);
// Possible interface subcue.
int pcue = ctxmark.indexOf(QLatin1Char(':'));
if (pcue >= 0) {
cuename = ctxmark.mid(pcue + 1);
ctxmark = ctxmark.left(pcue);
// Semantic role.
rolname = ctxmark;
// Names remain empty if marker was not found, which is ok.
// Normalize names.
rolname = rolname.trimmed().toLower();
cuename = cuename.trimmed().toLower();
fmtname = fmtname.trimmed().toLower();
// Set role from name.
Kuit::RolVar rol;
if (s->knownRols.contains(rolname)) { // known role
rol = s->knownRols[rolname];
else { // unknown role
rol = Kuit::Rol::None;
if (!rolname.isEmpty()) {
kDebug(173) << QString::fromLatin1("Unknown semantic role '@%1' in "
"context marker for message {%2}.")
.arg(rolname, shorten(text));
// Set subcue from name.
Kuit::CueVar cue;
if (s->knownCues.contains(cuename)) { // known subcue
cue = s->knownCues[cuename];
else { // unknown or not given subcue
cue = Kuit::Cue::None;
if (!cuename.isEmpty()) {
kDebug(173) << QString::fromLatin1("Unknown interface subcue ':%1' in "
"context marker for message {%2}.")
.arg(cuename, shorten(text));
// Set format from name, or by derivation from contex/subcue.
Kuit::FmtVar fmt;
if (s->knownFmts.contains(fmtname)) { // known format
fmt = s->knownFmts[fmtname];
else { // unknown or not given format
// Check first if there is a format defined for role/subcue
// combination, than for role only, then default to none.
if (s->defFmts.contains(rol)) {
if (s->defFmts[rol].contains(cue)) {
fmt = s->defFmts[rol][cue];
else {
fmt = s->defFmts[rol][Kuit::Cue::None];
else {
fmt = Kuit::Fmt::None;
if (!fmtname.isEmpty()) {
kDebug(173) << QString::fromLatin1("Unknown visual format '/%1' in "
"context marker for message {%2}.")
.arg(fmtname, shorten(text));
return fmt;
Kuit::FmtVar KuitSemanticsPrivate::formatFromTags (const QString &text)
KuitSemanticsStaticData *s = semanticsStaticData;
static QRegExp staticTagRx(QString::fromLatin1("<\\s*(\\w+)[^>]*>"));
QRegExp tagRx = staticTagRx; // for thread-safety
int p = tagRx.indexIn(text);
while (p >= 0) {
QString tagname = tagRx.capturedTexts().at(1).toLower();
if (s->qtHtmlTagNames.contains(tagname)) {
return Kuit::Fmt::Rich;
p = tagRx.indexIn(text, p + tagRx.matchedLength());
return Kuit::Fmt::Plain;
QString KuitSemanticsPrivate::equipTopTag (const QString &text_,
Kuit::TagVar &toptag)
KuitSemanticsStaticData *s = semanticsStaticData;
// Unless the text opens either with TopLong or TopShort tags,
// make a guess: if it opens with one of Title, Subtitle, Para,
// consider it TopLong, otherwise TopShort.
static QRegExp opensWithTagRx(QString::fromLatin1("^\\s*<\\s*(\\w+)[^>]*>"));
bool explicitTopTag = false;
QString text = text_;
int p = opensWithTagRx.indexIn(text);
// <qt> or <html> tag are to be ignored for deciding the top tag.
if (p >= 0) {
QString fullmatch = opensWithTagRx.capturedTexts().at(0);
QString tagname = opensWithTagRx.capturedTexts().at(1).toLower();
if (tagname == QLatin1String("qt") || tagname == QLatin1String("html")) {
// Kill the tag and see if there is another one following,
// for primary check below.
text = text.mid(fullmatch.length());
p = opensWithTagRx.indexIn(text);
// Check the first non-<qt>/<html> tag.
if (p >= 0) { // opens with a tag
QString tagname = opensWithTagRx.capturedTexts().at(1).toLower();
if (s->knownTags.contains(tagname)) { // a known tag
Kuit::TagVar tag = s->knownTags[tagname];
if ( tag == Kuit::Tag::TopLong
|| tag == Kuit::Tag::TopShort) { // explicitly given top tag
toptag = tag;
explicitTopTag = true;
else if ( tag == Kuit::Tag::Para
|| tag == Kuit::Tag::Title
|| tag == Kuit::Tag::Subtitle) { // one of long text tags
toptag = Kuit::Tag::TopLong;
else { // not one of long text tags
toptag = Kuit::Tag::TopShort;
else { // not a KUIT tag
toptag = Kuit::Tag::TopShort;
else { // doesn't open with a tag
toptag = Kuit::Tag::TopShort;
// Wrap text with top tag if not explicitly given.
if (!explicitTopTag) {
return QLatin1Char('<') + s->tagNames[toptag] + QLatin1Char('>')
+ text_ // original text, not the one possibly stripped above
+ QLatin1String("</") + s->tagNames[toptag] + QLatin1Char('>');
else {
return text;
#define ENTITY_SUBRX "[a-z]+|#[0-9]+|#x[0-9a-fA-F]+"
QString KuitSemanticsPrivate::semanticToVisualText (const QString &text_,
Kuit::FmtVar fmtExp_,
Kuit::FmtVar fmtImp_) const
KuitSemanticsStaticData *s = semanticsStaticData;
// Replace &-shortcut marker with "&", not to confuse the parser;
// but do not touch & which forms an XML entity as it is.
QString original = text_;
QString text;
int p = original.indexOf(QLatin1Char('&'));
while (p >= 0) {
text.append(original.mid(0, p + 1));
original.remove(0, p + 1);
static QRegExp restRx(QString::fromLatin1("^(" ENTITY_SUBRX ");"));
if (original.indexOf(restRx) != 0) { // not an entity
p = original.indexOf(QLatin1Char('&'));
Kuit::FmtVar fmtExp = fmtExp_;
Kuit::FmtVar fmtImp = fmtImp_;
int numCtx = 0;
bool hadQtTag = false;
bool hadAnyHtmlTag = false;
QStack<OpenEl> openEls;
QXmlStreamReader xml(text);
QStringRef lastElementName;
while (!xml.atEnd()) {
if (xml.isStartElement()) {
lastElementName = xml.name();
// Find first proper enclosing element tag.
Kuit::TagVar etag = Kuit::Tag::None;
for (int i = openEls.size() - 1; i >= 0; --i) {
if (openEls[i].handling == OpenEl::Proper) {
etag = openEls[i].tag;
// Collect data about this element.
OpenEl oel = parseOpenEl(xml, etag, text);
if (oel.name == QLatin1String("qt") || oel.name == QLatin1String("html")) {
hadQtTag = true;
if (s->qtHtmlTagNames.contains(oel.name)) {
hadAnyHtmlTag = true;
// If this is top tag, check if it overrides the context marker
// by its ctx attribute.
if (openEls.isEmpty() && oel.avals.contains(Kuit::Att::Ctx)) {
// Resolve format override.
fmtExp = formatFromContextMarker(oel.avals[Kuit::Att::Ctx], text);
fmtImp = fmtExp;
// Record the new element on the parse stack.
// Update numeric context.
if (oel.tag == Kuit::Tag::Numid) {
else if (xml.isEndElement()) {
// Get closed element data.
OpenEl oel = openEls.pop();
// If this was closing of the top element, we're done.
if (openEls.isEmpty()) {
// Return with final touches applied.
return finalizeVisualText(oel.formattedText, fmtExp,
hadQtTag, hadAnyHtmlTag);
// Append formatted text segment.
QString pt = openEls.top().formattedText; // preceding text
openEls.top().formattedText += formatSubText(pt, oel, fmtImp, numCtx);
// Update numeric context.
if (oel.tag == Kuit::Tag::Numid) {
else if (xml.isCharacters()) {
// Stream reader will automatically resolve default XML entities,
// which is not desired in this case, as the final text may
// be rich. Convert them back into entities.
QString text = xml.text().toString();
QString ntext;
foreach (const QChar &c, text) {
if (s->xmlEntitiesInverse.contains(c)) {
const QString entname = s->xmlEntitiesInverse[c];
ntext += QLatin1Char('&') + entname + QLatin1Char(';');
} else {
ntext += c;
openEls.top().formattedText += ntext;
if (xml.hasError()) {
kDebug(173) << QString::fromLatin1("Markup error in message {%1}: %2. Last tag parsed: %3")
.arg(shorten(text), xml.errorString(), lastElementName.toString());
return QString();
// Cannot reach here.
return text;
KuitSemanticsPrivate::parseOpenEl (const QXmlStreamReader &xml,
Kuit::TagVar etag,
const QString &text) const
#ifdef NDEBUG
KuitSemanticsStaticData *s = semanticsStaticData;
OpenEl oel;
oel.name = xml.name().toString().toLower();
// Collect attribute names and values, and format attribute string.
QStringList attnams, attvals;
foreach (const QXmlStreamAttribute &xatt, xml.attributes()) {
attnams += xatt.name().toString().toLower();
attvals += xatt.value().toString();
QChar qc = attvals.last().indexOf(QLatin1Char('\'')) < 0 ? QLatin1Char('\'') : QLatin1Char('"');
oel.astr += QLatin1Char(' ') + attnams.last() + QLatin1Char('=') + qc + attvals.last() + qc;
if (s->knownTags.contains(oel.name)) { // known KUIT element
oel.tag = s->knownTags[oel.name];
// If this element can be contained within enclosing element,
// mark it proper, otherwise mark it for removal.
if (etag == Kuit::Tag::None || s->tagSubs[etag].contains(oel.tag)) {
oel.handling = OpenEl::Proper;
else {
oel.handling = OpenEl::Dropout;
kDebug(173) << QString::fromLatin1("Tag '%1' cannot be subtag of '%2' "
"in message {%3}.")
.arg(s->tagNames[oel.tag], s->tagNames[etag],
// Resolve attributes and compute attribute set key.
QSet<Kuit::AttVar> attset;
for (int i = 0; i < attnams.size(); ++i) {
if (s->knownAtts.contains(attnams[i])) {
Kuit::AttVar att = s->knownAtts[attnams[i]];
if (s->tagAtts[oel.tag].contains(att)) {
attset << att;
oel.avals[att] = attvals[i];
else {
kDebug(173) << QString::fromLatin1("Attribute '%1' cannot be used in "
"tag '%2' in message {%3}.")
.arg(attnams[i], oel.name,
else {
kDebug(173) << QString::fromLatin1("Unknown semantic tag attribute '%1' "
"in message {%2}.")
.arg(attnams[i], shorten(text));
oel.akey = attSetKey(attset);
else if (oel.name == QLatin1String("qt") || oel.name == QLatin1String("html")) {
// Drop qt/html tags (gets added in the end).
oel.handling = OpenEl::Dropout;
else { // other element, leave it in verbatim
oel.handling = OpenEl::Ignored;
if (!s->qtHtmlTagNames.contains(oel.name)) {
kDebug(173) << QString::fromLatin1("Tag '%1' is neither semantic nor HTML in "
"message {%2}.")
.arg(oel.name, shorten(text));
return oel;
QString KuitSemanticsPrivate::visualPattern (Kuit::TagVar tag, int akey,
Kuit::FmtVar fmt) const
// Default pattern: simple substitution.
QString pattern = QString::fromLatin1("%1");
// See if there is a pattern specifically for this element.
if ( m_patterns.contains(tag)
&& m_patterns[tag].contains(akey)
&& m_patterns[tag][akey].contains(fmt))
pattern = m_patterns[tag][akey][fmt];
return pattern;
QString KuitSemanticsPrivate::formatSubText (const QString &ptext,
const OpenEl &oel,
Kuit::FmtVar fmt,
int numctx) const
KuitSemanticsStaticData *s = semanticsStaticData;
if (oel.handling == OpenEl::Proper) {
// Select formatting pattern.
QString pattern = visualPattern(oel.tag, oel.akey, fmt);
// Some tags modify their text.
QString mtext = modifyTagText(oel.formattedText, oel.tag, oel.avals,
numctx, fmt);
using namespace Kuit;
// Format text according to pattern.
QString ftext;
/**/ if (oel.tag == Tag::Link && oel.avals.contains(Att::Url)) {
ftext = pattern.arg(oel.avals[Att::Url], mtext);
else if (oel.tag == Tag::Command && oel.avals.contains(Att::Section)) {
ftext = pattern.arg(mtext, oel.avals[Att::Section]);
else if (oel.tag == Tag::Email && oel.avals.contains(Att::Address)) {
ftext = pattern.arg(mtext, oel.avals[Att::Address]);
else if (oel.tag == Tag::Note && oel.avals.contains(Att::Label)) {
ftext = pattern.arg(oel.avals[Att::Label], mtext);
else if (oel.tag == Tag::Warning && oel.avals.contains(Att::Label)) {
ftext = pattern.arg(oel.avals[Att::Label], mtext);
else {
ftext = pattern.arg(mtext);
// Handle leading newlines, if this is not start of the text
// (ptext is the preceding text).
if (!ptext.isEmpty() && s->leadingNewlines.contains(oel.tag)) {
// Count number of present newlines.
int pnumle, pnumtr, fnumle, fnumtr;
countWrappingNewlines(ptext, pnumle, pnumtr);
countWrappingNewlines(ftext, fnumle, fnumtr);
// Number of leading newlines already present.
int numle = pnumtr + fnumle;
// The required extra newlines.
QString strle;
if (numle < s->leadingNewlines[oel.tag]) {
strle = QString(s->leadingNewlines[oel.tag] - numle, QLatin1Char('\n'));
ftext = strle + ftext;
return ftext;
else if (oel.handling == OpenEl::Ignored) {
if (oel.name == QLatin1String("br") || oel.name == QLatin1String("hr")) {
// Close these tags in-place (just for looks).
return QLatin1Char('<') + oel.name + QLatin1String("/>");
else {
return QLatin1Char('<') + oel.name + oel.astr + QLatin1Char('>')
+ oel.formattedText
+ QLatin1String("</") + oel.name + QLatin1Char('>');
else { // oel.handling == OpenEl::Dropout
return oel.formattedText;
void KuitSemanticsPrivate::countWrappingNewlines (const QString &text,
int &numle, int &numtr)
int len = text.length();
// Number of newlines at start of text.
numle = 0;
while (numle < len && text[numle] == QLatin1Char('\n')) {
// Number of newlines at end of text.
numtr = 0;
while (numtr < len && text[len - numtr - 1] == QLatin1Char('\n')) {
QString KuitSemanticsPrivate::modifyTagText (const QString &text,
Kuit::TagVar tag,
const QHash<Kuit::AttVar, QString> &avals,
int numctx,
Kuit::FmtVar fmt) const
// numctx < 1 means that the number is not in numeric-id context.
if ( (tag == Kuit::Tag::NumIntg || tag == Kuit::Tag::NumReal) && numctx < 1) {
int fieldWidth = avals.value(Kuit::Att::Width, QString(QLatin1Char('0'))).toInt();
const QString fillStr = avals.value(Kuit::Att::Fill, QString(QLatin1Char(' ')));
const QChar fillChar = !fillStr.isEmpty() ? fillStr[0] : QChar::fromLatin1(' ');
return QString::fromLatin1("%1").arg(KGlobal::locale()->formatNumber(text, false), fieldWidth, fillChar);
} else if (tag == Kuit::Tag::Filename) {
return QDir::toNativeSeparators(text);
// Fell through, no modification.
return text;
QString KuitSemanticsPrivate::finalizeVisualText (const QString &final,
Kuit::FmtVar fmt,
bool hadQtTag,
bool hadAnyHtmlTag) const
KuitSemanticsStaticData *s = semanticsStaticData;
QString text = final;
// Resolve XML entities if format explicitly not rich
// and no HTML tag encountered.
if (fmt != Kuit::Fmt::Rich && !hadAnyHtmlTag)
static QRegExp staticEntRx(QLatin1String("&(" ENTITY_SUBRX ");"));
// We have to have a local copy here, otherwise this function
// will not be thread safe because QRegExp is not thread safe.
QRegExp entRx = staticEntRx;
int p = entRx.indexIn(text);
QString plain;
while (p >= 0) {
QString ent = entRx.capturedTexts().at(1);
plain.append(text.mid(0, p));
text.remove(0, p + ent.length() + 2);
if (ent.startsWith(QLatin1Char('#'))) { // numeric character entity
QChar c;
bool ok;
if (ent[1] == QLatin1Char('x')) {
c = QChar(ent.mid(2).toInt(&ok, 16));
} else {
c = QChar(ent.mid(1).toInt(&ok, 10));
if (ok) {
} else { // unknown Unicode point, leave as is
plain.append(QLatin1Char('&') + ent + QLatin1Char(';'));
else if (s->xmlEntities.contains(ent)) { // known entity
} else { // unknown entity, just leave as is
plain.append(QLatin1Char('&') + ent + QLatin1Char(';'));
p = entRx.indexIn(text);
text = plain;
// Add top rich tag if format explicitly rich or such tag encountered.
if (fmt == Kuit::Fmt::Rich || hadQtTag) {
text = QString::fromLatin1("<html>") + text + QLatin1String("</html>");
return text;
QString KuitSemanticsPrivate::salvageMarkup (const QString &text_,
Kuit::FmtVar fmt) const
KuitSemanticsStaticData *s = semanticsStaticData;
QString text = text_;
QString ntext;
int pos;
// Resolve KUIT tags simple-mindedly.
// - tags with content
static QRegExp staticWrapRx(QLatin1String("(<\\s*(\\w+)\\b([^>]*)>)(.*)(<\\s*/\\s*\\2\\s*>)"));
QRegExp wrapRx = staticWrapRx; // for thread-safety
pos = 0;
while (true) {
int previousPos = pos;
pos = wrapRx.indexIn(text, previousPos);
if (pos < 0) {
ntext += text.mid(previousPos);
ntext += text.mid(previousPos, pos - previousPos);
const QStringList capts = wrapRx.capturedTexts();
QString tagname = capts[2].toLower();
QString content = salvageMarkup(capts[4], fmt);
if (s->knownTags.contains(tagname)) {
// Select formatting pattern.
// TODO: Do not ignore attributes (in capts[3]).
QString pattern = visualPattern(s->knownTags[tagname], 0, fmt);
ntext += pattern.arg(content);
} else {
ntext += capts[1] + content + capts[5];
pos += wrapRx.matchedLength();
text = ntext;
// - content-less tags
static QRegExp staticNowrRx(QLatin1String("<\\s*(\\w+)\\b([^>]*)/\\s*>"));
QRegExp nowrRx = staticNowrRx; // for thread-safety
pos = 0;
while (true) {
int previousPos = pos;
pos = nowrRx.indexIn(text, previousPos);
if (pos < 0) {
ntext += text.mid(previousPos);
ntext += text.mid(previousPos, pos - previousPos);
const QStringList capts = nowrRx.capturedTexts();
QString tagname = capts[1].toLower();
if (s->knownTags.contains(tagname)) {
QString pattern = visualPattern(s->knownTags[tagname], 0, fmt);
ntext += pattern.arg(QString());
} else {
ntext += capts[0];
pos += nowrRx.matchedLength();
text = ntext;
return text;
// -----------------------------------------------------------------------------
// The KuitSemantics methods, only delegate to KuitSemanticsPrivate.
KuitSemantics::KuitSemantics (const QString &lang)
: d(new KuitSemanticsPrivate(lang))
KuitSemantics::~KuitSemantics ()
delete d;
QString KuitSemantics::format (const QString &text, const QString &ctxt) const
return d->format(text, ctxt);