Commit b0534242 authored by twanvl's avatar twanvl

Moved part of keyword matching into a separate function; it was way too long

parent c4a752a3
......@@ -393,6 +393,7 @@ void dump(int i, KeywordTrie* t) {
String KeywordDatabase::expand(const String& text,
const ScriptValueP& expand_default,
const ScriptValueP& combine_script,
bool case_sensitive,
Context& ctx) const {
assert(combine_script);
......@@ -408,19 +409,19 @@ String KeywordDatabase::expand(const String& text,
}
// Remove all old reminder texts
String s = remove_tag_contents(text, _("<atom-reminder"));
s = remove_tag_contents(s, _("<atom-keyword")); // OLD, TODO: REMOVEME
s = remove_tag_contents(s, _("<atom-kwpph>"));
s = remove_tag(s, _("<keyword-param"));
s = remove_tag(s, _("<param-"));
String untagged = untag_no_escape(s);
String tagged = remove_tag_contents(text, _("<atom-reminder"));
tagged = remove_tag_contents(tagged, _("<atom-keyword")); // OLD, TODO: REMOVEME
tagged = remove_tag_contents(tagged, _("<atom-kwpph>"));
tagged = remove_tag(tagged, _("<keyword-param"));
tagged = remove_tag(tagged, _("<param-"));
String untagged = untag_no_escape(tagged);
if (!root) return s;
if (!root) return tagged;
String result;
// Find keywords
while (!s.empty()) {
while (!tagged.empty()) {
vector<KeywordTrie*> current; // current location(s) in the trie
vector<KeywordTrie*> next; // location(s) after this step
set<const Keyword*> used; // keywords already investigated
......@@ -433,20 +434,20 @@ String KeywordDatabase::expand(const String& text,
// - 'a' = reminder text in default state, hidden
// - 'A' = reminder text in default state, shown
for (size_t i = 0 ; i < s.size() ;) {
Char c = s.GetChar(i);
for (size_t i = 0 ; i < tagged.size() ;) {
Char c = tagged.GetChar(i);
// tag?
if (c == _('<')) {
if (is_substr(s, i, _("<kw-")) && i + 4 < s.size()) {
expand_type = s.GetChar(i + 4); // <kw-?>
s = s.erase(i, skip_tag(s,i)-i); // remove the tag from the string
} else if (is_substr(s, i, _("</kw-"))) {
if (is_substr(tagged, i, _("<kw-")) && i + 4 < tagged.size()) {
expand_type = tagged.GetChar(i + 4); // <kw-?>
tagged = tagged.erase(i, skip_tag(tagged,i)-i); // remove the tag from the string
} else if (is_substr(tagged, i, _("</kw-"))) {
expand_type = 'a';
s = s.erase(i, skip_tag(s,i)-i); // remove the tag from the string
} else if (is_substr(s, i, _("<atom"))) {
i = match_close_tag_end(s, i); // skip <atom>s
tagged = tagged.erase(i, skip_tag(tagged,i)-i); // remove the tag from the string
} else if (is_substr(tagged, i, _("<atom"))) {
i = match_close_tag_end(tagged, i); // skip <atom>s
} else {
i = skip_tag(s, i);
i = skip_tag(tagged, i);
}
continue;
} else {
......@@ -473,32 +474,69 @@ String KeywordDatabase::expand(const String& text,
FOR_EACH(n, current) {
FOR_EACH(f, n->finished) {
const Keyword* kw = f;
if (used.insert(kw).second) {
// we have found a possible match, which we have not seen before
assert(kw->match_re.IsValid());
if (used.insert(kw).second) {
// we have found a possible match, for a keyword which we have not seen before
if (tryExpand(*kw, i, tagged, untagged, result, expand_type,
expand_default, combine_script, case_sensitive, ctx,
stat, stat_key))
{
// it matches
used.clear();
expand_type = _('a');
goto matched_keyword;
}
}
}
}
}
// Remainder of the string
result += tagged;
tagged.clear();
matched_keyword:;
}
return result;
}
// try to match it against the *untagged* string
if (kw->match_re.Matches(untagged)) {
// Everything before the keyword
bool KeywordDatabase::tryExpand(const Keyword& kw,
size_t expand_type_known_upto,
String& tagged,
String& untagged,
String& result,
char expand_type,
const ScriptValueP& expand_default,
const ScriptValueP& combine_script,
bool case_sensitive,
Context& ctx,
KeywordUsageStatistics* stat,
Value* stat_key) const
{
// try to match regex against the *untagged* string
if (!kw.match_re.Matches(untagged)) return false;
// Find match position
size_t start_u, len_u;
kw->match_re.GetMatch(&start_u, &len_u, 0);
size_t start = untagged_to_index(s, start_u, true),
end = untagged_to_index(s, start_u + len_u, true);
if (start == end) continue; // don't match empty keywords
kw.match_re.GetMatch(&start_u, &len_u, 0);
size_t start = untagged_to_index(tagged, start_u, true),
end = untagged_to_index(tagged, start_u + len_u, true);
if (start == end) return false; // don't match empty keywords
// copy text before keyword
result += remove_tag(s.substr(0, start), _("<kw-"));
result += remove_tag(tagged.substr(0, start), _("<kw-"));
// a part of s has not been searched for <kw- tags
// a part of tagged has not been searched for <kw- tags
// this can happen when the trie incorrectly matches too early
for (size_t j = i+1 ; j < start ;) {
Char c = s.GetChar(j);
for (size_t j = expand_type_known_upto+1 ; j < start ;) {
Char c = tagged.GetChar(j);
if (c == _('<')) {
if (is_substr(s, j, _("<kw-")) && j + 4 < s.size()) {
expand_type = s.GetChar(j + 4); // <kw-?>
} else if (is_substr(s, j, _("</kw-"))) {
if (is_substr(tagged, j, _("<kw-")) && j + 4 < tagged.size()) {
expand_type = tagged.GetChar(j + 4); // <kw-?>
} else if (is_substr(tagged, j, _("</kw-"))) {
expand_type = 'a';
}
j = skip_tag(s, j);
j = skip_tag(tagged, j);
} else {
++j;
}
......@@ -506,22 +544,22 @@ String KeywordDatabase::expand(const String& text,
// Split the keyword, set parameters in context
String total; // the total keyword
size_t match_count = kw->match_re.GetMatchCount();
assert(match_count - 1 == 1 + 2 * kw->parameters.size());
size_t match_count = kw.match_re.GetMatchCount();
assert(match_count - 1 == 1 + 2 * kw.parameters.size());
for (size_t j = 1 ; j < match_count ; ++j) {
// we start counting at 1, so
// j = 1 mod 2 -> text
// j = 0 mod 2 -> parameter #((j-1)/2) == (j/2-1)
size_t start_u, len_u;
kw->match_re.GetMatch(&start_u, &len_u, j);
kw.match_re.GetMatch(&start_u, &len_u, j);
// note: start_u can be (uint)-1 when len_u == 0
size_t part_end = len_u > 0 ? untagged_to_index(s, start_u + len_u, true) : start;
String part = s.substr(start, part_end - start);
size_t part_end = len_u > 0 ? untagged_to_index(tagged, start_u + len_u, true) : start;
String part = tagged.substr(start, part_end - start);
// strip left over </kw tags
part = remove_tag(part,_("</kw-"));
if ((j % 2) == 0) {
// parameter
KeywordParam& kwp = *kw->parameters[j/2-1];
KeywordParam& kwp = *kw.parameters[j/2-1];
String param = untagged.substr(start_u, len_u); // untagged version
// strip separator_before
String separator_before, separator_after;
......@@ -579,7 +617,7 @@ String KeywordDatabase::expand(const String& text,
total += part;
start = part_end;
}
ctx.setVariable(_("mode"), to_script(kw->mode));
ctx.setVariable(_("mode"), to_script(kw.mode));
// Show reminder text?
bool expand = expand_type == _('1');
......@@ -593,9 +631,9 @@ String KeywordDatabase::expand(const String& text,
if (expand) {
String reminder;
try {
reminder = kw->reminder.invoke(ctx)->toString();
reminder = kw.reminder.invoke(ctx)->toString();
} catch (const Error& e) {
handle_error(_ERROR_2_("in keyword reminder", e.what(), kw->keyword), true, false);
handle_error(_ERROR_2_("in keyword reminder", e.what(), kw.keyword), true, false);
}
ctx.setVariable(_("keyword"), to_script(total));
ctx.setVariable(_("reminder"), to_script(reminder));
......@@ -610,28 +648,14 @@ String KeywordDatabase::expand(const String& text,
// Add to usage statistics
if (stat && stat_key) {
stat->push_back(make_pair(stat_key, kw));
stat->push_back(make_pair(stat_key, &kw));
}
// After keyword
s = s.substr(end);
tagged = tagged.substr(end);
untagged = untagged.substr(start_u + len_u);
used.clear();
expand_type = _('a');
goto matched_keyword;
}
}
}
}
}
// Remainder of the string
result += s; s.clear();
matched_keyword:;
}
return result;
return true;
}
// ----------------------------------------------------------------------------- : KeywordParamValue
......
......@@ -152,12 +152,23 @@ class KeywordDatabase {
/// Expand/update all keywords in the given string.
/** @param expand_default script function indicating whether reminder text should be shown by default
* @param combine_script script function to combine keyword and reminder text in some way
* @param case_sensitive case sensitive matching of keywords?
* @param ctx context for evaluation of scripts
*/
String expand(const String& text, const ScriptValueP& expand_default, const ScriptValueP& combine_script, Context& ctx) const;
String expand(const String& text, const ScriptValueP& expand_default, const ScriptValueP& combine_script, bool case_sensitive, Context& ctx) const;
private:
KeywordTrie* root; ///< Data structure for finding keywords
/// (try to) expand a single keyword
/** If the keyword matches:
* - add the result to out
* - advance the tagged and untagged string by dropping a part from the front
* - return true
*/
bool tryExpand(const Keyword& kw, size_t pos, String& tagged, String& untagged, String& out, char expand_type,
const ScriptValueP& expand_default, const ScriptValueP& combine_script, bool case_sensitive, Context& ctx,
KeywordUsageStatistics* stat, Value* stat_key) const;
};
// ----------------------------------------------------------------------------- : Processing parameters
......
......@@ -553,7 +553,7 @@ SCRIPT_RULE_2_N_DEP(expand_keywords, ScriptValueP, _("default expand"), default_
SCRIPT_OPTIONAL_PARAM_C_(CardP, card);
WITH_DYNAMIC_ARG(keyword_usage_statistics, card ? &card->keyword_usage : nullptr);
try {
SCRIPT_RETURN(db.expand(input, default_expand, combine, ctx));
SCRIPT_RETURN(db.expand(input, default_expand, combine, true, ctx));
} catch (const Error& e) {
throw ScriptError(_ERROR_2_("in function", e.what(), _("expand_keywords")));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment