Commit 49a4cff3 authored by twanvl's avatar twanvl

Added break_text function

parent 05304df1
Function: break_text
--Usage--
> break_text(some_string, match: regular expression, in_context: regular expression)
> break_rule(match: ..., in_context: ...)(some_string)
Break text by only keeping the parts of the input that match the regular expression.
The function returns a [[type:list]] of parts.
If @in_context@ is given, the context must also match the string where the match is represented as <tt>&lt;match></tt>.
This function is available in [[script:rule form]].
When the @break_text@ is used many times the rule form is more efficient, because the regular expression is only compiled once.
--Filter vs. break--
The function @filter_text@ is very similar to @break_text@, instead of returning a list it concatenates the items.
So for example where @break_text@ would return @["a","b","c"]@, @filter_text@ would return @"abc"@.
In fact, @filter_text@ could be implemented as
> filter_text := { for part in break_text() do part }
--Parameters--
! Parameter Type Description
| @input@ [[type:string]] String to replace in.
| @match@ [[type:regex]] Regular expression to match.
| @in_context@ [[type:regex]] (optional) Context to match
--Examples--
> break_text(match: "a", "banana") == ["a","a","a"]
> break_text(match: "na|.", "banana") == ["b","a","na","na"]
> break_text(match: "ap", "banana") == []
>
> f := break_rule(match: "xx+")
> f("xyzxxxxyyzz") == ["xxxx"]
--See also--
| [[fun:filter_text|filter_text / filter_rule]]
Keep only the text matching a regular expression.
...@@ -9,7 +9,7 @@ Filter text by only keeping the parts of the input that match the regular expres ...@@ -9,7 +9,7 @@ Filter text by only keeping the parts of the input that match the regular expres
If @in_context@ is given, the context must also match the string where the match is represented as <tt>&lt;match></tt>. If @in_context@ is given, the context must also match the string where the match is represented as <tt>&lt;match></tt>.
This function is available in [[script:rule form]]. This function is available in [[script:rule form]].
When the filter is used many times the rule form is more efficient, because the regular expression is only compiled once. When the @filter_text@ is used many times the rule form is more efficient, because the regular expression is only compiled once.
--Parameters-- --Parameters--
! Parameter Type Description ! Parameter Type Description
...@@ -26,5 +26,7 @@ When the filter is used many times the rule form is more efficient, because the ...@@ -26,5 +26,7 @@ When the filter is used many times the rule form is more efficient, because the
> f("xyzxxyyzz") == "xx" > f("xyzxxyyzz") == "xx"
--See also-- --See also--
| [[fun:break_text|break_text / break_rule]]
Break text into parts each matching a regular expression.
| [[fun:replace|replace / replace_rule]] | [[fun:replace|replace / replace_rule]]
Replace text matching a regular expression. Replace text matching a regular expression.
...@@ -16,6 +16,8 @@ These functions are built into the program, other [[type:function]]s can be defi ...@@ -16,6 +16,8 @@ These functions are built into the program, other [[type:function]]s can be defi
Replace text matching a regular expression. Replace text matching a regular expression.
| [[fun:filter_text|filter_text / filter_rule]] | [[fun:filter_text|filter_text / filter_rule]]
Keep only the text matching a regular expression. Keep only the text matching a regular expression.
| [[fun:break_text|break_text / break_rule]]
Break text into parts each matching a regular expression.
| [[fun:sort_text|sort_text / sort_rule]] | [[fun:sort_text|sort_text / sort_rule]]
Sort the letters in a string using a custom order. Sort the letters in a string using a custom order.
| [[fun:contains]] Does a string contain another one? | [[fun:contains]] Does a string contain another one?
......
//+----------------------------------------------------------------------------+
//| Description: Magic Set Editor - Program to make Magic (tm) cards |
//| Copyright: (C) 2001 - 2008 Twan van Laarhoven and "coppro" |
//| License: GNU General Public License 2 or later (see file COPYING) |
//+----------------------------------------------------------------------------+
#ifndef HEADER_DATA_ACTION_GENERIC
#define HEADER_DATA_ACTION_GENERIC
/** @file data/action/generic.hpp
*
* Generic action stuff
*/
// ----------------------------------------------------------------------------- : Includes
#include <util/prec.hpp>
#include <util/action_stack.hpp>
// ----------------------------------------------------------------------------- : Generic add/remove action
enum AddingOrRemoving {ADD, REMOVE};
/// Adding or removing some objects from a vector
template <typename T>
class GenericAddAction {
public:
GenericAddAction(AddingOrRemoving, const T& item, const vector<T>& container);
GenericAddAction(AddingOrRemoving, const vector<T>& items, const vector<T>& container);
String getName() const;
void perform(vector<T>& container, bool to_undo) const;
/// A step of removing/adding
struct Step {
inline Step(size_t pos, const T& item) : pos(pos), item(item) {}
size_t pos;
T item;
};
bool adding; ///< Were objects added? (as opposed to removed)
vector<Step> steps; ///< Added/removed objects, sorted by ascending pos
};
// ----------------------------------------------------------------------------- : Implementation
template <typename T>
bool contains(const vector<T>& items, const T& item) {
return find(items.begin(), items.end(), item) != items.end();
}
template <typename T>
GenericAddAction<T>::GenericAddAction(AddingOrRemoving ar, const T& item, const vector<T>& container)
: adding(ar == ADD)
{
if (ar == ADD) {
size_t pos = container.size();
steps.push_back(Step(pos, item));
} else {
for (size_t pos = 0 ; pos < container.size() ; ++pos) {
if (container[pos] == item) {
steps.push_back(Step(pos, item));
return;
}
}
throw InternalError(_("Item to remove not found in container"));
}
}
template <typename T>
GenericAddAction<T>::GenericAddAction(AddingOrRemoving ar, const vector<T>& items, const vector<T>& container)
: adding(ar == ADD)
{
if (ar == ADD) {
size_t pos = container.size();
for (vector<T>::const_iterator it = items.begin() ; it != items.end() ; ++it) {
steps.push_back(Step(pos++, *it));
}
} else {
for (size_t pos = 0 ; pos < container.size() ; ++pos) {
if (contains(items, container[pos])) {
steps.push_back(Step(pos, container[pos]));
return;
}
}
if (steps.size() != items.size()) {
throw InternalError(_("Item to remove not found in container"));
}
}
}
template <typename T>
String GenericAddAction<T>::getName() const {
String type = type_name(steps.front().item) + (steps.size() == 1 ? _("") : _("s"));
return adding ? _ACTION_1_("add object", type) : _ACTION_1_("remove object", type);
}
template <typename T>
void GenericAddAction<T>::perform(vector<T>& container, bool to_undo) const {
if (adding != to_undo) {
// (re)insert the items
// ascending order, this is the reverse of removal
FOR_EACH_CONST(s, steps) {
assert(s.pos <= container.size());
container.insert(container.begin() + s.pos, s.item);
}
} else {
// remove the items
// descending order, because earlier removals shift the rest of the vector
FOR_EACH_CONST_REVERSE(s, steps) {
assert(s.pos < container.size());
container.erase(container.begin() + s.pos);
}
}
}
// ----------------------------------------------------------------------------- : EOF
#endif
...@@ -447,7 +447,7 @@ ScriptValueP replace_rule(Context& ctx) { ...@@ -447,7 +447,7 @@ ScriptValueP replace_rule(Context& ctx) {
ret->replacement = replace->toString(); ret->replacement = replace->toString();
} }
// in_context // in_context
SCRIPT_OPTIONAL_PARAM_N(String, _("in context"), in_context) { SCRIPT_OPTIONAL_PARAM_C(String, in_context) {
if (!ret->context.Compile(in_context, wxRE_ADVANCED)) { if (!ret->context.Compile(in_context, wxRE_ADVANCED)) {
throw ScriptError(_("Error while compiling regular expression: '")+in_context+_("'")); throw ScriptError(_("Error while compiling regular expression: '")+in_context+_("'"));
} }
...@@ -479,7 +479,7 @@ class ScriptFilterRule : public ScriptValue { ...@@ -479,7 +479,7 @@ class ScriptFilterRule : public ScriptValue {
bool ok = regex.GetMatch(&start, &len, 0); bool ok = regex.GetMatch(&start, &len, 0);
assert(ok); assert(ok);
String inside = input.substr(start, len); // the match String inside = input.substr(start, len); // the match
String next_input = input.substr(start + len); // everything after the match String next_input = input.substr(start + len); // everything after the match
if (!context.IsValid() || context.Matches(input.substr(0,start) + _("<match>") + next_input)) { if (!context.IsValid() || context.Matches(input.substr(0,start) + _("<match>") + next_input)) {
// no context or context match // no context or context match
ret += inside; ret += inside;
...@@ -497,7 +497,7 @@ class ScriptFilterRule : public ScriptValue { ...@@ -497,7 +497,7 @@ class ScriptFilterRule : public ScriptValue {
ScriptValueP filter_rule(Context& ctx) { ScriptValueP filter_rule(Context& ctx) {
// cached? // cached?
SCRIPT_PARAM_C(String, match); SCRIPT_PARAM_C(String, match);
SCRIPT_PARAM_DEFAULT_N(String, _("in context"), in_context, String()); SCRIPT_PARAM_DEFAULT_C(String, in_context, String());
// cache // cache
const int CACHE_SIZE = 6; const int CACHE_SIZE = 6;
...@@ -540,6 +540,59 @@ SCRIPT_FUNCTION(filter_text) { ...@@ -540,6 +540,59 @@ SCRIPT_FUNCTION(filter_text) {
return filter_rule(ctx)->eval(ctx); return filter_rule(ctx)->eval(ctx);
} }
// ----------------------------------------------------------------------------- : Rules : regex filter/break
class ScriptBreakRule : public ScriptValue {
public:
virtual ScriptType type() const { return SCRIPT_FUNCTION; }
virtual String typeName() const { return _("break_rule"); }
virtual ScriptValueP eval(Context& ctx) const {
SCRIPT_PARAM_C(String, input);
intrusive_ptr<ScriptCustomCollection> ret(new ScriptCustomCollection);
while (regex.Matches(input)) {
// match, append to result
size_t start, len;
bool ok = regex.GetMatch(&start, &len, 0);
assert(ok);
String inside = input.substr(start, len); // the match
String next_input = input.substr(start + len); // everything after the match
if (!context.IsValid() || context.Matches(input.substr(0,start) + _("<match>") + next_input)) {
// no context or context match
ret->value.push_back(to_script(inside));
}
input = next_input;
}
return ret;
}
wxRegEx regex; ///< Regex to match
wxRegEx context; ///< Match only in a given context, optional
};
// Create a regular expression rule for breaking strings
ScriptValueP break_rule(Context& ctx) {
intrusive_ptr<ScriptBreakRule> ret(new ScriptBreakRule);
// match
SCRIPT_PARAM_C(String, match);
if (!ret->regex.Compile(match, wxRE_ADVANCED)) {
throw ScriptError(_("Error while compiling regular expression: '")+match+_("'"));
}
// in_context
SCRIPT_OPTIONAL_PARAM_C(String, in_context) {
if (!ret->context.Compile(in_context, wxRE_ADVANCED)) {
throw ScriptError(_("Error while compiling regular expression: '")+in_context+_("'"));
}
}
return ret;
}
SCRIPT_FUNCTION(break_rule) {
return break_rule(ctx);
}
SCRIPT_FUNCTION(break_text) {
return break_rule(ctx)->eval(ctx);
}
// ----------------------------------------------------------------------------- : Rules : regex match // ----------------------------------------------------------------------------- : Rules : regex match
class ScriptMatchRule : public ScriptValue { class ScriptMatchRule : public ScriptValue {
...@@ -657,10 +710,12 @@ void init_script_basic_functions(Context& ctx) { ...@@ -657,10 +710,12 @@ void init_script_basic_functions(Context& ctx) {
// advanced string rules/functions // advanced string rules/functions
ctx.setVariable(_("replace"), script_replace); ctx.setVariable(_("replace"), script_replace);
ctx.setVariable(_("filter text"), script_filter_text); ctx.setVariable(_("filter text"), script_filter_text);
ctx.setVariable(_("break text"), script_break_text);
ctx.setVariable(_("match"), script_match); ctx.setVariable(_("match"), script_match);
ctx.setVariable(_("sort text"), script_sort_text); ctx.setVariable(_("sort text"), script_sort_text);
ctx.setVariable(_("replace rule"), script_replace_rule); ctx.setVariable(_("replace rule"), script_replace_rule);
ctx.setVariable(_("filter rule"), script_filter_rule); ctx.setVariable(_("filter rule"), script_filter_rule);
ctx.setVariable(_("break rule"), script_break_rule);
ctx.setVariable(_("match rule"), script_match_rule); ctx.setVariable(_("match rule"), script_match_rule);
ctx.setVariable(_("sort rule"), script_sort_rule); ctx.setVariable(_("sort rule"), script_sort_rule);
} }
...@@ -136,6 +136,8 @@ inline Type from_script(const ScriptValueP& v, Variable var) { ...@@ -136,6 +136,8 @@ inline Type from_script(const ScriptValueP& v, Variable var) {
#define SCRIPT_PARAM_DEFAULT_N(Type, str, name, def) \ #define SCRIPT_PARAM_DEFAULT_N(Type, str, name, def) \
ScriptValueP name##_ = ctx.getVariableOpt(str); \ ScriptValueP name##_ = ctx.getVariableOpt(str); \
Type name = name##_ ? from_script<Type>(name##_, str) : def Type name = name##_ ? from_script<Type>(name##_, str) : def
#define SCRIPT_PARAM_DEFAULT_C(Type, name, def) \
SCRIPT_PARAM_DEFAULT_N(Type, SCRIPT_VAR_ ## name, name, name)
// ----------------------------------------------------------------------------- : Rules // ----------------------------------------------------------------------------- : Rules
......
...@@ -111,6 +111,7 @@ enum Variable ...@@ -111,6 +111,7 @@ enum Variable
, SCRIPT_VAR_in , SCRIPT_VAR_in
, SCRIPT_VAR_match , SCRIPT_VAR_match
, SCRIPT_VAR_replace , SCRIPT_VAR_replace
, SCRIPT_VAR_in_context
, SCRIPT_VAR_order , SCRIPT_VAR_order
, SCRIPT_VAR_filter , SCRIPT_VAR_filter
, SCRIPT_VAR_choice , SCRIPT_VAR_choice
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment