Commit b4ce292c authored by twanvl's avatar twanvl

Simple spelling checker using the Hunspell library.

This time adding the source files :)

The checker is used (experimentally) by the magic game file.
parent e6183d2d
......@@ -4,6 +4,7 @@
languages := [
English: [
code : "en",
spellcheck_code : "en_US",
pt_separator : "/",
type_separator : " — ",
subtype_separator : " ",
......@@ -17,6 +18,7 @@ languages := [
Français: [
code : "fr",
spellcheck_code : "", # TODO: get dictionary
pt_separator : "/",
type_separator : " : "
subtype_separator : "<atom-sep> et </atom-sep>",
......
......@@ -322,8 +322,8 @@ mana_context :=
|adds?|pay(ed)?[ ](with|using)
)
([ ]either)? # pay either X or Y
([ ]<sym[^>]*>[STQXYZIWUBRG0-9/|]+</sym[^>]*>,)* # pay X, Y or Z
([ ]<sym[^>]*>[STQXYZIWUBRG0-9/|]+</sym[^>]*>[ ](and|or|and/or))* # pay X or Y
([ ](<sym[^>]*>)?[STQXYZIWUBRG0-9/|]+(</sym[^>]*>)?,)* # pay X, Y or Z
([ ](<sym[^>]*>)?[STQXYZIWUBRG0-9/|]+(</sym[^>]*>)?[ ](and|or|and/or))* # pay X or Y
[ ]<match>
([,.)]|$ # (end of word)
|[ ][^ .,]*$ # still typing...
......@@ -343,6 +343,7 @@ text_filter :=
# step 1 : remove all automatic tags
remove_tag@(tag: "<sym-auto>") +
remove_tag@(tag: "<i-auto>") +
remove_tag@(tag: "<error-spelling") +
# step 2 : reminder text for keywords
expand_keywords@(
condition: {
......@@ -392,7 +393,7 @@ text_filter :=
replace: {"<nosym>" + mana_filter_t() + "</nosym>"} ) +
# step 5 : add mana & tap symbols
replace@(
match: "\b[STQXYZIWUBRG0-9/|]+\b",
match: "\\b[STQXYZIWUBRG0-9/|]+\\b",
in_context: mana_context,
replace: {"<sym-auto>" + mana_filter_t() + "</sym-auto>"} ) +
# step 5b : add explict mana symbols
......@@ -410,7 +411,9 @@ text_filter :=
+ "([[:lower:]])" # match this
+ "(?![)])", # not followed by this
replace: { _1 + to_upper(_2) }) +
curly_quotes
curly_quotes +
# step 9 : spellcheck
{ check_spelling(language:language().spellcheck_code) }
############################################################## Other boxes
......@@ -423,7 +426,9 @@ flavor_text_filter :=
# step 2 : surround by <i> tags
{ "<i-flavor>" + input + "</i-flavor>" } +
# curly quotes
curly_quotes
curly_quotes +
# spellcheck
{ check_spelling(language:language().spellcheck_code) }
# Move the cursor past the separator in the p/t and type boxes
type_over_pt := replace@(match:"/$", replace:"")
......
//+----------------------------------------------------------------------------+
//| Description: Magic Set Editor - Program to make Magic (tm) cards |
//| Copyright: (C) 2001 - 2008 Twan van Laarhoven and "coppro" |
//| License: GNU General Public License 2 or later (see file COPYING) |
//+----------------------------------------------------------------------------+
// ----------------------------------------------------------------------------- : Includes
#include <util/prec.hpp>
#include <script/functions/functions.hpp>
#include <script/functions/util.hpp>
#include <util/spell_checker.hpp>
#include <util/tagged_string.hpp>
// ----------------------------------------------------------------------------- : Functions
void check_word(const String& input, String& out, size_t start, size_t end, SpellChecker& checker, bool must_be_empty) {
if (start >= end) return;
String word = untag(input.substr(start,end-start));
// TODO: handle keywords and cardname references
bool error = !word.empty() && (must_be_empty || !checker.spell_with_punctuation(word));
if (error) out += _("<error-spelling>");
out.append(input, start, end-start);
if (error) out += _("</error-spelling>");
}
SCRIPT_FUNCTION(check_spelling) {
SCRIPT_PARAM(String,language);
SCRIPT_PARAM(String,input);
if (language.empty()) {
// no language -> spelling checking
SCRIPT_RETURN(true);
}
SpellChecker& checker = SpellChecker::get(language);
// remove old spelling error tags
input = remove_tag(input, _("<error-spelling"));
// now walk over the words in the input, and mark misspellings
String result;
size_t word_start = 0, pos = 0;
bool must_be_empty = false; // must this word be empty?
while (pos < input.size()) {
Char c = input.GetChar(pos);
if (c == _('<')) {
if (is_substr(input,pos,_("<sym"))) {
// before symbols should be empty
check_word(input,result, word_start,pos, checker, true);
// don't spellcheck symbols
word_start = pos;
pos = min(input.size(), match_close_tag_end(input,pos));
result.append(input, word_start, pos-word_start);
word_start = pos;
must_be_empty = true; // need a space after symbols
} else {
pos = skip_tag(input,pos);
}
} else if (isSpace(c)) {
// word boundary -> check word
check_word(input,result, word_start,pos, checker, must_be_empty);
// next
result += c;
pos++;
word_start = pos;
must_be_empty = false;
} else {
pos++;
}
}
// last word
check_word(input,result, word_start,input.size(), checker, must_be_empty);
// done
SCRIPT_RETURN(result);
}
SCRIPT_FUNCTION(check_spelling_word) {
SCRIPT_PARAM(String,language);
SCRIPT_PARAM(String,input);
if (language.empty()) {
// no language -> spelling checking
SCRIPT_RETURN(true);
} else {
bool correct = SpellChecker::get(language).spell(input);
SCRIPT_RETURN(correct);
}
}
// ----------------------------------------------------------------------------- : Init
void init_script_spelling_functions(Context& ctx) {
ctx.setVariable(_("check spelling"), script_check_spelling);
ctx.setVariable(_("check spelling word"), script_check_spelling_word);
}
//+----------------------------------------------------------------------------+
//| Description: Magic Set Editor - Program to make Magic (tm) cards |
//| Copyright: (C) 2001 - 2008 Twan van Laarhoven and "coppro" |
//| License: GNU General Public License 2 or later (see file COPYING) |
//+----------------------------------------------------------------------------+
// ----------------------------------------------------------------------------- : Includes
#include <util/prec.hpp>
#include <util/spell_checker.hpp>
#include <util/io/package_manager.hpp>
// ----------------------------------------------------------------------------- : Spell checker : construction
map<String,SpellCheckerP> SpellChecker::spellers;
SpellChecker& SpellChecker::get(const String& language) {
SpellCheckerP& speller = spellers[language];
if (!speller) {
String local_dir = package_manager.getDictionaryDir(true);
String global_dir = package_manager.getDictionaryDir(false);
String aff_path = language + _(".aff");
String dic_path = language + _(".dic");
if (wxFileExists(local_dir + aff_path) && wxFileExists(local_dir + dic_path)) {
speller = SpellCheckerP(new SpellChecker((local_dir + aff_path).mb_str(),
(local_dir + dic_path).mb_str()));
} else if (wxFileExists(global_dir + aff_path) && wxFileExists(global_dir + dic_path)) {
speller = SpellCheckerP(new SpellChecker((global_dir + aff_path).mb_str(),
(global_dir + dic_path).mb_str()));
} else {
throw Error(_("Dictionary not found for language: ") + language);
}
}
return *speller;
}
SpellChecker::SpellChecker(const char* aff_path, const char* dic_path)
: Hunspell(aff_path,dic_path)
, encoding(String(get_dic_encoding(), IF_UNICODE(wxConvLibc, wxSTRING_MAXLEN)))
{}
void SpellChecker::destroy() {
spellers.clear();
}
// ----------------------------------------------------------------------------- : Spell checker : use
bool SpellChecker::spell(const String& word) {
return Hunspell::spell(word.mb_str(encoding));
}
const String word_start = String(_("[({\"\'")) + LEFT_SINGLE_QUOTE + LEFT_DOUBLE_QUOTE;
const String word_end = String(_("])}.,;:\"\'")) + RIGHT_SINGLE_QUOTE + RIGHT_DOUBLE_QUOTE;
bool SpellChecker::spell_with_punctuation(const String& word) {
size_t first = word.find_first_not_of(word_start);
size_t last = word.find_last_not_of(word_end);
if (first > last) return false; // just punctuation is incorrect
return spell(word.substr(first, last-first+1));
}
//+----------------------------------------------------------------------------+
//| Description: Magic Set Editor - Program to make Magic (tm) cards |
//| Copyright: (C) 2001 - 2008 Twan van Laarhoven and "coppro" |
//| License: GNU General Public License 2 or later (see file COPYING) |
//+----------------------------------------------------------------------------+
#ifndef HEADER_UTIL_SPELL_CHECKER
#define HEADER_UTIL_SPELL_CHECKER
// ----------------------------------------------------------------------------- : Includes
#include <util/prec.hpp>
#include "hunspell.hxx"
DECLARE_POINTER_TYPE(SpellChecker);
// ----------------------------------------------------------------------------- : Spell checker
/// A spelling checker for a particular language
class SpellChecker : public Hunspell, public IntrusivePtrBase<SpellChecker> {
public:
/// Get a SpellChecker object for the given language.
/** Note: This is not threadsafe yet */
static SpellChecker& get(const String& language);
/// Destroy all cached SpellChecker objects
static void destroy();
/// Check the spelling of a single word
bool spell(const String& word);
/// Check the spelling of a single word, ignore punctuation
bool spell_with_punctuation(const String& word);
private:
/// Convert between String and dictionary encoding
wxCSConv encoding;
SpellChecker(const char* aff_path, const char* dic_path);
static map<String,SpellCheckerP> spellers; //< Cached checkers for each language
};
// ----------------------------------------------------------------------------- : EOF
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment