mailing-list for TeXmacs Users

Text archives Help


Re: [TeXmacs] spell check


Chronological Thread 
  • From: François Poulain <address@hidden>
  • To: address@hidden
  • Subject: Re: [TeXmacs] spell check
  • Date: Mon, 2 Apr 2012 18:55:37 +0200

Le Mon, 2 Apr 2012 21:39:08 +0700 (NOVT),
"Andrey G. Grozin" <address@hidden> a écrit :

> On Mon, 2 Apr 2012, François Poulain wrote:
> > Anyway, is there *any* reason to use ispell nowaday ?
> No. It should be deprecated in favour of aspell.

Ok. Here is a patch for this, which should handle correctly (in a
aspell way) the internationalization.

I also have to add support for unicode langages (asiatics and, russian
in the future.)

Also, I guess "english" is language for "default value" ... in that
case, I do not pass language setting to aspell, which will then use
the locale configuration.

> A crasy idea: maybe it's better to use the enchant library
> (http://www.abisource.com/projects/enchant/)? It's an abstraction
> level over aspell, ispell, and some other spell checkers currently
> used by AbiWord (maybe by some other projects too, I'm not sure).
> Then there will be no need to start an aspell process and communicate
> with it, just call some enchant functions.

No idea about this. :)

François

--
François Poulain <address@hidden>
diff --git a/src/Data/String/analyze.hpp b/src/Data/String/analyze.hpp
index e326a62..0b9c484 100644
--- a/src/Data/String/analyze.hpp
+++ b/src/Data/String/analyze.hpp
@@ -38,6 +38,10 @@ string upcase_all (string s);
 string locase_all (string s);
 string string_union (string s1, string s2);
 string string_minus (string s1, string s2);
+string utf8_to_t2a (string s);
+string t2a_to_utf8 (string s);
+string utf8_to_cork (string s);
+string cork_to_utf8 (string s);
 string iso_to_koi8 (string s);
 string koi8_to_iso (string s);
 string iso_to_koi8uk (string s);
diff --git a/src/Data/String/converter.cpp b/src/Data/String/converter.cpp
index d4328b8..9e15780 100644
--- a/src/Data/String/converter.cpp
+++ b/src/Data/String/converter.cpp
@@ -209,6 +209,23 @@ cork_to_utf8 (string input) {
 }
 
 string
+utf8_to_t2a (string input) {
+  converter conv= load_converter ("UTF-8", "T2A");
+  int start, i, n= N(input);
+  string output;
+  for (i=0; i<n; ) {
+    start= i;
+    unsigned int code= decode_from_utf8 (input, i);
+    string s= input (start, i);
+    string r= apply (conv, s);
+    if (r == s && code >= 256)
+      r= "<#" * as_hexadecimal (code) * ">";
+    output << r;
+  }
+  return output;
+}
+
+string
 t2a_to_utf8 (string input) {
   converter conv= load_converter ("T2A", "UTF-8");
   int start= 0, i, n= N(input);
diff --git a/src/Plugins/Ispell/ispell.cpp b/src/Plugins/Ispell/ispell.cpp
index eb5d21c..0712227 100644
--- a/src/Plugins/Ispell/ispell.cpp
+++ b/src/Plugins/Ispell/ispell.cpp
@@ -15,10 +15,11 @@
 #include "tm_link.hpp"
 #include "convert.hpp"
 
-string ispell_dictionary (string lang);
-string ispell_extra_args (string lang);
+// string ispell_dictionary (string lang);
+// string ispell_extra_args (string lang);
 string ispell_encode (string lan, string s);
 string ispell_decode (string lan, string s);
+string ietf_languages_code (string s);
 
 /******************************************************************************
 * The connection resource
@@ -58,27 +59,29 @@ ispeller_rep::start () {
         cmd= "\"C:\\Program Files\\Aspell\\bin\\aspell.exe\"";
       else
 #endif
-        cmd= "ispell";
-    cmd << " -a -d " * ispell_dictionary (lan) * ispell_extra_args (lan);
+        return "Error: Aspell is not installed";
+    cmd << " -a --encoding=utf-8 ";
+    if (ietf_languages_code (lan) != "")
+      cmd << "-l " << ietf_languages_code (lan);
 #endif
     ln= make_pipe_link (cmd);
   }
   if (ln->alive) return "ok";
   string message= ln->start ();
-  if (DEBUG_IO) cout << "Ispell] Received " << message << "\n";
+  if (DEBUG_IO) cout << "Aspell] Received " << message << "\n";
   if (starts (message, "Error: ")) {
     if (ln->alive) ln->stop ();
     return message;
   }
   message= retrieve ();
-  if (DEBUG_IO) cout << "Ispell] Received " << message << "\n";
+  if (DEBUG_IO) cout << "Aspell] Received " << message << "\n";
 #ifdef OS_WIN32
   if (search_forwards (message, 0, "@(#)")) return "ok";
 #else
   if (starts (message, "@(#)")) return "ok";
 #endif
   if (ln->alive) ln->stop ();
-  return "Error: no dictionary for#" * lan;
+  return "Error: no dictionary for " * lan;
 }
 
 string
@@ -114,6 +117,7 @@ ispeller_rep::send (string cmd) {
 * Ispell dictionaries
 ******************************************************************************/
 
+/*
 static hashmap<string,string> the_dict ("");
 
 static void
@@ -124,8 +128,39 @@ init_dictionary (string lang, string dict) {
       exists ("/usr/lib/aspell/" * dict * ".multi"))
     the_dict (lang)= dict;
 }
+*/
 
 string
+ietf_languages_code (string lang) {
+  if (lang == "british")    return "en_GB";
+  if (lang == "bulgarian")  return "bg_BG";
+  if (lang == "chinese")    return "zh_CN";
+  if (lang == "czech")      return "cs_CZ";
+  if (lang == "danish")     return "da_DK";
+  if (lang == "dutch")      return "de_DE";
+  if (lang == "finnish")    return "fi_FI";
+  if (lang == "french")     return "fr_FR";
+  if (lang == "german")     return "de_DE";
+  if (lang == "hungarian")  return "hu_HU";
+  if (lang == "italian")    return "it_IT";
+  if (lang == "japanese")   return "ja_JP";
+  if (lang == "korean")     return "ko_KR";
+  if (lang == "polish")     return "pl_PL";
+  if (lang == "portuguese") return "pt_PT";
+  if (lang == "romanian")   return "ro_RO";
+  if (lang == "russian")    return "ru_RU";
+  if (lang == "slovene")    return "sl_SI";
+  if (lang == "spanish")    return "es_ES";
+  if (lang == "swedish")    return "sv_SE";
+  if (lang == "taiwanese")  return "th_TH";
+  if (lang == "ukrainian")  return "uk_UA";
+  // seems to be the "default value" --> use locale
+  if (lang == "english")    return "";
+  return "";
+}
+
+/*
+string
 ispell_dictionary (string lang) {
   if (N(the_dict) == 0) {
     init_dictionary ("english", "english");
@@ -158,18 +193,23 @@ ispell_dictionary (string lang) {
   if (the_dict->contains (lang)) return the_dict [lang];
   return lang;
 }
+*/
 
 /******************************************************************************
 * Language dependent arguments to ispell
 ******************************************************************************/
 
+/*
 string
 ispell_extra_args (string lan) {
   if (lan == "german")
-    return " -T latin1";
+    return " -T latin1 ";
+  else if (lan == "italian")
+    return " --encoding=iso-8859-1 ";
   else
     return "";
 }
+*/
 
 /******************************************************************************
 * Internationalization
@@ -177,34 +217,18 @@ ispell_extra_args (string lan) {
 
 string
 ispell_encode (string lan, string s) {
-  if ((lan == "czech") || (lan == "hungarian") ||
-      (lan == "polish") || (lan == "slovene"))
-    return cork_to_il2 (s);
-  else if ((lan == "bulgarian") || (lan == "russian"))
-    return koi8_to_iso (s);
-  else if (lan == "ukrainian")
-    return koi8uk_to_iso (s);
-  else if (lan == "spanish")
-    return spanish_to_ispanish (s);
-  else if (lan == "german")
-    return german_to_igerman (s);
-  else return s;
+  // TODO: write tm_unicode to utf8 converter
+  if (lan == "bulgarian" || lan == "russian" || lan == "ukrainian")
+    return t2a_to_utf8 (s);
+  else return cork_to_utf8 (s);
 }
 
 string
 ispell_decode (string lan, string s) {
-  if ((lan == "czech") || (lan == "hungarian") ||
-      (lan == "polish") || (lan == "slovene"))
-    return il2_to_cork (s);
-  else if ((lan == "bulgarian") || (lan == "russian"))
-    return iso_to_koi8 (s);
-  else if (lan == "ukrainian")
-    return iso_to_koi8uk (s);
-  else if (lan == "spanish")
-    return ispanish_to_spanish (s);
-  else if (lan == "german")
-    return igerman_to_german (s);
-  else return s;
+  // TODO: write utf8 to tm_unicode converter
+  if (lan == "bulgarian" || lan == "russian" || lan == "ukrainian")
+    return utf8_to_t2a (s);
+  return utf8_to_cork (s);
 }
 
 /******************************************************************************
@@ -262,7 +286,7 @@ ispell_eval (string lan, string s) {
 
 string
 ispell_start (string lan) {
-  if (DEBUG_IO) cout << "Ispell] Start " << lan << "\n";
+  if (DEBUG_IO) cout << "Aspell] Start " << lan << "\n";
   ispeller sc= ispeller (lan);
   if (is_nil (sc)) sc= tm_new<ispeller_rep> (lan);
   return sc->start ();
@@ -270,7 +294,7 @@ ispell_start (string lan) {
 
 tree
 ispell_check (string lan, string s) {
-  if (DEBUG_IO) cout << "Ispell] Check " << s << "\n";
+  if (DEBUG_IO) cout << "Aspell] Check " << s << "\n";
   ispeller sc= ispeller (lan);
   if (is_nil (sc) || (!sc->ln->alive)) {
     string message= ispell_start (lan);
@@ -283,18 +307,18 @@ ispell_check (string lan, string s) {
 
 void
 ispell_accept (string lan, string s) {
-  if (DEBUG_IO) cout << "Ispell] Accept " << s << "\n";
+  if (DEBUG_IO) cout << "Aspell] Accept " << s << "\n";
   ispell_send (lan, "@" * s);
 }
 
 void
 ispell_insert (string lan, string s) {
-  if (DEBUG_IO) cout << "Ispell] Insert " << s << "\n";
+  if (DEBUG_IO) cout << "Aspell] Insert " << s << "\n";
   ispell_send (lan, "*" * s);
 }
 
 void
 ispell_done (string lan) {
-  if (DEBUG_IO) cout << "Ispell] End " << lan << "\n";
+  if (DEBUG_IO) cout << "Aspell] End " << lan << "\n";
   ispell_send (lan, "#");
 }



Archive powered by MHonArc 2.6.19.

Top of page