9 files changed, 215 insertions, 50 deletions
diff --git a/src/mnemonics/electrum-words.cpp b/src/mnemonics/electrum-words.cpp
index b204e81cf..ffa82b21e 100644
--- a/src/mnemonics/electrum-words.cpp
+++ b/src/mnemonics/electrum-words.cpp
@@ -61,7 +61,6 @@
 
 namespace
 {
-  const int seed_length = 24;
 
   /*!
    * \brief Finds the word list that contains the seed words and puts the indices
@@ -69,11 +68,11 @@ namespace
    * \param  seed            List of words to match.
    * \param  has_checksum    If word list passed checksum test, we need to only do a prefix check.
    * \param  matched_indices The indices where the seed words were found are added to this.
+   * \param  language        Language instance pointer to write to after it is found.
    * \return                 true if all the words were present in some language false if not.
    */
   bool find_seed_language(const std::vector<std::string> &seed,
-    bool has_checksum, std::vector<uint32_t> &matched_indices, uint32_t &word_list_length,
-    std::string &language_name)
+    bool has_checksum, std::vector<uint32_t> &matched_indices, Language::Base **language)
   {
     // If there's a new language added, add an instance of it here.
     std::vector<Language::Base*> language_instances({
@@ -83,18 +82,6 @@ namespace
       Language::Singleton<Language::Japanese>::instance(),
       Language::Singleton<Language::OldEnglish>::instance()
     });
-    // To hold trimmed seed words in case of a checksum being present.
-    std::vector<std::string> trimmed_seed;
-    if (has_checksum)
-    {
-      // If it had a checksum, we'll just compare the unique prefix
-      // So we create a list of trimmed seed words
-      for (std::vector<std::string>::const_iterator it = seed.begin(); it != seed.end(); it++)
-      {
-        trimmed_seed.push_back(it->length() > Language::unique_prefix_length ?
-          it->substr(0, Language::unique_prefix_length) : *it);
-      }
-    }
 
     // Iterate through all the languages and find a match
     for (std::vector<Language::Base*>::iterator it1 = language_instances.begin();
@@ -104,23 +91,22 @@ namespace
       const std::unordered_map<std::string, uint32_t> &trimmed_word_map = (*it1)->get_trimmed_word_map();
       // To iterate through seed words
       std::vector<std::string>::const_iterator it2;
-      // To iterate through trimmed seed words
-      std::vector<std::string>::iterator it3;
       bool full_match = true;
 
+      std::string trimmed_word;
       // Iterate through all the words and see if they're all present
-      for (it2 = seed.begin(), it3 = trimmed_seed.begin();
-        it2 != seed.end(); it2++, it3++)
+      for (it2 = seed.begin(); it2 != seed.end(); it2++)
       {
         if (has_checksum)
         {
+          trimmed_word = it2->substr(0, (*it1)->get_unique_prefix_length());
           // Use the trimmed words and map
-          if (trimmed_word_map.count(*it3) == 0)
+          if (trimmed_word_map.count(trimmed_word) == 0)
           {
             full_match = false;
             break;
           }
-          matched_indices.push_back(trimmed_word_map.at(*it3));
+          matched_indices.push_back(trimmed_word_map.at(trimmed_word));
         }
         else
         {
@@ -134,8 +120,7 @@ namespace
       }
       if (full_match)
       {
-        word_list_length = (*it1)->get_word_list().size();
-        language_name = (*it1)->get_language_name();
+        *language = *it1;
         return true;
       }
       // Some didn't match. Clear the index array.
@@ -146,18 +131,20 @@ namespace
 
   /*!
    * \brief Creates a checksum index in the word list array on the list of words.
-   * \param  word_list Vector of words
-   * \return           Checksum index
+   * \param  word_list            Vector of words
+   * \param unique_prefix_length  the prefix length of each word to use for checksum
+   * \return                      Checksum index
    */
-  uint32_t create_checksum_index(const std::vector<std::string> &word_list)
+  uint32_t create_checksum_index(const std::vector<std::string> &word_list,
+    uint32_t unique_prefix_length)
   {
     std::string trimmed_words = "";
 
     for (std::vector<std::string>::const_iterator it = word_list.begin(); it != word_list.end(); it++)
     {
-      if (it->length() > 4)
+      if (it->length() > unique_prefix_length)
       {
-        trimmed_words += it->substr(0, Language::unique_prefix_length);
+        trimmed_words += it->substr(0, unique_prefix_length);
       }
       else
       {
@@ -166,25 +153,26 @@ namespace
     }
     boost::crc_32_type result;
     result.process_bytes(trimmed_words.data(), trimmed_words.length());
-    return result.checksum() % seed_length;
+    return result.checksum() % crypto::ElectrumWords::seed_length;
   }
 
   /*!
    * \brief Does the checksum test on the seed passed.
-   * \param seed    Vector of seed words
-   * \return        True if the test passed false if not.
+   * \param seed                  Vector of seed words
+   * \param unique_prefix_length  the prefix length of each word to use for checksum
+   * \return                      True if the test passed false if not.
    */
-  bool checksum_test(std::vector<std::string> seed)
+  bool checksum_test(std::vector<std::string> seed, uint32_t unique_prefix_length)
   {
     // The last word is the checksum.
     std::string last_word = seed.back();
     seed.pop_back();
 
-    std::string checksum = seed[create_checksum_index(seed)];
+    std::string checksum = seed[create_checksum_index(seed, unique_prefix_length)];
 
-    std::string trimmed_checksum = checksum.length() > 4 ? checksum.substr(0, Language::unique_prefix_length) :
+    std::string trimmed_checksum = checksum.length() > unique_prefix_length ? checksum.substr(0, unique_prefix_length) :
       checksum;
-    std::string trimmed_last_word = checksum.length() > 4 ? last_word.substr(0, Language::unique_prefix_length) :
+    std::string trimmed_last_word = last_word.length() > unique_prefix_length ? last_word.substr(0, unique_prefix_length) :
       last_word;
     return trimmed_checksum == trimmed_last_word;
   }
@@ -211,11 +199,12 @@ namespace crypto
      * \param  language_name   Language of the seed as found gets written here.
      * \return                 false if not a multiple of 3 words, or if word is not in the words list
      */
-    bool words_to_bytes(const std::string& words, crypto::secret_key& dst,
+    bool words_to_bytes(std::string words, crypto::secret_key& dst,
       std::string &language_name)
     {
       std::vector<std::string> seed;
 
+      boost::algorithm::trim(words);
       boost::split(seed, words, boost::is_any_of(" "));
 
       // error on non-compliant word list
@@ -227,22 +216,25 @@ namespace crypto
 
       // If it is seed with a checksum.
       bool has_checksum = seed.size() == (seed_length + 1);
+
+      std::vector<uint32_t> matched_indices;
+      Language::Base *language;
+      if (!find_seed_language(seed, has_checksum, matched_indices, &language))
+      {
+        return false;
+      }
+      language_name = language->get_language_name();
+      uint32_t word_list_length = language->get_word_list().size();
+
       if (has_checksum)
       {
-        if (!checksum_test(seed))
+        if (!checksum_test(seed, language->get_unique_prefix_length()))
         {
           // Checksum fail
           return false;
         }
         seed.pop_back();
       }
-      
-      std::vector<uint32_t> matched_indices;
-      uint32_t word_list_length = 0;
-      if (!find_seed_language(seed, has_checksum, matched_indices, word_list_length, language_name))
-      {
-        return false;
-      }
 
       for (unsigned int i=0; i < seed.size() / 3; i++)
       {
@@ -335,7 +327,7 @@ namespace crypto
       }
 
       words.pop_back();
-      words += (' ' + words_store[create_checksum_index(words_store)]);
+      words += (' ' + words_store[create_checksum_index(words_store, language->get_unique_prefix_length())]);
       return false;
     }
 
diff --git a/src/mnemonics/electrum-words.h b/src/mnemonics/electrum-words.h
index b822e7740..b75de30e9 100644
--- a/src/mnemonics/electrum-words.h
+++ b/src/mnemonics/electrum-words.h
@@ -59,6 +59,7 @@ namespace crypto
   namespace ElectrumWords
   {
 
+    const int seed_length = 24;
     const std::string old_language_name = "OldEnglish";
     /*!
      * \brief Converts seed words to bytes (secret key).
@@ -67,7 +68,7 @@ namespace crypto
      * \param  language_name   Language of the seed as found gets written here.
      * \return                 false if not a multiple of 3 words, or if word is not in the words list
      */
-    bool words_to_bytes(const std::string& words, crypto::secret_key& dst,
+    bool words_to_bytes(std::string words, crypto::secret_key& dst,
       std::string &language_name);
 
     /*!
diff --git a/src/mnemonics/english.h b/src/mnemonics/english.h
index ae39d44fd..8ca0a3fe3 100644
--- a/src/mnemonics/english.h
+++ b/src/mnemonics/english.h
@@ -1681,6 +1681,7 @@ namespace Language
         "zones",
         "zoom"
       });
+      unique_prefix_length = 3;
       word_map = new std::unordered_map<std::string, uint32_t>;
       trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
       language_name = "English";
diff --git a/src/mnemonics/japanese.h b/src/mnemonics/japanese.h
index 22c7a53ba..fc3d1ee74 100644
--- a/src/mnemonics/japanese.h
+++ b/src/mnemonics/japanese.h
@@ -1681,6 +1681,7 @@ namespace Language
         "びじゅつかん",
         "ひしょ"
       });
+      unique_prefix_length = 4;
       word_map = new std::unordered_map<std::string, uint32_t>;
       trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
       language_name = "Japanese";
diff --git a/src/mnemonics/language_base.h b/src/mnemonics/language_base.h
index 0f1062b09..06815e39d 100644
--- a/src/mnemonics/language_base.h
+++ b/src/mnemonics/language_base.h
@@ -45,7 +45,6 @@
  */
 namespace Language
 {
-  const int unique_prefix_length = 4; /*!< Length of the prefix of all words guaranteed to be unique */
   /*!
    * \class Base
    * \brief A base language class which all languages have to inherit from for
@@ -58,7 +57,7 @@ namespace Language
     std::unordered_map<std::string, uint32_t> *word_map; /*!< hash table to find word's index */
     std::unordered_map<std::string, uint32_t> *trimmed_word_map; /*!< hash table to find word's trimmed index */
     std::string language_name; /*!< Name of language */
-    int trim_length; /*!< Number of unique starting characters to trim the wordlist to when matching */
+    uint32_t unique_prefix_length; /*!< Number of unique starting characters to trim the wordlist to when matching */
     /*!
      * \brief Populates the word maps after the list is ready.
      */
@@ -85,6 +84,7 @@ namespace Language
       word_list = new std::vector<std::string>;
       word_map = new std::unordered_map<std::string, uint32_t>;
       trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
+      unique_prefix_length = 4;
     }
     /*!
      * \brief Returns a pointer to the word list.
@@ -122,9 +122,9 @@ namespace Language
      * \brief Returns the number of unique starting characters to be used for matching.
      * \return Number of unique starting characters.
      */
-    int get_trim_length() const
+    uint32_t get_unique_prefix_length() const
     {
-      return trim_length;
+      return unique_prefix_length;
     }
   };
 }
diff --git a/src/mnemonics/old_english.h b/src/mnemonics/old_english.h
index 09ac37e66..b91a593b6 100644
--- a/src/mnemonics/old_english.h
+++ b/src/mnemonics/old_english.h
@@ -1681,6 +1681,7 @@ namespace Language
         "weapon",
         "weary"
       });
+      unique_prefix_length = 4;
       word_map = new std::unordered_map<std::string, uint32_t>;
       trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
       language_name = "OldEnglish";
diff --git a/src/mnemonics/portuguese.h b/src/mnemonics/portuguese.h
index c040a0415..6a90659e0 100644
--- a/src/mnemonics/portuguese.h
+++ b/src/mnemonics/portuguese.h
@@ -1679,6 +1679,7 @@ namespace Language
         "zenite",
         "zumbi"
       });
+      unique_prefix_length = 4;
       word_map = new std::unordered_map<std::string, uint32_t>;
       trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
       language_name = "Portuguese";
diff --git a/src/mnemonics/spanish.h b/src/mnemonics/spanish.h
index 8d695a4b1..f71627086 100644
--- a/src/mnemonics/spanish.h
+++ b/src/mnemonics/spanish.h
@@ -1681,6 +1681,7 @@ namespace Language
         "ritmo",
         "rito"
       });
+      unique_prefix_length = 4;
       word_map = new std::unordered_map<std::string, uint32_t>;
       trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
       language_name = "Spanish";
diff --git a/tests/unit_tests/mnemonics.cpp b/tests/unit_tests/mnemonics.cpp
new file mode 100644
index 000000000..70e8a413f
--- /dev/null
+++ b/tests/unit_tests/mnemonics.cpp
@@ -0,0 +1,167 @@
+// Copyright (c) 2014, The Monero Project
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without modification, are
+// permitted provided that the following conditions are met:
+// 
+// 1. Redistributions of source code must retain the above copyright notice, this list of
+//    conditions and the following disclaimer.
+// 
+// 2. Redistributions in binary form must reproduce the above copyright notice, this list
+//    of conditions and the following disclaimer in the documentation and/or other
+//    materials provided with the distribution.
+// 
+// 3. Neither the name of the copyright holder nor the names of its contributors may be
+//    used to endorse or promote products derived from this software without specific
+//    prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "gtest/gtest.h"
+#include "mnemonics/electrum-words.h"
+#include "crypto/crypto.h"
+#include <stdlib.h>
+#include <vector>
+#include <time.h>
+#include <iostream>
+#include <boost/algorithm/string.hpp>
+#include "mnemonics/english.h"
+#include "mnemonics/spanish.h"
+#include "mnemonics/portuguese.h"
+#include "mnemonics/japanese.h"
+#include "mnemonics/old_english.h"
+#include "mnemonics/language_base.h"
+#include "mnemonics/singleton.h"
+
+namespace
+{
+  /*!
+   * \brief Returns random index from 0 to max-1
+   * \param  max Range maximum
+   * \return     required random index
+   */
+  uint32_t get_random_index(int max)
+  {
+    return rand() % max;
+  }
+
+  /*!
+   * \brief Print a seed
+   * \param seed word list
+   */
+  void print_seed(const std::vector<std::string> &seed)
+  {
+    for (std::vector<std::string>::const_iterator it = seed.begin(); it != seed.end(); it++)
+    {
+      std::cout << *it << " ";
+    }
+    std::cout << std::endl;
+  }
+
+  /*!
+   * \brief Compares vectors for equality
+   * \param expected expected vector
+   * \param present  current vector
+   */
+  void compare_vectors(const std::vector<std::string> &expected, const std::vector<std::string> &present)
+  {
+    std::vector<std::string>::const_iterator it1, it2;
+    for (it1 = expected.begin(), it2 = present.begin(); it1 != expected.end() && it2 != present.end();
+      it1++, it2++)
+    {
+      ASSERT_STREQ(it1->c_str(), it2->c_str());
+    }
+  }
+
+  /*!
+   * \brief Tests the given language mnemonics.
+   * \param language A Language instance to test
+   */
+  void test_language(const Language::Base &language)
+  {
+    const std::vector<std::string> &word_list = language.get_word_list();
+    std::string seed = "", return_seed = "";
+    // Generate a random seed without checksum
+    for (int ii = 0; ii < crypto::ElectrumWords::seed_length; ii++)
+    {
+      seed += (word_list[get_random_index(word_list.size())] + ' ');
+    }
+    seed.pop_back();
+    std::cout << "Test seed without checksum:\n";
+    std::cout << seed << std::endl;
+
+    crypto::secret_key key;
+    std::string language_name;
+    bool res;
+    std::vector<std::string> seed_vector, return_seed_vector;
+    std::string checksum_word;
+
+    // Convert it to secret key
+    res = crypto::ElectrumWords::words_to_bytes(seed, key, language_name);
+    ASSERT_EQ(true, res);
+    std::cout << "Detected language: " << language_name << std::endl;
+    ASSERT_STREQ(language.get_language_name().c_str(), language_name.c_str());
+
+    // Convert the secret key back to seed
+    crypto::ElectrumWords::bytes_to_words(key, return_seed, language.get_language_name());
+    ASSERT_EQ(true, res);
+    std::cout << "Returned seed:\n";
+    std::cout << return_seed << std::endl;
+    boost::split(seed_vector, seed, boost::is_any_of(" "));
+    boost::split(return_seed_vector, return_seed, boost::is_any_of(" "));
+
+    // Extract the checksum word
+    checksum_word = return_seed_vector.back();
+    return_seed_vector.pop_back();
+    ASSERT_EQ(seed_vector.size(), return_seed_vector.size());
+    // Ensure that the rest of it is same
+    compare_vectors(seed_vector, return_seed_vector);
+
+    // Append the checksum word to repeat the entire process with a seed with checksum
+    seed += (" " + checksum_word);
+    std::cout << "Test seed with checksum:\n";
+    std::cout << seed << std::endl;
+    res = crypto::ElectrumWords::words_to_bytes(seed, key, language_name);
+    ASSERT_EQ(true, res);
+    std::cout << "Detected language: " << language_name << std::endl;
+    ASSERT_STREQ(language.get_language_name().c_str(), language_name.c_str());
+
+    return_seed = "";
+    crypto::ElectrumWords::bytes_to_words(key, return_seed, language.get_language_name());
+    ASSERT_EQ(true, res);
+    std::cout << "Returned seed:\n";
+    std::cout << return_seed << std::endl;
+
+    seed_vector.clear();
+    return_seed_vector.clear();
+    boost::split(seed_vector, seed, boost::is_any_of(" "));
+    boost::split(return_seed_vector, return_seed, boost::is_any_of(" "));
+    ASSERT_EQ(seed_vector.size(), return_seed_vector.size());
+    compare_vectors(seed_vector, return_seed_vector);
+  }
+}
+
+TEST(mnemonics, all_languages)
+{
+  srand(time(NULL));
+  std::vector<Language::Base*> languages({
+    Language::Singleton<Language::English>::instance(),
+    Language::Singleton<Language::Spanish>::instance(),
+    Language::Singleton<Language::Portuguese>::instance(),
+    Language::Singleton<Language::Japanese>::instance(),
+  });
+
+  for (std::vector<Language::Base*>::iterator it = languages.begin(); it != languages.end(); it++)
+  {
+    test_language(*(*it));
+  }
+}