Merge pull request #404 from kdmukai/entropy_to_mnemonic_cli

[New Feature] Entropy-to-mnemonic CLI utility
SeedSigner · Apr 12, 2024 · c12f64e · c12f64e
2 parents 69cb6ac + c4aafd0
commit c12f64e
Show file tree

Hide file tree

Showing 5 changed files with 351 additions and 33 deletions.
diff --git a/docs/dice_verification.md b/docs/dice_verification.md
@@ -205,3 +205,70 @@ We double-checked in two different web tools implementing different methods for
 
 So congratulations if the fingerprints, zpubs and addresses all match up in your example so you can be much more confident that nothing is wrong with your generated seed.
 
+---
+
+# Command Line Tool
+_(for more advanced/python-savvy users)_
+
+Run the exact same SeedSigner mnemonic generation code from the command line to quickly test and externally verify the results.
+
+Create a python virtualenv (out of the scope of this doc) and install dependencies:
+```bash
+pip3 install embit
+
+# Install the main project code to make it importable
+pip3 install -e .
+```
+
+
+Then run the utility script with `-h` to view the usage instructions:
+```bash
+cd tools
+python3 mnemonic.py -h
+```
+
+```
+    Verify SeedSigner's dice rolls and coin flip entropy-to-mnemonic conversion via this tool.
+
+    Compare its results against iancoleman.io/bip39 and bitcoiner.guide/seed
+
+    Usage:
+        # 50 dice rolls / 12-word mnemonic
+        python3 mnemonic.py dice 5624433434...
+        
+        # 99 dice rolls / 24-word mnemonic
+        python3 mnemonic.py dice 6151463561...
+
+        # 50 dice rolls, entered as 0-5 / 12-word mnemonic
+        python3 mnemonic.py --zero-indexed-dice dice 5135535514...
+
+        # 128 coin flips / 12-word mnemonic
+        python3 mnemonic.py coins 1111100111...
+
+        # 256 coin flips / 24-word mnemonic
+        python mnemonic.py coins 0010111010...
+
+        # GENERATE 50 random dice rolls / 12-word mnemonic
+        python3 mnemonic.py dice rand12
+
+        # GENERATE 99 random dice rolls / 24-word mnemonic
+        python3 mnemonic.py dice rand24
+
+        # GENERATE 99 random dice rolls, entered as 0-5 / 24-word mnemonic
+        python3 mnemonic.py --zero-indexed-dice dice rand24
+
+        # GENERATE 128 random coin flips / 12-word mnemonic
+        python3 mnemonic.py coins rand12
+
+        # GENERATE 256 random coin flips / 24-word mnemonic
+        python3 mnemonic.py coins rand24
+```
+
+### How to get the same results in iancoleman.io
+Always specify your expected length in the "Mnemonic Length" droplist (defaults to "Use Raw Entropy (3 words per 32 bits)").
+
+Dice Rolls: Do NOT use the "Dice [1-6]" option; select "Base 10 [0-9]" or "Hex [0-9A-F]"
+
+Zero-indexed dice rolls: Select "Base 6 [0-5]", "Base 10 [0-9]", or "Hex [0-9A-F]"
+
+Coin Flips: Select "Binary [0-1]", "Base 6 [0-5]", "Base 10 [0-9]", or "Hex [0-9A-F]"
diff --git a/src/seedsigner/helpers/mnemonic_generation.py b/src/seedsigner/helpers/mnemonic_generation.py
@@ -2,21 +2,40 @@
 import unicodedata
 
 from embit import bip39
-from embit.bip39 import mnemonic_to_bytes, mnemonic_from_bytes
-from typing import List
+from seedsigner.models.settings_definition import SettingsConstants
+from seedsigner.models.seed import Seed
 
+"""
+    This is SeedSigner's internal mnemonic generation utility.
+     
+    It can also be run as an independently-executable CLI to facilitate external
+    verification of SeedSigner's results for a given input entropy.
 
+    see: docs/dice_verification.md (the "Command Line Tool" section).
+"""
 
-def calculate_checksum(mnemonic: list, wordlist_language_code: str) -> List[str]:
+DICE__NUM_ROLLS__12WORD = 50
+DICE__NUM_ROLLS__24WORD = 99
+
+
+
+def calculate_checksum(mnemonic: list | str, wordlist_language_code: str = SettingsConstants.WORDLIST_LANGUAGE__ENGLISH) -> list[str]:
     """
         Provide 12- or 24-word mnemonic, returns complete mnemonic w/checksum as a list.
 
+        Mnemonic may be a list of words or a string of words separated by spaces or commas.
+
         If 11- or 23-words are provided, append word `0000` to end of list as temp final
         word.
     """
-    from seedsigner.models.seed import Seed
+    if type(mnemonic) == str:
+        import re
+        # split on commas or spaces
+        mnemonic = re.findall(r'[^,\s]+', mnemonic)
+
     if len(mnemonic) in [11, 23]:
-        mnemonic.append(Seed.get_wordlist(wordlist_language_code)[0])
+        temp_final_word = Seed.get_wordlist(wordlist_language_code)[0]
+        mnemonic.append(temp_final_word)
 
     if len(mnemonic) not in [12, 24]:
         raise Exception("Pass in a 12- or 24-word mnemonic")
@@ -37,28 +56,68 @@ def calculate_checksum(mnemonic: list, wordlist_language_code: str) -> List[str]
 
 
 
-def generate_mnemonic_from_bytes(entropy_bytes) -> List[str]:
-    return bip39.mnemonic_from_bytes(entropy_bytes).split()
+def generate_mnemonic_from_bytes(entropy_bytes, wordlist_language_code: str = SettingsConstants.WORDLIST_LANGUAGE__ENGLISH) -> list[str]:
+    return bip39.mnemonic_from_bytes(entropy_bytes, wordlist=Seed.get_wordlist(wordlist_language_code)).split()
+
 
 
+def generate_mnemonic_from_dice(roll_data: str, wordlist_language_code: str = SettingsConstants.WORDLIST_LANGUAGE__ENGLISH) -> list[str]:
+    """
+        Takes a string of 50 or 99 dice rolls and returns a 12- or 24-word mnemonic.
 
-def generate_mnemonic_from_dice(roll_data: str) -> List[str]:
+        Uses the iancoleman.io/bip39 and bitcoiner.guide/seed "Base 10" or "Hex" mode approach:
+        * dice rolls are treated as string data.
+        * hashed via SHA256.
+
+        Important note: This method is NOT compatible with iancoleman's "Dice" mode.
+    """
     entropy_bytes = hashlib.sha256(roll_data.encode()).digest()
 
-    if len(roll_data) == 50:
+    if len(roll_data) == DICE__NUM_ROLLS__12WORD:
         # 12-word mnemonic; only use 128bits / 16 bytes
         entropy_bytes = entropy_bytes[:16]
 
     # Return as a list
-    return bip39.mnemonic_from_bytes(entropy_bytes).split()
+    return bip39.mnemonic_from_bytes(entropy_bytes, wordlist=Seed.get_wordlist(wordlist_language_code)).split()
+
+
+
+def generate_mnemonic_from_coin_flips(coin_flips: str, wordlist_language_code: str = SettingsConstants.WORDLIST_LANGUAGE__ENGLISH) -> list[str]:
+    """
+        Takes a string of 128 or 256 0s and 1s and returns a 12- or 24-word mnemonic.
+
+        Uses the iancoleman.io/bip39 and bitcoiner.guide/seed "Binary" mode approach:
+        * binary digit stream is treated as string data.
+        * hashed via SHA256.
+    """
+    entropy_bytes = hashlib.sha256(coin_flips.encode()).digest()
+
+    if len(coin_flips) == 128:
+        # 12-word mnemonic; only use 128bits / 16 bytes
+        entropy_bytes = entropy_bytes[:16]
+
+    # Return as a list
+    return bip39.mnemonic_from_bytes(entropy_bytes, wordlist=Seed.get_wordlist(wordlist_language_code)).split()
+
+
+
+def get_partial_final_word(coin_flips: str, wordlist_language_code: str = SettingsConstants.WORDLIST_LANGUAGE__ENGLISH) -> str:
+    """ Look up the partial final word for the given coin flips.
+        7 coin flips: 0101010 + **** where the final 4 bits will be replaced with the checksum
+        3 coin flips: 010 + ******** where the final 8 bits will be replaced with the checksum
+    """
+    binary_string = coin_flips + "0" * (11 - len(coin_flips))
+    wordlist_index = int(binary_string, 2)
+
+    return Seed.get_wordlist(wordlist_language_code)[wordlist_index]
 
 
 
 # Note: This currently isn't being used since we're now chaining hashed bytes for the
 #   image-based entropy and aren't just ingesting a single image.
-def generate_mnemonic_from_image(image) -> List[str]:
+def generate_mnemonic_from_image(image, wordlist_language_code: str = SettingsConstants.WORDLIST_LANGUAGE__ENGLISH) -> list[str]:
     import hashlib
     hash = hashlib.sha256(image.tobytes())
 
     # Return as a list
-    return bip39.mnemonic_from_bytes(hash.digest()).split()
+    return bip39.mnemonic_from_bytes(hash.digest(), wordlist=Seed.get_wordlist(wordlist_language_code)).split()
diff --git a/src/seedsigner/views/tools_views.py b/src/seedsigner/views/tools_views.py
@@ -188,8 +188,8 @@ def run(self):
 ****************************************************************************"""
 class ToolsDiceEntropyMnemonicLengthView(View):
     def run(self):
-        TWELVE = "12 words (50 rolls)"
-        TWENTY_FOUR = "24 words (99 rolls)"
+        TWELVE = f"12 words ({mnemonic_generation.DICE__NUM_ROLLS__12WORD} rolls)"
+        TWENTY_FOUR = f"24 words ({mnemonic_generation.DICE__NUM_ROLLS__24WORD} rolls)"
 
         button_data = [TWELVE, TWENTY_FOUR]
         selected_menu_num = ButtonListScreen(
@@ -203,10 +203,10 @@ def run(self):
             return Destination(BackStackView)
 
         elif button_data[selected_menu_num] == TWELVE:
-            return Destination(ToolsDiceEntropyEntryView, view_args=dict(total_rolls=50))
+            return Destination(ToolsDiceEntropyEntryView, view_args=dict(total_rolls=mnemonic_generation.DICE__NUM_ROLLS__12WORD))
 
         elif button_data[selected_menu_num] == TWENTY_FOUR:
-            return Destination(ToolsDiceEntropyEntryView, view_args=dict(total_rolls=99))
+            return Destination(ToolsDiceEntropyEntryView, view_args=dict(total_rolls=mnemonic_generation.DICE__NUM_ROLLS__24WORD))
 
 
 

diff --git a/tests/test_mnemonic_generation.py b/tests/test_mnemonic_generation.py
@@ -12,35 +12,58 @@ def test_dice_rolls():
     dice_rolls = ""
     for i in range(0, 99):
         # Do not need truly rigorous random for this test
-        dice_rolls += str(random.randint(0, 5))
+        dice_rolls += str(random.randint(1, 6))
 
     mnemonic = mnemonic_generation.generate_mnemonic_from_dice(dice_rolls)
+
     assert len(mnemonic) == 24
     assert bip39.mnemonic_is_valid(" ".join(mnemonic))
 
     dice_rolls = ""
-    for i in range(0, 50):
+    for i in range(0, mnemonic_generation.DICE__NUM_ROLLS__12WORD):
         # Do not need truly rigorous random for this test
-        dice_rolls += str(random.randint(0, 5))
+        dice_rolls += str(random.randint(1, 6))
 
     mnemonic = mnemonic_generation.generate_mnemonic_from_dice(dice_rolls)
     assert len(mnemonic) == 12
     assert bip39.mnemonic_is_valid(" ".join(mnemonic))
 
 
 
-def test_calculate_checksum():
-    """ Given an 11-word or 23-word mnemonic, the calculated checksum should yield a
+def test_calculate_checksum_input_type():
+    """
+        Given an 11-word or 23-word mnemonic, the calculated checksum should yield a
         valid complete mnemonic.
+        
+        calculate_checksum should accept the mnemonic as:
+        * a list of strings
+        * string: "A B C", "A, B, C", "A,B,C"
     """
     # Test mnemonics from https://iancoleman.io/bip39/
+    def _try_all_input_formats(partial_mnemonic: str):
+        # List of strings
+        mnemonic = mnemonic_generation.calculate_checksum(partial_mnemonic.split(" "))
+        assert bip39.mnemonic_is_valid(" ".join(mnemonic))
+
+        # Comma-separated string
+        mnemonic = mnemonic_generation.calculate_checksum(partial_mnemonic.replace(" ", ","))
+        assert bip39.mnemonic_is_valid(" ".join(mnemonic))
+
+        # Comma-separated string w/space
+        mnemonic = mnemonic_generation.calculate_checksum(partial_mnemonic.replace(" ", ", "))
+        assert bip39.mnemonic_is_valid(" ".join(mnemonic))
+
+        # Space-separated string
+        mnemonic = mnemonic_generation.calculate_checksum(partial_mnemonic)
+        assert bip39.mnemonic_is_valid(" ".join(mnemonic))
+
     partial_mnemonic = "crawl focus rescue cable view pledge rather dinner cousin unfair day"
-    mnemonic = mnemonic_generation.calculate_checksum(partial_mnemonic.split(" "), wordlist_language_code=SettingsConstants.WORDLIST_LANGUAGE__ENGLISH)
-    assert bip39.mnemonic_is_valid(" ".join(mnemonic))
+    _try_all_input_formats(partial_mnemonic)
 
     partial_mnemonic = "bubble father debate ankle injury fence mesh evolve section wet coyote violin pyramid flower rent arrow round clutch myth safe base skin mobile"
-    mnemonic = mnemonic_generation.calculate_checksum(partial_mnemonic.split(" "), wordlist_language_code=SettingsConstants.WORDLIST_LANGUAGE__ENGLISH)
-    assert bip39.mnemonic_is_valid(" ".join(mnemonic))
+    _try_all_input_formats(partial_mnemonic)
+
+
 
 
 def test_calculate_checksum_invalid_mnemonics():
@@ -50,25 +73,25 @@ def test_calculate_checksum_invalid_mnemonics():
     with pytest.raises(Exception) as e:
         # Mnemonic is too short: 10 words instead of 11
         partial_mnemonic = "abandon " * 9 + "about"
-        mnemonic_generation.calculate_checksum(partial_mnemonic.split(" "), wordlist_language_code=SettingsConstants.WORDLIST_LANGUAGE__ENGLISH)
+        mnemonic_generation.calculate_checksum(partial_mnemonic)
     assert "12- or 24-word" in str(e)
 
     with pytest.raises(Exception) as e:
         # Valid mnemonic but unsupported length
         mnemonic = "devote myth base logic dust horse nut collect buddy element eyebrow visit empty dress jungle"
-        mnemonic_generation.calculate_checksum(mnemonic.split(" "), wordlist_language_code=SettingsConstants.WORDLIST_LANGUAGE__ENGLISH)
+        mnemonic_generation.calculate_checksum(mnemonic)
     assert "12- or 24-word" in str(e)
 
     with pytest.raises(Exception) as e:
         # Mnemonic is too short: 22 words instead of 23
         partial_mnemonic = "abandon " * 21 + "about"
-        mnemonic_generation.calculate_checksum(partial_mnemonic.split(" "), wordlist_language_code=SettingsConstants.WORDLIST_LANGUAGE__ENGLISH)
+        mnemonic_generation.calculate_checksum(partial_mnemonic)
     assert "12- or 24-word" in str(e)
 
     with pytest.raises(ValueError) as e:
         # Invalid BIP-39 word
         partial_mnemonic = "foobar " * 11 + "about"
-        mnemonic_generation.calculate_checksum(partial_mnemonic.split(" "), wordlist_language_code=SettingsConstants.WORDLIST_LANGUAGE__ENGLISH)
+        mnemonic_generation.calculate_checksum(partial_mnemonic)
     assert "not in the dictionary" in str(e)
 
 
@@ -78,17 +101,17 @@ def test_calculate_checksum_with_default_final_word():
         the mnemonic.
     """
     partial_mnemonic = "crawl focus rescue cable view pledge rather dinner cousin unfair day"
-    mnemonic1 = mnemonic_generation.calculate_checksum(partial_mnemonic.split(" "), wordlist_language_code=SettingsConstants.WORDLIST_LANGUAGE__ENGLISH)
+    mnemonic1 = mnemonic_generation.calculate_checksum(partial_mnemonic)
 
     partial_mnemonic += " abandon"
-    mnemonic2 = mnemonic_generation.calculate_checksum(partial_mnemonic.split(" "), wordlist_language_code=SettingsConstants.WORDLIST_LANGUAGE__ENGLISH)
+    mnemonic2 = mnemonic_generation.calculate_checksum(partial_mnemonic)
     assert mnemonic1 == mnemonic2
 
     partial_mnemonic = "bubble father debate ankle injury fence mesh evolve section wet coyote violin pyramid flower rent arrow round clutch myth safe base skin mobile"
-    mnemonic1 = mnemonic_generation.calculate_checksum(partial_mnemonic.split(" "), wordlist_language_code=SettingsConstants.WORDLIST_LANGUAGE__ENGLISH)
+    mnemonic1 = mnemonic_generation.calculate_checksum(partial_mnemonic)
 
     partial_mnemonic += " abandon"
-    mnemonic2 = mnemonic_generation.calculate_checksum(partial_mnemonic.split(" "), wordlist_language_code=SettingsConstants.WORDLIST_LANGUAGE__ENGLISH)
+    mnemonic2 = mnemonic_generation.calculate_checksum(partial_mnemonic)
     assert mnemonic1 == mnemonic2