diff --git a/talon/quotations.py b/talon/quotations.py
index 60163109..f540a729 100644
--- a/talon/quotations.py
+++ b/talon/quotations.py
@@ -25,9 +25,9 @@
 RE_FWD = re.compile("^[-]+[ ]*Forwarded message[ ]*[-]+$", re.I | re.M)
 
 RE_ON_DATE_SMB_WROTE = re.compile(
-    u'(-*[>]?[ ]?({0})[ ].*({1})(.*\n){{0,2}}.*({2}):?-*)'.format(
+    '(-*[>]?[ ]?({0})[ ].*({1})(.*\n){{0,2}}.*({2}):?-*)'.format(
         # Beginning of the line
-        u'|'.join((
+        '|'.join((
             # English
             'On',
             # French
@@ -39,25 +39,25 @@
             # German
             'Am',
             # Norwegian
-            u'På',
+            'På',
             # Swedish, Danish
             'Den',
         )),
         # Date and sender separator
-        u'|'.join((
+        '|'.join((
             # most languages separate date and sender address by comma
             ',',
             # polish date and sender address separator
-            u'użytkownik'
+            'użytkownik'
         )),
         # Ending of the line
-        u'|'.join((
+        '|'.join((
             # English
             'wrote', 'sent',
             # French
-            u'a écrit',
+            'a écrit',
             # Polish
-            u'napisał',
+            'napisał',
             # Dutch
             'schreef','verzond','geschreven',
             # German
@@ -68,15 +68,15 @@
     ))
 # Special case for languages where text is translated like this: 'on {date} wrote {somebody}:'
 RE_ON_DATE_WROTE_SMB = re.compile(
-    u'(-*[>]?[ ]?({0})[ ].*(.*\n){{0,2}}.*({1})[ ]*.*:)'.format(
+    '(-*[>]?[ ]?({0})[ ].*(.*\n){{0,2}}.*({1})[ ]*.*:)'.format(
         # Beginning of the line
-        u'|'.join((
+        '|'.join((
         	'Op',
         	#German
         	'Am'
         )),
         # Ending of the line
-        u'|'.join((
+        '|'.join((
             # Dutch
             'schreef','verzond','geschreven',
             # German
@@ -86,7 +86,7 @@
     )
 
 RE_QUOTATION = re.compile(
-    r'''
+    rb'''
     (
         # quotation border: splitter line or a number of quotation marker lines
         (?:
@@ -107,7 +107,7 @@
     ''', re.VERBOSE)
 
 RE_EMPTY_QUOTATION = re.compile(
-    r'''
+    rb'''
     (
         # quotation border: splitter line or a number of quotation marker lines
         (?:
@@ -121,26 +121,26 @@
 
 # ------Original Message------ or ---- Reply Message ----
 # With variations in other languages.
-RE_ORIGINAL_MESSAGE = re.compile(u'[\s]*[-]+[ ]*({})[ ]*[-]+'.format(
-    u'|'.join((
+RE_ORIGINAL_MESSAGE = re.compile('[\s]*[-]+[ ]*({})[ ]*[-]+'.format(
+    '|'.join((
         # English
         'Original Message', 'Reply Message',
         # German
-        u'Ursprüngliche Nachricht', 'Antwort Nachricht',
+        'Ursprüngliche Nachricht', 'Antwort Nachricht',
         # Danish
         'Oprindelig meddelelse',
     ))), re.I)
 
-RE_FROM_COLON_OR_DATE_COLON = re.compile(u'(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]?.*'.format(
-    u'|'.join((
+RE_FROM_COLON_OR_DATE_COLON = re.compile('(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]? .*'.format(
+    '|'.join((
         # "From" in different languages.
-        'From', 'Van', 'De', 'Von', 'Fra', u'Från',
+        'From', 'Van', 'De', 'Von', 'Fra', 'Från',
         # "Date" in different languages.
-        'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt',
+        'Date', 'Datum', 'Envoyé', 'Skickat', 'Sendt',
     ))), re.I)
 
 # ---- John Smith wrote ----
-RE_ANDROID_WROTE = re.compile(u'[\s]*[-]+.*({})[ ]*[-]+'.format(
+RE_ANDROID_WROTE = re.compile('[\s]*[-]+.*({})[ ]*[-]+'.format(
     u'|'.join((
         # English
         'wrote'
@@ -183,6 +183,7 @@
 RE_HEADER = re.compile(": ")
 
 
+
 def extract_from(msg_body, content_type='text/plain'):
     try:
         if content_type == 'text/plain':
@@ -221,15 +222,15 @@ def mark_message_lines(lines):
     >>> mark_message_lines(['answer', 'From: foo@bar.com', '', '> question'])
     'tsem'
     """
-    markers = ['e' for _ in lines]
+    markers = [b'e' for _ in lines]
     i = 0
     while i < len(lines):
         if not lines[i].strip():
-            markers[i] = 'e'  # empty line
+            markers[i] = b'e'  # empty line
         elif QUOT_PATTERN.match(lines[i]):
-            markers[i] = 'm'  # line with quotation marker
+            markers[i] = b'm'  # line with quotation marker
         elif RE_FWD.match(lines[i]):
-            markers[i] = 'f'  # ---- Forwarded message ----
+            markers[i] = b'f'  # ---- Forwarded message ----
         else:
             # in case splitter is spread across several lines
             splitter = is_splitter('\n'.join(lines[i:i + SPLITTER_MAX_LINES]))
@@ -238,16 +239,16 @@ def mark_message_lines(lines):
                 # append as many splitter markers as lines in splitter
                 splitter_lines = splitter.group().splitlines()
                 for j in range(len(splitter_lines)):
-                    markers[i + j] = 's'
+                    markers[i + j] = b's'
 
                 # skip splitter lines
                 i += len(splitter_lines) - 1
             else:
                 # probably the line from the last message in the conversation
-                markers[i] = 't'
+                markers[i] = b't'
         i += 1
 
-    return ''.join(markers)
+    return b''.join(markers)
 
 
 def process_marked_lines(lines, markers, return_flags=[False, -1, -1]):
@@ -261,19 +262,18 @@ def process_marked_lines(lines, markers, return_flags=[False, -1, -1]):
     return_flags = [were_lines_deleted, first_deleted_line,
                     last_deleted_line]
     """
-    markers = ''.join(markers)
     # if there are no splitter there should be no markers
-    if 's' not in markers and not re.search('(me*){3}', markers):
-        markers = markers.replace('m', 't')
+    if b's' not in markers and not re.search(b'(me*){3}', markers):
+        markers = markers.replace(b'm', b't')
 
-    if re.match('[te]*f', markers):
+    if re.match(b'[te]*f', markers):
         return_flags[:] = [False, -1, -1]
         return lines
 
     # inlined reply
     # use lookbehind assertions to find overlapping entries e.g. for 'mtmtm'
     # both 't' entries should be found
-    for inline_reply in re.finditer('(?<=m)e*((?:t+e*)+)m', markers):
+    for inline_reply in re.finditer(b'(?<=m)e*((?:t+e*)+)m', markers):
         # long links could break sequence of quotation lines but they shouldn't
         # be considered an inline reply
         links = (
@@ -284,7 +284,7 @@ def process_marked_lines(lines, markers, return_flags=[False, -1, -1]):
             return lines
 
     # cut out text lines coming after splitter if there are no markers there
-    quotation = re.search('(se*)+((t|f)+e*)+', markers)
+    quotation = re.search(b'(se*)+((t|f)+e*)+', markers)
     if quotation:
         return_flags[:] = [True, quotation.start(), len(lines)]
         return lines[:quotation.start()]
@@ -411,7 +411,6 @@ def extract_from_html(msg_body):
 
     return result
 
-
 def _extract_from_html(msg_body):
     """
     Extract not quoted message from provided html message body
@@ -489,7 +488,7 @@ def _extract_from_html(msg_body):
     if _readable_text_empty(html_tree_copy):
         return msg_body
 
-    return html.tostring(html_tree_copy)
+    return _html_tostring(html_tree_copy)
 
 
 def split_emails(msg):
@@ -525,43 +524,46 @@ def _mark_quoted_email_splitlines(markers, lines):
     """
     # Create a list of markers to easily alter specific characters
     markerlist = list(markers)
+
     for i, line in enumerate(lines):
-        if markerlist[i] != 'm':
+        if markerlist[i] != b'm'[0]:
             continue
         for pattern in SPLITTER_PATTERNS:
             matcher = re.search(pattern, line)
             if matcher:
-                markerlist[i] = 's'
+                markerlist[i] = b's'[0]
                 break
 
-    return "".join(markerlist)
+    return bytes(markerlist)
 
 
 def _correct_splitlines_in_headers(markers, lines):
     """
     Corrects markers by removing splitlines deemed to be inside header blocks.
     """
-    updated_markers = ""
+    updated_markers = b""
     i = 0
     in_header_block = False
 
     for m in markers:
         # Only set in_header_block flag when we hit an 's' and line is a header
-        if m == 's':
+        m = bytes([m])
+        if m == b"s":
             if not in_header_block:
                 if bool(re.search(RE_HEADER, lines[i])):
                     in_header_block = True
             else:
                 if QUOT_PATTERN.match(lines[i]):
-                    m = 'm'
+                    m = b"m"
                 else:
-                    m = 't'
+                    m = b"t"
 
         # If the line is not a header line, set in_header_block false.
         if not bool(re.search(RE_HEADER, lines[i])):
             in_header_block = False
 
         # Add the marker to the new updated markers string.
+        print(updated_markers, m)
         updated_markers += m
         i += 1
 
@@ -598,3 +600,6 @@ def register_xpath_extensions():
     ns.prefix = 'mg'
     ns['text_content'] = text_content
     ns['tail'] = tail
+
+def _html_tostring(html_tree):
+  return html.tostring(html_tree).decode('utf-8')
diff --git a/talon/signature/__init__.py b/talon/signature/__init__.py
index fc60e1d8..6398f522 100644
--- a/talon/signature/__init__.py
+++ b/talon/signature/__init__.py
@@ -35,5 +35,6 @@
 
 
 def initialize():
+    print(EXTRACTOR_FILENAME)
     extraction.EXTRACTOR = classifier.load(EXTRACTOR_FILENAME,
                                            EXTRACTOR_DATA)
diff --git a/talon/signature/bruteforce.py b/talon/signature/bruteforce.py
index 7f666bd9..bfc72f2d 100644
--- a/talon/signature/bruteforce.py
+++ b/talon/signature/bruteforce.py
@@ -50,7 +50,7 @@
 # c - could be signature line
 # d - line starts with dashes (could be signature or list item)
 # l - long line
-RE_SIGNATURE_CANDIDATE = re.compile(r'''
+RE_SIGNATURE_CANDIDATE = re.compile(br'''
     (?P<candidate>c+d)[^d]
     |
     (?P<candidate>c+d)$
@@ -163,16 +163,16 @@ def _mark_candidate_indexes(lines, candidate):
     'cdc'
     """
     # at first consider everything to be potential signature lines
-    markers = bytearray('c'*len(candidate))
+    markers = bytearray('c'*len(candidate), 'utf-8')
 
     # mark lines starting from bottom up
     for i, line_idx in reversed(list(enumerate(candidate))):
         if len(lines[line_idx].strip()) > TOO_LONG_SIGNATURE_LINE:
-            markers[i] = 'l'
+            markers[i] = ord(b'l')
         else:
             line = lines[line_idx].strip()
             if line.startswith('-') and line.strip("-"):
-                markers[i] = 'd'
+                markers[i] = ord(b'd')
 
     return markers
 
diff --git a/talon/signature/data/classifier b/talon/signature/data/classifier
index 1c3a4b08..4ee71eec 100644
Binary files a/talon/signature/data/classifier and b/talon/signature/data/classifier differ
diff --git a/talon/signature/data/classifier_01.npy b/talon/signature/data/classifier_01.npy
index 11d13026..ea117aac 100644
Binary files a/talon/signature/data/classifier_01.npy and b/talon/signature/data/classifier_01.npy differ
diff --git a/talon/signature/data/classifier_02.npy b/talon/signature/data/classifier_02.npy
index 2cec7290..11d13026 100644
Binary files a/talon/signature/data/classifier_02.npy and b/talon/signature/data/classifier_02.npy differ
diff --git a/talon/signature/data/classifier_03.npy b/talon/signature/data/classifier_03.npy
index e5762ae5..77af8e37 100644
Binary files a/talon/signature/data/classifier_03.npy and b/talon/signature/data/classifier_03.npy differ
diff --git a/talon/signature/extraction.py b/talon/signature/extraction.py
index 32591717..20263285 100644
--- a/talon/signature/extraction.py
+++ b/talon/signature/extraction.py
@@ -18,7 +18,7 @@
 
 # regex signature pattern for reversed lines
 # assumes that all long lines have been excluded
-RE_REVERSE_SIGNATURE = re.compile(r'''
+RE_REVERSE_SIGNATURE = re.compile(br'''
 # signature should consists of blocks like this
 (?:
    # it could end with empty line
@@ -81,7 +81,7 @@ def _mark_lines(lines, sender):
     candidate = get_signature_candidate(lines)
 
     # at first consider everything to be text no signature
-    markers = bytearray('t'*len(lines))
+    markers = bytearray('t'*len(lines), 'utf-8')
 
     # mark lines starting from bottom up
     # mark only lines that belong to candidate
@@ -92,9 +92,9 @@ def _mark_lines(lines, sender):
         # relative to lines not candidate
         j = len(lines) - len(candidate) + i
         if not line.strip():
-            markers[j] = 'e'
+            markers[j] = ord(b'e')
         elif is_signature_line(line, sender, EXTRACTOR):
-            markers[j] = 's'
+            markers[j] = ord(b's')
 
     return markers
 
diff --git a/talon/signature/learning/dataset.py b/talon/signature/learning/dataset.py
index 308995be..63c0489a 100644
--- a/talon/signature/learning/dataset.py
+++ b/talon/signature/learning/dataset.py
@@ -61,7 +61,7 @@ def parse_msg_sender(filename, sender_known=True):
     if os.path.isfile(filename) and not is_sender_filename(filename):
         with open(filename) as f:
             msg = f.read()
-            sender = u''
+            sender = ''
             if sender_known:
                 sender_filename = build_sender_filename(filename)
                 if os.path.exists(sender_filename):
@@ -124,9 +124,9 @@ def build_detection_dataset(folder, dataset_filename,
     """
     if os.path.exists(dataset_filename):
         os.remove(dataset_filename)
-    build_detection_class(os.path.join(folder, u'P'),
+    build_detection_class(os.path.join(folder, 'P'),
                           dataset_filename, 1)
-    build_detection_class(os.path.join(folder, u'N'),
+    build_detection_class(os.path.join(folder, 'N'),
                           dataset_filename, -1)
 
 
diff --git a/talon/utils.py b/talon/utils.py
index e6c884bf..a4dfb3bd 100644
--- a/talon/utils.py
+++ b/talon/utils.py
@@ -7,6 +7,7 @@
 import cchardet
 import regex as re
 
+import lxml.html
 from lxml.html import html5parser
 from lxml.cssselect import CSSSelector
 
@@ -37,7 +38,7 @@ def safe_format(format_string, *args, **kwargs):
 
     # ignore other errors
     except:
-        return u''
+        return ''
 
 
 def to_unicode(str_or_unicode, precise=False):
@@ -177,11 +178,13 @@ def html_to_text(string):
 def html_fromstring(s):
     """Parse html tree from string. Return None if the string can't be parsed.
     """
+    if isinstance(s, bytes):
+        s = s.decode()
     try:
         if html_too_big(s):
             return None
 
-        return html5parser.fromstring(s, parser=_html5lib_parser())
+        return lxml.html.document_fromstring(s, ensure_head_body=True) #html5parser.fromstring(s, parser=_html5lib_parser())
     except Exception:
         pass
 
@@ -189,11 +192,13 @@ def html_fromstring(s):
 def html_document_fromstring(s):
     """Parse html tree from string. Return None if the string can't be parsed.
     """
+    if isinstance(s, bytes):
+        s = s.decode()
     try:
         if html_too_big(s):
             return None
 
-        return html5parser.document_fromstring(s, parser=_html5lib_parser())
+        return lxml.html.document_fromstring(s, ensure_head_body=True) #html5parser.document_fromstring(s, parser=_html5lib_parser())
     except Exception:
         pass
 
diff --git a/tests/html_quotations_test.py b/tests/html_quotations_test.py
index b78409bf..3f7c4389 100644
--- a/tests/html_quotations_test.py
+++ b/tests/html_quotations_test.py
@@ -27,7 +27,7 @@ def test_quotation_splitter_inside_blockquote():
 
 </blockquote>"""
 
-    eq_("<html><head></head><body>Reply</body></html>",
+    eq_("<html><head></head><body><p>Reply</p></body></html>",
         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
 
 
@@ -44,7 +44,7 @@ def test_quotation_splitter_outside_blockquote():
   </div>
 </blockquote>
 """
-    eq_("<html><head></head><body>Reply</body></html>",
+    eq_("<html><head></head><body><p>Reply</p></body></html>",
         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
 
 
@@ -62,7 +62,7 @@ def test_regular_blockquote():
   </div>
 </blockquote>
 """
-    eq_("<html><head></head><body>Reply<blockquote>Regular</blockquote></body></html>",
+    eq_("<html><head></head><body><p>Reply</p><blockquote>Regular</blockquote></body></html>",
         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
 
 
@@ -129,7 +129,7 @@ def test_gmail_quote():
     </div>
   </div>
 </div>"""
-    eq_("<html><head></head><body>Reply</body></html>",
+    eq_("<html><head></head><body><p>Reply</p></body></html>",
         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
 
 
@@ -140,7 +140,7 @@ def test_gmail_quote_compact():
                '<div>Test</div>' \
                '</div>' \
                '</div>'
-    eq_("<html><head></head><body>Reply</body></html>",
+    eq_("<html><head></head><body><p>Reply</p></body></html>",
         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
 
 
@@ -157,7 +157,7 @@ def test_gmail_quote_blockquote():
 
 
 def test_unicode_in_reply():
-    msg_body = u"""Reply \xa0 \xa0 Text<br>
+    msg_body = """Reply \xa0 \xa0 Text<br>
 
 <div>
   <br>
@@ -165,9 +165,9 @@ def test_unicode_in_reply():
 
 <blockquote>
   Quote
-</blockquote>""".encode("utf-8")
+</blockquote>"""
 
-    eq_("<html><head></head><body>Reply&#160;&#160;Text<br><div><br></div>"
+    eq_("<html><head></head><body><p>Reply&#160;&#160;Text<br></p><div><br></div>"
         "</body></html>",
         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
 
@@ -298,7 +298,7 @@ def test_from_block_and_quotations_in_separate_divs():
   </div>
 </div>
 '''
-    eq_('<html><head></head><body>Reply<div><hr></div></body></html>',
+    eq_('<html><head></head><body><p>Reply</p><div><hr></div></body></html>',
         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
 
 
@@ -373,9 +373,9 @@ def test_CRLF():
 </blockquote>"""
     msg_body = msg_body.replace('\n', '\r\n')
     extracted = quotations.extract_from_html(msg_body)
-    assert_false(symbol in extracted)    
-    # Keep new lines otherwise "My reply" becomes one word - "Myreply" 
-    eq_("<html><head></head><body>My\nreply\n</body></html>", extracted)
+    assert_false(symbol in extracted)
+    # Keep new lines otherwise "My reply" becomes one word - "Myreply"
+    eq_("<html><head></head><body><p>My\nreply\n</p></body></html>", extracted)
 
 
 def test_gmail_forwarded_msg():
diff --git a/tests/signature/bruteforce_test.py b/tests/signature/bruteforce_test.py
index 382615bb..65b53242 100644
--- a/tests/signature/bruteforce_test.py
+++ b/tests/signature/bruteforce_test.py
@@ -128,10 +128,10 @@ def test_blackberry_signature():
     eq_(('Heeyyoooo.', msg_body[len('Heeyyoooo.\n'):]),
         bruteforce.extract_signature(msg_body))
 
-    msg_body = u"""Blah
+    msg_body = """Blah
 Enviado desde mi oficina mÃ³vil BlackBerryÂ® de Telcel"""
 
-    eq_(('Blah', u'Enviado desde mi oficina mÃ³vil BlackBerryÂ® de Telcel'),
+    eq_(('Blah', 'Enviado desde mi oficina mÃ³vil BlackBerryÂ® de Telcel'),
         bruteforce.extract_signature(msg_body))
 
 
@@ -200,14 +200,14 @@ def test_get_signature_candidate():
 def test_mark_candidate_indexes():
     with patch.object(bruteforce, 'TOO_LONG_SIGNATURE_LINE', 3):
         # spaces are not considered when checking line length
-        eq_('clc',
+        eq_(b'clc',
             bruteforce._mark_candidate_indexes(
                 ['BR,  ', 'long', 'Bob'],
                 [0, 1, 2]))
 
         # only candidate lines are marked
         # if line has only dashes it's a candidate line
-        eq_('ccdc',
+        eq_(b'ccdc',
             bruteforce._mark_candidate_indexes(
                 ['-', 'long', '-', '- i', 'Bob'],
                 [0, 2, 3, 4]))
@@ -216,20 +216,20 @@ def test_mark_candidate_indexes():
 def test_process_marked_candidate_indexes():
     eq_([2, 13, 15],
         bruteforce._process_marked_candidate_indexes(
-            [2, 13, 15], 'dcc'))
+            [2, 13, 15], b'dcc'))
 
     eq_([15],
         bruteforce._process_marked_candidate_indexes(
-            [2, 13, 15], 'ddc'))
+            [2, 13, 15], b'ddc'))
 
     eq_([13, 15],
         bruteforce._process_marked_candidate_indexes(
-            [13, 15], 'cc'))
+            [13, 15], b'cc'))
 
     eq_([15],
         bruteforce._process_marked_candidate_indexes(
-            [15], 'lc'))
+            [15], b'lc'))
 
     eq_([15],
         bruteforce._process_marked_candidate_indexes(
-            [13, 15], 'ld'))
+            [13, 15], b'ld'))
diff --git a/tests/signature/extraction_test.py b/tests/signature/extraction_test.py
index b5703031..1720100e 100644
--- a/tests/signature/extraction_test.py
+++ b/tests/signature/extraction_test.py
@@ -127,7 +127,7 @@ def test_handles_unicode():
 @patch.object(signature.extraction, 'has_signature')
 def test_signature_extract_crash(has_signature):
     has_signature.side_effect = Exception('Bam!')
-    msg_body = u'Blah\r\n--\r\n\r\nСергей'
+    msg_body = 'Blah\r\n--\r\n\r\nСергей'
     eq_((msg_body, None), signature.extract(msg_body, 'Сергей'))
 
 
@@ -135,7 +135,7 @@ def test_mark_lines():
     with patch.object(bruteforce, 'SIGNATURE_MAX_LINES', 2):
         # we analyse the 2nd line as well though it's the 6th line
         # (starting from the bottom) because we don't count empty line
-        eq_('ttset',
+        eq_(b'ttset',
             e._mark_lines(['Bob Smith',
                           'Bob Smith',
                           'Bob Smith',
@@ -145,7 +145,7 @@ def test_mark_lines():
     with patch.object(bruteforce, 'SIGNATURE_MAX_LINES', 3):
         # we don't analyse the 1st line because
         # signature cant start from the 1st line
-        eq_('tset',
+        eq_(b'tset',
             e._mark_lines(['Bob Smith',
                           'Bob Smith',
                           '',
@@ -154,20 +154,20 @@ def test_mark_lines():
 
 def test_process_marked_lines():
     # no signature found
-    eq_((list(range(5)), None), e._process_marked_lines(list(range(5)), 'telt'))
+    eq_((list(range(5)), None), e._process_marked_lines(list(range(5)), b'telt'))
 
     # signature in the middle of the text
-    eq_((list(range(9)), None), e._process_marked_lines(list(range(9)), 'tesestelt'))
+    eq_((list(range(9)), None), e._process_marked_lines(list(range(9)), b'tesestelt'))
 
     # long line splits signature
     eq_((list(range(7)), [7, 8]),
-        e._process_marked_lines(list(range(9)), 'tsslsless'))
+        e._process_marked_lines(list(range(9)), b'tsslsless'))
 
     eq_((list(range(20)), [20]),
-        e._process_marked_lines(list(range(21)), 'ttttttstttesllelelets'))
+        e._process_marked_lines(list(range(21)), b'ttttttstttesllelelets'))
 
     # some signature lines could be identified as text
-    eq_(([0], list(range(1, 9))), e._process_marked_lines(list(range(9)), 'tsetetest'))
+    eq_(([0], list(range(1, 9))), e._process_marked_lines(list(range(9)), b'tsetetest'))
 
     eq_(([], list(range(5))),
-        e._process_marked_lines(list(range(5)), "ststt"))
+        e._process_marked_lines(list(range(5)), b"ststt"))
diff --git a/tests/signature/learning/dataset_test.py b/tests/signature/learning/dataset_test.py
index 8e152753..5f84bac3 100644
--- a/tests/signature/learning/dataset_test.py
+++ b/tests/signature/learning/dataset_test.py
@@ -32,7 +32,7 @@ def test_parse_msg_sender():
     # if the message sender is stored in a separate file
     sender, msg = d.parse_msg_sender(MSG_FILENAME_WITH_BODY_SUFFIX)
     with open(MSG_FILENAME_WITH_BODY_SUFFIX) as f:
-        eq_(sender, u"john@example.com")
+        eq_(sender, "john@example.com")
         eq_(msg, f.read())
 
 
diff --git a/tests/signature/learning/helpers_test.py b/tests/signature/learning/helpers_test.py
index d9e7b866..2870c822 100644
--- a/tests/signature/learning/helpers_test.py
+++ b/tests/signature/learning/helpers_test.py
@@ -151,7 +151,7 @@ def test_extract_names():
         ['David', 'DECOSTER', 'Domicile']
         }
 
-    for sender, expected_names in senders_names.items():
+    for sender, expected_names in list(senders_names.items()):
         extracted_names = h.extract_names(sender)
         # check that extracted names could be compiled
         try:
diff --git a/tests/text_quotations_test.py b/tests/text_quotations_test.py
index 7a81c994..d6722369 100644
--- a/tests/text_quotations_test.py
+++ b/tests/text_quotations_test.py
@@ -54,7 +54,7 @@ def test_pattern_on_date_wrote_somebody():
     """Lorem
 
 Op 13-02-2014 3:18 schreef Julius Caesar <pantheon@rome.com>:
-    
+
 Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
 """))
 
@@ -137,7 +137,7 @@ def test_reply_and_quotation_splitter_share_line():
 
 
 def _check_pattern_original_message(original_message_indicator):
-    msg_body = u"""Test reply
+    msg_body = """Test reply
 
 -----{}-----
 
@@ -145,12 +145,13 @@ def _check_pattern_original_message(original_message_indicator):
     eq_('Test reply', quotations.extract_from_plain(
         msg_body.format(six.text_type(original_message_indicator))))
 
+
 def test_english_original_message():
     _check_pattern_original_message('Original Message')
     _check_pattern_original_message('Reply Message')
 
 def test_german_original_message():
-    _check_pattern_original_message(u'Ursprüngliche Nachricht')
+    _check_pattern_original_message('Ursprüngliche Nachricht')
     _check_pattern_original_message('Antwort Nachricht')
 
 def test_danish_original_message():
@@ -256,7 +257,7 @@ def test_with_indent():
 
 ------On 12/29/1987 17:32 PM, Julius Caesar wrote-----
 
-Brunch mumblecore pug Marfa tofu, irure taxidermy hoodie readymade pariatur. 
+Brunch mumblecore pug Marfa tofu, irure taxidermy hoodie readymade pariatur.
     """
     eq_("YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.", quotations.extract_from_plain(msg_body))
 
@@ -267,8 +268,8 @@ def test_short_quotation_with_newline():
 On Tue, Jan 27, 2015 at 12:42 PM -0800, "Company" <christine.XXX@XXX.com> wrote:
 
 Hi Mark,
-Blah blah? 
-Thanks,Christine 
+Blah blah?
+Thanks,Christine
 
 On Jan 27, 2015, at 11:55 AM, Mark XXX <mark@XXX.com> wrote:
 
@@ -312,7 +313,7 @@ def test_german_from_block():
 
 def test_french_multiline_from_block():
     eq_('Lorem ipsum', quotations.extract_from_plain(
-    u"""Lorem ipsum
+    """Lorem ipsum
 
 De : Brendan xxx [mailto:brendan.xxx@xxx.com]
 Envoyé : vendredi 23 janvier 2015 16:39
@@ -324,7 +325,7 @@ def test_french_multiline_from_block():
 
 def test_french_from_block():
     eq_('Lorem ipsum', quotations.extract_from_plain(
-    u"""Lorem ipsum
+    """Lorem ipsum
 
 Le 23 janv. 2015 à 22:03, Brendan xxx <brendan.xxx@xxx.com<mailto:brendan.xxx@xxx.com>> a écrit:
 
@@ -332,7 +333,7 @@ def test_french_from_block():
 
 def test_polish_from_block():
     eq_('Lorem ipsum', quotations.extract_from_plain(
-    u"""Lorem ipsum
+    """Lorem ipsum
 
 W dniu 28 stycznia 2015 01:53 użytkownik Zoe xxx <zoe.xxx@xxx.com>
 napisał:
@@ -354,7 +355,7 @@ def test_danish_from_block():
 
 def test_swedish_from_block():
     eq_('Allo! Follow up MIME!', quotations.extract_from_plain(
-    u"""Allo! Follow up MIME!
+    """Allo! Follow up MIME!
 Från: Anno Sportel [mailto:anno.spoel@hsbcssad.com]
 Skickat: den 26 augusti 2015 14:45
 Till: Isacson Leiff
@@ -373,7 +374,7 @@ def test_swedish_from_line():
 
 def test_norwegian_from_line():
     eq_('Lorem', quotations.extract_from_plain(
-    u"""Lorem
+    """Lorem
 På 14 september 2015 på 02:23:18, Valentino Rudy (valentino@rudy.be) skrev:
 
 Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
@@ -381,11 +382,11 @@ def test_norwegian_from_line():
 
 def test_dutch_from_block():
     eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain(
-    """Gluten-free culpa lo-fi et nesciunt nostrud. 
+    """Gluten-free culpa lo-fi et nesciunt nostrud.
 
 Op 17-feb.-2015, om 13:18 heeft Julius Caesar <pantheon@rome.com> het volgende geschreven:
-    
-Small batch beard laboris tempor, non listicle hella Tumblr heirloom. 
+
+Small batch beard laboris tempor, non listicle hella Tumblr heirloom.
 """))
 
 
@@ -515,7 +516,7 @@ def test_mark_message_lines():
              '> Hi',
              '',
              'Signature']
-    eq_('tessemet', quotations.mark_message_lines(lines))
+    eq_(b'tessemet', quotations.mark_message_lines(lines))
 
     lines = ['Just testing the email reply',
              '',
@@ -529,41 +530,40 @@ def test_mark_message_lines():
              'wrote:',
              '',
              'Tarmo Lehtpuu has posted the following message on']
-    eq_('tettessset', quotations.mark_message_lines(lines))
+    eq_(b'tettessset', quotations.mark_message_lines(lines))
 
 
 def test_process_marked_lines():
     # quotations and last message lines are mixed
     # consider all to be a last message
-    markers = 'tsemmtetm'
-    lines = [str(i) for i in range(len(markers))]
+    markers = b'tsemmtetm'
     lines = [str(i) for i in range(len(markers))]
 
     eq_(lines, quotations.process_marked_lines(lines, markers))
 
     # no splitter => no markers
-    markers = 'tmm'
+    markers = b'tmm'
     lines = ['1', '2', '3']
     eq_(['1', '2', '3'], quotations.process_marked_lines(lines, markers))
 
     # text after splitter without markers is quotation
-    markers = 'tst'
+    markers = b'tst'
     lines = ['1', '2', '3']
     eq_(['1'], quotations.process_marked_lines(lines, markers))
 
     # message + quotation + signature
-    markers = 'tsmt'
+    markers = b'tsmt'
     lines = ['1', '2', '3', '4']
     eq_(['1', '4'], quotations.process_marked_lines(lines, markers))
 
     # message + <quotation without markers> + nested quotation
-    markers = 'tstsmt'
+    markers = b'tstsmt'
     lines = ['1', '2', '3', '4', '5', '6']
     eq_(['1'], quotations.process_marked_lines(lines, markers))
 
     # test links wrapped with paranthesis
     # link starts on the marker line
-    markers = 'tsmttem'
+    markers = b'tsmttem'
     lines = ['text',
              'splitter',
              '>View (http://example.com',
@@ -574,7 +574,7 @@ def test_process_marked_lines():
     eq_(lines[:1], quotations.process_marked_lines(lines, markers))
 
     # link starts on the new line
-    markers = 'tmmmtm'
+    markers = b'tmmmtm'
     lines = ['text',
              '>'
              '>',
@@ -585,7 +585,7 @@ def test_process_marked_lines():
     eq_(lines[:1], quotations.process_marked_lines(lines, markers))
 
     # check all "inline" replies
-    markers = 'tsmtmtm'
+    markers = b'tsmtmtm'
     lines = ['text',
              'splitter',
              '>',
@@ -596,7 +596,7 @@ def test_process_marked_lines():
     eq_(lines, quotations.process_marked_lines(lines, markers))
 
     # inline reply with link not wrapped in paranthesis
-    markers = 'tsmtm'
+    markers = b'tsmtm'
     lines = ['text',
              'splitter',
              '>',
@@ -605,7 +605,7 @@ def test_process_marked_lines():
     eq_(lines, quotations.process_marked_lines(lines, markers))
 
     # inline reply with link wrapped in paranthesis
-    markers = 'tsmtm'
+    markers = b'tsmtm'
     lines = ['text',
              'splitter',
              '>',
@@ -695,8 +695,9 @@ def test_standard_replies():
         with open(filename) as f:
             message = email.message_from_file(f)
             body = next(email.iterators.typed_subpart_iterator(message, subtype='plain'))
-            text = ''.join(body_iterator(body, True))
-
+            text = ''.join(email.iterators.body_line_iterator(body, True))
+            if not text:
+              text = ''.join(email.iterators.body_line_iterator(body, False))
             stripped_text = quotations.extract_from_plain(text)
             reply_text_fn = filename[:-4] + '_reply_text'
             if os.path.isfile(reply_text_fn):
@@ -754,6 +755,6 @@ def test_split_email():
         >
         >
 """
-    expected_markers = "stttttsttttetesetesmmmmmmssmmmmmmsmmmmmmmm"
+    expected_markers = b"stttttsttttetesetesmmmmmmssmmmmmmsmmmmmmmm"
     markers = quotations.split_emails(msg)
     eq_(markers, expected_markers)
diff --git a/tests/utils_test.py b/tests/utils_test.py
index 778e858c..138338ab 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -16,15 +16,15 @@ def test_get_delimiter():
 
 
 def test_unicode():
-    eq_ (u'hi', u.to_unicode('hi'))
-    eq_ (type(u.to_unicode('hi')), six.text_type )
-    eq_ (type(u.to_unicode(u'hi')), six.text_type )
-    eq_ (type(u.to_unicode('привет')), six.text_type )
-    eq_ (type(u.to_unicode(u'привет')), six.text_type )
-    eq_ (u"привет", u.to_unicode('привет'))
-    eq_ (u"привет", u.to_unicode(u'привет'))
+    eq_ ('hi', u.to_unicode('hi'))
+    eq_ (type(u.to_unicode('hi')), str )
+    eq_ (type(u.to_unicode('hi')), str )
+    eq_ (type(u.to_unicode('привет')), str )
+    eq_ (type(u.to_unicode('привет')), str )
+    eq_ ("привет", u.to_unicode('привет'))
+    eq_ ("привет", u.to_unicode('привет'))
     # some latin1 stuff
-    eq_ (u"Versión", u.to_unicode(u'Versi\xf3n'.encode('iso-8859-2'), precise=True))
+    eq_ ("Versión", u.to_unicode('Versi\xf3n', precise=True))
 
 
 def test_detect_encoding():
@@ -79,7 +79,7 @@ def test_html_to_text():
 </body>"""
     text = u.html_to_text(html)
     eq_(b"Hello world! \n\n  * One! \n  * Two \nHaha", text)
-    eq_(u"привет!", u.html_to_text("<b>привет!</b>").decode('utf8'))
+    eq_("привет!".encode('utf-8'), u.html_to_text("<b>привет!</b>"))
 
     html = '<body><br/><br/>Hi</body>'
     eq_ (b'Hi', u.html_to_text(html))
@@ -115,7 +115,7 @@ def test_html_to_text():
 def test_comment_no_parent():
     s = "<!-- COMMENT 1 --> no comment"
     d = u.html_document_fromstring(s)
-    eq_("no comment", u.html_tree_to_text(d))
+    eq_(b"no comment", u.html_tree_to_text(d))
 
 
 @patch.object(u.html5parser, 'fromstring', Mock(side_effect=Exception()))
@@ -156,5 +156,5 @@ def test_html_too_big():
 
 @patch.object(u, '_MAX_TAGS_COUNT', 3)
 def test_html_to_text():
-    eq_("Hello", u.html_to_text("<div>Hello</div>"))
+    eq_(b"Hello", u.html_to_text("<div>Hello</div>"))
     eq_(None, u.html_to_text("<div><span>Hi</span></div>"))