From 65f7eff8e02c7798269baeb648c9abd286fb8cdf Mon Sep 17 00:00:00 2001
From: Deyan Ginev <deyan.ginev@gmail.com>
Date: Tue, 19 Sep 2023 18:41:29 -0400
Subject: [PATCH 1/2] extra care for catching empty lines at Mouth::readToken;
 avoid mistreating 0 char as undef

---
 lib/LaTeXML/Core/Mouth.pm | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lib/LaTeXML/Core/Mouth.pm b/lib/LaTeXML/Core/Mouth.pm
index f44279358..17390b10a 100644
--- a/lib/LaTeXML/Core/Mouth.pm
+++ b/lib/LaTeXML/Core/Mouth.pm
@@ -20,7 +20,7 @@ use LaTeXML::Core::Token;
 use LaTeXML::Core::Tokens;
 use LaTeXML::Util::Pathname;
 use Encode qw(decode);
-use base   qw(LaTeXML::Common::Object);
+use base qw(LaTeXML::Common::Object);
 
 our $READLINE_PROGRESS_QUANTUM = 25;
 
@@ -214,7 +214,7 @@ sub handle_escape {    # Read control sequence
   # Bit I believe that he does NOT mean within control sequences
   my $cs = "\\" . $ch;    # I need this standardized to be able to lookup tokens (A better way???)
   if ((defined $cc) && ($cc == CC_LETTER)) {    # For letter, read more letters for csname.
-    while ((($ch, $cc) = getNextChar($self)) && $ch && ($cc == CC_LETTER)) {
+    while ((($ch, $cc) = getNextChar($self)) && (length($ch) > 0) && ($cc == CC_LETTER)) {
       $cs .= $ch; }
     # We WILL skip spaces, but not till next token is read (in case catcode changes!!!!)
     $$self{skipping_spaces} = 1;
@@ -321,10 +321,10 @@ sub readToken {
       my ($ch, $cc);
       while ((($ch, $cc) = getNextChar($self)) && (defined $ch)
         && (($cc == CC_SPACE) || ($cc == CC_IGNORE))) { }
-      if ($ch && ($cc == CC_EOL)) {    # Eolch already? empty line!
-        $$self{colno} = $$self{nchars};    # ignore rest of line.
+      if ((defined $ch) && ($cc == CC_EOL)) {    # Eolch already? empty line!
+        $$self{colno} = $$self{nchars};          # ignore rest of line.
         return T_CS('\par'); }
-      elsif ($$self{colno} > $$self{nchars}) {    # Past end of line?
+      elsif (($$self{nchars} == 0) || ($$self{colno} > $$self{nchars})) {    # Past end of line?
             # If upcoming line is empty, and there is no recognizable EOL, fake one
         return T_MARKER('EOL') if $read_mode && ((!defined $eolch) || ($eolch ne "\r")); }
       else {    # Back up over peeked char
@@ -347,7 +347,6 @@ sub readToken {
     my $token = (defined $cc ? $DISPATCH[$cc] : undef);
     $token = &$token($self, $ch) if ref $token eq 'CODE';
     return $token if defined $token;    # Else, repeat till we get something or run out.
-
   }
   return; }
 

From 9f2645f1e6d85d5807545f0f669c01b55638fff0 Mon Sep 17 00:00:00 2001
From: Deyan Ginev <deyan.ginev@gmail.com>
Date: Thu, 28 Sep 2023 15:13:45 -0400
Subject: [PATCH 2/2] consistently used defined check for char in Mouth

---
 lib/LaTeXML/Core/Mouth.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/LaTeXML/Core/Mouth.pm b/lib/LaTeXML/Core/Mouth.pm
index 17390b10a..9ff40dfdf 100644
--- a/lib/LaTeXML/Core/Mouth.pm
+++ b/lib/LaTeXML/Core/Mouth.pm
@@ -214,7 +214,7 @@ sub handle_escape {    # Read control sequence
   # Bit I believe that he does NOT mean within control sequences
   my $cs = "\\" . $ch;    # I need this standardized to be able to lookup tokens (A better way???)
   if ((defined $cc) && ($cc == CC_LETTER)) {    # For letter, read more letters for csname.
-    while ((($ch, $cc) = getNextChar($self)) && (length($ch) > 0) && ($cc == CC_LETTER)) {
+    while ((($ch, $cc) = getNextChar($self)) && (defined $ch) && ($cc == CC_LETTER)) {
       $cs .= $ch; }
     # We WILL skip spaces, but not till next token is read (in case catcode changes!!!!)
     $$self{skipping_spaces} = 1;