diff --git a/Changes b/Changes index a75c2df..687af65 100644 --- a/Changes +++ b/Changes @@ -1,4 +1,6 @@ {{$NEXT}} + - Delegate font encoding and hyphen processing from + text boxes to text style. 0.8.14 2024-12-24T07:43:50+13:00 - Handle partial and multiple space characters: diff --git a/lib/PDF/Content/Text/Box.rakumod b/lib/PDF/Content/Text/Box.rakumod index 8fb881a..d46a500 100644 --- a/lib/PDF/Content/Text/Box.rakumod +++ b/lib/PDF/Content/Text/Box.rakumod @@ -184,36 +184,6 @@ multi submethod TWEAK(:@chunks!, :$!text = @chunks».Str.join, |c) { self!layup: @chunks; } -method !encode(Str:D $atom) { - my List $encoded; - my Numeric $width; - my Bool $shape := $!style.shape; - my Bool $kern = $!style.kern; - $kern //= True if $shape; - if $shape || $.script || $.lang { - given $.font.shape($atom, :$kern, :$.script, :$.lang) { - $encoded := .[0]; - $width = .[1]; - } - } - elsif $kern { - given $.font.kern($atom) { - $encoded := .List given .[0].list.map: { - .does(Numeric) ?? -$_ !! $.font.encode($_); - } - $width = .[1]; - } - } - else { - $encoded := ( $.font.encode($atom), ); - $width = $.font.stringwidth($atom); - } - $width *= $!style.font-size * $.HorizScaling / 100000; - $width += ($atom.chars - 1) * $.CharSpacing - if $.CharSpacing > -$!style.font-size; - ($encoded, $width); -} - method !layup(@atoms is copy) { my Int $i = 0; my Int $line-start = 0; @@ -222,16 +192,11 @@ method !layup(@atoms is copy) { my Numeric $preceding-spaces = self!flush-spaces: $em-spaces, @atoms, $i; my $word-gap := self!word-gap; my $height := $!style.font-size; - my Numeric $hyphen-width; my Bool $prev-soft-hyphen; my PDF::Content::Text::Line $line .= new: :$word-gap, :$height, :$!indent; @!lines = $line; - sub hyphen-width { - $hyphen-width //= self!encode("\c[HYPHEN]")[1] || self!encode("\c[HYPHEN-MINUS]")[1]; - } - LAYUP: while $i < $n { my subset StrOrImage where Str | PDF::Content::XObject; my StrOrImage $atom = @atoms[$i++]; @@ -244,21 +209,24 @@ method !layup(@atoms is copy) { given $atom { when Str { + my $enc; if $atom eq "\c[HYPHENATION POINT]" { - $atom = '-'; + $atom = $!style.hyphen; + $enc = $!style.hyphen-encoding; + $word-width = $!style.hyphen-width; $soft-hyphen = True; } - elsif $!verbatim && +.match("\n", :g) -> UInt $nl { - # todo: handle tabs - $line-breaks = $nl; - $atom = ' ' x $preceding-spaces; - $word-pad = 0; - } - - given self!encode($atom) { - $word = .[0]; - $word-width = .[1]; + else { + if $!verbatim && +.match("\n", :g) -> UInt $nl { + # todo: handle tabs + $line-breaks = $nl; + $atom = ' ' x $preceding-spaces; + $word-pad = 0; + } + $enc = $!style.encode: $atom; } + $word = $enc[0]; + $word-width = $enc[1]; } when PDF::Content::XObject { $xobject = True; @@ -269,7 +237,7 @@ method !layup(@atoms is copy) { if $!width && !$line-breaks && ($line.encoded || $line.indent) { my $test-width = $line.content-width + $word-pad + $word-width; - $test-width += hyphen-width() + $test-width += $!style.hyphen-width if @atoms[$i] ~~ "\c[HYPHENATION POINT]"; $line-breaks = $test-width > $!width @@ -308,7 +276,7 @@ method !layup(@atoms is copy) { if $prev-soft-hyphen { # Drop soft hyphen when line is continued $line.encoded.pop; - $line.word-width -= hyphen-width(); + $line.word-width -= $!style.hyphen-width; } $line.spaces[+$line.encoded] = $preceding-spaces; $line.decoded.push: $xobject ?? '' !! $atom; diff --git a/lib/PDF/Content/Text/Style.rakumod b/lib/PDF/Content/Text/Style.rakumod index 8e4b0e6..03a1b71 100644 --- a/lib/PDF/Content/Text/Style.rakumod +++ b/lib/PDF/Content/Text/Style.rakumod @@ -14,6 +14,8 @@ has Str $.lang; has Numeric $!space-width = 300; has $!units-per-EM = 1000; has TextDirection $.direction = 'ltr'; +has Str $!hypen; +has List $!hypen-encode; # directly mapped to graphics state has Numeric $.WordSpacing is built; @@ -90,3 +92,55 @@ method font-height(|c) { $!font.height: $!font-size, |c; } +method encode(Str:D $atom) { + my List $encoded; + my Numeric $width; + if $.shape || $.script || $.lang { + my Bool $kern = $.kern || $.shape; + given $!font.shape($atom, :$kern, :$.script, :$.lang) { + $encoded := .[0]; + $width = .[1]; + } + } + elsif $.kern { + given $!font.kern($atom) { + $encoded := .List given .[0].list.map: { + .does(Numeric) ?? -$_ !! $!font.encode($_); + } + $width = .[1]; + } + } + else { + $encoded := ( $!font.encode($atom), ); + $width = $!font.stringwidth($atom); + } + $width *= $!font-size * $.HorizScaling / 100000; + $width += ($atom.chars - 1) * $.CharSpacing + if $.CharSpacing > -$!font-size; + ($encoded, $width); +} + +has Str $!hyphen; +has List $!hyphen-encoding; + +method !hyphen-init { + $!hyphen = "\c[HYPHEN]"; + $!hyphen-encoding = self.encode: $!hyphen; + unless $!hyphen-encoding[1] { + $!hyphen = "\c[HYPHEN-MINUS]"; + $!hyphen-encoding = self.encode: $!hyphen; + } +} + +method hyphen { + self!hyphen-init without $!hyphen; + $!hypen; +} + +method hyphen-encoding { + self!hyphen-init without $!hyphen; + $!hyphen-encoding; +} + +method hyphen-width { self.hyphen-encoding[1] } +