Skip to content

Commit

Permalink
Better support various em space characters
Browse files Browse the repository at this point in the history
  • Loading branch information
dwarring committed Dec 23, 2024
1 parent f3b483c commit 98dae2f
Show file tree
Hide file tree
Showing 11 changed files with 47 additions and 21 deletions.
34 changes: 23 additions & 11 deletions lib/PDF/Content/Text/Box.rakumod
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ method content-height returns Numeric { @!lines».height.sum * $.leading; }
my grammar Text {
token nbsp { <[ \c[NO-BREAK SPACE] \c[NARROW NO-BREAK SPACE] \c[WORD JOINER] ]> }
token space { [\s <!after <nbsp> > | "\c[ZERO WIDTH SPACE]"]+ }
token space { [\s <!after <nbsp> > | \c[ZERO WIDTH SPACE] ]+ }
token hyphen { <[ \c[HYPHEN] \c[HYPHEN-MINUS] \c[HYPHENATION POINT] ]> }
token word { [ <!hyphen> <!space> . ]+ <[ \c[HYPHEN] \c[HYPHEN-MINUS] ]>? | <.hyphen> }
}
Expand Down Expand Up @@ -218,7 +218,8 @@ method !layup(@atoms is copy) {
my Int $i = 0;
my Int $line-start = 0;
my Int $n = +@atoms;
my UInt $preceding-spaces = self!flush-spaces: @atoms, $i;
my $em-spaces = self!word-gap($!style.scale: 1000) / self!word-gap;
my Numeric $preceding-spaces = self!flush-spaces: $em-spaces, @atoms, $i;
my $word-gap := self!word-gap;
my $height := $!style.font-size;
my Numeric $hyphen-width;
Expand Down Expand Up @@ -317,7 +318,7 @@ method !layup(@atoms is copy) {
if $height > $line.height;

$prev-soft-hyphen = $soft-hyphen;
$preceding-spaces = self!flush-spaces(@atoms, $i);
$preceding-spaces = self!flush-spaces($em-spaces, @atoms, $i);
}

if $preceding-spaces {
Expand Down Expand Up @@ -360,29 +361,40 @@ method !height-exceeded {
$!height && self.content-height > $!height;
}

method !flush-spaces(@words is raw, $i is rw) returns UInt {
my constant %SpaceWidth = %(
"\c[EN SPACE]" => .5,
"\c[EM SPACE]" => 1,
"\c[THREE-PER-EM SPACE]" => 3,
"\c[FOUR-PER-EM SPACE]" => 4,
"\c[SIX-PER-EM SPACE]" => 6,
"\c[THIN SPACE]" => .2,
"\c[HAIR SPACE]" => .1,
"\c[ZERO WIDTH SPACE]" => 0,
);

method !flush-spaces($em-spaces is rw, @words is raw, $i is rw) returns Numeric:D {
my $n = 0; # space count for padding purposes
with @words[$i] {
when /<Text::space>/ {
$n = .chars;
if $!verbatim && (my $last-nl = .rindex("\n")).defined {
# count spaces after last new-line
$n -= $last-nl + 1;
$n = 0 if $!squish;
$n = .substr($last-nl+1).comb.map({do with %SpaceWidth{$_} { $_ * $em-spaces } // 1}).sum
unless $!squish;
}
else {
$i++;
$n = 1 if $!squish;
$n = 0 if .contains("\c[ZERO WIDTH SPACE]");
$n = .comb.map({do with %SpaceWidth{$_} { $_ * $em-spaces } // 1}).sum;
$n = 1 if $n > 1 && $!squish;
dd [.uniname, :$n, :$i];
}
}
}
$n;
}

# calculates actual spacing between words
method !word-gap returns Numeric {
my $word-gap = $.space-width + $.WordSpacing + $.CharSpacing;
method !word-gap($space = $.space-width) returns Numeric {
my $word-gap = $space + $.WordSpacing + $.CharSpacing;
$word-gap * $.HorizScaling / 100;
}

Expand Down
11 changes: 7 additions & 4 deletions lib/PDF/Content/Text/Line.rakumod
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ has Numeric $.word-width is rw = 0;
has Numeric $.word-gap is rw = 0;
has Numeric $.indent is rw = 0;
has Numeric $.align = 0;
has UInt @.spaces;
has Numeric @.spaces;

method content-width returns Numeric {
$!indent + $!word-width + @!spaces.sum * $!word-gap;
Expand Down Expand Up @@ -153,9 +153,12 @@ method content(:$font!, Numeric :$font-size!, :$space-pad = 0, :$TextRise = 0.0)
for ^+@!encoded -> $i {
my $spaces := @!spaces[$i];
if $spaces {
@line.push: $font.encode(Space x $spaces);
@line.push: $space-pad * $spaces
unless $space-pad =~= 0;
my UInt $whole-spaces = $spaces.floor;
my $part-spaces = $spaces - $whole-spaces;
@line.push: $font.encode(Space x $whole-spaces);
my Int $pad = round($space-pad * $spaces + -1000 * $part-spaces * $!word-gap / $font-size);
@line.push: $pad
if $pad;
}
@line.append: @!encoded[$i].list;
}
Expand Down
10 changes: 5 additions & 5 deletions lib/PDF/Content/Text/Style.rakumod
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,19 @@ multi method baseline-shift(Baseline $_ --> Numeric) {
#| get/set a numeric font vertical alignment offset
multi method baseline-shift is rw { $!TextRise }

method scale($v) { $v * $!font-size / $!units-per-EM; }

#| return the scaled width of spaces
method space-width {
$!space-width * $!font-size / $!units-per-EM;
}
method space-width { self.scale: $!space-width; }

#| return the scaled underline position
method underline-position {
($!font.underline-position // -100) * $!font-size / $!units-per-EM;
self.scale: ($!font.underline-position // -100)
}

#| return the scaled underline thickness
method underline-thickness {
($!font.underline-thickness // 50) * $!font-size / $!units-per-EM;
self.scale: ($!font.underline-thickness // 50)
}

#| return the scaled font height
Expand Down
Binary file modified t/pdf-text-align.pdf
Binary file not shown.
Binary file modified t/pdf-text-hyphenation.pdf
Binary file not shown.
Binary file modified t/pdf-text-indent.pdf
Binary file not shown.
Binary file modified t/pdf-text-style.pdf
Binary file not shown.
Binary file modified t/pdf-text-verbatim.pdf
Binary file not shown.
Binary file modified t/text-box-images.pdf
Binary file not shown.
Binary file modified t/text-box.pdf
Binary file not shown.
13 changes: 12 additions & 1 deletion t/text-box.t
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use v6;
use Test;
plan 14;
plan 15;
use lib 't';
use PDF::Grammar::Test :is-json-equiv;
use PDF::Content::Text::Box;
Expand Down Expand Up @@ -152,6 +152,17 @@ subtest 'zero width spaces', {
}
}

subtest 'variable spaces', {
$gfx.text: {
$text = "Spaces:en-space\c[EN SPACE]space tab\tem-space\c[EM SPACE]em-quad\c[EM QUAD]three\c[THREE-PER-EM SPACE]four\c[FOUR-PER-EM SPACE]six\c[SIX-PER-EM SPACE]thin\c[THIN SPACE]hair\c[HAIR SPACE]zero\c[ZERO WIDTH SPACE]. " x 2;
my $width = 400;
$height = 100;
$text-box .= new( :$text, :$font, :$font-size, :$width, :$height );
.text-position = 100, 250;
.say: $text-box;
}
}

subtest 'font loading from content stream', {
if (try require PDF::Font::Loader) === Nil {
skip 'PDF::Font::Loader is needed for this test';
Expand Down

0 comments on commit 98dae2f

Please sign in to comment.