diff --git a/src/Smalot/PdfParser/Font.php b/src/Smalot/PdfParser/Font.php index 9e4db9fe..a00d6444 100644 --- a/src/Smalot/PdfParser/Font.php +++ b/src/Smalot/PdfParser/Font.php @@ -434,8 +434,8 @@ public function decodeText(array $commands): string // replace escaped chars $text = str_replace( - ['\\\\', '\(', '\)', '\n', '\r', '\t', '\f', '\ '], - ['\\', '(', ')', "\n", "\r", "\t", "\f", ' '], + ['\\\\', '\(', '\)', '\n', '\r', '\t', '\f', '\ ', '\b'], + [\chr(92), \chr(40), \chr(41), \chr(10), \chr(13), \chr(9), \chr(12), \chr(32), \chr(8)], $text ); diff --git a/tests/PHPUnit/Unit/FontTest.php b/tests/PHPUnit/Unit/FontTest.php new file mode 100644 index 00000000..f60818ff --- /dev/null +++ b/tests/PHPUnit/Unit/FontTest.php @@ -0,0 +1,71 @@ + + * + * @date 2023-07-19 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace PHPUnitTests\Unit; + +use PHPUnitTests\TestCase; +use Smalot\PdfParser\Config; +use Smalot\PdfParser\Document; +use Smalot\PdfParser\Font; +use Smalot\PdfParser\PDFObject; + +class FontTest extends TestCase +{ + /** + * decodeText must decode \b. + * + * @see https://github.com/smalot/pdfparser/pull/597 + */ + public function testDecodeTextIssue597(): void + { + $config = $this->createMock(Config::class); + $config->method('getFontSpaceLimit')->willReturn(1); + + $document = $this->createMock(Document::class); + $sut = new Font($document, null, null, $config); + + $commands = [ + [ + PDFObject::TYPE => '<', + PDFObject::COMMAND => "\b", + ], + ]; + + // result is a binary string and looks like: 0x3cc2ab083e + $result = $sut->decodeText($commands); + + // check that \b is not part of the result anymore + self::assertFalse(strpos($result, "\b>")); + + // compare result with expected value + self::assertEquals('3cc2ab083e', bin2hex($result)); + } +}