Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions lib/bidi.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import bidiFactory from 'bidi-js';

let bidiInstance = null;
function getBidi() {
if (bidiInstance == null) {
bidiInstance = bidiFactory();
}
return bidiInstance;
}

const RTL_RANGES = [
[0x0590, 0x05ff], // Hebrew
[0xfb1d, 0xfb4f], // Hebrew presentation forms
[0x0600, 0x06ff], // Arabic
[0x0700, 0x074f], // Syriac
[0x0780, 0x07bf], // Thaana
[0x07c0, 0x07ff], // NKo
[0x0800, 0x083f], // Samaritan
[0xfb50, 0xfdff], // Arabic presentation forms-A
[0xfe70, 0xfeff], // Arabic presentation forms-B
];

export function containsRTL(text) {
if (!text) return false;
for (let i = 0; i < text.length; i++) {
const code = text.charCodeAt(i);
for (const [lo, hi] of RTL_RANGES) {
if (code >= lo && code <= hi) return true;
}
}
return false;
}

export function detectBaseDirection(text) {
if (!text || !containsRTL(text)) return 'ltr';
const { paragraphs } = getBidi().getEmbeddingLevels(text);
return paragraphs[0]?.level === 1 ? 'rtl' : 'ltr';
}

export function resolveLine(text, baseDirection) {
const bidi = getBidi();
const { levels, paragraphs } = bidi.getEmbeddingLevels(text, baseDirection);
const paragraphLevel = paragraphs[0]?.level ?? 0;
return { levels, paragraphLevel };
}

export function applyMirroring(text, levels) {
const bidi = getBidi();
const mirrors = bidi.getMirroredCharactersMap(text, levels);
if (mirrors.size === 0) return text;
const chars = text.split('');
mirrors.forEach((replacement, idx) => {
chars[idx] = replacement;
});
return chars.join('');
}

function segmentRuns(text, levels, start, end) {
const runs = [];
let runStart = start;
let runLevel = levels[start];
for (let i = start + 1; i < end; i++) {
if (levels[i] !== runLevel) {
runs.push({
text: text.slice(runStart, i),
level: runLevel,
start: runStart,
end: i,
});
runStart = i;
runLevel = levels[i];
}
}
if (runStart < end) {
runs.push({
text: text.slice(runStart, end),
level: runLevel,
start: runStart,
end: end,
});
}
return runs;
}

// UAX #9 L2: from highest level to lowest odd, reverse contiguous run sequences
// at that level or higher.
function reorderRunsVisually(runs, paragraphLevel) {
if (runs.length <= 1) return runs.slice();
let maxLevel = paragraphLevel;
for (const run of runs) {
if (run.level > maxLevel) maxLevel = run.level;
}
const result = runs.slice();
for (let level = maxLevel; level >= 1; level--) {
let i = 0;
while (i < result.length) {
if (result[i].level >= level) {
let j = i + 1;
while (j < result.length && result[j].level >= level) j++;
const segment = result.slice(i, j).reverse();
result.splice(i, j - i, ...segment);
i += segment.length;
} else {
i++;
}
}
}
return result;
}

// Resolve a line of text into visual-order runs ready for shaping.
// Each returned run has { text, direction } in visual order; concatenating
// them while drawing LTR at incrementing x produces correct visual output.
export function visualRuns(text, baseDirection) {
if (!text) return [];
if (!containsRTL(text)) {
return [{ text, direction: baseDirection === 'rtl' ? 'rtl' : 'ltr' }];
}
const { levels, paragraphLevel } = resolveLine(text, baseDirection);
const mirrored = applyMirroring(text, levels);
const runs = segmentRuns(mirrored, levels, 0, text.length);
const ordered = reorderRunsVisually(runs, paragraphLevel);
return ordered.map((run) => ({
text: run.text,
direction: run.level % 2 === 1 ? 'rtl' : 'ltr',
}));
}
56 changes: 40 additions & 16 deletions lib/font/embedded.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,14 @@ class EmbeddedFont extends PDFFont {
}
}

layoutRun(text, features) {
const run = this.font.layout(text, features);
layoutRun(text, features, direction) {
const run = this.font.layout(
text,
features,
undefined,
undefined,
direction,
);

// Normalize position values
for (let i = 0; i < run.positions.length; i++) {
Expand All @@ -44,30 +50,37 @@ class EmbeddedFont extends PDFFont {
return run;
}

layoutCached(text) {
layoutCached(text, direction) {
if (!this.layoutCache) {
return this.layoutRun(text);
return this.layoutRun(text, undefined, direction);
}
const key = direction ? `${direction}\0${text}` : text;
let cached;
if ((cached = this.layoutCache[text])) {
if ((cached = this.layoutCache[key])) {
return cached;
}

const run = this.layoutRun(text);
this.layoutCache[text] = run;
const run = this.layoutRun(text, undefined, direction);
this.layoutCache[key] = run;
return run;
}

layout(text, features, onlyWidth) {
layout(text, features, onlyWidth, direction) {
// Skip the cache if any user defined features are applied
if (features) {
return this.layoutRun(text, features);
return this.layoutRun(text, features, direction);
}

let glyphs = onlyWidth ? null : [];
let positions = onlyWidth ? null : [];
let advanceWidth = 0;

// For RTL, each cached chunk is itself shaped in visual order by fontkit,
// so the LAST logical chunk must appear FIRST visually. We collect the
// cached chunks in logical order and walk them in reverse when emitting.
const isRTL = direction === 'rtl';
const cachedRuns = !onlyWidth && isRTL ? [] : null;

// Split the string by words to increase cache efficiency.
// For this purpose, spaces and tabs are a good enough delimeter.
let last = 0;
Expand All @@ -78,10 +91,14 @@ class EmbeddedFont extends PDFFont {
(index === text.length && last < index) ||
((needle = text.charAt(index)), [' ', '\t'].includes(needle))
) {
const run = this.layoutCached(text.slice(last, ++index));
const run = this.layoutCached(text.slice(last, ++index), direction);
if (!onlyWidth) {
glyphs = glyphs.concat(run.glyphs);
positions = positions.concat(run.positions);
if (isRTL) {
cachedRuns.push(run);
} else {
glyphs = glyphs.concat(run.glyphs);
positions = positions.concat(run.positions);
}
}

advanceWidth += run.advanceWidth;
Expand All @@ -91,11 +108,18 @@ class EmbeddedFont extends PDFFont {
}
}

if (cachedRuns) {
for (let i = cachedRuns.length - 1; i >= 0; i--) {
glyphs = glyphs.concat(cachedRuns[i].glyphs);
positions = positions.concat(cachedRuns[i].positions);
}
}

return { glyphs, positions, advanceWidth };
}

encode(text, features) {
const { glyphs, positions } = this.layout(text, features);
encode(text, features, direction) {
const { glyphs, positions } = this.layout(text, features, false, direction);

const res = [];
for (let i = 0; i < glyphs.length; i++) {
Expand All @@ -114,8 +138,8 @@ class EmbeddedFont extends PDFFont {
return [res, positions];
}

widthOfString(string, size, features) {
const width = this.layout(string, features, true).advanceWidth;
widthOfString(string, size, features, direction) {
const width = this.layout(string, features, true, direction).advanceWidth;
const scale = size / 1000;
return width * scale;
}
Expand Down
65 changes: 62 additions & 3 deletions lib/mixins/text.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import LineWrapper from '../line_wrapper';
import PDFObject from '../object';
import { cosine, sine } from '../utils';
import { containsRTL, detectBaseDirection, visualRuns } from '../bidi';

const { number } = PDFObject;

Expand Down Expand Up @@ -63,6 +64,25 @@ export default {
text = text.replace(/\s{2,}/g, ' ');
}

// Resolve text direction. 'auto' inspects the first strong char in each
// paragraph; otherwise honor the user's explicit choice.
const requestedDirection = options.direction || 'auto';
if (requestedDirection === 'auto') {
options._resolvedDirection = detectBaseDirection(text);
} else {
options._resolvedDirection = requestedDirection;
}
options._bidiEnabled = containsRTL(text) || requestedDirection === 'rtl';

// RTL paragraphs default to right alignment unless caller specified one.
if (
options._resolvedDirection === 'rtl' &&
options.align == null &&
options.width
) {
options.align = 'right';
}

const addStructure = () => {
if (options.structParent) {
options.structParent.add(
Expand Down Expand Up @@ -112,8 +132,17 @@ export default {

widthOfString(string, options = {}) {
const horizontalScaling = options.horizontalScaling || 100;
// For strings containing RTL chars, shape with rtl direction so the font's
// GSUB/GPOS rules produce correct widths and mark positioning. Pure-LTR
// strings keep direction undefined to preserve the layout cache's hit rate.
const direction = containsRTL(string) ? 'rtl' : undefined;
return (
((this._font.widthOfString(string, this._fontSize, options.features) +
((this._font.widthOfString(
string,
this._fontSize,
options.features,
direction,
) +
(options.characterSpacing || 0) * (string.length - 1)) *
horizontalScaling) /
100
Expand Down Expand Up @@ -467,7 +496,14 @@ export default {
if (options.width) {
switch (align) {
case 'right':
textWidth = this.widthOfString(text.replace(/\s+$/, ''), options);
// For RTL paragraphs, "trailing" whitespace in logical order is
// visual-leading; trim logical-leading whitespace instead so the
// visible glyphs flush to the right margin.
if (options._resolvedDirection === 'rtl') {
textWidth = this.widthOfString(text.replace(/^\s+/, ''), options);
} else {
textWidth = this.widthOfString(text.replace(/\s+$/, ''), options);
}
x += options.lineWidth - textWidth;
break;

Expand Down Expand Up @@ -639,21 +675,32 @@ export default {
this.addContent(`${horizontalScaling} Tz`);
}

// Resolve text into visual-order encoding source. When the line contains
// any RTL characters we run UAX #9 to produce visual-order runs and shape
// each run with its own direction; pure-LTR lines take the original fast
// path so non-bidi documents pay zero extra cost.
const useBidi = options._bidiEnabled && containsRTL(text);
const baseDir = options._resolvedDirection === 'rtl' ? 'rtl' : 'ltr';
const runs = useBidi ? visualRuns(text, baseDir) : null;

// Add the actual text
// If we have a word spacing value, we need to encode each word separately
// since the normal Tw operator only works on character code 32, which isn't
// used for embedded fonts.
if (wordSpacing) {
words = text.trim().split(/\s+/);
const sourceText = useBidi ? runs.map((r) => r.text).join('') : text;
words = sourceText.trim().split(/\s+/);
wordSpacing += this.widthOfString(' ') + characterSpacing;
wordSpacing *= 1000 / this._fontSize;

encoded = [];
positions = [];
for (let word of words) {
const wordDir = containsRTL(word) ? 'rtl' : undefined;
const [encodedWord, positionsWord] = this._font.encode(
word,
options.features,
wordDir,
);
encoded = encoded.concat(encodedWord);
positions = positions.concat(positionsWord);
Expand All @@ -669,6 +716,18 @@ export default {
space.xAdvance += wordSpacing;
positions[positions.length - 1] = space;
}
} else if (useBidi) {
encoded = [];
positions = [];
for (const run of runs) {
const [encRun, posRun] = this._font.encode(
run.text,
options.features,
run.direction,
);
encoded = encoded.concat(encRun);
positions = positions.concat(posRun);
}
} else {
[encoded, positions] = this._font.encode(text, options.features);
}
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"dependencies": {
"@noble/ciphers": "^1.0.0",
"@noble/hashes": "^1.6.0",
"bidi-js": "^1.0.3",
"fontkit": "^2.0.4",
"js-md5": "^0.8.3",
"linebreak": "^1.1.0",
Expand Down Expand Up @@ -81,4 +82,4 @@
"node >= v20.0.0"
],
"packageManager": "yarn@4.10.3"
}
}
Binary file added rtl-demo-2.pdf
Binary file not shown.
Binary file added rtl-demo.pdf
Binary file not shown.
Binary file added tests/fonts/AdumaLight.ttf
Binary file not shown.
Loading
Loading