class PDF::Reader::Font

Represents a single font PDF object and provides some useful methods for extracting info. Mainly used for converting text to UTF-8.

Attributes

basefont[R]
cid_default_width[R]
cid_widths[R]
descendantfonts[RW]
encoding[RW]
first_char[R]
font_descriptor[R]
last_char[R]
subtype[RW]
tounicode[RW]
widths[R]

Public Class Methods

new(ohash, obj) click to toggle source
# File lib/pdf/reader/font.rb, line 41
def initialize(ohash, obj)
  @ohash = ohash
  @tounicode = nil

  extract_base_info(obj)
  extract_type3_info(obj)
  extract_descriptor(obj)
  extract_descendants(obj)
  @width_calc = build_width_calculator

  @encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
end

Public Instance Methods

glyph_width(code_point) click to toggle source

looks up the specified codepoint and returns a value that is in (pdf) glyph space, which is 1000 glyph units = 1 text space unit

# File lib/pdf/reader/font.rb, line 68
def glyph_width(code_point)
  if code_point.is_a?(String)
    code_point = code_point.unpack(encoding.unpack).first
  end

  @cached_widths ||= {}
  @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
end
glyph_width_in_text_space(code_point) click to toggle source

In most cases glyph width is converted into text space with a simple divide by 1000.

However, Type3 fonts provide their own FontMatrix that’s used for the transformation.

# File lib/pdf/reader/font.rb, line 81
def glyph_width_in_text_space(code_point)
  glyph_width_in_glyph_space = glyph_width(code_point)

  if @subtype == :Type3
    x1, _y1 = font_matrix_transform(0,0)
    x2, _y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
    (x2 - x1).abs.round(2)
  else
    glyph_width_in_glyph_space / 1000.0
  end
end
to_utf8(params) click to toggle source
# File lib/pdf/reader/font.rb, line 54
def to_utf8(params)
  if @tounicode
    to_utf8_via_cmap(params)
  else
    to_utf8_via_encoding(params)
  end
end
unpack(data) click to toggle source
# File lib/pdf/reader/font.rb, line 62
def unpack(data)
  data.unpack(encoding.unpack)
end

Private Instance Methods

build_encoding(obj) click to toggle source
# File lib/pdf/reader/font.rb, line 152
def build_encoding(obj)
  if obj[:Encoding].is_a?(Symbol)
    # one of the standard encodings, referenced by name
    # TODO pass in a standard shape, always a Hash
    PDF::Reader::Encoding.new(obj[:Encoding])
  elsif obj[:Encoding].is_a?(Hash) || obj[:Encoding].is_a?(PDF::Reader::Stream)
    PDF::Reader::Encoding.new(obj[:Encoding])
  elsif obj[:Encoding].nil?
    default_encoding(@basefont)
  else
    raise MalformedPDFError, "Unexpected type for Encoding (#{obj[:Encoding].class})"
  end
end
build_width_calculator() click to toggle source
# File lib/pdf/reader/font.rb, line 126
def build_width_calculator
  if @subtype == :Type0
    PDF::Reader::WidthCalculator::TypeZero.new(self)
  elsif @subtype == :Type1
    if @font_descriptor.nil?
      PDF::Reader::WidthCalculator::BuiltIn.new(self)
    else
      PDF::Reader::WidthCalculator::TypeOneOrThree .new(self)
    end
  elsif @subtype == :Type3
    PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
  elsif @subtype == :TrueType
    if @font_descriptor
      PDF::Reader::WidthCalculator::TrueType.new(self)
    else
      # A TrueType font that isn't embedded. Most readers look for a version on the
      # local system and fallback to a substitute. For now, we go straight to a substitute
      PDF::Reader::WidthCalculator::BuiltIn.new(self)
    end
  elsif @subtype == :CIDFontType0 || @subtype == :CIDFontType2
    PDF::Reader::WidthCalculator::Composite.new(self)
  else
    PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
  end
end
default_encoding(font_name) click to toggle source
# File lib/pdf/reader/font.rb, line 115
def default_encoding(font_name)
  case font_name.to_s
  when "Symbol" then
    PDF::Reader::Encoding.new(:SymbolEncoding)
  when "ZapfDingbats" then
    PDF::Reader::Encoding.new(:ZapfDingbatsEncoding)
  else
    PDF::Reader::Encoding.new(:StandardEncoding)
  end
end
extract_base_info(obj) click to toggle source
# File lib/pdf/reader/font.rb, line 166
def extract_base_info(obj)
  @subtype  = @ohash.deref_name(obj[:Subtype])
  @basefont = @ohash.deref_name(obj[:BaseFont])
  @encoding = build_encoding(obj)
  @widths   = @ohash.deref_array_of_numbers(obj[:Widths]) || []
  @first_char = @ohash.deref_integer(obj[:FirstChar])
  @last_char = @ohash.deref_integer(obj[:LastChar])

  # CID Fonts are not required to have a W or DW entry, if they don't exist,
  # the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
  @cid_widths         = @ohash.deref_array(obj[:W])  || []
  @cid_default_width  = @ohash.deref_number(obj[:DW]) || 1000

  if obj[:ToUnicode]
    # ToUnicode is optional for Type1 and Type3
    stream = @ohash.deref_stream(obj[:ToUnicode])
    if stream
      @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
    end
  end
end
extract_descendants(obj) click to toggle source
# File lib/pdf/reader/font.rb, line 207
def extract_descendants(obj)
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
  # A one-element array specifying the CIDFont dictionary that is the
  # descendant of this Type 0 font.
  if obj[:DescendantFonts]
    descendants = @ohash.deref_array(obj[:DescendantFonts])
    @descendantfonts = descendants.map { |desc|
      PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
    }
  else
    @descendantfonts = []
  end
end
extract_descriptor(obj) click to toggle source
# File lib/pdf/reader/font.rb, line 196
def extract_descriptor(obj)
  if obj[:FontDescriptor]
    # create a font descriptor object if we can, in other words, unless this is
    # a CID Font
    fd = @ohash.deref_hash(obj[:FontDescriptor])
    @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
  else
    @font_descriptor = nil
  end
end
extract_type3_info(obj) click to toggle source
# File lib/pdf/reader/font.rb, line 188
def extract_type3_info(obj)
  if @subtype == :Type3
    @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
      0.001, 0, 0, 0.001, 0, 0
    ]
  end
end
font_matrix_transform(x, y) click to toggle source

Only valid for Type3 fonts

# File lib/pdf/reader/font.rb, line 96
def font_matrix_transform(x, y)
  return x, y if @font_matrix.nil?

  matrix = TransformationMatrix.new(
    @font_matrix[0], @font_matrix[1],
    @font_matrix[2], @font_matrix[3],
    @font_matrix[4], @font_matrix[5],
  )

  if x == 0 && y == 0
    [matrix.e, matrix.f]
  else
    [
      (matrix.a * x) + (matrix.c * y) + (matrix.e),
      (matrix.b * x) + (matrix.d * y) + (matrix.f)
    ]
  end
end
to_utf8_via_cmap(params) click to toggle source
# File lib/pdf/reader/font.rb, line 221
def to_utf8_via_cmap(params)
  case params
  when Integer
    [
      @tounicode.decode(params) || PDF::Reader::Encoding::UNKNOWN_CHAR
    ].flatten.pack("U*")
  when String
    params.unpack(encoding.unpack).map { |c|
      @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
    }.flatten.pack("U*")
  when Array
    params.collect { |param| to_utf8_via_cmap(param) }.join("")
  end
end
to_utf8_via_encoding(params) click to toggle source
# File lib/pdf/reader/font.rb, line 236
def to_utf8_via_encoding(params)
  if encoding.kind_of?(String)
    raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported"
  end

  case params
  when Integer
    encoding.int_to_utf8_string(params)
  when String
    encoding.to_utf8(params)
  when Array
    params.collect { |param| to_utf8_via_encoding(param) }.join("")
  end
end