module Hashdiff

This module provides methods to diff two hash, patch and unpatch hash

Constants

VERSION

Public Class Methods

best_diff(obj1, obj2, options = {}, &block) click to toggle source

Best diff two objects, which tries to generate the smallest change set using different similarity values.

Hashdiff.best_diff is useful in case of comparing two objects which include similar hashes in arrays.

@param [Array, Hash] obj1 @param [Array, Hash] obj2 @param [Hash] options the options to use when comparing

* :strict (Boolean) [true] whether numeric values will be compared on type as well as value.  Set to false to allow comparing Integer, Float, BigDecimal to each other
* :ignore_keys (Symbol, String or Array) [[]] a list of keys to ignore. No comparison is made for the specified key(s) in either hash
* :indifferent (Boolean) [false] whether to treat hash keys indifferently.  Set to true to ignore differences between symbol keys (ie. {a: 1} ~= {'a' => 1})
* :delimiter (String) ['.'] the delimiter used when returning nested key references
* :numeric_tolerance (Numeric) [0] should be a positive numeric value.  Value by which numeric differences must be greater than.  By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value.
* :strip (Boolean) [false] whether or not to call #strip on strings before comparing
* :array_path (Boolean) [false] whether to return the path references for nested values in an array, can be used for patch compatibility with non string keys.
* :use_lcs (Boolean) [true] whether or not to use an implementation of the Longest common subsequence algorithm for comparing arrays, produces better diffs but is slower.

@yield [path, value1, value2] Optional block is used to compare each value, instead of default ==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison.

@return [Array] an array of changes.

e.g. [[ '+', 'a.b', '45' ], [ '-', 'a.c', '5' ], [ '~', 'a.x', '45', '63']]

@example

a = {'x' => [{'a' => 1, 'c' => 3, 'e' => 5}, {'y' => 3}]}
b = {'x' => [{'a' => 1, 'b' => 2, 'e' => 5}] }
diff = Hashdiff.best_diff(a, b)
diff.should == [['-', 'x[0].c', 3], ['+', 'x[0].b', 2], ['-', 'x[1].y', 3], ['-', 'x[1]', {}]]

@since 0.0.1

# File lib/hashdiff/diff.rb, line 32
def self.best_diff(obj1, obj2, options = {}, &block)
  options[:comparison] = block if block_given?

  opts = { similarity: 0.3 }.merge!(options)
  diffs1 = diff(obj1, obj2, opts)
  count1 = count_diff diffs1

  opts = { similarity: 0.5 }.merge!(options)
  diffs2 = diff(obj1, obj2, opts)
  count2 = count_diff diffs2

  opts = { similarity: 0.8 }.merge!(options)
  diffs3 = diff(obj1, obj2, opts)
  count3 = count_diff diffs3

  count, diffs = count1 < count2 ? [count1, diffs1] : [count2, diffs2]
  count < count3 ? diffs : diffs3
end
comparable?(obj1, obj2, strict = true) click to toggle source

@private

check if objects are comparable

# File lib/hashdiff/util.rb, line 108
def self.comparable?(obj1, obj2, strict = true)
  return true if (obj1.is_a?(Array) || obj1.is_a?(Hash)) && obj2.is_a?(obj1.class)
  return true if (obj2.is_a?(Array) || obj2.is_a?(Hash)) && obj1.is_a?(obj2.class)
  return true if !strict && obj1.is_a?(Numeric) && obj2.is_a?(Numeric)

  obj1.is_a?(obj2.class) && obj2.is_a?(obj1.class)
end
compare_values(obj1, obj2, options = {}) click to toggle source

@private

check for equality or “closeness” within given tolerance

# File lib/hashdiff/util.rb, line 86
def self.compare_values(obj1, obj2, options = {})
  if options[:numeric_tolerance].is_a?(Numeric) &&
     obj1.is_a?(Numeric) && obj2.is_a?(Numeric)
    return (obj1 - obj2).abs <= options[:numeric_tolerance]
  end

  if options[:strip] == true
    obj1 = obj1.strip if obj1.respond_to?(:strip)
    obj2 = obj2.strip if obj2.respond_to?(:strip)
  end

  if options[:case_insensitive] == true
    obj1 = obj1.downcase if obj1.respond_to?(:downcase)
    obj2 = obj2.downcase if obj2.respond_to?(:downcase)
  end

  obj1 == obj2
end
count_diff(diffs) click to toggle source

@private

count node differences

# File lib/hashdiff/util.rb, line 25
def self.count_diff(diffs)
  diffs.inject(0) do |sum, item|
    old_change_count = count_nodes(item[2])
    new_change_count = count_nodes(item[3])
    sum + (old_change_count + new_change_count)
  end
end
count_nodes(obj) click to toggle source

@private

count total nodes for an object

# File lib/hashdiff/util.rb, line 36
def self.count_nodes(obj)
  return 0 unless obj

  count = 0
  if obj.is_a?(Array)
    obj.each { |e| count += count_nodes(e) }
  elsif obj.is_a?(Hash)
    obj.each_value { |v| count += count_nodes(v) }
  else
    return 1
  end

  count
end
custom_compare(method, key, obj1, obj2) click to toggle source

@private

try custom comparison

# File lib/hashdiff/util.rb, line 119
def self.custom_compare(method, key, obj1, obj2)
  return unless method

  res = method.call(key, obj1, obj2)

  # nil != false here
  return [['~', key, obj1, obj2]] if res == false
  return [] if res == true
end
decode_property_path(path, delimiter = '.') click to toggle source

@private

decode property path into an array @param [String] path Property-string @param [String] delimiter Property-string delimiter

e.g. “a.b.c” => [‘a’, ‘b’, 3, ‘c’]

# File lib/hashdiff/util.rb, line 58
def self.decode_property_path(path, delimiter = '.')
  path.split(delimiter).inject([]) do |memo, part|
    if part =~ /^(.*)\[(\d+)\]$/
      if !Regexp.last_match(1).empty?
        memo + [Regexp.last_match(1), Regexp.last_match(2).to_i]
      else
        memo + [Regexp.last_match(2).to_i]
      end
    else
      memo + [part]
    end
  end
end
diff(obj1, obj2, options = {}, &block) click to toggle source

Compute the diff of two hashes or arrays

@param [Array, Hash] obj1 @param [Array, Hash] obj2 @param [Hash] options the options to use when comparing

* :strict (Boolean) [true] whether numeric values will be compared on type as well as value.  Set to false to allow comparing Integer, Float, BigDecimal to each other
* :ignore_keys (Symbol, String or Array) [[]] a list of keys to ignore. No comparison is made for the specified key(s) in either hash
* :indifferent (Boolean) [false] whether to treat hash keys indifferently.  Set to true to ignore differences between symbol keys (ie. {a: 1} ~= {'a' => 1})
* :similarity (Numeric) [0.8] should be between (0, 1]. Meaningful if there are similar hashes in arrays. See {best_diff}.
* :delimiter (String) ['.'] the delimiter used when returning nested key references
* :numeric_tolerance (Numeric) [0] should be a positive numeric value.  Value by which numeric differences must be greater than.  By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value.
* :strip (Boolean) [false] whether or not to call #strip on strings before comparing
* :array_path (Boolean) [false] whether to return the path references for nested values in an array, can be used for patch compatibility with non string keys.
* :use_lcs (Boolean) [true] whether or not to use an implementation of the Longest common subsequence algorithm for comparing arrays, produces better diffs but is slower.

@yield [path, value1, value2] Optional block is used to compare each value, instead of default ==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison.

@return [Array] an array of changes.

e.g. [[ '+', 'a.b', '45' ], [ '-', 'a.c', '5' ], [ '~', 'a.x', '45', '63']]

@example

a = {"a" => 1, "b" => {"b1" => 1, "b2" =>2}}
b = {"a" => 1, "b" => {}}

diff = Hashdiff.diff(a, b)
diff.should == [['-', 'b.b1', 1], ['-', 'b.b2', 2]]

@since 0.0.1

# File lib/hashdiff/diff.rb, line 80
def self.diff(obj1, obj2, options = {}, &block)
  opts = {
    prefix: '',
    similarity: 0.8,
    delimiter: '.',
    strict: true,
    ignore_keys: [],
    indifferent: false,
    strip: false,
    numeric_tolerance: 0,
    array_path: false,
    use_lcs: true
  }.merge!(options)

  opts[:prefix] = [] if opts[:array_path] && opts[:prefix] == ''

  opts[:ignore_keys] = [*opts[:ignore_keys]]

  opts[:comparison] = block if block_given?

  # prefer to compare with provided block
  result = custom_compare(opts[:comparison], opts[:prefix], obj1, obj2)
  return result if result

  return [] if obj1.nil? && obj2.nil?

  return [['~', opts[:prefix], obj1, obj2]] if obj1.nil? || obj2.nil?

  return [['~', opts[:prefix], obj1, obj2]] unless comparable?(obj1, obj2, opts[:strict])

  return LcsCompareArrays.call(obj1, obj2, opts) if obj1.is_a?(Array) && opts[:use_lcs]

  return LinearCompareArray.call(obj1, obj2, opts) if obj1.is_a?(Array) && !opts[:use_lcs]

  return CompareHashes.call(obj1, obj2, opts) if obj1.is_a?(Hash)

  return [] if compare_values(obj1, obj2, opts)

  [['~', opts[:prefix], obj1, obj2]]
end
diff_array_lcs(arraya, arrayb, options = {}) { |links| ... } click to toggle source

@private

diff array using LCS algorithm

# File lib/hashdiff/diff.rb, line 124
def self.diff_array_lcs(arraya, arrayb, options = {})
  return [] if arraya.empty? && arrayb.empty?

  change_set = []

  if arraya.empty?
    arrayb.each_index do |index|
      change_set << ['+', index, arrayb[index]]
    end

    return change_set
  end

  if arrayb.empty?
    arraya.each_index do |index|
      i = arraya.size - index - 1
      change_set << ['-', i, arraya[i]]
    end

    return change_set
  end

  opts = {
    prefix: '',
    similarity: 0.8,
    delimiter: '.'
  }.merge!(options)

  links = lcs(arraya, arrayb, opts)

  # yield common
  yield links if block_given?

  # padding the end
  links << [arraya.size, arrayb.size]

  last_x = -1
  last_y = -1
  links.each do |pair|
    x, y = pair

    # remove from a, beginning from the end
    (x > last_x + 1) && (x - last_x - 2).downto(0).each do |i|
      change_set << ['-', last_y + i + 1, arraya[i + last_x + 1]]
    end

    # add from b, beginning from the head
    (y > last_y + 1) && 0.upto(y - last_y - 2).each do |i|
      change_set << ['+', last_y + i + 1, arrayb[i + last_y + 1]]
    end

    # update flags
    last_x = x
    last_y = y
  end

  change_set
end
lcs(arraya, arrayb, options = {}) click to toggle source

@private

caculate array difference using LCS algorithm en.wikipedia.org/wiki/Longest_common_subsequence_problem

# File lib/hashdiff/lcs.rb, line 8
def self.lcs(arraya, arrayb, options = {})
  return [] if arraya.empty? || arrayb.empty?

  opts = { similarity: 0.8 }.merge!(options)

  opts[:prefix] = prefix_append_array_index(opts[:prefix], '*', opts)

  a_start = b_start = 0
  a_finish = arraya.size - 1
  b_finish = arrayb.size - 1
  vector = []

  lcs = []
  (b_start..b_finish).each do |bi|
    lcs[bi] = []
    (a_start..a_finish).each do |ai|
      if similar?(arraya[ai], arrayb[bi], opts)
        topleft = (ai > 0) && (bi > 0) ? lcs[bi - 1][ai - 1][1] : 0
        lcs[bi][ai] = [:topleft, topleft + 1]
      elsif (top = bi > 0 ? lcs[bi - 1][ai][1] : 0)
        left = ai > 0 ? lcs[bi][ai - 1][1] : 0
        count = top > left ? top : left

        direction = if top > left
                      :top
                    elsif top < left
                      :left
                    elsif bi.zero?
                      :top
                    elsif ai.zero?
                      :left
                    else
                      :both
                    end

        lcs[bi][ai] = [direction, count]
      end
    end
  end

  x = a_finish
  y = b_finish
  while (x >= 0) && (y >= 0) && (lcs[y][x][1] > 0)
    if lcs[y][x][0] == :both
      x -= 1
    elsif lcs[y][x][0] == :topleft
      vector.insert(0, [x, y])
      x -= 1
      y -= 1
    elsif lcs[y][x][0] == :top
      y -= 1
    elsif lcs[y][x][0] == :left
      x -= 1
    end
  end

  vector
end
node(hash, parts) click to toggle source

@private

get the node of hash by given path parts

# File lib/hashdiff/util.rb, line 75
def self.node(hash, parts)
  temp = hash
  parts.each do |part|
    temp = temp[part]
  end
  temp
end
patch!(obj, changes, options = {}) click to toggle source

Apply patch to object

@param [Hash, Array] obj the object to be patched, can be an Array or a Hash @param [Array] changes e.g. [[ ‘+’, ‘a.b’, ‘45’ ], [ ‘-’, ‘a.c’, ‘5’ ], [ ‘~’, ‘a.x’, ‘45’, ‘63’]] @param [Hash] options supports following keys:

* :delimiter (String) ['.'] delimiter string for representing nested keys in changes array

@return the object after patch

@since 0.0.1

# File lib/hashdiff/patch.rb, line 17
def self.patch!(obj, changes, options = {})
  delimiter = options[:delimiter] || '.'

  changes.each do |change|
    parts = change[1]
    parts = decode_property_path(parts, delimiter) unless parts.is_a?(Array)

    last_part = parts.last

    parent_node = node(obj, parts[0, parts.size - 1])

    if change[0] == '+'
      if parent_node.is_a?(Array)
        parent_node.insert(last_part, change[2])
      else
        parent_node[last_part] = change[2]
      end
    elsif change[0] == '-'
      if parent_node.is_a?(Array)
        parent_node.delete_at(last_part)
      else
        parent_node.delete(last_part)
      end
    elsif change[0] == '~'
      parent_node[last_part] = change[3]
    end
  end

  obj
end
prefix_append_array_index(prefix, array_index, opts) click to toggle source
# File lib/hashdiff/util.rb, line 137
def self.prefix_append_array_index(prefix, array_index, opts)
  if opts[:array_path]
    prefix + [array_index]
  else
    "#{prefix}[#{array_index}]"
  end
end
prefix_append_key(prefix, key, opts) click to toggle source
# File lib/hashdiff/util.rb, line 129
def self.prefix_append_key(prefix, key, opts)
  if opts[:array_path]
    prefix + [key]
  else
    prefix.empty? ? key.to_s : "#{prefix}#{opts[:delimiter]}#{key}"
  end
end
similar?(obja, objb, options = {}) click to toggle source

@private

judge whether two objects are similar

# File lib/hashdiff/util.rb, line 7
def self.similar?(obja, objb, options = {})
  return compare_values(obja, objb, options) if !options[:comparison] && !any_hash_or_array?(obja, objb)

  count_a = count_nodes(obja)
  count_b = count_nodes(objb)

  return true if (count_a + count_b).zero?

  opts = { similarity: 0.8 }.merge!(options)

  diffs = count_diff diff(obja, objb, opts)

  (1 - diffs / (count_a + count_b).to_f) >= opts[:similarity]
end
unpatch!(obj, changes, options = {}) click to toggle source

Unpatch an object

@param [Hash, Array] obj the object to be unpatched, can be an Array or a Hash @param [Array] changes e.g. [[ ‘+’, ‘a.b’, ‘45’ ], [ ‘-’, ‘a.c’, ‘5’ ], [ ‘~’, ‘a.x’, ‘45’, ‘63’]] @param [Hash] options supports following keys:

* :delimiter (String) ['.'] delimiter string for representing nested keys in changes array

@return the object after unpatch

@since 0.0.1

# File lib/hashdiff/patch.rb, line 58
def self.unpatch!(obj, changes, options = {})
  delimiter = options[:delimiter] || '.'

  changes.reverse_each do |change|
    parts = change[1]
    parts = decode_property_path(parts, delimiter) unless parts.is_a?(Array)

    last_part = parts.last

    parent_node = node(obj, parts[0, parts.size - 1])

    if change[0] == '+'
      if parent_node.is_a?(Array)
        parent_node.delete_at(last_part)
      else
        parent_node.delete(last_part)
      end
    elsif change[0] == '-'
      if parent_node.is_a?(Array)
        parent_node.insert(last_part, change[2])
      else
        parent_node[last_part] = change[2]
      end
    elsif change[0] == '~'
      parent_node[last_part] = change[2]
    end
  end

  obj
end

Private Class Methods

any_hash_or_array?(obja, objb) click to toggle source

@private

checks if both objects are Arrays or Hashes

# File lib/hashdiff/util.rb, line 151
def any_hash_or_array?(obja, objb)
  obja.is_a?(Array) || obja.is_a?(Hash) || objb.is_a?(Array) || objb.is_a?(Hash)
end