googletest/src/gtest-edit-distance.cc - hafnium/third_party/googletest - Git at Google

 // Copyright 2018, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 //
 // Internal helper functions for finding optimal edit transformations
 // between strings.

 #include "gtest/gtest.h"

 #include <functional>
 #include <list>
 #include <ostream>  // NOLINT
 #include <queue>
 #include <vector>

 namespace testing {
 namespace internal {
 namespace {

 // The following implement data structures and code for a Dijkstra-search
 // based implementation of optimal edit distance.

 // Posible states a node can be in. Either a node is unsettled (it hasn't been
 // drawn from the priority queue yet), or it is settled and a back-link to its
 // parent node is fixed.
 enum EditSearchState {
   kUnsettled,
   kMatchParent,
   kAddParent,
   kRemoveParent,
   kReplaceParent
 };

 // Custom container for search states. This is smaller and faster than a hash
 // map, because the used states are dense along diagonals.
 // Specifically, each state requires only 1 byte, whereas a hash_map would
 // require storing the key, which would come to at least 8 bytes. std::map has
 // an extra 32 bytes per node (3 pointers + 1 byte, padded), so even though
 // there are circumstances where this class can have kBlockSize overhead per
 // state, on average it does better than 40 bytes of overhead per state.
 // In addition, in unopt builds (the usual way tests are run) the fewer
 // allocations + better locality has this method running 10-50x faster than
 // std::map for inputs that are large enough to measure.
 class EditSearchMap {
  public:
   EditSearchMap(size_t left_size, size_t right_size)
       : left_size_(left_size), right_size_(right_size) {
     GTEST_CHECK_(left_size_ == left_size && right_size_ == right_size)
         << "Overflow in size: Arguments too large";
   }

   // Gets a mutable reference to a state - this is actually of type
   // EditSearchState - inserting if it does not exist.
   unsigned char& insert(UInt32 left, UInt32 right) {
     std::vector<UInt32>* vec;
     size_t index1;
     size_t index2;
     if (left > right) {
       vec = &left_nodes_;
       index1 = left - right - 1;
       index2 = right;
     } else {
       vec = &right_nodes_;
       index1 = right - left;
       index2 = left;
     }
     if (vec->size() <= index1) {
       GTEST_CHECK_(vec->size() == index1)
           << "Array diagonals should only grow by one " << vec->size() << " vs "
           << index1;
       vec->push_back(block_indices_.size());
       // Round up
       block_indices_.resize(
           block_indices_.size() +
               (DiagonalLength(left, right) + kBlockSize - 1) / kBlockSize,
           kUnallocatedBlock);
     }
     const size_t bucket = index2 / kBlockSize;
     const size_t pos_in_bucket = index2 % kBlockSize;
     UInt32& level2 = block_indices_[(*vec)[index1] + bucket];
     if (level2 == kUnallocatedBlock) {
       level2 = nodes_.size();
       size_t diagonal_length = DiagonalLength(left, right);
       GTEST_CHECK_(diagonal_length > index2)
           << diagonal_length << " " << index2;
       size_t block_size = kBlockSize;
       if (diagonal_length / kBlockSize == bucket) {
         // We can never get here if diagonal_length is a multiple of
         // kBlockSize, which is what we want, since this would evaluate to 0.
         block_size = diagonal_length % kBlockSize;
       }
       nodes_.resize(nodes_.size() + block_size);
     }
     return nodes_[level2 + pos_in_bucket];
   }

   size_t MemoryUsage() const {
     return nodes_.capacity() +
            sizeof(UInt32) * (left_nodes_.capacity() + right_nodes_.capacity() +
                              block_indices_.capacity());
   }

  private:
   enum { kBlockSize = 1024, kUnallocatedBlock = 0xFFFFFFFFul };

   size_t DiagonalLength(UInt32 left, UInt32 right) const {
     return std::min(left_size_ - left, right_size_ - right) +
            (left < right ? left : right);
   }

   // The state space is conceptually a left_size_ by right_size_ sparse matrix
   // of EditSearchStates. However, due to the access pattern of the search, it
   // is much better to store the nodes per diagonal rather than per row.
   UInt32 left_size_;
   UInt32 right_size_;
   // The nodes are stored by diagonals, split in two: Those to the left of the
   // main diagonal are in left_nodes_, and everything else is in right_nodes_.
   // The values are indices into block_indices_.
   std::vector<UInt32> left_nodes_;
   std::vector<UInt32> right_nodes_;
   // Every entry here is an offset into the beginning of a kBlockSize-sized
   // block in nodes_. An entire diagonal is allocated together here; for a
   // diagonal of length <= kBlockSize, that's just a single entry, but for
   // longer diagonals multiple contiguous index entries will be reserved at
   // once. Unused entries will be assigned kUnallocatedBlock; this
   // double-indirect scheme is used to save memory in the cases when an entire
   // diagonal isn't needed.
   std::vector<UInt32> block_indices_;
   // This stores the actual EditSearchState data, pointed to by block_indices_.
   std::vector<unsigned char> nodes_;
 };

 struct EditHeapEntry {
   EditHeapEntry(UInt32 l, UInt32 r, UInt64 c, EditSearchState s)
       : left(l), right(r), cost(c), state(s) {}

   UInt32 left;
   UInt32 right;
   UInt64 cost : 61;
   // The state that the node will get when this entry is settled. Therefore,
   // this can never be kUnsettled.
   UInt64 state : 3;

   bool operator>(const EditHeapEntry& other) const { return cost > other.cost; }
 };

 // Need a min-queue, so invert the comparator.
 typedef std::priority_queue<EditHeapEntry, std::vector<EditHeapEntry>,
                             std::greater<EditHeapEntry>>
     EditHeap;

 }  // namespace

 std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
                                             const std::vector<size_t>& right,
                                             size_t* memory_usage) {
   const UInt64 kBaseCost = 100000;
   // We make replace a little more expensive than add/remove to lower
   // their priority.
   const UInt64 kReplaceCost = 100001;
   // In the common case where the vectors are the same (or almost the same)
   // size, we know that an add will have to be followed by some later remove
   // (or vice versa) in order to get the lengths to balance. We "borrow" some
   // of the cost of the later operation and bring it forward into the earlier
   // operation, to increase the cost of exploring (usually fruitlessly) around
   // the beginning of the graph.
   // However, there is a trade-off: This cheapens the cost of exploring around
   // the beginning of the graph (in one direction) when the vectors are
   // unequal in length. So we don't steal *all* the cost.
   // You can view this as a form of A*, using an admissable heuristic that has
   // been re-cast as a cost function that can be used in Dijkstra.
   const UInt64 kTowardsGoalCost = 50003;
   const UInt64 kAwayFromGoalCost = 2 * kBaseCost - kTowardsGoalCost;

   EditSearchMap node_map(left.size() + 1, right.size() + 1);
   EditHeap heap;
   heap.push(EditHeapEntry(0, 0, 0, kReplaceParent));

   while (!heap.empty()) {
     const EditHeapEntry current_entry = heap.top();
     heap.pop();

     UInt32 left_pos = current_entry.left;
     UInt32 right_pos = current_entry.right;
     unsigned char& current_state = node_map.insert(left_pos, right_pos);
     if (current_state != kUnsettled) {
       // Node was already settled by a previous entry in the priority queue,
       // this is a suboptimal path that should be ignored.
       continue;
     }
     current_state = current_entry.state;

     if (left_pos == left.size() && right_pos == right.size()) {
       // This is the normal exit point; if we terminate due to the heap being
       // empty, we'll fail a check later.
       break;
     }

     // Special case: Since the cost of a match is zero, we can immediately
     // settle the new node without putting it in the queue, since nothing can
     // have a smaller cost than it. Furthermore, we don't need to relax the
     // other two edges, since we know we don't need them: Any path from this
     // node that would use them has an path via the match that is at least as
     // cheap. Together, this means we can loop here until we stop matching.
     while (left_pos < left.size() && right_pos < right.size() &&
            left[left_pos] == right[right_pos]) {
       left_pos++;
       right_pos++;
       unsigned char& fast_forward_state = node_map.insert(left_pos, right_pos);
       if (fast_forward_state != kUnsettled) {
         // The search reached around and settled this node before settling the
         // base node. This means we're completely done with this iteration;
         // abort to the outer loop.
         goto outer_loop_bottom;
         // Otherwise, when can settle this node, even if it was created from
         // another state - we know the cost of settling it now is optimal.
       }
       fast_forward_state = kMatchParent;
     }

     // Relax adjacent nodes. We have no way to find or lower the cost of
     // existing entries in the heap, so we just push new entries and throw
     // them out at the top if the node is already settled. We *could* check to
     // see if they're already settled before pushing, but it turns out to be
     // ~not any faster, and more complicated to do so.
     //
     // If we're at an edge, there's only one node to relax.
     if (left_pos >= left.size()) {
       if (right_pos >= right.size()) {
         break;  // Can happen due to the fast-path loop above.
       }
       heap.push(EditHeapEntry(left_pos, right_pos + 1,
                               current_entry.cost + kTowardsGoalCost,
                               kAddParent));
       continue;
     }
     if (right_pos >= right.size()) {
       heap.push(EditHeapEntry(left_pos + 1, right_pos,
                               current_entry.cost + kTowardsGoalCost,
                               kRemoveParent));
       continue;
     }
     // General case: Relax 3 edges.
     heap.push(EditHeapEntry(
         left_pos, right_pos + 1,
         current_entry.cost + (right.size() + left_pos > right_pos + left.size()
                                   ? kTowardsGoalCost
                                   : kAwayFromGoalCost),
         kAddParent));
     heap.push(EditHeapEntry(
         left_pos + 1, right_pos,
         current_entry.cost + (right.size() + left_pos < right_pos + left.size()
                                   ? kTowardsGoalCost
                                   : kAwayFromGoalCost),
         kRemoveParent));
     heap.push(EditHeapEntry(left_pos + 1, right_pos + 1,
                             current_entry.cost + kReplaceCost, kReplaceParent));
   outer_loop_bottom : {}  // Need the curlies to form a statement.
   }

   // Reconstruct the best path. We do it in reverse order.
   std::vector<EditType> best_path;
   UInt32 left_pos = left.size();
   UInt32 right_pos = right.size();
   while (left_pos != 0 || right_pos != 0) {
     GTEST_CHECK_(left_pos <= left.size() && right_pos <= right.size());
     // The node must already exist, but if it somehow doesn't, it will be
     // added as kUnsettled, which will crash below.
     const unsigned char state = node_map.insert(left_pos, right_pos);
     switch (state) {
       case kAddParent:
         right_pos--;
         break;
       case kRemoveParent:
         left_pos--;
         break;
       case kMatchParent:
       case kReplaceParent:
         left_pos--;
         right_pos--;
         break;
       default:
         GTEST_LOG_(FATAL) << "Unsettled node at " << left_pos << ","
                           << right_pos;
     }
     best_path.push_back(static_cast<EditType>(state - 1));
   }
   std::reverse(best_path.begin(), best_path.end());
   if (memory_usage != NULL) {
     *memory_usage = node_map.MemoryUsage();
   }
   return best_path;
 }

 namespace {

 // Helper class to convert string into ids with deduplication.
 class InternalStrings {
  public:
   size_t GetId(const std::string* str) {
     IdMap::iterator it = ids_.find(str);
     if (it != ids_.end()) return it->second;
     size_t id = ids_.size();
     return ids_[str] = id;
   }

  private:
   struct IdMapCmp {
     bool operator()(const std::string* first, const std::string* second) const {
       return *first < *second;
     }
   };
   typedef std::map<const std::string*, size_t, IdMapCmp> IdMap;
   IdMap ids_;
 };

 }  // namespace

 std::vector<EditType> CalculateOptimalEdits(
     const std::vector<std::string>& left,
     const std::vector<std::string>& right) {
   std::vector<size_t> left_ids, right_ids;
   {
     InternalStrings intern_table;
     for (size_t i = 0; i < left.size(); ++i) {
       left_ids.push_back(intern_table.GetId(&left[i]));
     }
     for (size_t i = 0; i < right.size(); ++i) {
       right_ids.push_back(intern_table.GetId(&right[i]));
     }
   }
   return CalculateOptimalEdits(left_ids, right_ids);
 }

 namespace {

 // Helper class that holds the state for one hunk and prints it out to the
 // stream.
 // It reorders adds/removes when possible to group all removes before all
 // adds. It also adds the hunk header before printing into the stream.
 class Hunk {
  public:
   Hunk(size_t left_start, size_t right_start)
       : left_start_(left_start),
         right_start_(right_start),
         adds_(),
         removes_(),
         common_() {}

   void PushLine(char edit, const char* line) {
     switch (edit) {
       case ' ':
         ++common_;
         FlushEdits();
         hunk_.push_back(std::make_pair(' ', line));
         break;
       case '-':
         ++removes_;
         hunk_removes_.push_back(std::make_pair('-', line));
         break;
       case '+':
         ++adds_;
         hunk_adds_.push_back(std::make_pair('+', line));
         break;
     }
   }

   void PrintTo(std::ostream* os) {
     PrintHeader(os);
     FlushEdits();
     for (std::list<std::pair<char, const char*> >::const_iterator it =
              hunk_.begin();
          it != hunk_.end(); ++it) {
       *os << it->first << it->second << "\n";
     }
   }

   bool has_edits() const { return adds_ || removes_; }

  private:
   void FlushEdits() {
     hunk_.splice(hunk_.end(), hunk_removes_);
     hunk_.splice(hunk_.end(), hunk_adds_);
   }

   // Print a unified diff header for one hunk.
   // The format is
   //   "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
   // where the left/right parts are omitted if unnecessary.
   void PrintHeader(std::ostream* ss) const {
     *ss << "@@ ";
     if (removes_) {
       *ss << "-" << left_start_ << "," << (removes_ + common_);
     }
     if (removes_ && adds_) {
       *ss << " ";
     }
     if (adds_) {
       *ss << "+" << right_start_ << "," << (adds_ + common_);
     }
     *ss << " @@\n";
   }

   size_t left_start_, right_start_;
   size_t adds_, removes_, common_;
   std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
 };

 }  // namespace

 // Create a list of diff hunks in Unified diff format.
 // Each hunk has a header generated by PrintHeader above plus a body with
 // lines prefixed with ' ' for no change, '-' for deletion and '+' for
 // addition.
 // 'context' represents the desired unchanged prefix/suffix around the diff.
 // If two hunks are close enough that their contexts overlap, then they are
 // joined into one hunk.
 std::string CreateUnifiedDiff(const std::vector<std::string>& left,
                               const std::vector<std::string>& right,
                               size_t context) {
   const std::vector<EditType> edits = CalculateOptimalEdits(left, right);

   size_t l_i = 0, r_i = 0, edit_i = 0;
   std::stringstream ss;
   while (edit_i < edits.size()) {
     // Find first edit.
     while (edit_i < edits.size() && edits[edit_i] == kEditMatch) {
       ++l_i;
       ++r_i;
       ++edit_i;
     }

     // Find the first line to include in the hunk.
     const size_t prefix_context = std::min(l_i, context);
     Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
     for (size_t i = prefix_context; i > 0; --i) {
       hunk.PushLine(' ', left[l_i - i].c_str());
     }

     // Iterate the edits until we found enough suffix for the hunk or the input
     // is over.
     size_t n_suffix = 0;
     for (; edit_i < edits.size(); ++edit_i) {
       if (n_suffix >= context) {
         // Continue only if the next hunk is very close.
         std::vector<EditType>::const_iterator it = edits.begin() + edit_i;
         while (it != edits.end() && *it == kEditMatch) ++it;
         if (it == edits.end() || (it - edits.begin()) - edit_i >= context) {
           // There is no next edit or it is too far away.
           break;
         }
       }

       EditType edit = edits[edit_i];
       // Reset count when a non match is found.
       n_suffix = edit == kEditMatch ? n_suffix + 1 : 0;

       if (edit == kEditMatch || edit == kEditRemove || edit == kEditReplace) {
         hunk.PushLine(edit == kEditMatch ? ' ' : '-', left[l_i].c_str());
       }
       if (edit == kEditAdd || edit == kEditReplace) {
         hunk.PushLine('+', right[r_i].c_str());
       }

       // Advance indices, depending on edit type.
       l_i += edit != kEditAdd;
       r_i += edit != kEditRemove;
     }

     if (!hunk.has_edits()) {
       // We are done. We don't want this hunk.
       break;
     }

     hunk.PrintTo(&ss);
   }
   return ss.str();
 }

 }  // namespace internal
 }  // namespace testing
	// Copyright 2018, Google Inc.
	// All rights reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// * Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above
	// copyright notice, this list of conditions and the following disclaimer
	// in the documentation and/or other materials provided with the
	// distribution.
	// * Neither the name of Google Inc. nor the names of its
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	//
	// Internal helper functions for finding optimal edit transformations
	// between strings.

	#include "gtest/gtest.h"

	#include <functional>
	#include <list>
	#include <ostream> // NOLINT
	#include <queue>
	#include <vector>

	namespace testing {
	namespace internal {
	namespace {

	// The following implement data structures and code for a Dijkstra-search
	// based implementation of optimal edit distance.

	// Posible states a node can be in. Either a node is unsettled (it hasn't been
	// drawn from the priority queue yet), or it is settled and a back-link to its
	// parent node is fixed.
	enum EditSearchState {
	kUnsettled,
	kMatchParent,
	kAddParent,
	kRemoveParent,
	kReplaceParent
	};

	// Custom container for search states. This is smaller and faster than a hash
	// map, because the used states are dense along diagonals.
	// Specifically, each state requires only 1 byte, whereas a hash_map would
	// require storing the key, which would come to at least 8 bytes. std::map has
	// an extra 32 bytes per node (3 pointers + 1 byte, padded), so even though
	// there are circumstances where this class can have kBlockSize overhead per
	// state, on average it does better than 40 bytes of overhead per state.
	// In addition, in unopt builds (the usual way tests are run) the fewer
	// allocations + better locality has this method running 10-50x faster than
	// std::map for inputs that are large enough to measure.
	class EditSearchMap {
	public:
	EditSearchMap(size_t left_size, size_t right_size)
	: left_size_(left_size), right_size_(right_size) {
	GTEST_CHECK_(left_size_ == left_size && right_size_ == right_size)
	<< "Overflow in size: Arguments too large";
	}

	// Gets a mutable reference to a state - this is actually of type
	// EditSearchState - inserting if it does not exist.
	unsigned char& insert(UInt32 left, UInt32 right) {
	std::vector<UInt32>* vec;
	size_t index1;
	size_t index2;
	if (left > right) {
	vec = &left_nodes_;
	index1 = left - right - 1;
	index2 = right;
	} else {
	vec = &right_nodes_;
	index1 = right - left;
	index2 = left;
	}
	if (vec->size() <= index1) {
	GTEST_CHECK_(vec->size() == index1)
	<< "Array diagonals should only grow by one " << vec->size() << " vs "
	<< index1;
	vec->push_back(block_indices_.size());
	// Round up
	block_indices_.resize(
	block_indices_.size() +
	(DiagonalLength(left, right) + kBlockSize - 1) / kBlockSize,
	kUnallocatedBlock);
	}
	const size_t bucket = index2 / kBlockSize;
	const size_t pos_in_bucket = index2 % kBlockSize;
	UInt32& level2 = block_indices_[(*vec)[index1] + bucket];
	if (level2 == kUnallocatedBlock) {
	level2 = nodes_.size();
	size_t diagonal_length = DiagonalLength(left, right);
	GTEST_CHECK_(diagonal_length > index2)
	<< diagonal_length << " " << index2;
	size_t block_size = kBlockSize;
	if (diagonal_length / kBlockSize == bucket) {
	// We can never get here if diagonal_length is a multiple of
	// kBlockSize, which is what we want, since this would evaluate to 0.
	block_size = diagonal_length % kBlockSize;
	}
	nodes_.resize(nodes_.size() + block_size);
	}
	return nodes_[level2 + pos_in_bucket];
	}

	size_t MemoryUsage() const {
	return nodes_.capacity() +
	sizeof(UInt32) * (left_nodes_.capacity() + right_nodes_.capacity() +
	block_indices_.capacity());
	}

	private:
	enum { kBlockSize = 1024, kUnallocatedBlock = 0xFFFFFFFFul };

	size_t DiagonalLength(UInt32 left, UInt32 right) const {
	return std::min(left_size_ - left, right_size_ - right) +
	(left < right ? left : right);
	}

	// The state space is conceptually a left_size_ by right_size_ sparse matrix
	// of EditSearchStates. However, due to the access pattern of the search, it
	// is much better to store the nodes per diagonal rather than per row.
	UInt32 left_size_;
	UInt32 right_size_;
	// The nodes are stored by diagonals, split in two: Those to the left of the
	// main diagonal are in left_nodes_, and everything else is in right_nodes_.
	// The values are indices into block_indices_.
	std::vector<UInt32> left_nodes_;
	std::vector<UInt32> right_nodes_;
	// Every entry here is an offset into the beginning of a kBlockSize-sized
	// block in nodes_. An entire diagonal is allocated together here; for a
	// diagonal of length <= kBlockSize, that's just a single entry, but for
	// longer diagonals multiple contiguous index entries will be reserved at
	// once. Unused entries will be assigned kUnallocatedBlock; this
	// double-indirect scheme is used to save memory in the cases when an entire
	// diagonal isn't needed.
	std::vector<UInt32> block_indices_;
	// This stores the actual EditSearchState data, pointed to by block_indices_.
	std::vector<unsigned char> nodes_;
	};

	struct EditHeapEntry {
	EditHeapEntry(UInt32 l, UInt32 r, UInt64 c, EditSearchState s)
	: left(l), right(r), cost(c), state(s) {}

	UInt32 left;
	UInt32 right;
	UInt64 cost : 61;
	// The state that the node will get when this entry is settled. Therefore,
	// this can never be kUnsettled.
	UInt64 state : 3;

	bool operator>(const EditHeapEntry& other) const { return cost > other.cost; }
	};

	// Need a min-queue, so invert the comparator.
	typedef std::priority_queue<EditHeapEntry, std::vector<EditHeapEntry>,
	std::greater<EditHeapEntry>>
	EditHeap;

	} // namespace

	std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
	const std::vector<size_t>& right,
	size_t* memory_usage) {
	const UInt64 kBaseCost = 100000;
	// We make replace a little more expensive than add/remove to lower
	// their priority.
	const UInt64 kReplaceCost = 100001;
	// In the common case where the vectors are the same (or almost the same)
	// size, we know that an add will have to be followed by some later remove
	// (or vice versa) in order to get the lengths to balance. We "borrow" some
	// of the cost of the later operation and bring it forward into the earlier
	// operation, to increase the cost of exploring (usually fruitlessly) around
	// the beginning of the graph.
	// However, there is a trade-off: This cheapens the cost of exploring around
	// the beginning of the graph (in one direction) when the vectors are
	// unequal in length. So we don't steal all the cost.
	// You can view this as a form of A*, using an admissable heuristic that has
	// been re-cast as a cost function that can be used in Dijkstra.
	const UInt64 kTowardsGoalCost = 50003;
	const UInt64 kAwayFromGoalCost = 2 * kBaseCost - kTowardsGoalCost;

	EditSearchMap node_map(left.size() + 1, right.size() + 1);
	EditHeap heap;
	heap.push(EditHeapEntry(0, 0, 0, kReplaceParent));

	while (!heap.empty()) {
	const EditHeapEntry current_entry = heap.top();
	heap.pop();

	UInt32 left_pos = current_entry.left;
	UInt32 right_pos = current_entry.right;
	unsigned char& current_state = node_map.insert(left_pos, right_pos);
	if (current_state != kUnsettled) {
	// Node was already settled by a previous entry in the priority queue,
	// this is a suboptimal path that should be ignored.
	continue;
	}
	current_state = current_entry.state;

	if (left_pos == left.size() && right_pos == right.size()) {
	// This is the normal exit point; if we terminate due to the heap being
	// empty, we'll fail a check later.
	break;
	}

	// Special case: Since the cost of a match is zero, we can immediately
	// settle the new node without putting it in the queue, since nothing can
	// have a smaller cost than it. Furthermore, we don't need to relax the
	// other two edges, since we know we don't need them: Any path from this
	// node that would use them has an path via the match that is at least as
	// cheap. Together, this means we can loop here until we stop matching.
	while (left_pos < left.size() && right_pos < right.size() &&
	left[left_pos] == right[right_pos]) {
	left_pos++;
	right_pos++;
	unsigned char& fast_forward_state = node_map.insert(left_pos, right_pos);
	if (fast_forward_state != kUnsettled) {
	// The search reached around and settled this node before settling the
	// base node. This means we're completely done with this iteration;
	// abort to the outer loop.
	goto outer_loop_bottom;
	// Otherwise, when can settle this node, even if it was created from
	// another state - we know the cost of settling it now is optimal.
	}
	fast_forward_state = kMatchParent;
	}

	// Relax adjacent nodes. We have no way to find or lower the cost of
	// existing entries in the heap, so we just push new entries and throw
	// them out at the top if the node is already settled. We could check to
	// see if they're already settled before pushing, but it turns out to be
	// ~not any faster, and more complicated to do so.
	//
	// If we're at an edge, there's only one node to relax.
	if (left_pos >= left.size()) {
	if (right_pos >= right.size()) {
	break; // Can happen due to the fast-path loop above.
	}
	heap.push(EditHeapEntry(left_pos, right_pos + 1,
	current_entry.cost + kTowardsGoalCost,
	kAddParent));
	continue;
	}
	if (right_pos >= right.size()) {
	heap.push(EditHeapEntry(left_pos + 1, right_pos,
	current_entry.cost + kTowardsGoalCost,
	kRemoveParent));
	continue;
	}
	// General case: Relax 3 edges.
	heap.push(EditHeapEntry(
	left_pos, right_pos + 1,
	current_entry.cost + (right.size() + left_pos > right_pos + left.size()
	? kTowardsGoalCost
	: kAwayFromGoalCost),
	kAddParent));
	heap.push(EditHeapEntry(
	left_pos + 1, right_pos,
	current_entry.cost + (right.size() + left_pos < right_pos + left.size()
	? kTowardsGoalCost
	: kAwayFromGoalCost),
	kRemoveParent));
	heap.push(EditHeapEntry(left_pos + 1, right_pos + 1,
	current_entry.cost + kReplaceCost, kReplaceParent));
	outer_loop_bottom : {} // Need the curlies to form a statement.
	}

	// Reconstruct the best path. We do it in reverse order.
	std::vector<EditType> best_path;
	UInt32 left_pos = left.size();
	UInt32 right_pos = right.size();
	while (left_pos != 0 \|\| right_pos != 0) {
	GTEST_CHECK_(left_pos <= left.size() && right_pos <= right.size());
	// The node must already exist, but if it somehow doesn't, it will be
	// added as kUnsettled, which will crash below.
	const unsigned char state = node_map.insert(left_pos, right_pos);
	switch (state) {
	case kAddParent:
	right_pos--;
	break;
	case kRemoveParent:
	left_pos--;
	break;
	case kMatchParent:
	case kReplaceParent:
	left_pos--;
	right_pos--;
	break;
	default:
	GTEST_LOG_(FATAL) << "Unsettled node at " << left_pos << ","
	<< right_pos;
	}
	best_path.push_back(static_cast<EditType>(state - 1));
	}
	std::reverse(best_path.begin(), best_path.end());
	if (memory_usage != NULL) {
	*memory_usage = node_map.MemoryUsage();
	}
	return best_path;
	}

	namespace {

	// Helper class to convert string into ids with deduplication.
	class InternalStrings {
	public:
	size_t GetId(const std::string* str) {
	IdMap::iterator it = ids_.find(str);
	if (it != ids_.end()) return it->second;
	size_t id = ids_.size();
	return ids_[str] = id;
	}

	private:
	struct IdMapCmp {
	bool operator()(const std::string* first, const std::string* second) const {
	return first < second;
	}
	};
	typedef std::map<const std::string*, size_t, IdMapCmp> IdMap;
	IdMap ids_;
	};

	} // namespace

	std::vector<EditType> CalculateOptimalEdits(
	const std::vector<std::string>& left,
	const std::vector<std::string>& right) {
	std::vector<size_t> left_ids, right_ids;
	{
	InternalStrings intern_table;
	for (size_t i = 0; i < left.size(); ++i) {
	left_ids.push_back(intern_table.GetId(&left[i]));
	}
	for (size_t i = 0; i < right.size(); ++i) {
	right_ids.push_back(intern_table.GetId(&right[i]));
	}
	}
	return CalculateOptimalEdits(left_ids, right_ids);
	}

	namespace {

	// Helper class that holds the state for one hunk and prints it out to the
	// stream.
	// It reorders adds/removes when possible to group all removes before all
	// adds. It also adds the hunk header before printing into the stream.
	class Hunk {
	public:
	Hunk(size_t left_start, size_t right_start)
	: left_start_(left_start),
	right_start_(right_start),
	adds_(),
	removes_(),
	common_() {}

	void PushLine(char edit, const char* line) {
	switch (edit) {
	case ' ':
	++common_;
	FlushEdits();
	hunk_.push_back(std::make_pair(' ', line));
	break;
	case '-':
	++removes_;
	hunk_removes_.push_back(std::make_pair('-', line));
	break;
	case '+':
	++adds_;
	hunk_adds_.push_back(std::make_pair('+', line));
	break;
	}
	}

	void PrintTo(std::ostream* os) {
	PrintHeader(os);
	FlushEdits();
	for (std::list<std::pair<char, const char*> >::const_iterator it =
	hunk_.begin();
	it != hunk_.end(); ++it) {
	*os << it->first << it->second << "\n";
	}
	}

	bool has_edits() const { return adds_ \|\| removes_; }

	private:
	void FlushEdits() {
	hunk_.splice(hunk_.end(), hunk_removes_);
	hunk_.splice(hunk_.end(), hunk_adds_);
	}

	// Print a unified diff header for one hunk.
	// The format is
	// "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
	// where the left/right parts are omitted if unnecessary.
	void PrintHeader(std::ostream* ss) const {
	*ss << "@@ ";
	if (removes_) {
	*ss << "-" << left_start_ << "," << (removes_ + common_);
	}
	if (removes_ && adds_) {
	*ss << " ";
	}
	if (adds_) {
	*ss << "+" << right_start_ << "," << (adds_ + common_);
	}
	*ss << " @@\n";
	}

	size_t left_start_, right_start_;
	size_t adds_, removes_, common_;
	std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
	};

	} // namespace

	// Create a list of diff hunks in Unified diff format.
	// Each hunk has a header generated by PrintHeader above plus a body with
	// lines prefixed with ' ' for no change, '-' for deletion and '+' for
	// addition.
	// 'context' represents the desired unchanged prefix/suffix around the diff.
	// If two hunks are close enough that their contexts overlap, then they are
	// joined into one hunk.
	std::string CreateUnifiedDiff(const std::vector<std::string>& left,
	const std::vector<std::string>& right,
	size_t context) {
	const std::vector<EditType> edits = CalculateOptimalEdits(left, right);

	size_t l_i = 0, r_i = 0, edit_i = 0;
	std::stringstream ss;
	while (edit_i < edits.size()) {
	// Find first edit.
	while (edit_i < edits.size() && edits[edit_i] == kEditMatch) {
	++l_i;
	++r_i;
	++edit_i;
	}

	// Find the first line to include in the hunk.
	const size_t prefix_context = std::min(l_i, context);
	Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
	for (size_t i = prefix_context; i > 0; --i) {
	hunk.PushLine(' ', left[l_i - i].c_str());
	}

	// Iterate the edits until we found enough suffix for the hunk or the input
	// is over.
	size_t n_suffix = 0;
	for (; edit_i < edits.size(); ++edit_i) {
	if (n_suffix >= context) {
	// Continue only if the next hunk is very close.
	std::vector<EditType>::const_iterator it = edits.begin() + edit_i;
	while (it != edits.end() && *it == kEditMatch) ++it;
	if (it == edits.end() \|\| (it - edits.begin()) - edit_i >= context) {
	// There is no next edit or it is too far away.
	break;
	}
	}

	EditType edit = edits[edit_i];
	// Reset count when a non match is found.
	n_suffix = edit == kEditMatch ? n_suffix + 1 : 0;

	if (edit == kEditMatch \|\| edit == kEditRemove \|\| edit == kEditReplace) {
	hunk.PushLine(edit == kEditMatch ? ' ' : '-', left[l_i].c_str());
	}
	if (edit == kEditAdd \|\| edit == kEditReplace) {
	hunk.PushLine('+', right[r_i].c_str());
	}

	// Advance indices, depending on edit type.
	l_i += edit != kEditAdd;
	r_i += edit != kEditRemove;
	}

	if (!hunk.has_edits()) {
	// We are done. We don't want this hunk.
	break;
	}

	hunk.PrintTo(&ss);
	}
	return ss.str();
	}

	} // namespace internal
	} // namespace testing