2013-09-28 12:39:57 +00:00
|
|
|
|
// -*- Mode: C++ -*-
|
|
|
|
|
//
|
2018-01-08 16:54:56 +00:00
|
|
|
|
// Copyright (C) 2013-2018 Red Hat, Inc.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
//
|
|
|
|
|
// This file is part of the GNU Application Binary Interface Generic
|
|
|
|
|
// Analysis and Instrumentation Library (libabigail). This library is
|
|
|
|
|
// free software; you can redistribute it and/or modify it under the
|
|
|
|
|
// terms of the GNU Lesser General Public License as published by the
|
|
|
|
|
// Free Software Foundation; either version 3, or (at your option) any
|
|
|
|
|
// later version.
|
|
|
|
|
|
|
|
|
|
// This library is distributed in the hope that it will be useful, but
|
|
|
|
|
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
|
// General Lesser Public License for more details.
|
|
|
|
|
|
|
|
|
|
// You should have received a copy of the GNU Lesser General Public
|
|
|
|
|
// License along with this program; see the file COPYING-LGPLV3. If
|
|
|
|
|
// not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
/// @file
|
|
|
|
|
///
|
|
|
|
|
/// This file declares types and operations implementing the "O(ND)
|
|
|
|
|
/// Difference Algorithm" (aka diff2) from Eugene W. Myers, to compute
|
|
|
|
|
/// the difference between two sequences.
|
|
|
|
|
///
|
|
|
|
|
/// To understand what is going on here, one must read the paper at
|
|
|
|
|
/// http://www.xmailserver.org/diff2.pdf. Throughout this file, that
|
|
|
|
|
/// paper is referred to as "the paper".
|
|
|
|
|
///
|
|
|
|
|
/// The implementations goes as far as calculating the shortest edit
|
|
|
|
|
/// script (the set of insertions and deletions) for transforming a
|
|
|
|
|
/// sequence into another. The main entry point for that is the
|
|
|
|
|
/// compute_diff() function.
|
|
|
|
|
|
2014-03-26 21:08:45 +00:00
|
|
|
|
#ifndef __ABG_DIFF_UTILS_H__
|
|
|
|
|
#define __ABG_DIFF_UTILS_H__
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
#include <stdexcept>
|
|
|
|
|
#include <cassert>
|
|
|
|
|
#include <cstdlib>
|
|
|
|
|
#include <ostream>
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <vector>
|
|
|
|
|
#include <sstream>
|
2014-10-22 09:51:27 +00:00
|
|
|
|
#include <tr1/memory>
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
namespace abigail
|
|
|
|
|
{
|
|
|
|
|
|
2014-02-24 21:47:14 +00:00
|
|
|
|
/// @brief Libabigail's core diffing algorithms
|
|
|
|
|
///
|
|
|
|
|
/// This is the namespace defining the core diffing algorithm used by
|
|
|
|
|
/// libabigail @ref comparison tools. This based on the diff
|
|
|
|
|
/// algorithm from Eugene Myers.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
namespace diff_utils
|
|
|
|
|
{
|
|
|
|
|
|
2014-10-22 09:51:27 +00:00
|
|
|
|
using std::tr1::shared_ptr;
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
// Inject the names from std:: below into this namespace
|
|
|
|
|
using std::string;
|
|
|
|
|
using std::ostream;
|
|
|
|
|
using std::vector;
|
|
|
|
|
using std::abs;
|
|
|
|
|
using std::ostringstream;
|
|
|
|
|
|
|
|
|
|
/// A class representing a vertex in an edit graph, as explained in
|
|
|
|
|
/// the paper. A vertex is a basically a pair of coordinates
|
|
|
|
|
/// (abscissa and ordinate).
|
|
|
|
|
class point
|
|
|
|
|
{
|
|
|
|
|
int x_;
|
|
|
|
|
int y_;
|
2013-10-02 21:41:26 +00:00
|
|
|
|
bool empty_;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
point()
|
2013-10-02 21:41:26 +00:00
|
|
|
|
: x_(-1), y_(-1),empty_(true)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
point(int x, int y)
|
2013-10-02 21:41:26 +00:00
|
|
|
|
: x_(x), y_(y), empty_(false)
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
point(const point& p)
|
|
|
|
|
: x_(p.x()), y_(p.y()), empty_(p.is_empty())
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
x() const
|
|
|
|
|
{return x_;}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
x(int x)
|
|
|
|
|
{
|
|
|
|
|
x_ = x;
|
|
|
|
|
empty_ = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
y() const
|
|
|
|
|
{return y_;}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
y(int y)
|
|
|
|
|
{
|
|
|
|
|
y_ = y;
|
|
|
|
|
empty_ = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
set(int x, int y)
|
|
|
|
|
{
|
|
|
|
|
x_ = x;
|
|
|
|
|
y_ = y;
|
|
|
|
|
empty_ = false;
|
|
|
|
|
}
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
void
|
|
|
|
|
set(int x, int y, bool empty)
|
|
|
|
|
{
|
|
|
|
|
x_ = x;
|
|
|
|
|
y_ = y;
|
|
|
|
|
empty_ = empty;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
add(int ax, int ay)
|
|
|
|
|
{set (x() + ax, y() + ay);}
|
|
|
|
|
|
2013-10-04 08:35:25 +00:00
|
|
|
|
bool
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
operator!=(const point& o) const
|
2013-10-04 08:35:25 +00:00
|
|
|
|
{return (x() != o.x() || y() != o.y() || is_empty() != o.is_empty());}
|
|
|
|
|
|
|
|
|
|
bool
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
operator==(const point& o) const
|
|
|
|
|
{return !(operator!=(o));}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
operator<(const point& o) const
|
|
|
|
|
{return (x() < o.x() && y() < o.y());}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
operator>(const point& o) const
|
|
|
|
|
{return (x() > o.x() && y() > o.y());}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
operator<=(const point& o) const
|
|
|
|
|
{return (x() <= o.x() && y() <= o.y());}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
operator>=(const point& o) const
|
|
|
|
|
{return (x() >= o.x() && y() >= o.y());}
|
2013-10-04 08:35:25 +00:00
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
point
|
|
|
|
|
operator+(int val) const
|
|
|
|
|
{return point(x() + val, y() + val);}
|
|
|
|
|
|
|
|
|
|
point
|
|
|
|
|
operator-(int val) const
|
|
|
|
|
{return point(x() - val, y() - val);}
|
|
|
|
|
|
|
|
|
|
point&
|
|
|
|
|
operator+= (int val)
|
|
|
|
|
{
|
2013-10-02 21:41:26 +00:00
|
|
|
|
set(x_ + val, y_ + val);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
return *this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
point&
|
|
|
|
|
operator-= (int val)
|
2013-10-02 21:41:26 +00:00
|
|
|
|
{return (*this) += (-val);}
|
|
|
|
|
|
|
|
|
|
point&
|
|
|
|
|
operator--()
|
|
|
|
|
{return (*this) -= 1;}
|
|
|
|
|
|
|
|
|
|
point&
|
|
|
|
|
operator++()
|
|
|
|
|
{return (*this) += 1;}
|
|
|
|
|
|
|
|
|
|
point
|
|
|
|
|
operator--(int)
|
|
|
|
|
{
|
|
|
|
|
point tmp(*this);
|
|
|
|
|
(*this)--;
|
|
|
|
|
return tmp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
point
|
|
|
|
|
operator++(int)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
2013-10-02 21:41:26 +00:00
|
|
|
|
point tmp(*this);
|
|
|
|
|
(*this)++;
|
|
|
|
|
return tmp;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
point&
|
|
|
|
|
operator=(int val)
|
|
|
|
|
{
|
2013-10-02 21:41:26 +00:00
|
|
|
|
set(val, val);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
return *this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
point&
|
|
|
|
|
operator=(const point& p)
|
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
set(p.x(), p.y(), p.is_empty());
|
2013-09-28 12:39:57 +00:00
|
|
|
|
return *this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
is_empty() const
|
|
|
|
|
{return empty_;}
|
|
|
|
|
|
|
|
|
|
operator bool () const
|
|
|
|
|
{return !is_empty();}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
operator!() const
|
|
|
|
|
{return is_empty();}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
clear()
|
|
|
|
|
{
|
|
|
|
|
x_ = -1;
|
|
|
|
|
y_ = -1;
|
|
|
|
|
empty_ = true;
|
|
|
|
|
}
|
|
|
|
|
};// end point
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
/// The abstraction of the Snake concept, from the paper.
|
|
|
|
|
///
|
|
|
|
|
/// In a given path from the edit graph, a snake is a non-diagonal
|
|
|
|
|
/// edge followed by zero or more diagonal edges.
|
|
|
|
|
///
|
|
|
|
|
/// The starting poing of the non-diagonal edge is the beginning of
|
|
|
|
|
/// the snake. This is given by the snake::begin() method. This point
|
|
|
|
|
/// is not explicitely referenced in the paper, but we need it for
|
|
|
|
|
/// some grunt implementation details of the algorithm.
|
|
|
|
|
///
|
|
|
|
|
/// The end point of the non-diagonal edge is the intermediate point
|
|
|
|
|
/// of the snake; it's given by the snake::intermediate() method.
|
|
|
|
|
/// This point is what is referred to as "the begining of the snake"
|
|
|
|
|
/// in the paper.
|
|
|
|
|
///
|
|
|
|
|
/// The end point of the first diagonal edge is given by the
|
|
|
|
|
/// snake::diagonal_start() method.
|
|
|
|
|
///
|
|
|
|
|
/// The end point of the last diagonal edge is given by the
|
|
|
|
|
/// snake::end() method. Note that when the snake contains no
|
|
|
|
|
/// diagonal edge, snake::intermediate(), and snake::end() return the
|
|
|
|
|
/// same point; snake::diagonal_start() contains an empty point (i.e,
|
|
|
|
|
/// a point for which point::is_empty() returns true).
|
|
|
|
|
class snake
|
|
|
|
|
{
|
|
|
|
|
point begin_, intermediate_, diagonal_start_, end_;
|
|
|
|
|
bool forward_;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
/// Default constructor for snake.
|
|
|
|
|
snake()
|
|
|
|
|
: forward_(false)
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
/// Constructor from the beginning, intermediate and end points.
|
|
|
|
|
///
|
|
|
|
|
/// @param b the beginning point of the snake. That is, the
|
|
|
|
|
/// starting point of the non-diagonal edge.
|
|
|
|
|
///
|
|
|
|
|
/// @param i the intermediate point of the snake. That is, the end
|
|
|
|
|
/// point of the non-diagonal edge.
|
|
|
|
|
///
|
|
|
|
|
/// @param e the end point of the snake. That is the end point of
|
|
|
|
|
/// the last diagonal edge.
|
|
|
|
|
snake(const point& b,
|
|
|
|
|
const point& i,
|
|
|
|
|
const point& e)
|
|
|
|
|
: begin_(b), intermediate_(i),
|
|
|
|
|
end_(e), forward_(false)
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
/// Constructor from the beginning, intermediate and end points.
|
|
|
|
|
///
|
|
|
|
|
/// @param b the beginning point of the snake. That is, the
|
|
|
|
|
/// starting point of the non-diagonal edge.
|
|
|
|
|
///
|
|
|
|
|
/// @param i the intermediate point of the snake. That is, the end
|
|
|
|
|
/// point of the non-diagonal edge.
|
|
|
|
|
///
|
|
|
|
|
/// @param d the beginning of the diagonal edge. That is the end of
|
|
|
|
|
/// the first diagonal edge of the snake.
|
|
|
|
|
///
|
|
|
|
|
/// @param e the end point of the snake. That is the end point of
|
|
|
|
|
/// the last diagonal edge.
|
|
|
|
|
snake(const point& b,
|
|
|
|
|
const point& i,
|
|
|
|
|
const point& d,
|
|
|
|
|
const point& e)
|
|
|
|
|
: begin_(b), intermediate_(i),
|
|
|
|
|
diagonal_start_(d), end_(e),
|
|
|
|
|
forward_(false)
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
/// Getter for the starting point of the non-diagonal edge of the
|
|
|
|
|
/// snake.
|
|
|
|
|
///
|
|
|
|
|
/// @return the starting point of the non-diagonal edge of the snake
|
|
|
|
|
const point&
|
|
|
|
|
begin() const
|
|
|
|
|
{return begin_;}
|
|
|
|
|
|
|
|
|
|
/// Getter for the starting point of the non-diagonal edge of the
|
|
|
|
|
/// snake, aka begin point.
|
|
|
|
|
///
|
|
|
|
|
///@param p the new begin point.
|
|
|
|
|
void
|
|
|
|
|
begin(const point& p)
|
|
|
|
|
{begin_ = p;}
|
|
|
|
|
|
|
|
|
|
/// Getter for the end point of the non-diagonal edge of the snake.
|
|
|
|
|
///
|
|
|
|
|
/// @return the end point of the non-diagonal edge of the snake
|
|
|
|
|
const point&
|
|
|
|
|
intermediate() const
|
|
|
|
|
{return intermediate_;}
|
|
|
|
|
|
|
|
|
|
/// Setter for the end point of the non-diagonal edge of the snake,
|
|
|
|
|
/// aka intermediate point.
|
|
|
|
|
///
|
|
|
|
|
/// @param p the new intermediate point.
|
|
|
|
|
void
|
|
|
|
|
intermediate(const point& p)
|
|
|
|
|
{intermediate_ = p;}
|
|
|
|
|
|
|
|
|
|
/// Getter for the end point of the first diagonal edge, aka
|
|
|
|
|
/// diagonal start point. Note that if the snake has no diagonal
|
|
|
|
|
/// edge, this point is empty.
|
|
|
|
|
///
|
|
|
|
|
/// @return the end point of the first diagonal edge.
|
|
|
|
|
const point&
|
|
|
|
|
diagonal_start() const
|
|
|
|
|
{return diagonal_start_;}
|
|
|
|
|
|
|
|
|
|
/// Setter for the end point of the first diagonal edge, aka
|
|
|
|
|
/// diagonal start point.
|
|
|
|
|
///
|
|
|
|
|
/// @param p the new diagonal start.d
|
|
|
|
|
void
|
|
|
|
|
diagonal_start(const point& p)
|
|
|
|
|
{diagonal_start_ = p;}
|
|
|
|
|
|
|
|
|
|
/// Getter for the end point of the last diagonal edge, aka snake
|
|
|
|
|
/// end point. Note that if the snake has no diagonal edge, this
|
|
|
|
|
/// point is equal to the intermediate point.
|
|
|
|
|
///
|
|
|
|
|
/// @return the end point of the last diagonal edge
|
|
|
|
|
const point&
|
|
|
|
|
end() const
|
|
|
|
|
{return end_;}
|
|
|
|
|
|
|
|
|
|
/// Setter for the end point of the last diagonal edge, aka snake
|
|
|
|
|
/// end point. Note that if the snake has no diagonal edge, this
|
|
|
|
|
/// point is equal to the intermediate point.
|
|
|
|
|
void
|
|
|
|
|
end(const point& p)
|
|
|
|
|
{end_ = p;}
|
|
|
|
|
|
|
|
|
|
/// Setter for the begin, intermediate and end points of the snake.
|
|
|
|
|
///
|
|
|
|
|
/// @param b the new snake begin point
|
|
|
|
|
///
|
|
|
|
|
/// @param i the new snake intermediate point
|
|
|
|
|
///
|
|
|
|
|
/// @param e the new snake end point
|
|
|
|
|
void
|
|
|
|
|
set(const point& b, const point&i, const point&e)
|
|
|
|
|
{
|
|
|
|
|
begin(b);
|
|
|
|
|
intermediate(i);
|
|
|
|
|
end(e);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Setter for the begin, intermediate, diagonal start and end points
|
|
|
|
|
/// of the snake.
|
|
|
|
|
///
|
|
|
|
|
/// @param b the new snake begin point
|
|
|
|
|
///
|
|
|
|
|
/// @param i the new snake intermediate point
|
|
|
|
|
///
|
|
|
|
|
/// @param d the new diagonal start point
|
|
|
|
|
///
|
|
|
|
|
/// @param e the new snake end point
|
|
|
|
|
void
|
|
|
|
|
set(const point& b, const point&i, const point& d, const point&e)
|
|
|
|
|
{
|
|
|
|
|
begin(b);
|
|
|
|
|
intermediate(i);
|
|
|
|
|
diagonal_start(d);
|
|
|
|
|
end(e);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// @return true iff the snake is a forward snake. That is, if it
|
|
|
|
|
/// was built while walking the edit graph going forward (from the
|
|
|
|
|
/// top left corner to the right bottom corner.
|
|
|
|
|
bool
|
|
|
|
|
is_forward() const
|
|
|
|
|
{return forward_;}
|
|
|
|
|
|
|
|
|
|
/// Set to true if the snake is a forward snake; that is, if it was
|
|
|
|
|
/// built while walking the edit graph going forward (from the top
|
|
|
|
|
/// left corner to the right bottom corner. Set to false otherwise.
|
|
|
|
|
///
|
|
|
|
|
/// @param f whether the snake is a forward snake or not.
|
|
|
|
|
void
|
|
|
|
|
set_forward(bool f)
|
|
|
|
|
{forward_ = f;}
|
|
|
|
|
|
|
|
|
|
/// Add an offset to the abscissas of the points of the snake, and
|
|
|
|
|
/// add another offset to the ordinates of these same points.
|
|
|
|
|
///
|
|
|
|
|
/// @param x_offset the offset to add to the abscissas of all the
|
|
|
|
|
/// points of the snake.
|
|
|
|
|
///
|
|
|
|
|
/// @param y_offset the offset to add to the ordinates of all the
|
|
|
|
|
/// points of the snake.
|
|
|
|
|
void
|
|
|
|
|
add(int x_offset, int y_offset)
|
|
|
|
|
{
|
|
|
|
|
if (is_empty())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
begin_.add(x_offset, y_offset);
|
|
|
|
|
intermediate_.add(x_offset, y_offset);
|
|
|
|
|
if (diagonal_start_)
|
|
|
|
|
diagonal_start_.add(x_offset, y_offset);
|
|
|
|
|
end_.add(x_offset, y_offset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// @return true iff the snake has at least one diagonal edge.
|
|
|
|
|
bool
|
|
|
|
|
has_diagonal_edge() const
|
|
|
|
|
{return !diagonal_start().is_empty();}
|
|
|
|
|
|
|
|
|
|
/// @return true iff the non-diagonal edge is horizontal.
|
|
|
|
|
bool
|
|
|
|
|
has_horizontal_edge() const
|
|
|
|
|
{return (begin().y() == intermediate().y());}
|
|
|
|
|
|
|
|
|
|
/// @return true iff the non-diagonal edge is vertical.
|
|
|
|
|
bool
|
|
|
|
|
has_vertical_edge() const
|
|
|
|
|
{return (begin().x() == intermediate().x());}
|
|
|
|
|
|
|
|
|
|
/// @return true iff the snake is empty, that is, if all the points
|
|
|
|
|
/// it contains are empty.
|
|
|
|
|
bool is_empty() const
|
|
|
|
|
{return begin().is_empty() && intermediate().is_empty() && end().is_empty();}
|
|
|
|
|
};// end class snake
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// The array containing the furthest D-path end-points, for each value
|
|
|
|
|
/// of K. MAX_D is the maximum value of the D-Path. That is, M+N if
|
|
|
|
|
/// M is the size of the first input string, and N is the size of the
|
|
|
|
|
/// second.
|
|
|
|
|
class d_path_vec : public std::vector<int>
|
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
|
2013-10-02 21:41:26 +00:00
|
|
|
|
unsigned a_size_;
|
|
|
|
|
unsigned b_size_;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
/// Forbid vector size modifications
|
|
|
|
|
void
|
2014-11-05 09:08:33 +00:00
|
|
|
|
push_back(const vector<int>::value_type&);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
/// Forbid default constructor.
|
|
|
|
|
d_path_vec();
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
bool
|
|
|
|
|
over_bounds(long long index) const
|
|
|
|
|
{return (index + offset()) >= (long long) size();}
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
void
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
check_index_against_bound(int index) const
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if (over_bounds(index))
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
ostringstream o;
|
|
|
|
|
o << "index '" << index
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
<< "' out of range [-" << max_d() << ", " << max_d() << "]";
|
2013-09-28 12:39:57 +00:00
|
|
|
|
throw std::out_of_range(o.str());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
/// Constructor of the d_path_vec.
|
|
|
|
|
///
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
/// For forward vectors, the underlying vector allocates 2 *
|
|
|
|
|
/// [MAX_D+1].
|
|
|
|
|
/// space, so that one can address elements in the index range
|
|
|
|
|
/// [-MAX_D, MAX_D]. And MAX_D is the sum of the two sequence
|
|
|
|
|
/// sizes. delta is the difference.
|
|
|
|
|
///
|
|
|
|
|
/// For reverse vectors, note that we need to be able to address
|
|
|
|
|
/// [-MAX_D - delta, MAX_D + delta], with delta being the (signed)
|
|
|
|
|
/// difference between the size of the two sequences. We consider
|
|
|
|
|
/// delta being bounded by MAX_D itself; so we say we need to be
|
|
|
|
|
/// able to address [-2MAX_D, 2MAX_D].
|
2013-09-28 12:39:57 +00:00
|
|
|
|
///
|
2014-01-17 14:42:47 +00:00
|
|
|
|
/// @param size1 the size of the first sequence we are interested
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// in.
|
|
|
|
|
///
|
|
|
|
|
/// @param size2 the size of the second sequence we are interested
|
|
|
|
|
/// in.
|
|
|
|
|
d_path_vec(unsigned size1, unsigned size2)
|
2014-02-10 12:57:58 +00:00
|
|
|
|
: vector<int>(2 * (size1 + size2 + 1 + (size1 + size2)) + 1, 0),
|
2013-10-02 21:41:26 +00:00
|
|
|
|
a_size_(size1), b_size_(size2)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-05 09:08:33 +00:00
|
|
|
|
std::vector<int>::const_reference
|
2013-09-28 12:39:57 +00:00
|
|
|
|
operator[](int index) const
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
{return at(index);}
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
2014-11-05 09:08:33 +00:00
|
|
|
|
std::vector<int>::reference
|
2013-09-28 12:39:57 +00:00
|
|
|
|
operator[](int index)
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
{return at(index);}
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
2014-11-05 09:08:33 +00:00
|
|
|
|
std::vector<int>::reference
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
at(long long index)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
2014-01-20 10:16:15 +00:00
|
|
|
|
//check_index_against_bound(index);
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
long long i = index + offset();
|
2014-01-20 10:16:15 +00:00
|
|
|
|
return vector<int>::operator[](i);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-11-05 09:08:33 +00:00
|
|
|
|
std::vector<int>::const_reference
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
at(long long index) const
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
check_index_against_bound(index);
|
|
|
|
|
long long i = offset() + index;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
return static_cast<const vector<int>* >(this)->at(i);
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-02 21:41:26 +00:00
|
|
|
|
unsigned
|
|
|
|
|
a_size() const
|
|
|
|
|
{return a_size_;}
|
|
|
|
|
|
|
|
|
|
unsigned
|
|
|
|
|
b_size() const
|
|
|
|
|
{return b_size_;}
|
|
|
|
|
|
|
|
|
|
unsigned
|
2013-09-28 12:39:57 +00:00
|
|
|
|
max_d() const
|
2014-02-10 13:02:59 +00:00
|
|
|
|
{return a_size_ + b_size_;}
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
|
|
|
|
|
unsigned
|
|
|
|
|
offset() const
|
|
|
|
|
{return max_d() + abs((long long) a_size() - (long long) b_size());}
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}; // end class d_path_vec
|
|
|
|
|
|
|
|
|
|
/// The abstration of an insertion of elements of a sequence B into a
|
|
|
|
|
/// sequence A. This is used to represent the edit script for
|
|
|
|
|
/// transforming a sequence A into a sequence B.
|
|
|
|
|
///
|
|
|
|
|
/// And insertion mainly encapsulates two components:
|
|
|
|
|
///
|
|
|
|
|
/// - An insertion point: this is the index (starting at 0) of the
|
|
|
|
|
/// element of the sequence A after which the insertion occurs.
|
|
|
|
|
///
|
|
|
|
|
/// - Inserted elements: this is a vector of indexes of elements of
|
|
|
|
|
/// sequence B (starting at 0) that got inserted into sequence A,
|
|
|
|
|
/// after the insertion point.
|
|
|
|
|
class insertion
|
|
|
|
|
{
|
2013-10-05 21:02:00 +00:00
|
|
|
|
int insertion_point_;
|
|
|
|
|
vector<unsigned> inserted_;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
insertion(int insertion_point,
|
2013-10-05 21:02:00 +00:00
|
|
|
|
const vector<unsigned>& inserted_indexes)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
: insertion_point_(insertion_point),
|
|
|
|
|
inserted_(inserted_indexes)
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
insertion(int insertion_point = 0)
|
|
|
|
|
: insertion_point_(insertion_point)
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
insertion_point_index() const
|
|
|
|
|
{return insertion_point_;}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
insertion_point_index(int i)
|
|
|
|
|
{insertion_point_ = i;}
|
|
|
|
|
|
2013-10-05 21:02:00 +00:00
|
|
|
|
const vector<unsigned>&
|
2013-09-28 12:39:57 +00:00
|
|
|
|
inserted_indexes() const
|
|
|
|
|
{return inserted_;}
|
|
|
|
|
|
2013-10-05 21:02:00 +00:00
|
|
|
|
vector<unsigned>&
|
2013-09-28 12:39:57 +00:00
|
|
|
|
inserted_indexes()
|
|
|
|
|
{return inserted_;}
|
|
|
|
|
};// end class insertion
|
|
|
|
|
|
|
|
|
|
/// The abstraction of the deletion of one element of a sequence A.
|
|
|
|
|
///
|
|
|
|
|
/// This encapsulates the index of the element A that got deleted.
|
|
|
|
|
class deletion
|
|
|
|
|
{
|
|
|
|
|
int index_;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
deletion(int i)
|
|
|
|
|
: index_(i)
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
index() const
|
|
|
|
|
{return index_;}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
index(int i)
|
|
|
|
|
{index_ = i;}
|
|
|
|
|
};// end class deletion
|
|
|
|
|
|
|
|
|
|
/// The abstraction of an edit script for transforming a sequence A
|
|
|
|
|
/// into a sequence B.
|
|
|
|
|
///
|
|
|
|
|
/// It encapsulates the insertions and deletions for transforming A
|
|
|
|
|
/// into B.
|
|
|
|
|
class edit_script
|
|
|
|
|
{
|
|
|
|
|
vector<insertion> insertions_;
|
|
|
|
|
vector<deletion> deletions_;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
edit_script()
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
const vector<insertion>&
|
|
|
|
|
insertions() const
|
|
|
|
|
{return insertions_;}
|
|
|
|
|
|
|
|
|
|
vector<insertion>&
|
|
|
|
|
insertions()
|
|
|
|
|
{return insertions_;}
|
|
|
|
|
|
|
|
|
|
const vector<deletion>&
|
|
|
|
|
deletions() const
|
|
|
|
|
{return deletions_;}
|
|
|
|
|
|
|
|
|
|
vector<deletion>&
|
|
|
|
|
deletions()
|
|
|
|
|
{return deletions_;}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
append(const edit_script& es)
|
|
|
|
|
{
|
|
|
|
|
insertions().insert(insertions().end(),
|
|
|
|
|
es.insertions().begin(),
|
|
|
|
|
es.insertions().end());
|
|
|
|
|
deletions().insert(deletions().end(),
|
|
|
|
|
es.deletions().begin(),
|
|
|
|
|
es.deletions().end());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
prepend(const edit_script& es)
|
|
|
|
|
{
|
|
|
|
|
insertions().insert(insertions().begin(),
|
|
|
|
|
es.insertions().begin(),
|
|
|
|
|
es.insertions().end());
|
|
|
|
|
deletions().insert(deletions().begin(),
|
|
|
|
|
es.deletions().begin(),
|
|
|
|
|
es.deletions().end());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
clear()
|
|
|
|
|
{
|
|
|
|
|
insertions().clear();
|
|
|
|
|
deletions().clear();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
is_empty() const
|
|
|
|
|
{return insertions().empty() && deletions().empty();}
|
|
|
|
|
|
|
|
|
|
operator bool() const
|
|
|
|
|
{return !is_empty();}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
num_insertions() const
|
|
|
|
|
{
|
|
|
|
|
int l = 0;
|
|
|
|
|
for (vector<insertion>::const_iterator i = insertions().begin();
|
|
|
|
|
i != insertions().end();
|
|
|
|
|
++i)
|
|
|
|
|
l += i->inserted_indexes().size();
|
|
|
|
|
return l;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
num_deletions() const
|
|
|
|
|
{return deletions().size();}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
length() const
|
|
|
|
|
{return num_insertions() + num_deletions();}
|
|
|
|
|
};//end class edit_script
|
|
|
|
|
|
2013-10-02 21:41:26 +00:00
|
|
|
|
bool
|
|
|
|
|
point_is_valid_in_graph(point& p,
|
|
|
|
|
unsigned a_size,
|
|
|
|
|
unsigned b_size);
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
bool
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
ends_of_furthest_d_paths_overlap(const point& forward_d_path_end,
|
|
|
|
|
const point& reverse_d_path_end);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// The default equality functor used by the core diffing algorithms.
|
|
|
|
|
struct default_eq_functor
|
|
|
|
|
{
|
|
|
|
|
/// This equality operator uses the default "==" to compare its
|
|
|
|
|
/// arguments.
|
|
|
|
|
///
|
|
|
|
|
/// @param a the first comparison argument.
|
|
|
|
|
///
|
|
|
|
|
/// @param b the second comparison argument.
|
|
|
|
|
///
|
|
|
|
|
/// @return true if the two arguments are equal, false otherwise.
|
|
|
|
|
template<typename T>
|
|
|
|
|
bool
|
|
|
|
|
operator()(const T a, const T b) const
|
|
|
|
|
{return a == b;}
|
|
|
|
|
};
|
|
|
|
|
|
Initial support for diffing ABI corpus files
* include/abg-comparison.h (string_function_ptr_map)
(changed_function_ptr, string_changed_function_ptr_map)
(corpus_diff_sptr): New convenience typedefs.
(translation_unit_diff): Add comments.
(class corpus_diff): New type.
(compute_diff): New overload for corpus_diff.
* include/abg-corpus.h (corpus::{functions, variables}): New
typedefs.
(corpus::{operator==, get_functions, get_variables}): New members.
* include/abg-diff-utils.h (struct deep_ptr_eq_functor): New
functor.
* include/abg-ir.h (translation_unit::operator==): New member
equality operator.
* src/abg-comparison.cc (struct corpus_diff::priv): New private
struct holding the private members of corpus_diff.
(corpus_diff::priv::{lookup_tables_empty, clear_lookup_tables,
ensure_lookup_tables_populated}): Define new private member functions.
(corpus_diff::{corpus_diff, first_corpus, second_corpus,
function_changes, variable_changes, length, report}): New public members.
(struct noop_deleter): New struct.
(compute_diff): New implementation for corpus_diff.
* src/abg-corpus.cc (struct corpus::priv): Renamed corpus::impl
into this. Add new fns, vars and is_symbol_table_built data
members.
(corpus::priv::build_symbol_table): New member function.
(class symtab_build_visitor_type): New visitor type to build the
symbol table.
(struct func_comp, struct var_comp): New comparison functors.
(corpus::priv::build_symbol_table): Define new member function.
(corpus::{corpus, add, get_translation_units, operator==,
get_functions, get_variables}): Define new members.
* src/abg-ir.cc (translation_unit::operator==): Define new member
equality operator.
(operator==(translation_unit_sptr l, translation_unit_sptr r)):
Define new equality operator.
* tools/abg-tools-utils.h (enum file_type): New enum.
(guess_file_type): Declare new function.
* tools/abg-tools-utils.cc (guess_file_type): define new function.
* tools/bidiff.cc (main): Guess the type of the files given in
input and support elf files reading and diffing.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-12-23 13:05:19 +00:00
|
|
|
|
|
|
|
|
|
/// An equality functor to deeply compare pointers.
|
|
|
|
|
struct deep_ptr_eq_functor
|
|
|
|
|
{
|
|
|
|
|
/// This equality operator compares pointers by comparing the
|
|
|
|
|
/// pointed-to objects.
|
|
|
|
|
///
|
|
|
|
|
/// @param first the first comparison argument.
|
|
|
|
|
///
|
|
|
|
|
/// @param second the second comparison argument.
|
|
|
|
|
///
|
|
|
|
|
/// @return true if the objects pointed to by the pointers are
|
|
|
|
|
/// equal, false otherwise.
|
|
|
|
|
template<typename T>
|
|
|
|
|
bool
|
|
|
|
|
operator()(const T* first,
|
2015-09-21 08:46:06 +00:00
|
|
|
|
const T* second) const
|
Initial support for diffing ABI corpus files
* include/abg-comparison.h (string_function_ptr_map)
(changed_function_ptr, string_changed_function_ptr_map)
(corpus_diff_sptr): New convenience typedefs.
(translation_unit_diff): Add comments.
(class corpus_diff): New type.
(compute_diff): New overload for corpus_diff.
* include/abg-corpus.h (corpus::{functions, variables}): New
typedefs.
(corpus::{operator==, get_functions, get_variables}): New members.
* include/abg-diff-utils.h (struct deep_ptr_eq_functor): New
functor.
* include/abg-ir.h (translation_unit::operator==): New member
equality operator.
* src/abg-comparison.cc (struct corpus_diff::priv): New private
struct holding the private members of corpus_diff.
(corpus_diff::priv::{lookup_tables_empty, clear_lookup_tables,
ensure_lookup_tables_populated}): Define new private member functions.
(corpus_diff::{corpus_diff, first_corpus, second_corpus,
function_changes, variable_changes, length, report}): New public members.
(struct noop_deleter): New struct.
(compute_diff): New implementation for corpus_diff.
* src/abg-corpus.cc (struct corpus::priv): Renamed corpus::impl
into this. Add new fns, vars and is_symbol_table_built data
members.
(corpus::priv::build_symbol_table): New member function.
(class symtab_build_visitor_type): New visitor type to build the
symbol table.
(struct func_comp, struct var_comp): New comparison functors.
(corpus::priv::build_symbol_table): Define new member function.
(corpus::{corpus, add, get_translation_units, operator==,
get_functions, get_variables}): Define new members.
* src/abg-ir.cc (translation_unit::operator==): Define new member
equality operator.
(operator==(translation_unit_sptr l, translation_unit_sptr r)):
Define new equality operator.
* tools/abg-tools-utils.h (enum file_type): New enum.
(guess_file_type): Declare new function.
* tools/abg-tools-utils.cc (guess_file_type): define new function.
* tools/bidiff.cc (main): Guess the type of the files given in
input and support elf files reading and diffing.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-12-23 13:05:19 +00:00
|
|
|
|
{
|
|
|
|
|
if (!!first != !!second)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (!first)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return *first == *second;
|
|
|
|
|
}
|
2014-10-22 09:51:27 +00:00
|
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
|
bool
|
|
|
|
|
operator()(const shared_ptr<T> first,
|
2015-09-21 08:46:06 +00:00
|
|
|
|
const shared_ptr<T> second) const
|
2014-10-22 09:51:27 +00:00
|
|
|
|
{return operator()(first.get(), second.get());}
|
Initial support for diffing ABI corpus files
* include/abg-comparison.h (string_function_ptr_map)
(changed_function_ptr, string_changed_function_ptr_map)
(corpus_diff_sptr): New convenience typedefs.
(translation_unit_diff): Add comments.
(class corpus_diff): New type.
(compute_diff): New overload for corpus_diff.
* include/abg-corpus.h (corpus::{functions, variables}): New
typedefs.
(corpus::{operator==, get_functions, get_variables}): New members.
* include/abg-diff-utils.h (struct deep_ptr_eq_functor): New
functor.
* include/abg-ir.h (translation_unit::operator==): New member
equality operator.
* src/abg-comparison.cc (struct corpus_diff::priv): New private
struct holding the private members of corpus_diff.
(corpus_diff::priv::{lookup_tables_empty, clear_lookup_tables,
ensure_lookup_tables_populated}): Define new private member functions.
(corpus_diff::{corpus_diff, first_corpus, second_corpus,
function_changes, variable_changes, length, report}): New public members.
(struct noop_deleter): New struct.
(compute_diff): New implementation for corpus_diff.
* src/abg-corpus.cc (struct corpus::priv): Renamed corpus::impl
into this. Add new fns, vars and is_symbol_table_built data
members.
(corpus::priv::build_symbol_table): New member function.
(class symtab_build_visitor_type): New visitor type to build the
symbol table.
(struct func_comp, struct var_comp): New comparison functors.
(corpus::priv::build_symbol_table): Define new member function.
(corpus::{corpus, add, get_translation_units, operator==,
get_functions, get_variables}): Define new members.
* src/abg-ir.cc (translation_unit::operator==): Define new member
equality operator.
(operator==(translation_unit_sptr l, translation_unit_sptr r)):
Define new equality operator.
* tools/abg-tools-utils.h (enum file_type): New enum.
(guess_file_type): Declare new function.
* tools/abg-tools-utils.cc (guess_file_type): define new function.
* tools/bidiff.cc (main): Guess the type of the files given in
input and support elf files reading and diffing.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-12-23 13:05:19 +00:00
|
|
|
|
};
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// Find the end of the furthest reaching d-path on diagonal k, for
|
|
|
|
|
/// two sequences. In the paper This is referred to as "the basic
|
|
|
|
|
/// algorithm".
|
|
|
|
|
///
|
|
|
|
|
/// Unlike in the paper, the coordinates of the edit graph start at
|
|
|
|
|
/// (-1,-1), rather than (0,0), and they end at (M-1, N-1), rather
|
|
|
|
|
/// than (M,N).
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm EqualityFunctor this must be a class that declares a public
|
|
|
|
|
/// call operator member returning a boolean and taking two arguments
|
|
|
|
|
/// that must be of the same type as the one pointed to by the @ref
|
|
|
|
|
/// RandomAccessOutputIterator template parameter. This functor is
|
|
|
|
|
/// used to compare the elements referred to by the iterators pased in
|
|
|
|
|
/// argument to this function.
|
|
|
|
|
///
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// @param k the number of the diagonal on which we want to find the
|
|
|
|
|
/// end of the furthest reaching D-path.
|
|
|
|
|
///
|
|
|
|
|
/// @param d the D in D-Path. That's the number of insertions/deletions
|
|
|
|
|
/// (the number of changes, in other words) in the changeset. That is
|
|
|
|
|
/// also the number of non-diagonals in the D-Path.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_begin an iterator to the beginning of the first sequence
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator that points right after the last element
|
|
|
|
|
/// of the second sequence to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_begin an iterator to the beginning of the second sequence.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator that points right after the last element
|
|
|
|
|
/// of the second sequence to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param v the vector of furthest end points of d_paths, at (d-1).
|
|
|
|
|
/// It contains the abscissas of the furthest end points for different
|
|
|
|
|
/// values of k, at (d-1). That is, for k in [-D + 1, -D + 3, -D + 5,
|
|
|
|
|
/// ..., D - 1], v[k] is the abscissa of the end of the furthest
|
|
|
|
|
/// reaching (D-1)-path on diagonal k.
|
|
|
|
|
///
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
/// @param snak the last snake of the furthest path found. The end
|
|
|
|
|
/// point of the snake is the end point of the furthest path.
|
2013-10-02 21:41:26 +00:00
|
|
|
|
///
|
|
|
|
|
/// @return true if the end of the furthest reaching path that was
|
|
|
|
|
/// found was inside the boundaries of the edit graph, false
|
|
|
|
|
/// otherwise.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator,
|
|
|
|
|
typename EqualityFunctor>
|
2013-10-02 21:41:26 +00:00
|
|
|
|
bool
|
2013-09-28 12:39:57 +00:00
|
|
|
|
end_of_fr_d_path_in_k(int k, int d,
|
|
|
|
|
RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_start,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
d_path_vec& v, snake& snak)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
int x = -1, y = -1;
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
point begin, intermediate, diag_start, end;
|
|
|
|
|
snake s;
|
2013-12-23 12:40:14 +00:00
|
|
|
|
EqualityFunctor eq;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
// Let's pick the end point of the furthest reaching
|
|
|
|
|
// (D-1)-path. It's either v[k-1] or v[k+1]; the word
|
|
|
|
|
// "furthest" means we choose the one which abscissa is the
|
|
|
|
|
// greatest (that is, furthest from abscissa zero).
|
|
|
|
|
if (k == -d || ((k != d) && (v[k-1] < v[k + 1])))
|
|
|
|
|
// So, the abscissa of the end point of the furthest
|
|
|
|
|
// reaching (D-1)-path is v[k+1]. That is a diagonal that
|
|
|
|
|
// is above the current (k) diagonal, and on the right.
|
|
|
|
|
// To move to the current k diagonal, one has to move
|
|
|
|
|
// "down" from the diagonal k+1. So the abscissa won't
|
|
|
|
|
// change. Only the ordinate will. It will be given by y
|
|
|
|
|
// = x - k (a bit below); as k has changed from k - 1 (it
|
|
|
|
|
// has increased), y is going to be the new y that is
|
|
|
|
|
// 'down' from the previous y in k - 1.
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
{
|
|
|
|
|
x = v[k+1];
|
|
|
|
|
begin.set(x, x - (k + 1));
|
|
|
|
|
}
|
2013-09-28 12:39:57 +00:00
|
|
|
|
else
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
{
|
|
|
|
|
// So the abscissa of the end point of the furthest
|
|
|
|
|
// (D-1)-path is v[k-1]. That is on the left of the
|
|
|
|
|
// current k diagonal. To move to the current k diagonal,
|
|
|
|
|
// one has to move "right" from diagonal k - 1. That is,
|
|
|
|
|
// the y stays constant and x is incremented.
|
|
|
|
|
x = v[k-1];
|
|
|
|
|
begin.set(x, x - (k - 1));
|
|
|
|
|
++x;
|
|
|
|
|
}
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
// Now get the value of y from the equation k = x -y.
|
|
|
|
|
// This is the point where we first touch K, when we move
|
|
|
|
|
// from the end of the furthest reaching (D-1)-path.
|
|
|
|
|
y = x - k;
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
intermediate.x(x);
|
|
|
|
|
intermediate.y(y);
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
int last_x_index = a_end - a_begin - 1;
|
|
|
|
|
int last_y_index = b_end - b_start - 1;
|
|
|
|
|
// Now, follow the snake (aka, zero or more consecutive
|
|
|
|
|
// diagonals). Note that we stay on the k diagonal when we
|
|
|
|
|
// do this.
|
|
|
|
|
while ((x < last_x_index) && (y < last_y_index))
|
2013-12-23 12:40:14 +00:00
|
|
|
|
if (eq(a_begin[x + 1], b_start[y + 1]))
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
x = x + 1;
|
|
|
|
|
y = y + 1;
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if (!diag_start)
|
|
|
|
|
diag_start.set(x, y);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
break;
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
end.x(x);
|
|
|
|
|
end.y(y);
|
|
|
|
|
|
2013-10-02 21:41:26 +00:00
|
|
|
|
// Note the point that we store in v here might be outside the
|
|
|
|
|
// bounds of the edit graph. But we store it at this step (for a
|
|
|
|
|
// given D) anyway, because out of bound or not, we need this value
|
|
|
|
|
// at this step to be able to compute the value of the point on the
|
|
|
|
|
// "next" diagonal for the next D.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
v[k] = x;
|
|
|
|
|
|
2013-10-02 21:41:26 +00:00
|
|
|
|
if (x >= (int) v.a_size()
|
|
|
|
|
|| y >= (int) v.b_size()
|
2013-10-04 08:35:25 +00:00
|
|
|
|
|| x < -1 || y < -1)
|
2013-10-02 21:41:26 +00:00
|
|
|
|
return false;
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
s.set(begin, intermediate, diag_start, end);
|
|
|
|
|
s.set_forward(true);
|
|
|
|
|
snak = s;
|
2013-10-02 21:41:26 +00:00
|
|
|
|
|
|
|
|
|
return true;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Find the end of the furthest reaching reverse d-path on diagonal k
|
|
|
|
|
/// + delta. Delta is abs(M - N), with M being the size of a and N
|
|
|
|
|
/// being the size of b. This is the "basic algorithm", run backward.
|
|
|
|
|
/// That is, starting from the point (M,N) of the edit graph.
|
|
|
|
|
///
|
|
|
|
|
/// Unlike in the paper, the coordinates of the edit graph start at
|
|
|
|
|
/// (-1,-1), rather than (0,0), and they end at (M-1, N-1), rather
|
|
|
|
|
/// than (M,N).
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm EqualityFunctor this must be a class that declares a public
|
|
|
|
|
/// call operator member returning a boolean and taking two arguments
|
|
|
|
|
/// that must be of the same type as the one pointed to by the @ref
|
|
|
|
|
/// RandomAccessOutputIterator template parameter. This functor is
|
|
|
|
|
/// used to compare the elements referred to by the iterators pased in
|
|
|
|
|
/// argument to this function.
|
|
|
|
|
///
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// @param k the number of the diagonal on which we want to find the
|
|
|
|
|
/// end of the furthest reaching reverse D-path. Actually, we want to
|
|
|
|
|
/// find the end of the furthest reaching reverse D-path on diagonal (k
|
|
|
|
|
/// - delta).
|
|
|
|
|
///
|
|
|
|
|
/// @param d the D in D-path. That's the number of insertions/deletions
|
|
|
|
|
/// (the number of changes, in other words) in the changeset. That is
|
|
|
|
|
/// also the number of non-diagonals in the D-Path.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_begin an iterator to the beginning of the first sequence
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator that points right after the last element
|
|
|
|
|
/// of the second sequence to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_begin an iterator to the beginning of the second sequence.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator that points right after the last element
|
|
|
|
|
/// of the second sequence to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param v the vector of furthest end points of d_paths, at (d-1).
|
|
|
|
|
/// It contains the abscissae of the furthest end points for different
|
|
|
|
|
/// values of k - delta, at (d-1). That is, for k in [-D + 1, -D + 3,
|
|
|
|
|
/// -D + 5, ..., D - 1], v[k - delta] is the abscissa of the end of the
|
|
|
|
|
/// furthest reaching (D-1)-path on diagonal k - delta.
|
|
|
|
|
///
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
/// @param snak the last snake of the furthest path found. The end
|
|
|
|
|
/// point of the snake is the end point of the furthest path.
|
2013-10-02 21:41:26 +00:00
|
|
|
|
///
|
|
|
|
|
/// @return true iff the end of the furthest reaching path that was
|
|
|
|
|
/// found was inside the boundaries of the edit graph, false
|
|
|
|
|
/// otherwise.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator,
|
|
|
|
|
typename EqualityFunctor>
|
2013-10-02 21:41:26 +00:00
|
|
|
|
bool
|
2013-09-28 12:39:57 +00:00
|
|
|
|
end_of_frr_d_path_in_k_plus_delta (int k, int d,
|
|
|
|
|
RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
d_path_vec& v, snake& snak)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
int a_size = a_end - a_begin;
|
|
|
|
|
int b_size = b_end - b_begin;
|
2013-10-02 21:41:26 +00:00
|
|
|
|
int delta = a_size - b_size;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
int k_plus_delta = k + delta;
|
|
|
|
|
int x = -1, y = -1;
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
point begin, intermediate, diag_start, end;
|
|
|
|
|
snake s;
|
2013-12-23 12:40:14 +00:00
|
|
|
|
EqualityFunctor eq;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
// Let's pick the end point of the furthest reaching (D-1)-path and
|
|
|
|
|
// move from there to reach the current k_plus_delta-line. That end
|
|
|
|
|
// point of the furthest reaching (D-1)-path is either on
|
|
|
|
|
// v[k_plus_delta-1] or on v[k_plus_delta+1]; the word "furthest"
|
|
|
|
|
// means we choose the one which abscissa is the lowest (that is,
|
|
|
|
|
// furthest from abscissa M).
|
|
|
|
|
if (k_plus_delta == -d + delta
|
|
|
|
|
|| ((k_plus_delta != d + delta)
|
2014-02-07 09:06:53 +00:00
|
|
|
|
&& (v[k_plus_delta + 1] <= v[k_plus_delta - 1])))
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
// We move left, that means ordinate won't change ...
|
|
|
|
|
x = v[k_plus_delta + 1];
|
|
|
|
|
y = x - (k_plus_delta + 1);
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
begin.set(x, y);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
// ... and abscissa decreases.
|
|
|
|
|
x = x - 1;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// So the furthest end point is on the k_plus_delta - 1
|
|
|
|
|
// diagonal. That is a diagonal that is 'below' the
|
|
|
|
|
// k_plus_delta current diagonal. So to join the current
|
|
|
|
|
// diagonal from the k_plus_delta - 1 one, we need to move up.
|
|
|
|
|
|
|
|
|
|
// So moving up means abscissa won't change ...
|
|
|
|
|
x = v[k_plus_delta - 1];
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
begin.set(x, x - (k_plus_delta - 1));
|
2013-09-28 12:39:57 +00:00
|
|
|
|
// ... and that ordinate decreases.
|
|
|
|
|
y = x - (k_plus_delta - 1) - 1;
|
|
|
|
|
}
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
intermediate.set(x, y);
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
// Now, follow the snake. Note that we stay on the k_plus_delta
|
2013-10-02 21:41:26 +00:00
|
|
|
|
// diagonal when we do this.
|
2013-10-04 08:35:25 +00:00
|
|
|
|
while (x >= 0 && y >= 0)
|
2013-12-23 12:40:14 +00:00
|
|
|
|
if (eq(a_begin[x], b_begin[y]))
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if (!diag_start)
|
|
|
|
|
diag_start.set(x, y);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
x = x - 1;
|
|
|
|
|
y = y - 1;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
break;
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
end.set(x, y);
|
|
|
|
|
|
2013-10-02 21:41:26 +00:00
|
|
|
|
// Note the point that we store in v here might be outside the
|
|
|
|
|
// bounds of the edit graph. But we store it at this step (for a
|
|
|
|
|
// given D) anyway, because out of bound or not, we need this value
|
|
|
|
|
// at this step to be able to compute the value of the point on the
|
|
|
|
|
// "next" diagonal for the next D.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
v[k_plus_delta] = x;
|
|
|
|
|
|
2013-10-02 21:41:26 +00:00
|
|
|
|
if (x == -1 && y == -1)
|
|
|
|
|
;
|
2013-10-04 08:35:25 +00:00
|
|
|
|
else if (x < -1 || y < -1)
|
2013-10-02 21:41:26 +00:00
|
|
|
|
return false;
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
s.set(begin, intermediate, diag_start, end);
|
|
|
|
|
s.set_forward(false);
|
|
|
|
|
snak = s;
|
2013-10-02 21:41:26 +00:00
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-04 08:35:25 +00:00
|
|
|
|
/// Tests if a given point is a match point in an edit graph.
|
2013-10-02 21:41:26 +00:00
|
|
|
|
///
|
2013-10-04 08:35:25 +00:00
|
|
|
|
/// @param a_begin the begin iterator of the first input sequence of
|
|
|
|
|
/// the edit graph.
|
2013-10-02 21:41:26 +00:00
|
|
|
|
///
|
2013-10-04 08:35:25 +00:00
|
|
|
|
/// @param a_end the end iterator of the first input sequence of the
|
|
|
|
|
/// edit graph. This points to one element passed the end of the
|
|
|
|
|
/// sequence.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_begin the begin iterator of the second input sequence of
|
|
|
|
|
/// the edit graph.
|
2013-10-02 21:41:26 +00:00
|
|
|
|
///
|
2013-10-04 08:35:25 +00:00
|
|
|
|
/// @param b_end the end iterator of the second input sequence of the
|
|
|
|
|
/// edit graph. This points the one element passed the end of the
|
|
|
|
|
/// sequence.
|
2013-10-02 21:41:26 +00:00
|
|
|
|
///
|
2013-10-04 08:35:25 +00:00
|
|
|
|
/// @param point the point to test for being a match point.
|
2013-10-02 21:41:26 +00:00
|
|
|
|
///
|
2013-10-04 08:35:25 +00:00
|
|
|
|
/// @return true iff \a point is a match point.
|
|
|
|
|
template<typename RandomAccessOutputIterator>
|
|
|
|
|
bool
|
|
|
|
|
is_match_point(RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
const point& point)
|
|
|
|
|
{
|
|
|
|
|
int a_size = a_end - a_begin, b_size = b_end - b_begin;
|
|
|
|
|
|
|
|
|
|
if (point.x() < 0
|
|
|
|
|
|| point.x () >= a_size
|
|
|
|
|
|| point.y() < 0
|
|
|
|
|
|| point.y() >= b_size)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return (a_begin[point.x()] == b_begin[point.y()]);
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// Returns the middle snake of two sequences A and B, as well as the
|
|
|
|
|
/// length of their shortest editing script.
|
|
|
|
|
///
|
|
|
|
|
/// This uses the "linear space refinement" algorithm presented in
|
|
|
|
|
/// section 4b in the paper. As the paper says, "The idea for doing
|
|
|
|
|
/// so is to simultaneously run the basic algorithm in both the
|
|
|
|
|
/// forward and reverse directions until furthest reaching forward and
|
|
|
|
|
/// reverse paths starting at opposing corners ‘‘overlap’’."
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm EqualityFunctor this must be a class that declares a public
|
|
|
|
|
/// call operator member returning a boolean and taking two arguments
|
|
|
|
|
/// that must be of the same type as the one pointed to by the @ref
|
|
|
|
|
/// RandomAccessOutputIterator template parameter. This functor is
|
|
|
|
|
/// used to compare the elements referred to by the iterators pased in
|
|
|
|
|
/// argument to this function.
|
|
|
|
|
///
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// @param a_begin an iterator pointing to the begining of sequence A.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator pointing to the end of sequence A. Note
|
|
|
|
|
/// that this points right /after/ the end of vector A.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_begin an iterator pointing to the begining of sequence B.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator pointing to the end of sequence B. Note
|
|
|
|
|
/// that this points right /after/ the end of vector B
|
|
|
|
|
///
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
/// @param snak out parameter. This is the snake current when the two
|
|
|
|
|
/// paths overlapped. This is set iff the function returns true;
|
|
|
|
|
/// otherwise, this is not touched.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
///
|
|
|
|
|
/// @return true is the snake was found, false otherwise.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator,
|
|
|
|
|
typename EqualityFunctor>
|
2013-09-28 12:39:57 +00:00
|
|
|
|
bool
|
|
|
|
|
compute_middle_snake(RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
snake& snak, int& ses_len)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
int a_size = a_end - a_begin;
|
|
|
|
|
int N = a_size;
|
|
|
|
|
int b_size = b_end - b_begin;
|
|
|
|
|
int M = b_size;
|
2013-10-02 21:41:26 +00:00
|
|
|
|
int delta = N - M;
|
|
|
|
|
d_path_vec forward_d_paths(a_size, b_size);
|
|
|
|
|
d_path_vec reverse_d_paths(a_size, b_size);
|
2013-10-04 08:35:25 +00:00
|
|
|
|
// These points below are the top leftmost point and bottom
|
|
|
|
|
// right-most points of the edit graph.
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
point first_point(-1, -1), last_point(a_size -1, b_size -1), point_zero(0, 0);
|
2013-10-02 21:41:26 +00:00
|
|
|
|
|
|
|
|
|
// We want the initial step (D = 0, k = 0 in the paper) to find a
|
|
|
|
|
// furthest reaching point on diagonal k == 0; For that, we need the
|
|
|
|
|
// value of x for k == 1; So let's set that value to -1; that is for
|
|
|
|
|
// k == 1 (diagonal 1), the point in the edit graph is (-1,-2).
|
|
|
|
|
// That way, to get the furthest reaching point on diagonal 0 (k ==
|
|
|
|
|
// 0), we go down from (-1,-2) on diagonal 1 and we hit diagonal 0
|
|
|
|
|
// on (-1,-1); that is the starting value that the algorithm expects
|
|
|
|
|
// for k == 0.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
forward_d_paths[1] = -1;
|
2013-10-02 21:41:26 +00:00
|
|
|
|
|
|
|
|
|
// Similarly for the reverse paths, for diagonal delta + 1 (note
|
|
|
|
|
// that diagonals are centered on delta, unlike for forward paths
|
|
|
|
|
// where they are centered on zero), we set the initial point to
|
|
|
|
|
// (a_size, b_size - 1). That way, at step D == 0 and k == delta,
|
|
|
|
|
// to reach diagonal delta from the point (a_size, b_size - 1) on
|
|
|
|
|
// diagonal delta + 1, we just have to move left, and we hit
|
|
|
|
|
// diagonal delta on (a_size - 1, b_size -1); that is the starting
|
|
|
|
|
// point value the algorithm expects for k == 0 in the reverse case.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
reverse_d_paths[delta + 1] = a_size;
|
|
|
|
|
|
2013-10-06 18:48:08 +00:00
|
|
|
|
int d_max = (M + N) / 2 + 1;
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
for (int d = 0; d <= d_max; ++d)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
2013-10-04 08:35:25 +00:00
|
|
|
|
// First build forward paths.
|
|
|
|
|
for (int k = -d; k <= d; k += 2)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
snake s;
|
2013-12-23 12:40:14 +00:00
|
|
|
|
bool found =
|
|
|
|
|
end_of_fr_d_path_in_k<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(k, d,
|
|
|
|
|
a_begin, a_end,
|
|
|
|
|
b_begin, b_end,
|
|
|
|
|
forward_d_paths, s);
|
2013-10-02 21:41:26 +00:00
|
|
|
|
if (!found)
|
|
|
|
|
continue;
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
// As the paper says in 4b while explaining the middle snake
|
|
|
|
|
// algorithm:
|
2013-09-28 12:39:57 +00:00
|
|
|
|
//
|
|
|
|
|
// "Thus when delta is odd, check for overlap only while
|
|
|
|
|
// extending forward paths ..."
|
|
|
|
|
if ((delta % 2)
|
2013-10-02 21:41:26 +00:00
|
|
|
|
&& (k >= (delta - (d - 1))) && (k <= (delta + (d - 1))))
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
point reverse_end;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
reverse_end.x(reverse_d_paths[k]);
|
|
|
|
|
reverse_end.y(reverse_end.x() - k);
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if (ends_of_furthest_d_paths_overlap(s.end(), reverse_end))
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
ses_len = 2 * d - 1;
|
|
|
|
|
snak = s;
|
|
|
|
|
return true;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-04 08:35:25 +00:00
|
|
|
|
// Now build reverse paths.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
for (int k = -d; k <= d; k += 2)
|
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
snake s;
|
2013-12-23 12:40:14 +00:00
|
|
|
|
bool found =
|
|
|
|
|
end_of_frr_d_path_in_k_plus_delta<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(k, d,
|
|
|
|
|
a_begin, a_end,
|
|
|
|
|
b_begin, b_end,
|
|
|
|
|
reverse_d_paths,
|
|
|
|
|
s);
|
2013-10-02 21:41:26 +00:00
|
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
|
continue;
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
// And the paper continues by saying:
|
|
|
|
|
//
|
|
|
|
|
// "... and when delta is even, check for overlap only while
|
|
|
|
|
// extending reverse paths."
|
|
|
|
|
int k_plus_delta = k + delta;
|
|
|
|
|
if (!(delta % 2)
|
2013-10-02 21:41:26 +00:00
|
|
|
|
&& (k_plus_delta >= -d) && (k_plus_delta <= d))
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
point forward_end;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
forward_end.x(forward_d_paths[k_plus_delta]);
|
|
|
|
|
forward_end.y(forward_end.x() - k_plus_delta);
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if (ends_of_furthest_d_paths_overlap(forward_end, s.end()))
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
ses_len = 2 * d;
|
|
|
|
|
snak = s;
|
|
|
|
|
return true;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
compute_middle_snake(const char* str1, const char* str2,
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
snake& s, int& ses_len);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
/// This prints the middle snake of two strings.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_begin the beginning of the first string.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_begin the beginning of the second string.
|
|
|
|
|
///
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
/// @param s the snake to print.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
///
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
/// @param out the output stream to print the snake to.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator>
|
|
|
|
|
void
|
|
|
|
|
print_snake(RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
2017-04-12 07:53:10 +00:00
|
|
|
|
const snake &s, ostream& out)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if (s.is_empty())
|
2013-09-28 12:39:57 +00:00
|
|
|
|
return;
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
out << "snake start: ";
|
|
|
|
|
out << "(" << s.begin().x() << ", " << s.end().y() << ")\n";
|
|
|
|
|
|
|
|
|
|
out << "snake intermediate: ";
|
|
|
|
|
out << "(" << s.intermediate().x() << ", " << s.intermediate().y() << ")\n";
|
|
|
|
|
|
|
|
|
|
out << "diagonal point(s): ";
|
|
|
|
|
if (s.has_diagonal_edge())
|
|
|
|
|
for (int x = s.intermediate().x(), y = s.intermediate().y();
|
|
|
|
|
x <= s.end().x() && y <= s.end().y();
|
|
|
|
|
++x, ++y)
|
|
|
|
|
{
|
|
|
|
|
assert(a_begin[x] == b_begin[y]);
|
|
|
|
|
out << "(" << x << "," << y << ") ";
|
|
|
|
|
}
|
|
|
|
|
out << "\n";
|
|
|
|
|
|
|
|
|
|
out << "snake end: ";
|
|
|
|
|
out << "(" << s.end().x() << ", " << s.end().y() << ")\n";
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Compute the length of the shortest edit script for two sequences a
|
|
|
|
|
/// and b. This is done using the "Greedy LCS/SES" of figure 2 in the
|
|
|
|
|
/// paper. It can walk the edit graph either foward (when reverse is
|
|
|
|
|
/// false) or backward starting from the end (when reverse is true).
|
|
|
|
|
///
|
|
|
|
|
/// Here, note that the real content of a and b should start at index
|
|
|
|
|
/// 1, for this implementatikon algorithm to match the paper's
|
|
|
|
|
/// algorithm in a straightforward manner. So pleast make sure that
|
|
|
|
|
/// at index 0, we just get some non-used value.
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm EqualityFunctor this must be a class that declares a public
|
|
|
|
|
/// call operator member returning a boolean and taking two arguments
|
|
|
|
|
/// that must be of the same type as the one pointed to by the @ref
|
|
|
|
|
/// RandomAccessOutputIterator template parameter. This functor is
|
|
|
|
|
/// used to compare the elements referred to by the iterators pased in
|
|
|
|
|
/// argument to this function.
|
|
|
|
|
///
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// @param a the first sequence we care about.
|
|
|
|
|
///
|
|
|
|
|
/// @param b the second sequence we care about.
|
|
|
|
|
///
|
|
|
|
|
/// @param v the vector that contains the end points of the furthest
|
|
|
|
|
/// reaching d-path and (d-1)-path.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator,
|
|
|
|
|
typename EqualityFunctor>
|
2013-09-28 12:39:57 +00:00
|
|
|
|
int
|
|
|
|
|
ses_len(RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
d_path_vec& v, bool reverse)
|
|
|
|
|
{
|
2013-10-02 21:41:26 +00:00
|
|
|
|
unsigned a_size = a_end - a_begin;
|
|
|
|
|
unsigned b_size = b_end - b_begin;
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
snake snak;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
assert(v.max_d() == a_size + b_size);
|
|
|
|
|
|
2013-10-02 21:41:26 +00:00
|
|
|
|
int delta = a_size - b_size;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
if (reverse)
|
|
|
|
|
// Set a fictitious (M, N-1) into v[1], to find the furthest
|
|
|
|
|
// reaching reverse 0-path (i.e, when we are at d == 0 and k == 0).
|
|
|
|
|
v[delta + 1] = a_size - 1;
|
|
|
|
|
else
|
|
|
|
|
// Set a fictitious (-1,-2) point into v[1], to find the furthest
|
|
|
|
|
// reaching forward 0-path (i.e, when we are at d == 0 and k == 0).
|
|
|
|
|
v[1] = -1;
|
|
|
|
|
|
2013-10-02 21:41:26 +00:00
|
|
|
|
for (unsigned d = 0; d <= v.max_d(); ++d)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
2013-10-02 21:41:26 +00:00
|
|
|
|
for (int k = -d; k <= (int) d; k += 2)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
point end;
|
|
|
|
|
if (reverse)
|
|
|
|
|
{
|
2013-12-23 12:40:14 +00:00
|
|
|
|
bool found =
|
|
|
|
|
end_of_frr_d_path_in_k_plus_delta<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(k, d,
|
|
|
|
|
a_begin, a_end,
|
|
|
|
|
b_begin, b_end,
|
|
|
|
|
v, snak);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
// If we reached the upper left corner of the edit graph then
|
|
|
|
|
// we are done.
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if (found && snak.end().x() == -1 && snak.end().y() == -1)
|
2013-09-28 12:39:57 +00:00
|
|
|
|
return d;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2013-12-23 12:40:14 +00:00
|
|
|
|
end_of_fr_d_path_in_k<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(k, d,
|
|
|
|
|
a_begin, a_end,
|
|
|
|
|
b_begin, b_end,
|
|
|
|
|
v, snak);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
// If we reached the lower right corner of the edit
|
|
|
|
|
// graph then we are done.
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if ((snak.end().x() == (int) a_size - 1)
|
|
|
|
|
&& (snak.end().y() == (int) b_size - 1))
|
2013-09-28 12:39:57 +00:00
|
|
|
|
return d;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// Compute the length of the shortest edit script for two sequences a
|
|
|
|
|
/// and b. This is done using the "Greedy LCS/SES" of figure 2 in the
|
|
|
|
|
/// paper. It can walk the edit graph either foward (when reverse is
|
|
|
|
|
/// false) or backward starting from the end (when reverse is true).
|
|
|
|
|
///
|
|
|
|
|
/// Here, note that the real content of a and b should start at index
|
|
|
|
|
/// 1, for this implementatikon algorithm to match the paper's
|
|
|
|
|
/// algorithm in a straightforward manner. So pleast make sure that
|
|
|
|
|
/// at index 0, we just get some non-used value.
|
|
|
|
|
///
|
|
|
|
|
/// Note that the equality operator used to compare the elements
|
|
|
|
|
/// passed in argument to this function is the default "==" operator.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @param a the first sequence we care about.
|
|
|
|
|
///
|
|
|
|
|
/// @param b the second sequence we care about.
|
|
|
|
|
///
|
|
|
|
|
/// @param v the vector that contains the end points of the furthest
|
|
|
|
|
/// reaching d-path and (d-1)-path.
|
|
|
|
|
template<typename RandomAccessOutputIterator>
|
|
|
|
|
int
|
|
|
|
|
ses_len(RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
d_path_vec& v, bool reverse)
|
|
|
|
|
{
|
|
|
|
|
return ses_len<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,
|
|
|
|
|
b_begin, b_end,
|
|
|
|
|
v, reverse);
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
int
|
|
|
|
|
ses_len(const char* str1,
|
|
|
|
|
const char* str2,
|
|
|
|
|
bool reverse = false);
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
bool
|
|
|
|
|
snake_end_points(const snake& s, point&, point&);
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// Compute the longest common subsequence of two (sub-regions of)
|
|
|
|
|
/// sequences as well as the shortest edit script from transforming
|
|
|
|
|
/// the first (sub-region of) sequence into the second (sub-region of)
|
|
|
|
|
/// sequence.
|
|
|
|
|
///
|
|
|
|
|
/// A sequence is determined by a base, a beginning offset and an end
|
|
|
|
|
/// offset. The base always points to the container that contains the
|
|
|
|
|
/// sequence to consider. The beginning offset is an iterator that
|
|
|
|
|
/// points the beginning of the sub-region of the sequence that we
|
|
|
|
|
/// actually want to consider. The end offset is an iterator that
|
|
|
|
|
/// points to the end of the sub-region of the sequence that we
|
|
|
|
|
/// actually want to consider.
|
|
|
|
|
///
|
|
|
|
|
/// This uses the LCS algorithm of the paper at section 4b.
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm EqualityFunctor this must be a class that declares a public
|
|
|
|
|
/// call operator member returning a boolean and taking two arguments
|
|
|
|
|
/// that must be of the same type as the one pointed to by the @ref
|
|
|
|
|
/// RandomAccessOutputIterator template parameter. This functor is
|
|
|
|
|
/// used to compare the elements referred to by the iterators pased in
|
|
|
|
|
/// argument to this function.
|
|
|
|
|
///
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// @param a_base the iterator to the base of the first sequence.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_start an iterator to the beginning of the sub-region
|
|
|
|
|
/// of the first sequence to actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator to the end of the sub-region of the first
|
|
|
|
|
/// sequence to consider.
|
|
|
|
|
///
|
|
|
|
|
///@param b_base an iterator to the base of the second sequence to
|
|
|
|
|
///consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_start an iterator to the beginning of the sub-region
|
|
|
|
|
/// of the second sequence to actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator to the end of the sub-region of the
|
|
|
|
|
/// second sequence to actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param lcs the resulting lcs. This is set iff the function
|
|
|
|
|
/// returns true.
|
|
|
|
|
///
|
|
|
|
|
/// @param ses the resulting shortest editing script.
|
|
|
|
|
///
|
|
|
|
|
/// @param ses_len the length of the ses above. Normally this can be
|
2014-03-12 13:52:41 +00:00
|
|
|
|
/// retrieved from ses.length(), but this parameter is here for sanity
|
|
|
|
|
/// check purposes. The function computes the length of the ses in
|
|
|
|
|
/// two redundant ways and ensures that both methods lead to the same
|
|
|
|
|
/// result.
|
2013-09-28 12:39:57 +00:00
|
|
|
|
///
|
|
|
|
|
/// @return true upon successful completion, false otherwise.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator,
|
|
|
|
|
typename EqualityFunctor>
|
2013-09-28 12:39:57 +00:00
|
|
|
|
void
|
|
|
|
|
compute_diff(RandomAccessOutputIterator a_base,
|
|
|
|
|
RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_base,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
vector<point>& lcs,
|
|
|
|
|
edit_script& ses,
|
|
|
|
|
int& ses_len)
|
|
|
|
|
{
|
|
|
|
|
int a_size = a_end - a_begin;
|
|
|
|
|
int b_size = b_end - b_begin;
|
2013-10-06 18:48:08 +00:00
|
|
|
|
unsigned a_offset = a_begin - a_base, b_offset = b_begin - b_base;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
if (a_size == 0 || b_size == 0)
|
|
|
|
|
{
|
|
|
|
|
if (a_size > 0 && b_size == 0)
|
|
|
|
|
// All elements of the first sequences have been deleted. So add
|
|
|
|
|
// the relevant deletions to the edit script.
|
|
|
|
|
for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
|
|
|
|
|
ses.deletions().push_back(deletion(i - a_base));
|
|
|
|
|
|
|
|
|
|
if (b_size > 0 && a_size == 0)
|
|
|
|
|
{
|
|
|
|
|
// All elements present in the second sequence are part of
|
|
|
|
|
// an insertion into the first sequence at a_end. So add
|
|
|
|
|
// that insertion to the edit script.
|
|
|
|
|
int a_full_size = a_end - a_base;
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
int insertion_index = a_full_size ? a_full_size - 1 : -1;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
insertion ins(insertion_index);
|
|
|
|
|
for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
|
|
|
|
|
ins.inserted_indexes().push_back(i - b_base);
|
|
|
|
|
|
|
|
|
|
ses.insertions().push_back(ins);
|
|
|
|
|
}
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
|
|
|
|
|
ses_len = a_size + b_size;
|
2013-09-28 12:39:57 +00:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int d = 0;
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
snake snak;
|
|
|
|
|
vector<point> trace; // the trace of the edit graph. Read the paper
|
|
|
|
|
// to understand what a trace is.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
bool has_snake =
|
|
|
|
|
compute_middle_snake<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(a_begin, a_end,
|
|
|
|
|
b_begin, b_end,
|
|
|
|
|
snak, d);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
if (has_snake)
|
|
|
|
|
{
|
|
|
|
|
// So middle_{begin,end} are expressed wrt a_begin and b_begin.
|
|
|
|
|
// Let's express them wrt a_base and b_base.
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
snak.add(a_offset, b_offset);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
ses_len = d;
|
|
|
|
|
}
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
|
|
|
|
|
if (has_snake)
|
|
|
|
|
{
|
|
|
|
|
if ( snak.has_diagonal_edge())
|
|
|
|
|
for (int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();
|
|
|
|
|
x <= snak.end().x() && y <= snak.end().y();
|
|
|
|
|
++x, ++y)
|
|
|
|
|
{
|
|
|
|
|
point p(x, y);
|
|
|
|
|
trace.push_back(p);
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-09-28 12:39:57 +00:00
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// So there is no middle snake. That means there is no lcs, so
|
|
|
|
|
// the two sequences are different.
|
|
|
|
|
|
|
|
|
|
// In other words, all the elements of the first sequence have
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
// been deleted ...
|
2013-09-28 12:39:57 +00:00
|
|
|
|
for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
|
|
|
|
|
ses.deletions().push_back(deletion(i - a_base));
|
|
|
|
|
|
2013-10-04 08:35:25 +00:00
|
|
|
|
// ... and all the elements of the second sequence are insertions
|
2013-09-28 12:39:57 +00:00
|
|
|
|
// that happen at the beginning of the first sequence.
|
|
|
|
|
insertion ins(a_begin - a_base);
|
|
|
|
|
for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
|
|
|
|
|
ins.inserted_indexes().push_back(i - b_base);
|
|
|
|
|
ses.insertions().push_back(ins);
|
|
|
|
|
|
|
|
|
|
ses_len = a_size + b_size;
|
|
|
|
|
assert(ses_len == ses.length());
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (d > 1)
|
|
|
|
|
{
|
2014-02-07 09:42:54 +00:00
|
|
|
|
int tmp_ses_len0 = 0;
|
|
|
|
|
edit_script tmp_ses0;
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
point px, pu;
|
|
|
|
|
snake_end_points(snak, px, pu);
|
2013-12-23 12:40:14 +00:00
|
|
|
|
compute_diff<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(a_base, a_begin, a_base + px.x() + 1,
|
|
|
|
|
b_base, b_begin, b_base + px.y() + 1,
|
2014-02-07 09:42:54 +00:00
|
|
|
|
lcs, tmp_ses0, tmp_ses_len0);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
lcs.insert(lcs.end(), trace.begin(), trace.end());
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
2014-02-07 09:42:54 +00:00
|
|
|
|
int tmp_ses_len1 = 0;
|
|
|
|
|
edit_script tmp_ses1;
|
2013-12-23 12:40:14 +00:00
|
|
|
|
compute_diff<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(a_base, a_base + pu.x() + 1, a_end,
|
|
|
|
|
b_base, b_base + pu.y() + 1, b_end,
|
2014-02-07 09:42:54 +00:00
|
|
|
|
lcs, tmp_ses1, tmp_ses_len1);
|
|
|
|
|
assert(tmp_ses0.length() + tmp_ses1.length() == d);
|
|
|
|
|
assert(tmp_ses_len0 + tmp_ses_len1 == d);
|
|
|
|
|
ses.append(tmp_ses0);
|
|
|
|
|
ses.append(tmp_ses1);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
else if (d == 1)
|
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if (snak.has_diagonal_edge())
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
for (int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();
|
|
|
|
|
x <= snak.end().x() && y <= snak.end().y();
|
|
|
|
|
++x, ++y)
|
2013-10-04 08:35:25 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
point p(x, y);
|
|
|
|
|
trace.push_back(p);
|
2013-10-04 08:35:25 +00:00
|
|
|
|
}
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if (snak.has_vertical_edge())
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
point p = snak.intermediate();
|
|
|
|
|
insertion ins(p.x());
|
|
|
|
|
ins.inserted_indexes().push_back(p.y());
|
|
|
|
|
ses.insertions().push_back(ins);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
else if (snak.has_horizontal_edge())
|
2013-09-28 12:39:57 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
if (snak.is_forward())
|
2013-10-04 08:35:25 +00:00
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
deletion del(snak.intermediate().x());
|
|
|
|
|
ses.deletions().push_back(del);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
deletion del(snak.begin().x());
|
|
|
|
|
ses.deletions().push_back(del);
|
2013-10-04 08:35:25 +00:00
|
|
|
|
}
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (d == 0)
|
|
|
|
|
{
|
|
|
|
|
// Obviously on the middle snake is part of the solution, as
|
|
|
|
|
// there is no edit script; iow, the two sequences are
|
|
|
|
|
// identical.
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
lcs.insert(lcs.end(), trace.begin(), trace.end());
|
2013-09-28 12:39:57 +00:00
|
|
|
|
ses_len = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(ses_len == ses.length());
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-05 21:02:00 +00:00
|
|
|
|
/// Compute the longest common subsequence of two (sub-regions of)
|
|
|
|
|
/// sequences as well as the shortest edit script from transforming
|
|
|
|
|
/// the first (sub-region of) sequence into the second (sub-region of)
|
|
|
|
|
/// sequence.
|
|
|
|
|
///
|
|
|
|
|
/// This uses the LCS algorithm of the paper at section 4b.
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm EqualityFunctor this must be a class that declares a public
|
|
|
|
|
/// call operator member returning a boolean and taking two arguments
|
|
|
|
|
/// that must be of the same type as the one pointed to by the @ref
|
|
|
|
|
/// RandomAccessOutputIterator template parameter. This functor is
|
|
|
|
|
/// used to compare the elements referred to by the iterators pased in
|
|
|
|
|
/// argument to this function.
|
|
|
|
|
///
|
2013-10-05 21:02:00 +00:00
|
|
|
|
/// @param a_start an iterator to the beginning of the first sequence
|
|
|
|
|
/// to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator to the end of the first sequence to
|
|
|
|
|
/// consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_start an iterator to the beginning of the second sequence
|
|
|
|
|
/// to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator to the end of the second sequence to
|
|
|
|
|
/// consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param lcs the resulting lcs. This is set iff the function
|
|
|
|
|
/// returns true.
|
|
|
|
|
///
|
|
|
|
|
/// @param ses the resulting shortest editing script.
|
|
|
|
|
///
|
|
|
|
|
/// @param ses_len the length of the ses above. Normally this can be
|
2014-03-26 14:06:32 +00:00
|
|
|
|
/// retrieved from ses.length(), but this parameter is here for sanity
|
|
|
|
|
/// check purposes. The function computes the length of the ses in
|
|
|
|
|
/// two redundant ways and ensures that both methods lead to the same
|
|
|
|
|
/// result.
|
2013-10-05 21:02:00 +00:00
|
|
|
|
///
|
|
|
|
|
/// @return true upon successful completion, false otherwise.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator,
|
|
|
|
|
typename EqualityFunctor>
|
2013-10-05 21:02:00 +00:00
|
|
|
|
void
|
|
|
|
|
compute_diff(RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
vector<point>& lcs,
|
|
|
|
|
edit_script& ses,
|
|
|
|
|
int& ses_len)
|
|
|
|
|
{
|
2013-12-23 12:40:14 +00:00
|
|
|
|
compute_diff<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(a_begin, a_begin, a_end,
|
|
|
|
|
b_begin, b_begin, b_end,
|
|
|
|
|
lcs, ses, ses_len);
|
2013-10-05 21:02:00 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// Compute the longest common subsequence of two (sub-regions of)
|
|
|
|
|
/// sequences as well as the shortest edit script from transforming
|
|
|
|
|
/// the first (sub-region of) sequence into the second (sub-region of)
|
|
|
|
|
/// sequence.
|
|
|
|
|
///
|
|
|
|
|
/// A sequence is determined by a base, a beginning offset and an end
|
|
|
|
|
/// offset. The base always points to the container that contains the
|
|
|
|
|
/// sequence to consider. The beginning offset is an iterator that
|
|
|
|
|
/// points the beginning of the sub-region of the sequence that we
|
|
|
|
|
/// actually want to consider. The end offset is an iterator that
|
|
|
|
|
/// points to the end of the sub-region of the sequence that we
|
|
|
|
|
/// actually want to consider.
|
|
|
|
|
///
|
|
|
|
|
/// This uses the LCS algorithm of the paper at section 4b.
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm EqualityFunctor this must be a class that declares a public
|
|
|
|
|
/// call operator member returning a boolean and taking two arguments
|
|
|
|
|
/// that must be of the same type as the one pointed to by the @ref
|
|
|
|
|
/// RandomAccessOutputIterator template parameter. This functor is
|
|
|
|
|
/// used to compare the elements referred to by the iterators pased in
|
|
|
|
|
/// argument to this function.
|
|
|
|
|
///
|
2013-09-28 12:39:57 +00:00
|
|
|
|
/// @param a_base the iterator to the base of the first sequence.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_start an iterator to the beginning of the sub-region
|
|
|
|
|
/// of the first sequence to actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator to the end of the sub-region of the first
|
|
|
|
|
/// sequence to consider.
|
|
|
|
|
///
|
|
|
|
|
///@param b_base an iterator to the base of the second sequence to
|
|
|
|
|
///consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_start an iterator to the beginning of the sub-region
|
|
|
|
|
/// of the second sequence to actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator to the end of the sub-region of the
|
|
|
|
|
/// second sequence to actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param lcs the resulting lcs. This is set iff the function
|
|
|
|
|
/// returns true.
|
|
|
|
|
///
|
|
|
|
|
/// @param ses the resulting shortest editing script.
|
|
|
|
|
///
|
|
|
|
|
/// @return true upon successful completion, false otherwise.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator,
|
|
|
|
|
typename EqualityFunctor>
|
2013-09-28 12:39:57 +00:00
|
|
|
|
void
|
|
|
|
|
compute_diff(RandomAccessOutputIterator a_base,
|
|
|
|
|
RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_base,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
vector<point>& lcs,
|
|
|
|
|
edit_script& ses)
|
|
|
|
|
{
|
|
|
|
|
int ses_len = 0;
|
|
|
|
|
|
2013-12-23 12:40:14 +00:00
|
|
|
|
compute_diff<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(a_base, a_begin, a_end,
|
|
|
|
|
b_base, b_begin, b_end,
|
|
|
|
|
lcs, ses, ses_len);
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-10-05 21:02:00 +00:00
|
|
|
|
/// Compute the longest common subsequence of two (sub-regions of)
|
|
|
|
|
/// sequences as well as the shortest edit script from transforming
|
|
|
|
|
/// the first (sub-region of) sequence into the second (sub-region of)
|
|
|
|
|
/// sequence.
|
|
|
|
|
///
|
|
|
|
|
/// This uses the LCS algorithm of the paper at section 4b.
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm EqualityFunctor this must be a class that declares a public
|
|
|
|
|
/// call operator member returning a boolean and taking two arguments
|
|
|
|
|
/// that must be of the same type as the one pointed to by the @ref
|
|
|
|
|
/// RandomAccessOutputIterator template parameter. This functor is
|
|
|
|
|
/// used to compare the elements referred to by the iterators pased in
|
|
|
|
|
/// argument to this function.
|
|
|
|
|
///
|
2013-10-05 21:02:00 +00:00
|
|
|
|
/// @param a_start an iterator to the beginning of the first sequence
|
|
|
|
|
/// to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator to the end of the first sequence to
|
|
|
|
|
/// consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_start an iterator to the beginning of the sequence to
|
|
|
|
|
/// actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator to the end of second sequence to
|
|
|
|
|
/// consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param lcs the resulting lcs. This is set iff the function
|
|
|
|
|
/// returns true.
|
|
|
|
|
///
|
|
|
|
|
/// @param ses the resulting shortest editing script.
|
|
|
|
|
///
|
|
|
|
|
/// @return true upon successful completion, false otherwise.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator,
|
|
|
|
|
typename EqualityFunctor>
|
|
|
|
|
void
|
|
|
|
|
compute_diff(RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
vector<point>& lcs,
|
|
|
|
|
edit_script& ses)
|
|
|
|
|
{
|
|
|
|
|
compute_diff<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(a_begin, a_begin, a_end,
|
|
|
|
|
b_begin, b_begin, b_end,
|
|
|
|
|
lcs, ses);
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-24 21:47:14 +00:00
|
|
|
|
/// Compute the longest common subsequence of two (sub-regions of)
|
|
|
|
|
/// sequences as well as the shortest edit script from transforming
|
|
|
|
|
/// the first (sub-region of) sequence into the second (sub-region of)
|
|
|
|
|
/// sequence.
|
|
|
|
|
///
|
|
|
|
|
/// This uses the LCS algorithm of the paper at section 4b.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_start an iterator to the beginning of the first sequence
|
|
|
|
|
/// to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator to the end of the first sequence to
|
|
|
|
|
/// consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_start an iterator to the beginning of the sequence to
|
|
|
|
|
/// actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator to the end of second sequence to
|
|
|
|
|
/// consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param lcs the resulting lcs. This is set iff the function
|
|
|
|
|
/// returns true.
|
|
|
|
|
///
|
|
|
|
|
/// @param ses the resulting shortest editing script.
|
|
|
|
|
///
|
|
|
|
|
/// @return true upon successful completion, false otherwise.
|
2013-10-05 21:02:00 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator>
|
|
|
|
|
void
|
|
|
|
|
compute_diff(RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
vector<point>& lcs,
|
|
|
|
|
edit_script& ses)
|
|
|
|
|
{
|
2013-12-23 12:40:14 +00:00
|
|
|
|
compute_diff<RandomAccessOutputIterator,
|
|
|
|
|
default_eq_functor>(a_begin, a_end, b_begin, b_end, lcs, ses);
|
2013-10-05 21:02:00 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-10-02 21:41:26 +00:00
|
|
|
|
/// Compute the longest common subsequence of two (sub-regions of)
|
|
|
|
|
/// sequences as well as the shortest edit script from transforming
|
|
|
|
|
/// the first (sub-region of) sequence into the second (sub-region of)
|
|
|
|
|
/// sequence.
|
|
|
|
|
///
|
|
|
|
|
/// A sequence is determined by a base, a beginning offset and an end
|
|
|
|
|
/// offset. The base always points to the container that contains the
|
|
|
|
|
/// sequence to consider. The beginning offset is an iterator that
|
|
|
|
|
/// points the beginning of the sub-region of the sequence that we
|
|
|
|
|
/// actually want to consider. The end offset is an iterator that
|
|
|
|
|
/// points to the end of the sub-region of the sequence that we
|
|
|
|
|
/// actually want to consider.
|
|
|
|
|
///
|
|
|
|
|
/// This uses the LCS algorithm of the paper at section 4b.
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm EqualityFunctor this must be a class that declares a public
|
|
|
|
|
/// call operator member returning a boolean and taking two arguments
|
|
|
|
|
/// that must be of the same type as the one pointed to by the @ref
|
|
|
|
|
/// RandomAccessOutputIterator template parameter. This functor is
|
|
|
|
|
/// used to compare the elements referred to by the iterators pased in
|
|
|
|
|
/// argument to this function.
|
|
|
|
|
///
|
2013-10-02 21:41:26 +00:00
|
|
|
|
/// @param a_base the iterator to the base of the first sequence.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_start an iterator to the beginning of the sub-region
|
|
|
|
|
/// of the first sequence to actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator to the end of the sub-region of the first
|
|
|
|
|
/// sequence to consider.
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @param b_base an iterator to the base of the second sequence to
|
|
|
|
|
/// consider.
|
2013-10-02 21:41:26 +00:00
|
|
|
|
///
|
|
|
|
|
/// @param b_start an iterator to the beginning of the sub-region
|
|
|
|
|
/// of the second sequence to actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator to the end of the sub-region of the
|
|
|
|
|
/// second sequence to actually consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param ses the resulting shortest editing script.
|
|
|
|
|
///
|
|
|
|
|
/// @return true upon successful completion, false otherwise.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator,
|
|
|
|
|
typename EqualityFunctor>
|
2013-10-02 21:41:26 +00:00
|
|
|
|
void
|
|
|
|
|
compute_diff(RandomAccessOutputIterator a_base,
|
|
|
|
|
RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_base,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
edit_script& ses)
|
|
|
|
|
{
|
|
|
|
|
vector<point> lcs;
|
|
|
|
|
|
2013-12-23 12:40:14 +00:00
|
|
|
|
compute_diff<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(a_base, a_begin, a_end,
|
|
|
|
|
b_base, b_begin, b_end,
|
|
|
|
|
lcs, ses);
|
2013-10-02 21:41:26 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-10-05 21:02:00 +00:00
|
|
|
|
/// Compute the longest common subsequence of two (sub-regions of)
|
|
|
|
|
/// sequences as well as the shortest edit script from transforming
|
|
|
|
|
/// the first (sub-region of) sequence into the second (sub-region of)
|
|
|
|
|
/// sequence.
|
|
|
|
|
///
|
|
|
|
|
/// This uses the LCS algorithm of the paper at section 4b.
|
|
|
|
|
///
|
2013-12-23 12:40:14 +00:00
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm EqualityFunctor this must be a class that declares a public
|
|
|
|
|
/// call operator member returning a boolean and taking two arguments
|
|
|
|
|
/// that must be of the same type as the one pointed to by the @ref
|
|
|
|
|
/// RandomAccessOutputIterator template parameter. This functor is
|
|
|
|
|
/// used to compare the elements referred to by the iterators pased in
|
|
|
|
|
/// argument to this function.
|
|
|
|
|
///
|
2013-10-05 21:02:00 +00:00
|
|
|
|
/// @param a_start an iterator to the beginning of the first sequence
|
|
|
|
|
/// to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator to the end of the first sequence to
|
|
|
|
|
/// consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_start an iterator to the beginning of the second sequence
|
|
|
|
|
/// to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator to the end of the second sequence to
|
|
|
|
|
/// consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param ses the resulting shortest editing script.
|
|
|
|
|
///
|
|
|
|
|
/// @return true upon successful completion, false otherwise.
|
2013-12-23 12:40:14 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator,
|
|
|
|
|
typename EqualityFunctor>
|
|
|
|
|
void
|
|
|
|
|
compute_diff(RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
edit_script& ses)
|
|
|
|
|
{
|
|
|
|
|
compute_diff<RandomAccessOutputIterator,
|
|
|
|
|
EqualityFunctor>(a_begin, a_begin, a_end,
|
|
|
|
|
b_begin, b_begin, b_end,
|
|
|
|
|
ses);
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-24 21:47:14 +00:00
|
|
|
|
/// Compute the longest common subsequence of two (sub-regions of)
|
|
|
|
|
/// sequences as well as the shortest edit script from transforming
|
|
|
|
|
/// the first (sub-region of) sequence into the second (sub-region of)
|
|
|
|
|
/// sequence.
|
|
|
|
|
///
|
|
|
|
|
/// This uses the LCS algorithm of the paper at section 4b.
|
|
|
|
|
///
|
|
|
|
|
/// @tparm RandomAccessOutputIterator the type of iterators passed to
|
|
|
|
|
/// this function. It must be a random access output iterator kind.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_start an iterator to the beginning of the first sequence
|
|
|
|
|
/// to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param a_end an iterator to the end of the first sequence to
|
|
|
|
|
/// consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_start an iterator to the beginning of the second sequence
|
|
|
|
|
/// to consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param b_end an iterator to the end of the second sequence to
|
|
|
|
|
/// consider.
|
|
|
|
|
///
|
|
|
|
|
/// @param ses the resulting shortest editing script.
|
|
|
|
|
///
|
|
|
|
|
/// @return true upon successful completion, false otherwise.
|
2013-10-05 21:02:00 +00:00
|
|
|
|
template<typename RandomAccessOutputIterator>
|
|
|
|
|
void
|
|
|
|
|
compute_diff(RandomAccessOutputIterator a_begin,
|
|
|
|
|
RandomAccessOutputIterator a_end,
|
|
|
|
|
RandomAccessOutputIterator b_begin,
|
|
|
|
|
RandomAccessOutputIterator b_end,
|
|
|
|
|
edit_script& ses)
|
|
|
|
|
{
|
2013-12-23 12:40:14 +00:00
|
|
|
|
compute_diff<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,
|
|
|
|
|
b_begin, b_end,
|
|
|
|
|
ses);
|
2013-10-05 21:02:00 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-28 12:39:57 +00:00
|
|
|
|
void
|
|
|
|
|
compute_lcs(const char* str1, const char* str2, int &ses_len, string& lcs);
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
compute_ses(const char* str1, const char* str2, edit_script& ses);
|
|
|
|
|
|
|
|
|
|
/// Display an edit script on standard output.
|
|
|
|
|
///
|
|
|
|
|
/// @param es the edit script to display
|
|
|
|
|
///
|
|
|
|
|
/// @param str1_base the first string the edit script is about.
|
|
|
|
|
///
|
|
|
|
|
/// @pram str2_base the second string the edit script is about.
|
|
|
|
|
template<typename RandomAccessOutputIterator>
|
|
|
|
|
void
|
|
|
|
|
display_edit_script(const edit_script& es,
|
|
|
|
|
const RandomAccessOutputIterator str1_base,
|
|
|
|
|
const RandomAccessOutputIterator str2_base,
|
|
|
|
|
ostream& out)
|
|
|
|
|
{
|
|
|
|
|
if (es.num_deletions() == 0)
|
|
|
|
|
out << "no deletion:\n";
|
2013-10-04 08:35:25 +00:00
|
|
|
|
else if (es.num_deletions() == 1)
|
|
|
|
|
{
|
|
|
|
|
out << "1 deletion:\n"
|
|
|
|
|
<< "\t happened at index: ";;
|
|
|
|
|
}
|
2013-09-28 12:39:57 +00:00
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
out << es.num_deletions() << " deletions:\n"
|
2013-10-02 21:41:26 +00:00
|
|
|
|
<< "\t happened at indexes: ";
|
2013-09-28 12:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (vector<deletion>::const_iterator i = es.deletions().begin();
|
|
|
|
|
i != es.deletions().end();
|
|
|
|
|
++i)
|
|
|
|
|
{
|
|
|
|
|
if (i != es.deletions().begin())
|
|
|
|
|
out << ", ";
|
|
|
|
|
out << i->index() << " (" << str1_base[i->index()] << ")";
|
|
|
|
|
}
|
|
|
|
|
out << "\n\n";
|
|
|
|
|
|
|
|
|
|
if (es.num_insertions() == 0)
|
|
|
|
|
out << "no insertion\n";
|
|
|
|
|
else if (es.num_insertions() == 1)
|
|
|
|
|
out << "1 insertion\n";
|
|
|
|
|
else
|
|
|
|
|
out << es.num_insertions() << " insertions:\n";
|
|
|
|
|
for (vector<insertion>::const_iterator i = es.insertions().begin();
|
|
|
|
|
i != es.insertions().end();
|
|
|
|
|
++i)
|
|
|
|
|
{
|
Re-write middle snakes management in core diff algorithms
* include/abg-diff-utils.h (point::set): New overload..
(point::{add, operator<, operator>, operator<=, operator>=}): New
methods.
(point::operator!=): Constify.
(point::operator==): Constify. Cleanup.
(point::operator=): Keep emptiness.
(class snake): New class definition
(d_path_vec::{over_bounds, offset}): New methods.
(d_path_vec::check_index_against_bound): Don't take a bound
parameter anymore. Use the new over_bound method above. Fix up
error reporting.
(d_path_vec::d_path_vec): Fix d_path_vec size allocation.
(d_path_vec::operator[]): Use the d_path_vec::at method to check
all accesses against the bounds. This is slower, but at least we
can expect to have something that is more robust. We can remove
the bound checking later when we are sure the code has been tested
enough. Also use the new offset() method.
(d_path_vec::at): Take long long.
(ends_of_furthest_d_paths_overlap): Constify input parameters.
(end_of_fr_d_path_in_k, end_of_frr_d_path_in_k_plus_delta): Take
an instance of the new snake in parameter, rather than a bare end
point that wasn't carrying enough information about the snake.
Record the snake which consists of up to four points: a begin
point, an intermediate point, a diagonal start point and an end
point. Return that snake upon successful completion.
(compute_middle_snake): Take an instance of snake, rather than the
two points that were supposed to represent a snake and with which
we were loosing information before. Revisit/simplify the logic of
this function; this literally goes forward or in reverse, gets the
resulting snake returned by the end_of_fr_d_path_in_k and
end_of_frr_d_path_in_k_plus_delta functions, detect if these snakes
overlap and just return the current snake. Much simpler. The
caller now gets a snake, which has much more information than the
previous snake approximation made of just two points. Bonus
point, this follows almost to the word, what the paper says.
(maybe_record_match_point, find_snake_start_point): Remove these
as there are not used by compute_middle_snake anymore.
(print_snake, ses_len): Update these to take/handle a snake.
(snake_end_points): New declaration.
(compute_diff): When we are getting an empty first sequence, this
means that we are inserting the second sequence *before* the
beginning of the first sequence; keep this information by setting
the insertion point index to -1, rather than zero. Update this to
get/handle snakes, rather than free points vaguely representing
snakes. Now that compute_middle_snake returns real snakes, handle
the information we are getting. Basically for edit scripts of
length equal to 1, as the snake carries all the necessary
information about the non-diagonal edge (as well as the diagonal
edges), we (can) now precisely update the current edit script (as
well as the longest common sub-sequence). For edit scripts of
length greater than 1, better at which points to divide the
problem and consequently, at which points to conquer it back --
better following The Paper to the letter.
(display_edit_script): Update this for the use of instances of
snake.
* src/abg-diff-utils.cc (ends_of_furthest_d_paths_overlap): Update
for constification of inputs.
(snake_end_points): Define new function.
(compute_middle_snake): Adapt for the taking an instance of snake.
* tests/test-diff2.cc (main): Update for using instances of snake.
* tests/test-core-diff.cc: Add new tests.
* tests/data/test-core-diff/report0.txt: Update for output
adaptation.
* tests/data/test-core-diff/report6.txt: Likewise.
* tests/data/test-core-diff/report7.txt: Likewise.
* tests/data/test-core-diff/report8.txt: New test data.
* tests/data/test-core-diff/report9.txt: Likewise.
* tests/data/test-core-diff/report10.txt: Likewise.
* tests/data/test-core-diff/report11.txt: Likewise.
* tests/data/test-core-diff/report12.txt: Likewise.
* tests/data/test-core-diff/report3.txt: Likewise.
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
2013-10-23 22:39:04 +00:00
|
|
|
|
int idx = i->insertion_point_index();
|
|
|
|
|
if (idx < 0)
|
|
|
|
|
out << "\t before index of first sequence: " << idx + 1
|
|
|
|
|
<< " (" << str1_base[idx + 1] << ")\n";
|
|
|
|
|
else
|
|
|
|
|
out << "\t after index of first sequence: " << idx
|
|
|
|
|
<< " (" << str1_base[idx] << ")\n";
|
2013-09-28 12:39:57 +00:00
|
|
|
|
|
|
|
|
|
if (!i->inserted_indexes().empty())
|
|
|
|
|
out << "\t\t inserted indexes from second sequence: ";
|
|
|
|
|
|
2013-10-05 21:02:00 +00:00
|
|
|
|
for (vector<unsigned>::const_iterator j = i->inserted_indexes().begin();
|
2013-09-28 12:39:57 +00:00
|
|
|
|
j != i->inserted_indexes().end();
|
|
|
|
|
++j)
|
|
|
|
|
{
|
|
|
|
|
if (j != i->inserted_indexes().begin())
|
|
|
|
|
out << ", ";
|
|
|
|
|
out << *j << " (" << str2_base[*j] << ")";
|
|
|
|
|
}
|
|
|
|
|
out << "\n";
|
|
|
|
|
}
|
|
|
|
|
out << "\n\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}//end namespace diff_utils
|
|
|
|
|
|
|
|
|
|
}//end namespace abigail
|
2014-03-26 21:08:45 +00:00
|
|
|
|
#endif // __ABG_DIFF_UTILS_H__
|