#!/usr/bin/env bash
#
# Compares multiple branches against a reference and shows which ones contain
# each commit, and the level of backports since the origin or its own ancestors.
#
# Copyright (c) 2016 Willy Tarreau <w@1wt.eu>
#
# The purpose is to make it easy to visualize what backports might be missing
# in a maintenance branch, and to easily spot the ones that are needed and the
# ones that are not. It solely relies on the "cherry-picked from" tags in the
# commit messages to find what commit is available where, and can even find a
# reference commit's ancestor in another branch's commit ancestors as well to
# detect that the patch is present. When done with the proper references and
# a correct ordering of the branches, it can be used to quickly apply a set of
# fixes to a branch since it dumps suggested commands at the end. When doing
# so it is a good idea to use "HEAD" as the last branch to avoid doing mistakes.
#
# Examples :
# - find what's in master and not in current branch :
#   show-backports -q -m -r master HEAD
# - find what's in 1.6/master and in hapee-maint-1.5r2 but not in current branch :
#   show-backports -q -m -r 1.6/master hapee-maint-1.5r2 HEAD | grep ' [a-f0-9]\{8\}[-+][0-9] '
# - check that no recent fix from master is missing in any maintenance branch :
#   show-backports -r master hapee-maint-1.5r2 aloha-7.5 hapee-maint-1.5r1 aloha-7.0
# - see what was recently merged into 1.6 and has no equivalent in local master :
#   show-backports -q -m -r 1.6/master -b "1.6/master@{1 week ago}" master
# - check what extra backports are present in hapee-r2 compared to hapee-r1 :
#   show-backports -q -m -r hapee-r2 hapee-r1


USAGE="Usage: ${0##*/} [-q] [-H] [-m] [-u] [-r reference] [-l logexpr] [-s subject] [-b base] {branch|range} [...]"
BASES=( )
BRANCHES=( )
REF=master
BASE=
QUIET=
LOGEXPR=
SUBJECT=
MISSING=
UPSTREAM=
BODYHASH=

die() {
	[ "$#" -eq 0 ] || echo "$*" >&2
	exit 1
}

err() {
	echo "$*" >&2
}

quit() {
	[ "$#" -eq 0 ] || echo "$*"
	exit 0
}

short() {
	# git rev-parse --short $1
	echo "${1::8}"
}

dump_commit_matrix() {
	title=":$REF:"
	for branch in "${BRANCHES[@]}"; do
		#echo -n " $branch"
		title="$title :${branch}:"
	done
	title="$title |"

	count=0
	# now look up commits
	while read ref subject; do
		if [ -n "$MISSING" -a "${subject:0:9}" = "[RELEASE]" ]; then
			continue
		fi

		upstream="none"
		missing=0
		refbhash=""
		line=""
		for branch in "${BRANCHES[@]}"; do
			set -- $(grep -m 1 $ref "$WORK/${branch//\//_}")
			newhash=$1 ; shift
			bhash=""
			# count the number of cherry-picks after this one. Since we shift,
			# the result is in "$#"
			while [ -n "$1" -a "$1" != "$ref" ]; do
				shift
			done
			if [ -n "$newhash" ]; then
				line="${line} $(short $newhash)-$#"
			else
				# before giving up we can check if our current commit was
				# itself cherry-picked and check this again. In order not
				# to have to do it all the time, we can cache the result
				# for the current line. If a match is found we report it
				# with the '+' delimiter instead of '-'.
				if [ "$upstream" = "none" ]; then
					upstream=( $(git log -1 --pretty --format=%B "$ref" | \
						sed -n 's/^commit \([^)]*\) upstream\.$/\1/p;s/^(cherry picked from commit \([^)]*\))/\1/p') )
				fi
				newhash=""
				for h in ${upstream[@]}; do
					set -- $(grep -m 1 $h "$WORK/${branch//\//_}")
					newhash=$1 ; shift
					while [ -n "$1" -a "$1" != "$h" ]; do
						shift
					done
					if [ -n "$newhash" ]; then
						line="${line} $(short $newhash)+$#"
						break
					fi
				done
				if [ -z "$newhash" -a -n "$BODYHASH" ]; then
					if [ -z "$refbhash" ]; then
						refbhash=$(git log -1 --pretty="%an|%ae|%at|%B" "$ref" | sed -n '/^\(Signed-off-by\|(cherry picked\)/q;p' | md5sum)
					fi


					set -- $(grep -m 1 "H$refbhash\$" "$WORK/${branch//\//_}")
					newhash=$1 ; shift
					if [ -n "$newhash" ]; then
						line="${line} $(short $newhash)+?"
						break
					fi
				fi
				if [ -z "$newhash" ]; then
					line="${line} -"
					missing=1
				fi
			fi
		done
		line="${line} |"
		if [ -z "$MISSING" -o $missing -gt 0 ]; then
			[ $((count++)) -gt 0 ] || echo "$title"
			[ "$QUIET" != "" -o $count -lt 20 ] || count=0
			if [ -z "$UPSTREAM" -o "$upstream" = "none" -o -z "$upstream" ]; then
				echo "$(short $ref) $line"
			else
				echo "$(short $upstream) $line"
			fi
		fi
	done < "$WORK/${REF//\//_}"
}

while [ -n "$1" -a -z "${1##-*}" ]; do
	case "$1" in
		-b)        BASE="$2"      ; shift 2 ;;
		-r)        REF="$2"       ; shift 2 ;;
		-l)        LOGEXPR="$2"   ; shift 2 ;;
		-s)        SUBJECT="$2"   ; shift 2 ;;
		-q)        QUIET=1        ; shift   ;;
		-m)        MISSING=1      ; shift   ;;
		-u)        UPSTREAM=1     ; shift   ;;
		-H)        BODYHASH=1     ; shift   ;;
		-h|--help) quit "$USAGE" ;;
		*)         die  "$USAGE" ;;
	esac
done

# branches may also appear as id1..id2 to limit the history instead of looking
# back to the common base. The field is left empty if not set.
BRANCHES=( )
BASES=( )
while [ $# -gt 0 ]; do
	branch="${1##*..}"
	if [ "$branch" == "$1" ]; then
		base=""
	else
		base="${1%%..*}"
	fi
	BASES[${#BRANCHES[@]}]="$base"
	BRANCHES[${#BRANCHES[@]}]="$branch"
	shift
done

if [ ${#BRANCHES[@]} = 0 ]; then
	die "$USAGE"
fi

for branch in "$REF" "${BRANCHES[@]}"; do
	if ! git rev-parse --verify -q "$branch" >/dev/null; then
		die "Failed to check git branch $branch."
	fi
done

if [ -z "$BASE" ]; then
	err "Warning! No base specified, looking for common ancestor."
	BASE=$(git merge-base --all "$REF" "${BRANCHES[@]}")
	if [ -z "$BASE" ]; then
		die "Couldn't find a common ancestor between these branches"
	fi
fi

# we want to go to the git root dir
DIR="$PWD"
cd $(git rev-parse --show-toplevel)

mkdir -p .git/.show-backports #|| die "Can't create .git/.show-backports"
WORK=.git/.show-backports

rm -f "$WORK/${REF//\//_}"
git log --reverse ${LOGEXPR:+--grep $LOGEXPR} --pretty="%H %s" "$BASE".."$REF" | grep "${SUBJECT}" > "$WORK/${REF//\//_}"

# for each branch, enumerate all commits and their ancestry

branch_num=0;
while [ $branch_num -lt "${#BRANCHES[@]}" ]; do
	branch="${BRANCHES[$branch_num]}"
	base="${BASES[$branch_num]}"
	base="${base:-$BASE}"
	rm -f "$WORK/${branch//\//_}"
	git log --reverse --pretty="%H %s" "$base".."$branch" | grep "${SUBJECT}" | while read h subject; do
		echo -n "$h" $(git log -1 --pretty --format=%B "$h" | \
			sed -n 's/^commit \([^)]*\) upstream\.$/\1/p;s/^(cherry picked from commit \([^)]*\))/\1/p')
		if [ -n "$BODYHASH" ]; then
			echo " H$(git log -1 --pretty="%an|%ae|%at|%B" "$h" | sed -n '/^\(Signed-off-by\|(cherry picked\)/q;p' | md5sum)"
		else
			echo
		fi
	done > "$WORK/${branch//\//_}"
	(( branch_num++ ))
done

count=0
dump_commit_matrix | column -t | \
(
	left_commits=( )
	right_commits=( )
	while read line; do
		# append the subject at the end of the line
		set -- $line
		echo -n "$line "
		if [ "${line::1}" = ":" ]; then
			echo "---- Subject ----"
		else
			# doing it this way prevents git from abusing the terminal
			echo "$(git log -1 --pretty="%s" "$1")"
			left_commits[${#left_commits[@]}]="$1"
			comm=""
			while [ -n "$1" -a "$1" != "-" -a "$1" != "|" ]; do
				comm="${1%-*}"
				shift
			done
			right_commits[${#right_commits[@]}]="$comm"
		fi
	done
	if [ -n "$MISSING" -a ${#left_commits[@]} -eq 0 ]; then
		echo "No missing commit to apply."
	elif [ -n "$MISSING" ]; then
		echo
		echo
		echo "In order to show and/or apply all leftmost commits to current branch :"
		echo "   git show --pretty=format:'%C(yellow)commit  %H%C(normal)%nAuthor: %an <%ae>%nDate:   %aD%n%n%C(green)%C(bold)git cherry-pick -sx %h%n%n%w(72,4,4)%B%N' ${left_commits[@]}"
		echo
		echo "   git cherry-pick -sx ${left_commits[@]}"
		echo
		if [ "${left_commits[*]}" != "${right_commits[*]}" ]; then
			echo "In order to show and/or apply all rightmost commits to current branch :"
			echo "   git show --pretty=format:'%C(yellow)commit  %H%C(normal)%nAuthor: %an <%ae>%nDate:   %aD%n%n%C(green)%C(bold)git cherry-pick -sx %h%n%n%w(72,4,4)%B%N' ${right_commits[@]}"
			echo
			echo "   git cherry-pick -sx ${right_commits[@]}"
			echo
		fi
	fi
)