Discussion:
[PATCH] grep: fix match highlighting for combined patterns with context lines
Zoltan Klinger
2014-10-21 05:56:03 UTC
Permalink
When git grep is run with combined patterns such as '-e p1 --and -e p2'
and surrounding context lines are requested, the output contains
incorrectly highlighted matches.

Consider the following output (highlighted matches are surrounded by '*'
characters):
$ cat testfile
foo a
foo b
foo bar
baz bar foo
bar x
bar y
$ git grep -n -C2 -e foo --and -e bar testfile
testfile-1-*foo* a
testfile-2-*foo* b
testfile:3:*foo* *bar*
testfile:4:baz *bar* *foo*
testfile-5-*bar* x
testfile-6-*bar* y

Lines 1, 2, 5 and 6 do not match the combined patterns, they only
contain incorrectly highlighted 'false positives'.

Modify the show_line() function in grep.c to highlight matches only on
lines that match the combined pattern. Do not highlight matches on lines
that provide only context or contain only the function name of the match.

The output of the same command after the change:
$ git grep -n -C2 -e foo --and -e bar testfile
testfile-1-foo a
testfile-2-foo b
testfile:3:*foo* *bar*
testfile:4:baz *bar* *foo*
testfile-5-bar x
testfile-6-bar y

Signed-off-by: Zoltan Klinger <***@gmail.com>
---
grep.c | 7 +++--
t/t7810-grep.sh | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/grep.c b/grep.c
index 4dc31ea..3c4d68e 100644
--- a/grep.c
+++ b/grep.c
@@ -1121,9 +1121,12 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
enum grep_context ctx = GREP_CONTEXT_BODY;
int ch = *eol;
int eflags = 0;
+ char *match_color = NULL;

- if (sign == ':')
+ if (sign == ':') {
line_color = opt->color_selected;
+ match_color = opt->color_match;
+ }
else if (sign == '-')
line_color = opt->color_context;
else if (sign == '=')
@@ -1136,7 +1139,7 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
output_color(opt, bol, match.rm_so, line_color);
output_color(opt, bol + match.rm_so,
match.rm_eo - match.rm_so,
- opt->color_match);
+ match_color);
bol += match.rm_eo;
rest -= match.rm_eo;
eflags = REG_NOTBOL;
diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh
index 40615de..b0d6b6f 100755
--- a/t/t7810-grep.sh
+++ b/t/t7810-grep.sh
@@ -1202,4 +1202,94 @@ test_expect_success LIBPCRE 'grep -P "^ "' '
test_cmp expected actual
'

+cat >expected <<EOF
+space-line without leading space1
+space: line <RED>with <RESET>leading space1
+space: line <RED>with <RESET>leading <RED>space2<RESET>
+space: line <RED>with <RESET>leading space3
+space:line without leading <RED>space2<RESET>
+EOF
+
+test_expect_success 'grep --color -e A -e B with context' '
+ test_config color.grep.context normal &&
+ test_config color.grep.filename normal &&
+ test_config color.grep.function normal &&
+ test_config color.grep.linenumber normal &&
+ test_config color.grep.match red &&
+ test_config color.grep.selected normal &&
+ test_config color.grep.separator normal &&
+
+ git grep --color=always -C2 -e "with " -e space2 space |
+ test_decode_color >actual &&
+ test_cmp expected actual
+'
+
+cat >expected <<EOF
+space-line without leading space1
+space- line with leading space1
+space: line <RED>with <RESET>leading <RED>space2<RESET>
+space- line with leading space3
+space-line without leading space2
+EOF
+
+test_expect_success 'grep --color -e A --and -e B with context' '
+ test_config color.grep.context normal &&
+ test_config color.grep.filename normal &&
+ test_config color.grep.function normal &&
+ test_config color.grep.linenumber normal &&
+ test_config color.grep.match red &&
+ test_config color.grep.selected normal &&
+ test_config color.grep.separator normal &&
+
+ git grep --color=always -C2 -e "with " --and -e space2 space |
+ test_decode_color >actual &&
+ test_cmp expected actual
+'
+
+cat >expected <<EOF
+space-line without leading space1
+space: line <RED>with <RESET>leading space1
+space- line with leading space2
+space: line <RED>with <RESET>leading space3
+space-line without leading space2
+EOF
+
+test_expect_success 'grep --color -e A --and --not -e B with context' '
+ test_config color.grep.context normal &&
+ test_config color.grep.filename normal &&
+ test_config color.grep.function normal &&
+ test_config color.grep.linenumber normal &&
+ test_config color.grep.match red &&
+ test_config color.grep.selected normal &&
+ test_config color.grep.separator normal &&
+
+ git grep --color=always -C2 -e "with " --and --not -e space2 space |
+ test_decode_color >actual &&
+ test_cmp expected actual
+'
+
+cat >expected <<EOF
+hello.c-#include <stdio.h>
+hello.c=int main(int argc, const char **argv)
+hello.c-{
+hello.c: pr<RED>int<RESET>f("<RED>Hello<RESET> world.\n");
+hello.c- return 0;
+hello.c- /* char ?? */
+hello.c-}
+EOF
+
+test_expect_success 'grep --color -e A --and -e B -p with context' '
+ test_config color.grep.context normal &&
+ test_config color.grep.filename normal &&
+ test_config color.grep.function normal &&
+ test_config color.grep.linenumber normal &&
+ test_config color.grep.match red &&
+ test_config color.grep.selected normal &&
+ test_config color.grep.separator normal &&
+
+ git grep --color=always -p -C3 -e int --and -e Hello --no-index hello.c |
+ test_decode_color >actual &&
+ test_cmp expected actual
+'
+
test_done
--
2.1.1
Junio C Hamano
2014-10-21 19:23:27 UTC
Permalink
Post by Zoltan Klinger
When git grep is run with combined patterns such as '-e p1 --and -e p2'
and surrounding context lines are requested, the output contains
incorrectly highlighted matches.
Consider the following output (highlighted matches are surrounded by '*'
$ cat testfile
foo a
foo b
foo bar
baz bar foo
bar x
bar y
$ git grep -n -C2 -e foo --and -e bar testfile
testfile-1-*foo* a
testfile-2-*foo* b
testfile:3:*foo* *bar*
testfile:4:baz *bar* *foo*
testfile-5-*bar* x
testfile-6-*bar* y
Lines 1, 2, 5 and 6 do not match the combined patterns, they only
contain incorrectly highlighted 'false positives'.
Modify the show_line() function in grep.c to highlight matches only on
lines that match the combined pattern. Do not highlight matches on lines
that provide only context or contain only the function name of the match.
$ git grep -n -C2 -e foo --and -e bar testfile
testfile-1-foo a
testfile-2-foo b
testfile:3:*foo* *bar*
testfile:4:baz *bar* *foo*
testfile-5-bar x
testfile-6-bar y
If your goal is to stop colouring words on context and other kinds
of lines, do you still need the "while (next_match(...))" loop for
them? Can't you make the resulting code clearer by restructuring
the inside of the whole "if (opt->color)" block further, something
along the lines of...

if (sign != ':') {
regmatch_t match; ...
enum grep_context ctx = GREP_CONTEXT_BODY;
...
while (next_match(...)) {
... the "word-by-word" loop ...
}
} else {
switch (sign) {
case '-':
line_color = opt->color_context;
break;
case ':':
line_color = opt->color_function;
break;
}
output_color(opt, bol, ..., line_color);
}

Hmm?
Junio C Hamano
2014-10-21 22:40:33 UTC
Permalink
Post by Junio C Hamano
If your goal is to stop colouring words on context and other kinds
of lines, do you still need the "while (next_match(...))" loop for
them? Can't you make the resulting code clearer by restructuring
the inside of the whole "if (opt->color)" block further, something
along the lines of...
Hmm?
It turns out that the result of such a change becomes more readable
than the original, in that it makes it clear that reinspection of
the lines are done only for matched ones and not context lines.

The diff looks unnecessarily noisy because it indents the while ()
loop that is only needed for sign == ':', though.

grep.c | 42 ++++++++++++++++++++++--------------------
1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/grep.c b/grep.c
index c668034..b363a94 100644
--- a/grep.c
+++ b/grep.c
@@ -1112,31 +1112,33 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
output_sep(opt, sign);
}
if (opt->color) {
- regmatch_t match;
- enum grep_context ctx = GREP_CONTEXT_BODY;
- int ch = *eol;
- int eflags = 0;
+ if (sign == ':') {
+ /* paint the hits on matched lines */
+ regmatch_t match;
+ enum grep_context ctx = GREP_CONTEXT_BODY;
+ int ch = *eol;
+ int eflags = 0;

- if (sign == ':')
line_color = opt->color_selected;
- else if (sign == '-')
+ *eol = '\0';
+ while (next_match(opt, bol, eol, ctx, &match, eflags)) {
+ if (match.rm_so == match.rm_eo)
+ break;
+
+ output_color(opt, bol, match.rm_so, line_color);
+ output_color(opt, bol + match.rm_so,
+ match.rm_eo - match.rm_so,
+ opt->color_match);
+ bol += match.rm_eo;
+ rest -= match.rm_eo;
+ eflags = REG_NOTBOL;
+ }
+ *eol = ch;
+ } else if (sign == '-') {
line_color = opt->color_context;
- else if (sign == '=')
+ } else if (sign == '=') {
line_color = opt->color_function;
- *eol = '\0';
- while (next_match(opt, bol, eol, ctx, &match, eflags)) {
- if (match.rm_so == match.rm_eo)
- break;
-
- output_color(opt, bol, match.rm_so, line_color);
- output_color(opt, bol + match.rm_so,
- match.rm_eo - match.rm_so,
- opt->color_match);
- bol += match.rm_eo;
- rest -= match.rm_eo;
- eflags = REG_NOTBOL;
}
- *eol = ch;
}
output_color(opt, bol, rest, line_color);
opt->output(opt, "\n", 1);
Zoltan Klinger
2014-10-22 00:45:19 UTC
Permalink
Post by Junio C Hamano
It turns out that the result of such a change becomes more readable
than the original, in that it makes it clear that reinspection of
the lines are done only for matched ones and not context lines.
Agree, it looks much clearer now. Happy if you squashed your
change (commit da736e6) in zk/grep-color-words branch.
Junio C Hamano
2014-10-22 19:14:13 UTC
Permalink
Post by Zoltan Klinger
Post by Junio C Hamano
It turns out that the result of such a change becomes more readable
than the original, in that it makes it clear that reinspection of
the lines are done only for matched ones and not context lines.
Agree, it looks much clearer now. Happy if you squashed your
change (commit da736e6) in zk/grep-color-words branch.
OK, will do. Thanks.

Loading...