From c988ef4cf435ffa50dc9d10d9b0e55d5685ac7b1 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Mon, 25 Dec 2023 11:28:26 +0100 Subject: coccinelle: rework how we run the Coccinelle transformations Turns out that the original way we did things was quite broken, as it skipped a _lot_ of code. This was because we just threw everything into one pile and tried to spatch it, but this made Coccinelle sad, like when man page examples redefined some of our macros, causing typedef conflicts. For example, with a minimal reproducer that defines a cleanup macro in two source files, Coccinelle has no issues when spatch-ing each one separately: $ spatch --verbose-parsing --sp-file zz-drop-braces.cocci main.c init_defs_builtins: /usr/lib64/coccinelle/standard.h HANDLING: main.c SPECIAL NAMES: adding _cleanup_ as a attribute with arguments SPECIAL NAMES: adding _cleanup_free_ as a attribute $ spatch --verbose-parsing --sp-file zz-drop-braces.cocci logcontrol-example.c init_defs_builtins: /usr/lib64/coccinelle/standard.h HANDLING: logcontrol-example.c SPECIAL NAMES: adding _cleanup_ as a attribute with arguments But when you try to spatch both of them at once, Coccinelle starts complaining and skipping the "bad" code: $ spatch --verbose-parsing --sp-file zz-drop-braces.cocci main.c logcontrol-example.c init_defs_builtins: /usr/lib64/coccinelle/standard.h HANDLING: main.c logcontrol-example.c SPECIAL NAMES: adding _cleanup_ as a attribute with arguments SPECIAL NAMES: adding _cleanup_free_ as a attribute remapping: _cleanup_ to an ident in macro name ERROR-RECOV: found sync end of #define, line 44 parsing pass2: try again ERROR-RECOV: found sync end of #define, line 44 parse error = File "logcontrol-example.c", line 44, column 21, charpos = 1719 around = '__attribute__', whole content = #define _cleanup_(f) __attribute__((cleanup(f))) badcount: 2 bad: #include bad: BAD:!!!!! #define _cleanup_(f) __attribute__((cleanup(f))) This was, unfortunately, hidden as it is visible only with --verbose-parsing (or --parse-error-msg). Another issue was how we handled includes. The original way of throwing them into the pile of source files doesn't really work, leading up to similar issues as above. The better way is to let Coccinelle properly resolve all includes by telling it where to find our own include files (basically the same thing we already do during compilation). After fixing all this, Coccinelle now has a chance to process much more of our code (there are still some issues in more complex macros, but that requires further investigation). However, there's a huge downside from all of this - doing a _proper_ code analysis is surprisingly time and resource heavy; meaning that processing just one Coccinelle rule now takes 15 - 30 minutes. To make this slightly less painful, Coccinelle supports caching the generated ASTs, which actually helps a lot - it gets the runtime of one rule from 15 - 30 minutes down to ~1 minute. It, of course, has its own downside - the cache is _really_ big (ATTOW the cache takes ~15 GiB). However, even with the aggressive AST caching you're still looking at ~1 hour for one full Coccinelle run, which is a bit annoying, but I guess that's the price of doing things _properly_ (but I'll definitely look into ways of further optimizing this). --- coccinelle/run-coccinelle.sh | 51 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) (limited to 'coccinelle') diff --git a/coccinelle/run-coccinelle.sh b/coccinelle/run-coccinelle.sh index cd951790b9..bb72a493f0 100755 --- a/coccinelle/run-coccinelle.sh +++ b/coccinelle/run-coccinelle.sh @@ -2,6 +2,14 @@ # SPDX-License-Identifier: LGPL-2.1-or-later set -e +# FIXME: +# - Coccinelle doesn't like our TEST() macros, which then causes name conflicts; i.e. Cocci can't process +# that TEST(xsetxattr) yields test_xsetxattr() and uses just xsetxattr() in this case, which then conflicts +# with the tested xsetxattr() function, leading up to the whole test case getting skipped due to +# conflicting typedefs +# - something keeps pulling in src/boot/efi/*.h stuff, even though it's excluded +# - Coccinelle has issues with some of our more complex macros + # Exclude following paths from the Coccinelle transformations EXCLUDED_PATHS=( "src/boot/efi/*" @@ -10,13 +18,17 @@ EXCLUDED_PATHS=( # Symlinked to test-bus-vtable-cc.cc, which causes issues with the IN_SET macro "src/libsystemd/sd-bus/test-bus-vtable.c" "src/libsystemd/sd-journal/lookup3.c" + # Ignore man examples, as they redefine some macros we use internally, which makes Coccinelle complain + # and ignore code that tries to use the redefined stuff + "man/*" ) TOP_DIR="$(git rev-parse --show-toplevel)" +CACHE_DIR="$(dirname "$0")/.coccinelle-cache" ARGS=() # Create an array from files tracked by git... -mapfile -t FILES < <(git ls-files ':/*.[ch]') +mapfile -t FILES < <(git ls-files ':/*.c') # ...and filter everything that matches patterns from EXCLUDED_PATHS for excl in "${EXCLUDED_PATHS[@]}"; do # shellcheck disable=SC2206 @@ -37,12 +49,43 @@ fi [[ ${#@} -ne 0 ]] && SCRIPTS=("$@") || SCRIPTS=("$TOP_DIR"/coccinelle/*.cocci) +mkdir -p "$CACHE_DIR" +echo "--x-- Using Coccinelle cache directory: $CACHE_DIR" +echo "--x--" +echo "--x-- Note: running spatch for the first time without populated cache takes" +echo "--x-- a _long_ time (15-30 minutes). Also, the cache is quite large" +echo "--x-- (~15 GiB), so make sure you have enough free space." +echo + for script in "${SCRIPTS[@]}"; do echo "--x-- Processing $script --x--" TMPFILE="$(mktemp)" echo "+ spatch --sp-file $script ${ARGS[*]} ..." - parallel --halt now,fail=1 --keep-order --noswap --max-args=20 \ - spatch --macro-file="$TOP_DIR/coccinelle/macros.h" --smpl-spacing --sp-file "$script" "${ARGS[@]}" ::: "${FILES[@]}" \ - 2>"$TMPFILE" || cat "$TMPFILE" + # A couple of notes: + # + # 1) Limit this to 10 files at once, as processing the ASTs is _very_ memory hungry - e.g. with 20 files + # at once one spatch process can take around 2.5 GiB of RAM, which can easily eat up all available RAM + # when paired together with parallel + # + # 2) Make sure spatch can find our includes via -I , similarly as we do when compiling stuff + # + # 3) Make sure to include includes from includes (--recursive-includes), but use them only to get type + # definitions (--include-headers-for-types) - otherwise we'd start formating them as well, which might be + # unwanted, especially for includes we fetch verbatim from third-parties + # + # 4) Use cache, since generating the full AST is _very_ expensive, i.e. the uncached run takes 15 - 30 + # minutes (for one rule(!)), vs 30 - 90 seconds when the cache is populated. One major downside of the + # cache is that it's quite big - ATTOW the cache takes around 15 GiB, but the performance boost is + # definitely worth it + parallel --halt now,fail=1 --keep-order --noswap --max-args=10 \ + spatch --cache-prefix "$CACHE_DIR" \ + -I src \ + --recursive-includes \ + --include-headers-for-types \ + --smpl-spacing \ + --sp-file "$script" \ + "${ARGS[@]}" ::: "${FILES[@]}" \ + 2>"$TMPFILE" || cat "$TMPFILE" + rm -f "$TMPFILE" echo -e "--x-- Processed $script --x--\n" done -- cgit v1.2.3 From 6688db4194a83954f03ab952891aca5c483e63a3 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Mon, 25 Dec 2023 11:43:02 +0100 Subject: coccinelle: fix the log-json rule As it generated very questionable results. --- coccinelle/log-json.cocci | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'coccinelle') diff --git a/coccinelle/log-json.cocci b/coccinelle/log-json.cocci index d184e56584..c941706c64 100644 --- a/coccinelle/log-json.cocci +++ b/coccinelle/log-json.cocci @@ -3,7 +3,6 @@ expression e, v, flags; expression list args; @@ -+ return - json_log(v, flags, 0, args); -+ json_log(v, flags, SYNTHETIC_ERRNO(e), args); - return -e; ++ return json_log(v, flags, SYNTHETIC_ERRNO(e), args); -- cgit v1.2.3 From fcd2db31c03e717d9f3b66f52af06ce60de46add Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Mon, 25 Dec 2023 11:43:27 +0100 Subject: coccinelle: properly drop braces around single-statement if()s --- coccinelle/zz-drop-braces.cocci | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) (limited to 'coccinelle') diff --git a/coccinelle/zz-drop-braces.cocci b/coccinelle/zz-drop-braces.cocci index 8c3be01c1f..7a3382c9a7 100644 --- a/coccinelle/zz-drop-braces.cocci +++ b/coccinelle/zz-drop-braces.cocci @@ -1,28 +1,13 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ @@ position p : script:python() { p[0].file != "src/journal/lookup3.c" }; -identifier id; -expression e; +expression e,e1; @@ -if (...) -- { +- if (e) { ++ if (e) ( - id@p(...); + e1@p; | - e@p; -) -- } - -@@ -position p : script:python() { p[0].file != "src/journal/lookup3.c" }; -identifier id; -expression e; -@@ -if (...) -- { -( - return id@p(...); -| - return e@p; + return e1@p; ) - } -- cgit v1.2.3