diff options
author | Andrei Pavel <andrei@isc.org> | 2022-01-21 13:21:23 +0100 |
---|---|---|
committer | Andrei Pavel <andrei@isc.org> | 2022-01-21 13:21:23 +0100 |
commit | e1de26409d7cb0052c09aed760384ed485c5652e (patch) | |
tree | 5bf37df5d570686baa7bc89d778dd0ed745df88a | |
parent | [#2222] Removed the skipped when equal code (diff) | |
download | kea-e1de26409d7cb0052c09aed760384ed485c5652e.tar.xz kea-e1de26409d7cb0052c09aed760384ed485c5652e.zip |
[#2205] are-scripts-in-sync.py
This script is used to maintain upgrade scripts in Kea and to ensure that the
incremental and the full schema scripts are in sync.
-rw-r--r-- | .gitlab-ci.yml | 6 | ||||
-rwxr-xr-x | src/share/database/scripts/utils/are-scripts-in-sync.py | 233 |
2 files changed, 239 insertions, 0 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 816076191f..5bb6e4120e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -129,6 +129,12 @@ danger: - gem install danger-commit_lint - danger --fail-on-errors=true --new-comment +dhcpdb_create-upgrade-consistency: + stage: test + image: "${CI_REGISTRY_IMAGE}:latest" + script: + - ./src/share/database/scripts/utils/are-scripts-in-sync.py + duplicate-includes: stage: test image: "${CI_REGISTRY_IMAGE}:latest" diff --git a/src/share/database/scripts/utils/are-scripts-in-sync.py b/src/share/database/scripts/utils/are-scripts-in-sync.py new file mode 100755 index 0000000000..848e6c9eef --- /dev/null +++ b/src/share/database/scripts/utils/are-scripts-in-sync.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 + +# This script is used to maintain upgrade scripts in Kea and to ensure that the +# incremental and the full schema scripts are in sync. See usage() function +# below for help in how to use it. + +import difflib +import glob +import os +import re +import subprocess +import sys + + +def usage(): + print('''\ +Usage: are-scripts-in-sync.py [-h|--help] [$file|$gitref [$gitref2]] + +$ are-scripts-in-sync.py + No parameters are given. + All the SQL scripts changed in the current branch are checked. + +$ are-scripts-in-sync.py $file + One file is given. It must be a dhcpdb_create script or an upgrade script. + If it is a dhcpdb_create script, it is compared with the latest upgrade script. + If it is an upgrade script, it is compared to the dhcpdb_create script. + +$ are-scripts-in-sync.py $gitref + Two git refs are given. They can be branches, commits, tags, etc. + All the SQL scripts changed between HEAD and $gitref are checked. + +$ are-scripts-in-sync.py $gitref1 $gitref2 + Two git refs are given. They can be branches, commits, tags, etc. + All the SQL scripts changed between $gitref1 and $gitref2 are checked.''') + + +def filter_the_noise(text, is_upgrade_script): + ''' Removes portions of the script which are always different. + + :param text: the script content to be analyzed + :type text: str + + :param is_upgrade_script: determines if it is a an upgrade script or a + dhcpdb_create script. Different delimiters are used for each. + :type is_upgrade_script: bool + + :return: the trimmed down portion of text + :type: str + ''' + append=False + result=[] + for i in text: + if re.search('<<EOF$' if is_upgrade_script else 'CREATE TABLE.*lease4', i): + append=True + elif re.search('^EOF$' if is_upgrade_script else 'Notes:', i): + append=False + elif re.search('^START TRANSACTION;$', i) or \ + re.search('^COMMIT;$', i) or \ + re.search('^$', i): + # Exclude start transactions, commits, and empty lines. + pass + elif append: + result.append(i) + return result + +def diff(dhcpdb_create_script, upgrade_script): + ''' Compares the common parts of two files. Prints the difference. + + :param dhcpdb_create_script: the path to the dhcpdb_create script + :type dhcpdb_create_script: str + :param upgrade_script: the path to the upgrade script + :type upgrade_script: str + + :return: True if there is a difference, False otherwise + :type: bool + ''' + with open(dhcpdb_create_script) as create_file: + create_text = create_file.readlines() + + with open(upgrade_script) as upgrade_file: + upgrade_text = upgrade_file.readlines() + + # PostgreSQL upgrade scripts need the $ delimiters escaped as opposed to + # create scripts. So escape them in the create script for the duration of + # this diff so that they don't come up in the diff (or so that they do + # come up if they are not correctly escaped in the upgrade script). + if dhcpdb_create_script.endswith('.pgsql'): + create_text = [i.replace('$', '\$') for i in create_text] + + # Removes portions of the script which are always different: the beginning + # and the end. + create_text = filter_the_noise(create_text, False) + upgrade_text = filter_the_noise(upgrade_text, True) + + # Use difflib to create the diff. + diff = ''.join(difflib.context_diff(create_text, upgrade_text, n=0)).splitlines() + + # Determine groups of a heuristical number of consecutive lines that differ. + # These are considered to be outside the upgrade script's scope and are + # subject to exclusion from the diff. + consecutive_lines_to_exclude = 16 + first_exclamation_mark = None + to_be_removed = [] + for i in range(len(diff)): + if diff[i].startswith('!'): + if first_exclamation_mark is None: + first_exclamation_mark = i + else: + if first_exclamation_mark is not None and first_exclamation_mark + consecutive_lines_to_exclude < i: + to_be_removed.append([first_exclamation_mark, i - 1]) + first_exclamation_mark = None + + # Exclude the groups determined above. + sanitized_diff = [] + for i in range(len(diff)): + if len(to_be_removed) > 0 and to_be_removed[0][0] <= i and i <= to_be_removed[0][1]: + pass + elif len(to_be_removed) > 0 and to_be_removed[0][1] < i: + while len(to_be_removed) > 0 and to_be_removed[0][1] < i: + to_be_removed.pop(0) + else: + sanitized_diff.append(diff[i]) + + # Print only the lines that start with an exclamation mark. This is how + # difflib's context diff is provided. + output = '' + for i in sanitized_diff: + if i.startswith('!'): + if len(output) == 0: + print (f'=== {dhcpdb_create_script} vs {upgrade_script} ===') + output += i + '\n' + + # Only print if we have something to print to avoid a newline. + if len(output) > 0: + print(output) + + return len(output) > 0 + +def execute(command): + ''' Executes a shell command and returns its output. + + :param command: the command to be executed + :type command: str + + :return: the standard output from the command + :type: str + ''' + if 'DEBUG' in os.environ: + print(f'> {command}') + p = subprocess.Popen(command, encoding='utf-8', shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, error = p.communicate() + if error: + print('ERROR:', error, file=sys.stderr) + exit(1) + return output.strip() + +def find_files_in_same_directory_starting_with(file, startswith): + ''' Returns the files that start with given criteria. + + :param file: the path to the file that lives in the same directory as the + desired file + :type file: str + + :param startswith: the substring that the file name should start with + :type startswith: str + ''' + dirname = os.path.dirname(file) + files = [] + for i in glob.glob(dirname + '/*'): + matches = re.search('^' + dirname + '/' + startswith + '.*$', i) + if matches is not None: + files.append(matches.group()) + return sorted(files) + +def get_files_changed_in_gitref_range(gitref_range): + # Change to toplevel for easier management of file names. + toplevel = execute('git rev-parse --show-toplevel') + os.chdir(toplevel) + + # Get the list of changed files since {gitref}. + return execute(f'git diff --name-only "{gitref_range}" ./src/share/database/scripts').splitlines() + + +def main(parameters): + # Print help if requested. + if '-h'in parameters or '--help' in parameters: + usage() + exit(0) + + # Parse parameters. + p1 = None + p2 = None + for i in parameters: + if p1 is None: + p1 = i + elif p2 is None: + p2 = i + else: + print('ERROR: Too many arguments.', file=sys.stderr) + usage() + exit(1) + + # Determine the files that we need to check. + if p1 is None and p2 is None: + files = get_files_changed_in_gitref_range(f'$(git merge-base origin/master HEAD)') + elif p1 is not None: + if os.path.isfile(p1): + files = [p1] + else: + files = get_files_changed_in_gitref_range(f'{p1}') + else: + files = get_files_changed_in_gitref_range(f'{p1}..{p2}') + + diff_found = False + for i in files: + basename = os.path.basename(i) + if basename.startswith('dhcpdb_create'): + # Get the latest upgrade script. + latest_upgrade_script = find_files_in_same_directory_starting_with(i, 'upgrade_')[-1] + # Do the diff. + diff_found |= diff(i, latest_upgrade_script) + elif basename.startswith('upgrade_'): + # Get the dhcpdb_create script. + dhcpdb_create = find_files_in_same_directory_starting_with(i, 'dhcpdb_create')[-1] + # Do the diff. + diff_found |= diff(dhcpdb_create, i) + + # For any diff, return 1 so that CI complains. # For no diff, return 0 to appease CI. + return int(diff_found) + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) |