summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrei Pavel <andrei@isc.org>2022-01-21 13:21:23 +0100
committerAndrei Pavel <andrei@isc.org>2022-01-21 13:21:23 +0100
commite1de26409d7cb0052c09aed760384ed485c5652e (patch)
tree5bf37df5d570686baa7bc89d778dd0ed745df88a
parent[#2222] Removed the skipped when equal code (diff)
downloadkea-e1de26409d7cb0052c09aed760384ed485c5652e.tar.xz
kea-e1de26409d7cb0052c09aed760384ed485c5652e.zip
[#2205] are-scripts-in-sync.py
This script is used to maintain upgrade scripts in Kea and to ensure that the incremental and the full schema scripts are in sync.
-rw-r--r--.gitlab-ci.yml6
-rwxr-xr-xsrc/share/database/scripts/utils/are-scripts-in-sync.py233
2 files changed, 239 insertions, 0 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 816076191f..5bb6e4120e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -129,6 +129,12 @@ danger:
- gem install danger-commit_lint
- danger --fail-on-errors=true --new-comment
+dhcpdb_create-upgrade-consistency:
+ stage: test
+ image: "${CI_REGISTRY_IMAGE}:latest"
+ script:
+ - ./src/share/database/scripts/utils/are-scripts-in-sync.py
+
duplicate-includes:
stage: test
image: "${CI_REGISTRY_IMAGE}:latest"
diff --git a/src/share/database/scripts/utils/are-scripts-in-sync.py b/src/share/database/scripts/utils/are-scripts-in-sync.py
new file mode 100755
index 0000000000..848e6c9eef
--- /dev/null
+++ b/src/share/database/scripts/utils/are-scripts-in-sync.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+
+# This script is used to maintain upgrade scripts in Kea and to ensure that the
+# incremental and the full schema scripts are in sync. See usage() function
+# below for help in how to use it.
+
+import difflib
+import glob
+import os
+import re
+import subprocess
+import sys
+
+
+def usage():
+ print('''\
+Usage: are-scripts-in-sync.py [-h|--help] [$file|$gitref [$gitref2]]
+
+$ are-scripts-in-sync.py
+ No parameters are given.
+ All the SQL scripts changed in the current branch are checked.
+
+$ are-scripts-in-sync.py $file
+ One file is given. It must be a dhcpdb_create script or an upgrade script.
+ If it is a dhcpdb_create script, it is compared with the latest upgrade script.
+ If it is an upgrade script, it is compared to the dhcpdb_create script.
+
+$ are-scripts-in-sync.py $gitref
+ Two git refs are given. They can be branches, commits, tags, etc.
+ All the SQL scripts changed between HEAD and $gitref are checked.
+
+$ are-scripts-in-sync.py $gitref1 $gitref2
+ Two git refs are given. They can be branches, commits, tags, etc.
+ All the SQL scripts changed between $gitref1 and $gitref2 are checked.''')
+
+
+def filter_the_noise(text, is_upgrade_script):
+ ''' Removes portions of the script which are always different.
+
+ :param text: the script content to be analyzed
+ :type text: str
+
+ :param is_upgrade_script: determines if it is a an upgrade script or a
+ dhcpdb_create script. Different delimiters are used for each.
+ :type is_upgrade_script: bool
+
+ :return: the trimmed down portion of text
+ :type: str
+ '''
+ append=False
+ result=[]
+ for i in text:
+ if re.search('<<EOF$' if is_upgrade_script else 'CREATE TABLE.*lease4', i):
+ append=True
+ elif re.search('^EOF$' if is_upgrade_script else 'Notes:', i):
+ append=False
+ elif re.search('^START TRANSACTION;$', i) or \
+ re.search('^COMMIT;$', i) or \
+ re.search('^$', i):
+ # Exclude start transactions, commits, and empty lines.
+ pass
+ elif append:
+ result.append(i)
+ return result
+
+def diff(dhcpdb_create_script, upgrade_script):
+ ''' Compares the common parts of two files. Prints the difference.
+
+ :param dhcpdb_create_script: the path to the dhcpdb_create script
+ :type dhcpdb_create_script: str
+ :param upgrade_script: the path to the upgrade script
+ :type upgrade_script: str
+
+ :return: True if there is a difference, False otherwise
+ :type: bool
+ '''
+ with open(dhcpdb_create_script) as create_file:
+ create_text = create_file.readlines()
+
+ with open(upgrade_script) as upgrade_file:
+ upgrade_text = upgrade_file.readlines()
+
+ # PostgreSQL upgrade scripts need the $ delimiters escaped as opposed to
+ # create scripts. So escape them in the create script for the duration of
+ # this diff so that they don't come up in the diff (or so that they do
+ # come up if they are not correctly escaped in the upgrade script).
+ if dhcpdb_create_script.endswith('.pgsql'):
+ create_text = [i.replace('$', '\$') for i in create_text]
+
+ # Removes portions of the script which are always different: the beginning
+ # and the end.
+ create_text = filter_the_noise(create_text, False)
+ upgrade_text = filter_the_noise(upgrade_text, True)
+
+ # Use difflib to create the diff.
+ diff = ''.join(difflib.context_diff(create_text, upgrade_text, n=0)).splitlines()
+
+ # Determine groups of a heuristical number of consecutive lines that differ.
+ # These are considered to be outside the upgrade script's scope and are
+ # subject to exclusion from the diff.
+ consecutive_lines_to_exclude = 16
+ first_exclamation_mark = None
+ to_be_removed = []
+ for i in range(len(diff)):
+ if diff[i].startswith('!'):
+ if first_exclamation_mark is None:
+ first_exclamation_mark = i
+ else:
+ if first_exclamation_mark is not None and first_exclamation_mark + consecutive_lines_to_exclude < i:
+ to_be_removed.append([first_exclamation_mark, i - 1])
+ first_exclamation_mark = None
+
+ # Exclude the groups determined above.
+ sanitized_diff = []
+ for i in range(len(diff)):
+ if len(to_be_removed) > 0 and to_be_removed[0][0] <= i and i <= to_be_removed[0][1]:
+ pass
+ elif len(to_be_removed) > 0 and to_be_removed[0][1] < i:
+ while len(to_be_removed) > 0 and to_be_removed[0][1] < i:
+ to_be_removed.pop(0)
+ else:
+ sanitized_diff.append(diff[i])
+
+ # Print only the lines that start with an exclamation mark. This is how
+ # difflib's context diff is provided.
+ output = ''
+ for i in sanitized_diff:
+ if i.startswith('!'):
+ if len(output) == 0:
+ print (f'=== {dhcpdb_create_script} vs {upgrade_script} ===')
+ output += i + '\n'
+
+ # Only print if we have something to print to avoid a newline.
+ if len(output) > 0:
+ print(output)
+
+ return len(output) > 0
+
+def execute(command):
+ ''' Executes a shell command and returns its output.
+
+ :param command: the command to be executed
+ :type command: str
+
+ :return: the standard output from the command
+ :type: str
+ '''
+ if 'DEBUG' in os.environ:
+ print(f'> {command}')
+ p = subprocess.Popen(command, encoding='utf-8', shell=True,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ output, error = p.communicate()
+ if error:
+ print('ERROR:', error, file=sys.stderr)
+ exit(1)
+ return output.strip()
+
+def find_files_in_same_directory_starting_with(file, startswith):
+ ''' Returns the files that start with given criteria.
+
+ :param file: the path to the file that lives in the same directory as the
+ desired file
+ :type file: str
+
+ :param startswith: the substring that the file name should start with
+ :type startswith: str
+ '''
+ dirname = os.path.dirname(file)
+ files = []
+ for i in glob.glob(dirname + '/*'):
+ matches = re.search('^' + dirname + '/' + startswith + '.*$', i)
+ if matches is not None:
+ files.append(matches.group())
+ return sorted(files)
+
+def get_files_changed_in_gitref_range(gitref_range):
+ # Change to toplevel for easier management of file names.
+ toplevel = execute('git rev-parse --show-toplevel')
+ os.chdir(toplevel)
+
+ # Get the list of changed files since {gitref}.
+ return execute(f'git diff --name-only "{gitref_range}" ./src/share/database/scripts').splitlines()
+
+
+def main(parameters):
+ # Print help if requested.
+ if '-h'in parameters or '--help' in parameters:
+ usage()
+ exit(0)
+
+ # Parse parameters.
+ p1 = None
+ p2 = None
+ for i in parameters:
+ if p1 is None:
+ p1 = i
+ elif p2 is None:
+ p2 = i
+ else:
+ print('ERROR: Too many arguments.', file=sys.stderr)
+ usage()
+ exit(1)
+
+ # Determine the files that we need to check.
+ if p1 is None and p2 is None:
+ files = get_files_changed_in_gitref_range(f'$(git merge-base origin/master HEAD)')
+ elif p1 is not None:
+ if os.path.isfile(p1):
+ files = [p1]
+ else:
+ files = get_files_changed_in_gitref_range(f'{p1}')
+ else:
+ files = get_files_changed_in_gitref_range(f'{p1}..{p2}')
+
+ diff_found = False
+ for i in files:
+ basename = os.path.basename(i)
+ if basename.startswith('dhcpdb_create'):
+ # Get the latest upgrade script.
+ latest_upgrade_script = find_files_in_same_directory_starting_with(i, 'upgrade_')[-1]
+ # Do the diff.
+ diff_found |= diff(i, latest_upgrade_script)
+ elif basename.startswith('upgrade_'):
+ # Get the dhcpdb_create script.
+ dhcpdb_create = find_files_in_same_directory_starting_with(i, 'dhcpdb_create')[-1]
+ # Do the diff.
+ diff_found |= diff(dhcpdb_create, i)
+
+ # For any diff, return 1 so that CI complains. # For no diff, return 0 to appease CI.
+ return int(diff_found)
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1:]))