#!/usr/bin/env python # Originally modified from A. Israel's script seen at # https://gist.github.com/aisrael/b2b78d9dfdd176a232b9 """To run this script first install the dependencies python3 -m venv v source v/bin/activate pip install githubpy GitPython requests Generate a github access token; this is needed as the anonymous access to Github's API will easily hit the limit even with a single invocation. For details see: https://help.github.com/articles/creating-an-access-token-for-command-line-use/ Next either set the github token as an env variable `GITHUB_ACCESS_TOKEN` or alternatively invoke the script with `--token` switch. Example: ceph-release-notes -r tags/v0.87..origin/giant \ $(git rev-parse --show-toplevel) """ from __future__ import print_function import argparse import github import os import re import sys import requests import time from git import Repo fixes_re = re.compile(r"Fixes\:? #(\d+)") reviewed_by_re = re.compile(r"Rev(.*)By", re.IGNORECASE) # labels is the list of relevant labels defined for github.com/ceph/ceph labels = {'bluestore', 'build/ops', 'cephfs', 'common', 'core', 'mgr', 'mon', 'performance', 'pybind', 'rdma', 'rgw', 'rbd', 'tests', 'tools'} merge_re = re.compile("Merge (pull request|PR) #(\d+).*") # prefixes is the list of commit description prefixes we recognize prefixes = ['bluestore', 'build/ops', 'cephfs', 'cephx', 'cli', 'cmake', 'common', 'core', 'crush', 'doc', 'fs', 'librados', 'librbd', 'log', 'mds', 'mgr', 'mon', 'msg', 'objecter', 'osd', 'pybind', 'rbd', 'rbd-mirror', 'rbd-nbd', 'rgw', 'tests', 'tools'] signed_off_re = re.compile("Signed-off-by: (.+) <") tracker_re = re.compile("http://tracker.ceph.com/issues/(\d+)") rst_link_re = re.compile(r"([a-zA-Z0-9])_(\W)") release_re = re.compile(r"^(nautilus|octopus|pacific|Pacific|quincy|Quincy|reef|Reef):\s*") tracker_uri = "http://tracker.ceph.com/issues/{0}.json" def get_original_issue(issue, verbose): r = requests.get(tracker_uri.format(issue), params={"include": "relations"}).json() # looking up for the original issue only makes sense # when dealing with an issue in the Backport tracker if r["issue"]["tracker"]["name"] != "Backport": if verbose: print ("http://tracker.ceph.com/issues/" + issue + " is from the tracker " + r["issue"]["tracker"]["name"] + ", do not look for the original issue") return issue # if a Backport issue does not have a relation, keep it if "relations" not in r["issue"]: if verbose: print ("http://tracker.ceph.com/issues/" + issue + " has no relations, do not look for the original issue") return issue copied_to = [ str(i['issue_id']) for i in r["issue"]["relations"] if i["relation_type"] == "copied_to" ] if copied_to: if len(copied_to) > 1: if verbose: print ("ERROR: http://tracker.ceph.com/issues/" + issue + " has more than one Copied To relation") return issue if verbose: print ("http://tracker.ceph.com/issues/" + issue + " is the backport of http://tracker.ceph.com/issues/" + copied_to[0]) return copied_to[0] else: if verbose: print ("http://tracker.ceph.com/issues/" + issue + " has no copied_to relations; do not look for the" + " original issue") return issue def split_component(title, gh, number): title_re = '(' + '|'.join(prefixes) + ')(:.*)' match = re.match(title_re, title) if match: return match.group(1)+match.group(2) else: issue = gh.repos("ceph")("ceph").issues(number).get() issue_labels = {it['name'] for it in issue['labels']} if 'documentation' in issue_labels: return 'doc: ' + title item = set(prefixes).intersection(issue_labels) if item: return ",".join(sorted(item)) + ': ' + title else: return 'UNKNOWN: ' + title def _title_message(commit, pr, strict): title = pr['title'] message_lines = commit.message.split('\n') if strict or len(message_lines) < 1: return (title, None) lines = [] for line in message_lines[1:]: if reviewed_by_re.match(line): continue line = line.strip() if line: lines.append(line) if len(lines) == 0: return (title, None) duplicates_pr_title = lines[0] == pr['title'].strip() if duplicates_pr_title: return (title, None) assert len(lines) > 0, "missing message content" if len(lines) == 1: # assume that a single line means the intention is to # re-write the PR title return (lines[0], None) elif len(lines) < 3 and 'refs/pull' in lines[0]: # assume the intent was rewriting the title and something like # ptl-tool was used to generate the merge message return (lines[1], None) message = " " + "\n ".join(lines) return (title, message) def make_release_notes(gh, repo, ref, cherry_picks, plaintext, html, markdown, verbose, strict, use_tags, include_pr_messages): issue2prs = {} pr2issues = {} pr2info = {} merges = {} for commit in repo.iter_commits(ref, merges=True): merge = merge_re.match(commit.summary) if not merge: continue PR = merge.group(2) merges[PR] = "merge_commit" if cherry_picks: for PR in cherry_picks.split(','): merges[PR] = "cherry_pick" merge_commits = {} for commit in repo.iter_commits(ref, merges=True): merge = merge_re.match(commit.summary) if not merge: continue number = merge.group(2) merge_commits[number] = commit for number in merges: print ("Considering PR#" + number) # do not pick up ceph/ceph-qa-suite.git PRs if int(number) < 1311: print ("Ignoring low-numbered PR, probably picked up from" " ceph/ceph-qa-suite.git") continue attempts = 0 retries = 30 while attempts < retries: try: pr = gh.repos("ceph")("ceph").pulls(number).get() break except Exception: if attempts < retries: attempts += 1 sleep_time = 2 * attempts print(f"Failed to fetch PR {number}, sleeping for {sleep_time} seconds") time.sleep(sleep_time) else: print(f"Could not fetch PR {number} in {retries} tries.") raise commit = None if merges[number] == "cherry_pick": try: commit = repo.commit(pr['merge_commit_sha']) except: pass if merges[number] == "merge_commit": commit = merge_commits[number] if commit: (title, message) = _title_message(commit, pr, strict) else: (title, message) = (pr['title'], pr['body']) issues = [] if pr['body']: issues = fixes_re.findall(pr['body']) + tracker_re.findall( pr['body'] ) authors = {} if commit: for c in repo.iter_commits( "{sha1}^1..{sha1}^2".format(sha1=commit.hexsha) ): for author in re.findall( "Signed-off-by:\s*(.*?)\s*<", c.message ): authors[author] = 1 issues.extend(fixes_re.findall(c.message) + tracker_re.findall(c.message)) else: for author in re.findall( "Signed-off-by:\s*(.*?)\s*<", message ): if author == "[Your Name]": continue authors[author] = 1 if authors: author = ", ".join(authors.keys()) else: if commit: author = commit.parents[-1].author.name else: author = pr['user']['login'] if strict and not issues: print ("ERROR: https://github.com/ceph/ceph/pull/" + str(number) + " has no associated issue") continue if strict: title_re = ( '^(?:nautilus|octopus|pacific|quincy):\s+(' + '|'.join(prefixes) + ')(:.*)' ) match = re.match(title_re, title) if not match: print ("ERROR: https://github.com/ceph/ceph/pull/" + str(number) + " title " + title + " does not match " + title_re) else: title = match.group(1) + match.group(2) if use_tags: title = split_component(title, gh, number) title = title.strip(' \t\n\r\f\v\.\,\;\:\-\=') # escape asterisks, which is used by reStructuredTextrst for inline # emphasis title = title.replace('*', '\*') # and escape the underscores for noting a link title = rst_link_re.sub(r'\1\_\2', title) # remove release prefix for backports title = release_re.sub('', title) pr2info[number] = (author, title, message) for issue in set(issues): if strict: issue = get_original_issue(issue, verbose) issue2prs.setdefault(issue, set([])).add(number) pr2issues.setdefault(number, set([])).add(issue) sys.stdout.write('.') print (" done collecting merges.") if strict: for (issue, prs) in issue2prs.items(): if len(prs) > 1: print (">>>>>>> " + str(len(prs)) + " pr for issue " + issue + " " + str(prs)) for (pr, (author, title, message)) in sorted( pr2info.items(), key=lambda title: title[1][1].lower() ): if pr in pr2issues: if plaintext: issues = map(lambda issue: '#' + str(issue), pr2issues[pr]) elif html: issues = map(lambda issue: ( 'issue#{issue}' ).format(issue=issue), pr2issues[pr] ) elif markdown: issues = map(lambda issue: ( '[issue#{issue}](http://tracker.ceph.com/issues/{issue})' ).format(issue=issue), pr2issues[pr] ) else: issues = map(lambda issue: ( '`issue#{issue} `_' ).format(issue=issue), pr2issues[pr] ) issues = ", ".join(issues) + ", " else: issues = '' if plaintext: print ("* {title} ({issues}{author})".format( title=title, issues=issues, author=author ) ) elif html: print ( ( "
  • {title} ({issues}pr#{pr}, {author})

  • " ).format( title=title, issues=issues, author=author, pr=pr ) ) elif markdown: markdown_title = title.replace('_', '\_').replace('.', '.') print ("- {title} ({issues}[pr#{pr}](https://github.com/ceph/ceph/pull/{pr}), {author})\n".format( title=markdown_title, issues=issues, author=author, pr=pr ) ) else: print ( ( "* {title} ({issues}`pr#{pr} <" "https://github.com/ceph/ceph/pull/{pr}" ">`_, {author})" ).format( title=title, issues=issues, author=author, pr=pr ) ) if include_pr_messages and message: print (message) if __name__ == "__main__": desc = ''' Make ceph release notes for a given revision. Eg usage: $ ceph-release-notes -r tags/v0.87..origin/giant \ $(git rev-parse --show-toplevel) It is recommended to set the github env. token in order to avoid hitting the api rate limits. ''' parser = argparse.ArgumentParser( description=desc, formatter_class=argparse.RawTextHelpFormatter ) parser.add_argument("--rev", "-r", help="git revision range for creating release notes") parser.add_argument("--cherry_picks", "-c", help="PRs associated with any last-minute cherry-picks. Provide PR numbers as a string separated by commas, i.e. '50575,50625'") parser.add_argument("--text", "-t", action='store_true', default=None, help="output plain text only, no links") parser.add_argument("--html", action='store_true', default=None, help="output html format for (old wordpress) website blog") parser.add_argument("--markdown", action='store_true', default=None, help="output markdown format for new ceph.io blog") parser.add_argument("--verbose", "-v", action='store_true', default=None, help="verbose") parser.add_argument("--strict", action='store_true', default=None, help="strict, recommended only for backport releases") parser.add_argument("repo", metavar="repo", help="path to ceph git repo") parser.add_argument( "--token", default=os.getenv("GITHUB_ACCESS_TOKEN"), help="Github Access Token ($GITHUB_ACCESS_TOKEN otherwise)", ) parser.add_argument("--use-tags", default=False, help="Use github tags to guess the component") parser.add_argument("--include-pr-messages", default=False, action='store_true', help="Include full PR message in addition to PR title, if available") args = parser.parse_args() gh = github.GitHub( access_token=args.token) make_release_notes( gh, Repo(args.repo), args.rev, args.cherry_picks, args.text, args.html, args.markdown, args.verbose, args.strict, args.use_tags, args.include_pr_messages )