Skip to content

Commit 01e4ad0

Browse files
authored
ARROW-16602: [Dev] Use GitHub API to merge pull request (#13184)
We use local "git merge" to merge a pull request in dev/merge_arrow_pr.py. If we use "git merge" to merge a pull request, GitHub's Web UI shows "Closed" mark not "Merged" mark in a pull request page. This sometimes confuses new contributors. "Why was my pull request closed without merging?" See #12004 (comment) for example. If we use GitHub API https://docs.github.com/en/rest/pulls/pulls#merge-a-pull-request to merge a pull request, GitHub's Web UI shows "Merged" mark not "Closed" mark. See #13180 for example. I used GitHub API to merge the pull request. And we don't need to create a local branch on local repository to merge a pull request. But we must specify ARROW_GITHUB_API_TOKEN to run dev/merge_arrow_pr.py. Authored-by: Sutou Kouhei <kou@clear-code.com> Signed-off-by: Sutou Kouhei <kou@clear-code.com>
1 parent 50fbfa1 commit 01e4ad0

5 files changed

Lines changed: 104 additions & 128 deletions

File tree

dev/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ After installed, it runs the merge script.
5353
have to install Python dependencies yourself and then run `dev/merge_arrow_pr.py`
5454
directly)
5555

56-
The merge script uses the GitHub REST API; if you encounter rate limit issues,
57-
you may set a `ARROW_GITHUB_API_TOKEN` environment variable to use a Personal
58-
Access Token.
56+
The merge script uses the GitHub REST API. You must set a
57+
`ARROW_GITHUB_API_TOKEN` environment variable to use a Personal Access
58+
Token. You need to add `workflow` scope to the Personal Access Token.
5959

6060
You can specify the username and the password of your JIRA account in
6161
`APACHE_JIRA_USERNAME` and `APACHE_JIRA_PASSWORD` environment variables.

dev/merge.conf.sample

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,7 @@
2323
# token credentials. Ensure that the file is properly protected.
2424
username=johnsmith
2525
password=123456
26+
27+
[github]
28+
# GitHub's personal access token. "workflow" scope is needed.
29+
api_token=ghp_ABC

dev/merge_arrow_pr.py

Lines changed: 87 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@
3333
# Configuration environment variables:
3434
# - APACHE_JIRA_USERNAME: your Apache JIRA ID
3535
# - APACHE_JIRA_PASSWORD: your Apache JIRA password
36-
# - ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests (to
37-
# avoid rate limiting)
36+
# - ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests
3837
# - PR_REMOTE_NAME: the name of the remote to the Apache git repo (set to
3938
# 'apache' by default)
4039
# - DEBUG: use for testing to avoid pushing to apache (0 by default)
@@ -71,14 +70,12 @@
7170
print("**************** DEBUGGING ****************")
7271

7372

74-
# Prefix added to temporary branches
75-
BRANCH_PREFIX = "PR_TOOL"
7673
JIRA_API_BASE = "https://issues.apache.org/jira"
7774

7875

7976
def get_json(url, headers=None):
80-
req = requests.get(url, headers=headers)
81-
return req.json()
77+
response = requests.get(url, headers=headers)
78+
return response.json()
8279

8380

8481
def run_cmd(cmd):
@@ -101,21 +98,6 @@ def run_cmd(cmd):
10198
return output
10299

103100

104-
original_head = run_cmd("git rev-parse HEAD")[:8]
105-
106-
107-
def clean_up():
108-
print("Restoring head pointer to %s" % original_head)
109-
run_cmd("git checkout %s" % original_head)
110-
111-
branches = run_cmd("git branch").replace(" ", "").split("\n")
112-
113-
for branch in [x for x in branches
114-
if x.startswith(BRANCH_PREFIX)]:
115-
print("Deleting local branch %s" % branch)
116-
run_cmd("git branch -D %s" % branch)
117-
118-
119101
_REGEX_CI_DIRECTIVE = re.compile(r'\[[^\]]*\]')
120102

121103

@@ -255,28 +237,55 @@ def format_jira_output(jira_id, status, summary, assignee, components):
255237

256238
class GitHubAPI(object):
257239

258-
def __init__(self, project_name):
240+
def __init__(self, project_name, cmd):
259241
self.github_api = ("https://api.github.com/repos/apache/{0}"
260242
.format(project_name))
261243

262-
token = os.environ.get('ARROW_GITHUB_API_TOKEN', None)
263-
if token:
264-
self.headers = {'Authorization': 'token {0}'.format(token)}
265-
else:
266-
self.headers = None
244+
token = None
245+
config = load_configuration()
246+
if "github" in config.sections():
247+
token = config["github"]["api_token"]
248+
if not token:
249+
token = os.environ.get('ARROW_GITHUB_API_TOKEN')
250+
if not token:
251+
token = cmd.prompt('Env ARROW_GITHUB_API_TOKEN not set, '
252+
'please enter your GitHub API token '
253+
'(GitHub personal access token):')
254+
headers = {
255+
'Accept': 'application/vnd.github.v3+json',
256+
'Authorization': 'token {0}'.format(token),
257+
}
258+
self.headers = headers
267259

268260
def get_pr_data(self, number):
269261
return get_json("%s/pulls/%s" % (self.github_api, number),
270262
headers=self.headers)
271263

264+
def get_pr_commits(self, number):
265+
return get_json("%s/pulls/%s/commits" % (self.github_api, number),
266+
headers=self.headers)
267+
268+
def merge_pr(self, number, commit_title, commit_message):
269+
url = f'{self.github_api}/pulls/{number}/merge'
270+
payload = {
271+
'commit_title': commit_title,
272+
'commit_message': commit_message,
273+
'merge_method': 'squash',
274+
}
275+
response = requests.put(url, headers=self.headers, json=payload)
276+
result = response.json()
277+
if response.status_code != 200 and 'merged' not in result:
278+
result['merged'] = False
279+
result['message'] += f': {url}'
280+
return result
281+
272282

273283
class CommandInput(object):
274284
"""
275285
Interface to input(...) to enable unit test mocks to be created
276286
"""
277287

278288
def fail(self, msg):
279-
clean_up()
280289
raise Exception(msg)
281290

282291
def prompt(self, prompt):
@@ -300,6 +309,7 @@ class PullRequest(object):
300309

301310
def __init__(self, cmd, github_api, git_remote, jira_con, number):
302311
self.cmd = cmd
312+
self._github_api = github_api
303313
self.git_remote = git_remote
304314
self.con = jira_con
305315
self.number = number
@@ -358,35 +368,23 @@ def merge(self):
358368
"""
359369
merge the requested PR and return the merge hash
360370
"""
361-
pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, self.number)
362-
target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX,
363-
self.number,
364-
self.target_ref.upper())
365-
run_cmd("git fetch %s pull/%s/head:%s" % (self.git_remote,
366-
self.number,
367-
pr_branch_name))
368-
run_cmd("git fetch %s %s:%s" % (self.git_remote, self.target_ref,
369-
target_branch_name))
370-
run_cmd("git checkout %s" % target_branch_name)
371-
372-
had_conflicts = False
373-
try:
374-
run_cmd(['git', 'merge', pr_branch_name, '--ff', '--squash'])
375-
except Exception as e:
376-
msg = ("Error merging: %s\nWould you like to "
377-
"manually fix-up this merge?" % e)
378-
self.cmd.continue_maybe(msg)
379-
msg = ("Okay, please fix any conflicts and 'git add' "
380-
"conflicting files... Finished?")
381-
self.cmd.continue_maybe(msg)
382-
had_conflicts = True
383-
384-
commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name,
385-
'--pretty=format:%an <%ae>']).split("\n")
386-
commit_co_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name,
387-
'--pretty=%(trailers:key=Co-authored-by,'
388-
'valueonly)']).split("\n")
389-
commit_co_authors = list(filter(None, commit_co_authors))
371+
commits = self._github_api.get_pr_commits(self.number)
372+
373+
def format_commit_author(commit):
374+
author = commit['commit']['author']
375+
name = author['name']
376+
email = author['email']
377+
return f'{name} <{email}>'
378+
commit_authors = [format_commit_author(commit) for commit in commits]
379+
co_authored_by_re = re.compile(r'^Co-authored-by:\s*(.*)')
380+
381+
def extract_co_authors(commit):
382+
message = commit['commit']['message']
383+
return co_authored_by_re.findall(message)
384+
commit_co_authors = []
385+
for commit in commits:
386+
commit_co_authors.extend(extract_co_authors(commit))
387+
390388
all_commit_authors = commit_authors + commit_co_authors
391389
distinct_authors = sorted(set(all_commit_authors),
392390
key=lambda x: commit_authors.count(x),
@@ -396,74 +394,51 @@ def merge(self):
396394
print("Author {}: {}".format(i + 1, author))
397395

398396
if len(distinct_authors) > 1:
399-
primary_author, distinct_authors = get_primary_author(
397+
primary_author, distinct_other_authors = get_primary_author(
400398
self.cmd, distinct_authors)
401399
else:
402400
# If there is only one author, do not prompt for a lead author
403-
primary_author = distinct_authors[0]
404-
405-
merge_message_flags = []
401+
primary_author = distinct_authors.pop()
402+
distinct_other_authors = []
406403

407-
merge_message_flags += ["-m", self.title]
404+
commit_title = f'{self.title} (#{self.number})'
405+
commit_message_chunks = []
408406
if self.body is not None:
409-
merge_message_flags += ["-m", self.body]
407+
commit_message_chunks.append(self.body)
410408

411409
committer_name = run_cmd("git config --get user.name").strip()
412410
committer_email = run_cmd("git config --get user.email").strip()
413411

414-
authors = ("Authored-by:" if len(distinct_authors) == 1
412+
authors = ("Authored-by:" if len(distinct_other_authors) == 0
415413
else "Lead-authored-by:")
416-
authors += " %s" % (distinct_authors.pop(0))
414+
authors += " %s" % primary_author
417415
if len(distinct_authors) > 0:
418416
authors += "\n" + "\n".join(["Co-authored-by: %s" % a
419-
for a in distinct_authors])
417+
for a in distinct_other_authors])
420418
authors += "\n" + "Signed-off-by: %s <%s>" % (committer_name,
421419
committer_email)
420+
commit_message_chunks.append(authors)
422421

423-
if had_conflicts:
424-
committer_name = run_cmd("git config --get user.name").strip()
425-
committer_email = run_cmd("git config --get user.email").strip()
426-
message = ("This patch had conflicts when merged, "
427-
"resolved by\nCommitter: %s <%s>" %
428-
(committer_name, committer_email))
429-
merge_message_flags += ["-m", message]
430-
431-
# The string "Closes #%s" string is required for GitHub to correctly
432-
# close the PR
433-
merge_message_flags += [
434-
"-m",
435-
"Closes #%s from %s"
436-
% (self.number, self.description)]
437-
merge_message_flags += ["-m", authors]
422+
commit_message = "\n\n".join(commit_message_chunks)
438423

439424
if DEBUG:
440-
print("\n".join(merge_message_flags))
441-
442-
run_cmd(['git', 'commit',
443-
'--no-verify', # do not run commit hooks
444-
'--author="%s"' % primary_author] +
445-
merge_message_flags)
446-
447-
self.cmd.continue_maybe("Merge complete (local ref %s). Push to %s?"
448-
% (target_branch_name, self.git_remote))
425+
print(commit_title)
426+
print()
427+
print(commit_message)
449428

450-
try:
451-
push_cmd = ('git push %s %s:%s' % (self.git_remote,
452-
target_branch_name,
453-
self.target_ref))
454-
if DEBUG:
455-
print(push_cmd)
456-
else:
457-
run_cmd(push_cmd)
458-
except Exception as e:
459-
clean_up()
460-
self.cmd.fail("Exception while pushing: %s" % e)
429+
if DEBUG:
430+
merge_hash = None
431+
else:
432+
result = self._github_api.merge_pr(self.number,
433+
commit_title,
434+
commit_message)
435+
if not result['merged']:
436+
message = result['message']
437+
self.cmd.fail(f'Failed to merge pull request: {message}')
438+
merge_hash = result['sha']
461439

462-
merge_hash = run_cmd("git rev-parse %s" % target_branch_name)[:8]
463-
clean_up()
464440
print("Pull request #%s merged!" % self.number)
465441
print("Merge hash: %s" % merge_hash)
466-
return merge_hash
467442

468443

469444
def get_primary_author(cmd, distinct_authors):
@@ -475,18 +450,17 @@ def get_primary_author(cmd, distinct_authors):
475450
"\"name <email>\" [%s]: " % distinct_authors[0])
476451

477452
if primary_author == "":
478-
return distinct_authors[0], distinct_authors
453+
return distinct_authors[0], distinct_authors[1:]
479454

480455
if author_pat.match(primary_author):
481456
break
482457
print('Bad author "{}", please try again'.format(primary_author))
483458

484459
# When primary author is specified manually, de-dup it from
485460
# author list and put it at the head of author list.
486-
distinct_authors = [x for x in distinct_authors
487-
if x != primary_author]
488-
distinct_authors = [primary_author] + distinct_authors
489-
return primary_author, distinct_authors
461+
distinct_other_authors = [x for x in distinct_authors
462+
if x != primary_author]
463+
return primary_author, distinct_other_authors
490464

491465

492466
def prompt_for_fix_version(cmd, jira_issue):
@@ -581,25 +555,23 @@ def cli():
581555

582556
os.chdir(ARROW_HOME)
583557

584-
github_api = GitHubAPI(PROJECT_NAME)
558+
github_api = GitHubAPI(PROJECT_NAME, cmd)
585559

586560
jira_con = connect_jira(cmd)
587561
pr = PullRequest(cmd, github_api, PR_REMOTE_NAME, jira_con, pr_num)
588562

589563
if pr.is_merged:
590-
print("Pull request %s has already been merged")
564+
print("Pull request %s has already been merged" % pr_num)
591565
sys.exit(0)
592566

593567
if not pr.is_mergeable:
594-
msg = ("Pull request %s is not mergeable in its current form.\n"
595-
% pr_num + "Continue? (experts only!)")
596-
cmd.continue_maybe(msg)
568+
print("Pull request %s is not mergeable in its current form" % pr_num)
569+
sys.exit(1)
597570

598571
pr.show()
599572

600573
cmd.continue_maybe("Proceed with merging pull request #%s?" % pr_num)
601574

602-
# merged hash not used
603575
pr.merge()
604576

605577
if pr.jira_issue is None:

dev/release/post-04-website.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ rough_n_development_months=$((
6767
git_tag=apache-arrow-${version}
6868
git_range=apache-arrow-${previous_version}..${git_tag}
6969

70-
committers_command_line="git shortlog -csn ${git_range}"
70+
committers_command_line="git shortlog -sn --group=trailer:signed-off-by ${git_range}"
7171
contributors_command_line="git shortlog -sn ${git_range}"
7272

7373
committers=$(${committers_command_line})

dev/test_merge_arrow_pr.py

100644100755
Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -211,22 +211,22 @@ def test_multiple_authors_bad_input():
211211
distinct_authors = [a0, a1]
212212

213213
cmd = FakeCLI(responses=[''])
214-
primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author(
215-
cmd, distinct_authors)
214+
primary_author, distinct_other_authors = \
215+
merge_arrow_pr.get_primary_author(cmd, distinct_authors)
216216
assert primary_author == a0
217-
assert new_distinct_authors == [a0, a1]
217+
assert distinct_other_authors == [a1]
218218

219219
cmd = FakeCLI(responses=['oops', a1])
220-
primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author(
221-
cmd, distinct_authors)
220+
primary_author, distinct_other_authors = \
221+
merge_arrow_pr.get_primary_author(cmd, distinct_authors)
222222
assert primary_author == a1
223-
assert new_distinct_authors == [a1, a0]
223+
assert distinct_other_authors == [a0]
224224

225225
cmd = FakeCLI(responses=[a2])
226-
primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author(
227-
cmd, distinct_authors)
226+
primary_author, distinct_other_authors = \
227+
merge_arrow_pr.get_primary_author(cmd, distinct_authors)
228228
assert primary_author == a2
229-
assert new_distinct_authors == [a2, a0, a1]
229+
assert distinct_other_authors == [a0, a1]
230230

231231

232232
def test_jira_already_resolved():

0 commit comments

Comments
 (0)