Rewrite getChangelog.py in GraphQL

This commit is contained in:
Keunes 2024-06-04 22:54:30 +02:00
parent d455323ca8
commit 56e8a3b899
1 changed files with 560 additions and 80 deletions

View File

@ -1,90 +1,570 @@
#!/usr/bin/env python3
try:
from graphqlclient import GraphQLClient
import json
import time
from datetime import datetime
import csv
from collections import OrderedDict
import sys
import os
import getpass
import threading
import urllib.error
import textwrap
except ModuleNotFoundError as e:
print(f"The '{e.name}' module is not installed. Please install it using 'pip install {e.name}' and try again.")
exit()
import requests
import time
import re
# Define variables
owner = os.getenv('OWNER', "AntennaPod") # The owner (organisation or user) of the repository
repo = os.getenv('REPO', "AntennaPod") # The repository name
token = os.getenv('GITHUB_API_TOKEN') # The GitHub API token [evnironment variable, otherwise user input]
base_ref = os.getenv('BASE') # The base reference (release code or branch); point of reference [user input]
head_ref = os.getenv('HEAD') # The head reference (release code or branch); environment containing the changes [user input]
max_associatedPRs = 5 # The maximum number of pull requests that the script will fetch per commit
client = GraphQLClient('https://api.github.com/graphql')
filename = None
def set_filename(base_ref, head_ref):
global filename
filename = f'{base_ref} - {head_ref} changelog.csv'
REPO = "AntennaPod/AntennaPod"
# Function: Handle exceptions
def handle_exception(error_message: str, data: dict, error_introduction="Error"):
print(f"\n{error_introduction}: {str(error_message)}")
if data:
print("JSON data:")
print(json.dumps(data, indent=2))
print("Hello, welcome to the AntennaPod PR list generator!")
print("First, please enter your GitHub API token.")
print("If you don't have one yet, create it at https://github.com/settings/tokens")
# Function: Display processing animation
animation_state = {"text": "Loading", "stop": False}
def display_processing_animation(state):
print(f"{state['text']}...", end="")
characters = ["", "", "", "", "", "", "", ""]
while not state['stop']:
for char in characters:
print(f"\r{char} {state['text']}... ", end="", flush=True)
time.sleep(0.5)
print("\r ", flush=True)
def get_token():
TOKEN = ""
while not TOKEN:
TOKEN = input('Token: ').strip()
return TOKEN
# Function: Get list of PRs for a given commit
# Note: this might be unnecessary in case commits can only have one PR
def get_associated_prs(commitid):
"""
Called in case the number of PRs linked to a commit exceeds the maximum number of PRs that the script will fetch per commit.
"""
global animation_text
animation_text = "Get PRs linked to commit"
query = '''
query ($cursor: String, $owner: String!, $repo: String!, $commitid: GitObjectID!) {
repository(name: $repo, owner: $owner) {
object(oid: $commitid) {
... on Commit {
associatedPullRequests(first: 100, after: $cursor) {
pageInfo {
endCursor
hasNextPage
}
nodes {
number
}
}
}
}
}
}
'''
# Variable definition for GraphQL Explorer:
# {"commitid": "863d4c3b611df83389d82958114bfd2d1204e457", "cursor": null, "owner": "AntennaPod", "repo": "AntennaPod"}
has_next_pr_page = True
cursor = None
pr_numbers = [] # Create PR list
while has_next_pr_page:
variables = {"cursor": cursor, "owner": owner, "repo": repo, "commitid": commitid }
result = client.execute(query, variables)
data = json.loads(result)
pr_numbers.extend([pr['number'] for pr in data['data']['repository']['commit']['associatedPullRequests']['nodes']])
page_info = data['data']['repository']['commit']['associatedPullRequests']['pageInfo']
has_next_pr_page = page_info['hasNextPage']
cursor = page_info['endCursor']
TOKEN = get_token()
print("Grand, thank you! (" + TOKEN + " is noted)")
time.sleep(1) # To prevent hitting rate limits
return pr_numbers
print()
print("Now, what do you want to compare?")
print("Please enter a release code or branch")
print("[default: latest GitHub release]")
BASE = input('Base: ')
if BASE == "":
response = requests.get("https://api.github.com/repos/" + REPO + "/releases/latest", headers={'Authorization': 'token ' + TOKEN})
while response.status_code == 401:
print("Error: Invalid GitHub API token.")
TOKEN = get_token()
response = requests.get("https://api.github.com/repos/" + REPO + "/releases/latest", headers={'Authorization': 'token ' + TOKEN})
release = response.json()
BASE = release["tag_name"]
print("Okido, latest release (" + BASE + ") it is!")
else:
print("Noted")
try: # Catch KeyboardInterrupt
# Define animation thread to avoid errors
animation_thread = None
print()
print("Then, what should be our endpoint?")
print("[default: 'master']")
HEAD = input('Head: ')
if HEAD == "":
print("Righty, master it is!")
HEAD="master"
else:
print("Roger that.")
# Define token
print(f"Hello, welcome to the {owner} PR list generator!")
time.sleep(0.5)
print("First, please enter your GitHub API token.")
print("If you don't have one yet, create it at https://github.com/settings/tokens")
def get_token():
global token
data = None
while True:
if not token:
token = getpass.getpass(prompt='Token: ')
try:
client.inject_token('Bearer ' + token)
query = '''
query {
viewer {
login
}
}
'''
response = client.execute(query) # This prints the response in case of error, despite any try/except blocks
data = json.loads(response)
if 'login' in data['data']['viewer']:
print(f"Grand, thank you @{data['data']['viewer']['login']}!")
print("Do you want learn how to save this token for future use? [y/N]")
save = input()
if save.lower() == "y":
print(textwrap.dedent(f"""
To save this token for future use, you need to set it as an environment variable on your system. Here's how you can do it:
def print_seen():
print(" [already seen] " + pr_details["title"] + " (#" + str(pr_details["number"]) + ")")
If you're using bash or zsh, you can add the following line to your shell profile file (.bashrc, .bash_profile or .zshrc):
export GITHUB_API_TOKEN='{token}'
print()
prsSeen = set()
filename = BASE + " - " + HEAD + " changelog.csv"
outputFile = open(filename, 'w')
outputFile.write("Type,Merge date,URL,Title,Author,Type,Functionality group\n")
commits = requests.get("https://api.github.com/repos/" + REPO + "/compare/" + BASE + "..." + HEAD, headers={'Authorization': 'token ' + TOKEN}).json()
numCommits = len(commits["commits"])
for i in range(numCommits):
sha = commits["commits"][i]["sha"]
commit = commits["commits"][i]
print("Commit "+ str(i+1) + " of " + str(numCommits))
if "Merge pull request #" in commit["commit"]["message"] or "Merge branch" in commit["commit"]["message"]:
print(" [is merge commit]")
continue
pr_match = re.search(r'\(#(\d{4})\)', commit["commit"]["message"])
if pr_match:
pr_number = pr_match.group(1)
if pr_number in prsSeen:
print_seen()
continue
pr_details = requests.get("https://api.github.com/repos/" + REPO + "/pulls/" + pr_number, headers={'Authorization': 'token ' + TOKEN}).json()
outputFile.write("PR," + pr_details["merged_at"] + "," + pr_details["html_url"] + ",\"" + pr_details["title"] + "\"," + pr_details["user"]["login"] + "\n")
print(" " + pr_details["title"] + " (#" + str(pr_details["number"]) + ")")
prsSeen.add(pr_number)
continue
time.sleep(1.5) # Avoid rate limit
prs = requests.get("https://api.github.com/search/issues?q=repo:" + REPO + "+type:pr+is:merged+" + sha, headers={'Authorization': 'token ' + TOKEN}).json()
if len(prs["items"]) == 0:
outputFile.write("Commit," + commit["commit"]["committer"]["date"] + "," + commit["html_url"] + ",\"" + commit["commit"]["message"].splitlines()[0] + "\"," + commit["committer"]["login"] + "\n")
print(" [orphan] " + commit["commit"]["message"].splitlines()[0])
continue
pr_details = prs["items"][0]
if pr_details["number"] in prsSeen:
print_seen()
continue
outputFile.write("PR," + pr_details["pull_request"]["merged_at"] + "," + pr_details["html_url"] + ",\"" + pr_details["title"] + "\"," + pr_details["user"]["login"] + "\n")
print(" " + pr_details["title"] + " (#" + str(pr_details["number"]) + ")")
prsSeen.add(pr_details["number"])
outputFile.close()
If you're using Fish shell, you can add the following line to your config.fish file:
set -x GITHUB_API_TOKEN '{token}'
After adding this line, you'll need to restart your terminal or run 'source ~/.bashrc' (or the appropriate file for your shell) for the changes to take effect.
On Windows, you can set environment variables through the System Properties. Here's how:
1. Right-click on Computer on the desktop or in the Start menu.
2. Choose Properties.
3. Click on Advanced system settings.
4. Click on Environment Variables.
5. Click on New under the User or System variables sections.
6. Enter 'GITHUB_API_TOKEN' as the variable name and '{token}' as the variable value.
7. Click OK in all windows.
Please note that setting environment variables this way will make them available in all future terminal sessions and scripts. Use with caution.
"""))
else:
print("Ok, moving on.")
break
except urllib.error.HTTPError as error_message:
if error_message.code == 401:
handle_exception(Exception("Invalid GitHub API token, please try again."), data)
token = ""
else:
handle_exception(Exception("Issue executing GraphQL query"), data)
token = ""
except Exception as error_message:
handle_exception(error_message, data)
token = ""
return token
if not token:
token = get_token()
# Assuming authentication is successful, we will no longer check for/catch authentication errors.
# Define base_ref
if not base_ref:
print()
print("Now, what should be our point of reference?")
print("Please enter a release code or branch")
print("[default: latest (in other words: previous) GitHub release]")
base_ref = input('Base: ')
if base_ref == "latest" or not base_ref:
query = '''
query ($owner: String!, $repo: String!) {
repository(owner: $owner, name: $repo) {
latestRelease {
tagName
}
}
}
'''
while True:
try:
variables = {"owner": owner, "repo": repo}
response = client.execute(query, variables)
data = json.loads(response)
base_ref = data['data']['repository']['latestRelease']['tagName']
print("\nOkido, let's get the latest release (" + base_ref + ")!")
break
except Exception as e:
handle_exception(e, data)
print("Does your token have enough permissions?")
sys.exit(1)
else:
query = '''
query ($owner: String!, $repo: String!, $ref: String!) {
repository(owner: $owner, name: $repo) {
ref(qualifiedName:"$ref") {
name
}
}
}
'''
while True:
try:
variables = {"owner": owner, "repo": repo, "ref": base_ref}
response = client.execute(query, variables)
data = json.loads(response)
if data['data']['repository']['ref'] == None:
print("\nError: Invalid release code or branch.")
print("Please try again.")
base_ref = input('Base: ')
continue
base_ref = data['data']['repository']['ref']['name']
print(f"\nNoted, {base_ref} it is.")
break
except Exception as e:
handle_exception(e, data)
print("Does your token have enough permissions?")
sys.exit(1)
# Define head_ref
if not head_ref:
print("\nThen, from which environment would you like to see the changes (the head)?")
print("Please enter a release code or branch")
print("[default: 'master']")
head_ref = input('Head: ')
if head_ref == "master" or not head_ref:
print("\nRighty, master it is!")
if not head_ref:
head_ref = "master"
else:
query = '''
query ($owner: String!, $repo: String!, $ref: String!) {
repository(owner: $owner, name: $repo) {
ref(qualifiedName: $ref) {
id
name
}
}
}
'''
while True:
try:
variables = {"owner": owner, "repo": repo, "ref": head_ref}
response = client.execute(query, variables)
data = json.loads(response)
if data['data']['repository']['ref'] == None:
print("\nError: Invalid release code or branch.")
print("Please try again.")
head_ref = input('Head: ')
continue
head_ref = data['data']['repository']['ref']['name']
print(f"\nNoted, {head_ref} it is.")
break
except Exception as e:
handle_exception(e, data)
print("Does your token have enough permissions?")
sys.exit(1)
# Start the animation in a separate thread
animation_thread = threading.Thread(target=display_processing_animation, args=(animation_state,))
animation_thread.start()
# Set filename
set_filename(base_ref, head_ref)
# Get list of commits & associated PRs, comparing base & head
animation_state['text'] = "Get list of commits & PRs"
query = '''
query($cursor: String, $owner: String!, $repo: String!, $baseRef: String!, $headRef: String!, $maxPRs: Int!) {
repository(name: $repo, owner: $owner) {
ref(qualifiedName: $baseRef) {
compare(headRef: $headRef) {
commits(first: 100, after: $cursor) {
pageInfo {
endCursor
hasNextPage
}
nodes {
oid
associatedPullRequests(first: $maxPRs) {
totalCount
nodes {
number
}
}
}
}
}
}
}
}
'''
# Variable definition for GraphQL Explorer:
# {"cursor": null, "owner": "AntennaPod", "repo": "AntennaPod", "baseRef": "master", "headRef": "develop", "maxPRs": 5}
has_next_page = True
cursor = None
commits = [] # Create commit list
while has_next_page:
variables = {"cursor": cursor, "owner": owner, "repo": repo, "baseRef": base_ref, "headRef": head_ref, "maxPRs": max_associatedPRs}
result = client.execute(query, variables)
data = json.loads(result)
commit_data = data['data']['repository']['ref']['compare']['commits']['nodes']
for commit in commit_data:
if commit['associatedPullRequests']['totalCount'] > max_associatedPRs: # Request a list of PRs for the commit, if there are many.
commitid = commit['oid']
pr_numbers = get_associated_prs(commitid)
else:
pr_numbers = [pr['number'] for pr in commit['associatedPullRequests']['nodes']]
commits.append({ # Store commit information in list
'sha': commit['oid'],
'pr_count': commit['associatedPullRequests']['totalCount'],
'pr_numbers': pr_numbers,
})
page_info = data['data']['repository']['ref']['compare']['commits']['pageInfo']
has_next_page = page_info['hasNextPage']
cursor = page_info['endCursor']
time.sleep(1) # To prevent hitting rate limits
# Create set of unique PRs from the list of commit dictionaries
animation_state['text'] = "Identify unique PRs"
unique_pr_numbers = set()
for commit in commits:
for pr_number in commit['pr_numbers']:
unique_pr_numbers.add(pr_number)
# Create a list of dictionaries with PR metadata for in CSV file
animation_state['text'] = "Get PR metadata"
## Combined GraphQL call: get relevant PRs and the issues they close
query = f'''
query ($owner: String!, $repo: String!) {{
repository (name: $repo, owner: $owner) {{
'''
for n, pr_number in enumerate(unique_pr_numbers):
query += f'''
pr{n}:pullRequest (number: {pr_number}) {{
url
title
author {{
login
}}
mergedAt
labels (first: 10) {{
totalCount
nodes {{
id
name
}}
}}
closingIssuesReferences(first: 10) {{
totalCount
nodes {{
number
title
labels(first: 10) {{
totalCount
nodes {{
id
name
}}
}}
}}
}}
}}
'''
query += f'''
}}
}}
'''
# Variable definition for GraphQL Explorer:
# {"prnumber": 7053, "owner": "AntennaPod", "repo": "AntennaPod"}
## Submit call
variables = {"owner": owner, "repo": repo}
result = client.execute(query, variables)
data = json.loads(result)
## Parse response and save PR data
animation_state['text'] = "Parse PR metadata"
prs_for_csv = []
for n, pr_number in enumerate(unique_pr_numbers):
prdata = data['data']['repository'][f'pr{n}']
# Create string with related issues
maximum_hit = False
related_issue_numbers = [relatedIssue['number'] for relatedIssue in prdata['closingIssuesReferences']['nodes']]
related_issues_string = ', '.join(map(str, related_issue_numbers))
if prdata['closingIssuesReferences']['totalCount'] > 10:
related_issues_string += " and more"
# Create string with labels of the PR and its associated issues
unique_labels = set()
maximum_hit = False
if prdata['labels']['totalCount'] > 10:
maximum_hit = True
for label in prdata['labels']['nodes']:
unique_labels.add(label['name'])
for relatedIssue in prdata['closingIssuesReferences']['nodes']:
if relatedIssue['labels']['totalCount'] > 10:
maximum_hit = True
for label in relatedIssue['labels']['nodes']:
unique_labels.add(label['name'])
unique_labels_list = list(unique_labels)
unique_labels_string = ', '.join(unique_labels_list)
if maximum_hit:
unique_labels_string += " and more (probably)"
# Create string with issue & PR number(s) that need review replies
numbers = []
maximum_hit = False
if any(label['id'] == 'LA_kwDOAFAGHc8AAAABoGK6aw' for label in prdata['labels']['nodes']):
numbers.append(pr_number)
if prdata['closingIssuesReferences']['totalCount'] > 10 or prdata['labels']['totalCount'] > 10:
maximum_hit = True
for relatedIssue in prdata['closingIssuesReferences']['nodes']:
if any(label['id'] == 'LA_kwDOAFAGHc8AAAABoGK6aw' for label in relatedIssue['labels']['nodes']):
numbers.append(relatedIssue['number'])
if relatedIssue['labels']['totalCount'] > 10:
maximum_hit = True
numbers_str = ', '.join(map(str, numbers))
if maximum_hit:
numbers_str += " and more, possibly"
if numbers_str:
needs_review_reply_string = f"Yes ({numbers_str})"
else:
needs_review_reply_string = "No"
# Store pr information in list
prs_for_csv.append({
'mergedAt': prdata['mergedAt'],
'URL': prdata['url'],
'title': prdata['title'],
'author': prdata['author']['login'],
'relatedIssues': related_issues_string,
'labels': unique_labels_string,
'needsReviewReplies': needs_review_reply_string,
})
# Create a list of dictionaries with commits for in CSV file
# /// NOTE maybe it's better to move this up to before the PR list generation (but after the list of unique PRs has been created) as it clears some of the memory used
animation_state['text'] = "Clean up commit list"
## Filter list with commit dictionaries so only ones without any associated PRs are left
commits = [commit for commit in commits if commit['pr_count'] == 0]
## Expand list with commit dictionaries to contain commit metadata
animation_state['text'] = "Get commit metadata"
### Loop through commits to construct GraphQL query
query = f'''
query ($owner: String!, $repo: String!) {{
repository (name: $repo, owner: $owner) {{
'''
for n, commit in enumerate(commits):
query += f'''
commit{n}:object(oid: "{commit['sha']}") {{
... on Commit {{
message
committedDate
url
authors(first:3) {{
totalCount
nodes {{
user {{
login
}}
}}
}}
}}
}}
'''
query += f'''
}}
}}
'''
# Variable definition for GraphQL Explorer:
# {"sha": "863d4c3b611df83389d82958114bfd2d1204e457", "owner": "AntennaPod", "repo": "AntennaPod"}
## Submit call
variables = {"owner": owner, "repo": repo}
result = client.execute(query, variables)
data = json.loads(result)
## Parse response and expand commit data
animation_state['text'] = "Parse commit metadata"
for n, commit in enumerate(commits):
commit_data = data['data']['repository'][f'commit{n}']
# Create string with authors
authors = [author['user']['login'] for author in commit_data['authors']['nodes']]
authors_string = ', '.join(authors)
commit['committedDate'] = commit_data['committedDate']
commit['URL'] = commit_data['url']
commit['title'] = commit_data['message']
commit['author'] = authors_string
# Combine commit & PR lists & sort by mergedAt (for PRs)/ committedDate (for commits)
animation_state['text'] = "Combine and sort PR and commit lists"
commits = [{**commit, 'datetime': datetime.strptime(commit['committedDate'], '%Y-%m-%dT%H:%M:%SZ'), 'entitytype': 'commit'} for commit in commits]
prs_for_csv = [{**pr, 'datetime': datetime.strptime(pr['mergedAt'], '%Y-%m-%dT%H:%M:%SZ'), 'entitytype': 'pr'} for pr in prs_for_csv]
for commit in commits:
del commit['committedDate']
for pr in prs_for_csv:
del pr['mergedAt']
combined = commits + prs_for_csv
combined.sort(key=lambda x: x['datetime'])
for row in combined:
row.pop('sha', None)
row.pop('pr_count', None)
row.pop('pr_numbers', None)
row['empty1'] = ''
row['empty2'] = ''
# Define your fieldnames and their human-readable counterparts
animation_state['text'] = "Save changelog as CSV"
fields = [
('entitytype', 'Entity'),
('datetime', 'Merge/Committed date'),
('URL', 'URL'),
('title', 'Title'),
('author', 'Author(s)'),
('empty1', 'Type'),
('empty2', 'Functionality group'),
('relatedIssues', 'Related issue(s)'),
('labels', 'Related label(s)'),
('needsReviewReplies', 'Needs review replies?'),
]
# Create an OrderedDict from the fields
fieldnames = OrderedDict(fields)
header = dict(zip(fieldnames.keys(), fieldnames.values()))
with open(f'{filename}', 'w', newline='') as outputFile:
# Use the OrderedDict as the fieldnames argument
writer = csv.DictWriter(outputFile, fieldnames=fieldnames)
writer.writerow(header)
writer.writerows(combined) # Writes all the dictionaries in the list to the CSV
# Stop the animation
animation_state['stop'] = True
animation_thread.join()
print("✅ The changelog has been saved as a CSV file.")
print(f"📂 The file is named '{filename}'.")
except KeyboardInterrupt:
animation_state['text'] = "Ending"
if animation_thread:
animation_state['stop'] = True
try:
animation_thread.join()
except KeyboardInterrupt:
animation_state['text'] = "Still wrapping up"
if filename and os.path.exists(filename):
os.remove(filename)
print(" The requested changelog file was deleted.")
print("\n⚠️ The script was interrupted by the user.")