Python and the GitHub API
GitHub API
GitHub data
- Users / Organizations
- Repositories
- Commits
- Issues
- Pull-Requests
- ...
GitHub API: REST vs GraphQL
-
REST API
-
Get data in the structure as the API provider though you'll need it.
-
Usually all the data from one table in the database.
-
GraphQL API
-
Have a mapping (edges) between pieces of data that are connected
-
Getting the data you need, nothing more
-
Nested fields
-
Strong typing of the data
-
Rare limits
Where is it used
- Open Source Develeopment Courses
- Open Source by Organizations
- Weekly report
GitHub get organization members
import json
from github_rest_api import get_from_github
orgid = 'github'
data = get_from_github(f"https://api.github.com/orgs/{orgid}/members")
with open("out.json", 'w') as fh:
json.dump(data, fh, indent=4)
print(data)
python examples/github-rest/rest_get_org_members.py
{% embed include file="src/examples/github-graphql/get_org_members.gql)
python examples/github-graphql/run_query_requests.py examples/github-graphql/get_org_members.gql out.json
Details about an orgarnization REST
import json
import sys
from github_rest_api import get_from_github
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} USERNAME")
organization = sys.argv[1]
data = get_from_github(f"https://api.github.com/orgs/{organization}")
with open("out.json", 'w') as fh:
json.dump(data, fh, indent=4)
python examples/github-rest/details-about-org.py github
python examples/github-rest/details-about-org.py kantoniko
python examples/github-rest/details-about-org.py osdc-code-maven
python examples/github-rest/details-about-org.py szabgab error, this is a user
Details about an user REST
import json
import sys
from github_rest_api import get_from_github
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} USERNAME")
username = sys.argv[1]
data = get_from_github(f"https://api.github.com/users/{username}")
with open("out.json", 'w') as fh:
json.dump(data, fh, indent=4)
python examples/github-rest/details-about-org.py szabgab
but these also work:
python examples/github-rest/details-about-org.py github
python examples/github-rest/details-about-org.py kantoniko
python examples/github-rest/details-about-org.py osdc-code-maven
REST - List of repositories by organization (pagination!)
import json
import sys
from github_rest_api import get_from_github
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} USERNAME")
organization = sys.argv[1]
data = get_from_github(f"https://api.github.com/orgs/{organization}/repos", pages=True)
with open("out.json", 'w') as fh:
json.dump(data, fh, indent=4)
python examples/github-rest/repos-of-org.py github
python examples/github-rest/repos-of-org.py kantoniko
python examples/github-rest/repos-of-org.py szabgab error, this is a user
REST - List of reposistories by user (pagination!)
import json
import sys
from github_rest_api import get_from_github
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} USERNAME")
organization = sys.argv[1]
data = get_from_github(f"https://api.github.com/users/{organization}/repos", pages=True)
with open("out.json", 'w') as fh:
json.dump(data, fh, indent=4)
python examples/github-rest/repos-of-user.py szabgab
but these also work:
python examples/github-rest/repos-of-user.py kantoniko
python examples/github-rest/repos-of-user.py osdc-code-maven
GraphQL - List repositories by organization
import datetime
import argparse
import json
import os
import datetime
import sys
import requests
query = '''
query ($organization: String!) {
organization(login: $organization) {
avatarUrl
repositories(first: 2, after: null) {
nodes {
createdAt
url
pushedAt
name
watchers {
totalCount
}
visibility
updatedAt
stargazers {
totalCount
}
}
totalCount
pageInfo {
endCursor
hasNextPage
}
}
}
}
'''
def run_query(query, **variables):
token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
'Authorization': f'Bearer {token}',
}
#print(query)
url = "https://api.github.com/graphql"
res = requests.post(url, json={"query": query, "variables": variables}, headers=headers)
# print(res.status_code)
if res.status_code == 200:
return res.json()
print(f"Request failed with status_code: {res.status_code}")
print(res.data)
def main():
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} ORGANIZATION")
organization = sys.argv[1]
results = run_query(query, organization=organization)
with open("out.json", "w") as fh:
json.dump(results, fh, indent=4)
main()
GitHub API KEY (PERSONAL TOKEN)
GitHub REST API
pip install requests
GitHub REST API execute query
import requests
import os
def get_from_github(url, expected=0, pages=False):
token = os.environ.get('MY_GITHUB_TOKEN')
if not token:
print('Missing MY_GITHUB_TOKEN. Not collecting data from Github')
return
headers = {
'Accept': 'application/vnd.github+json',
'Authorization': f'Bearer {token}',
'X-GitHub-Api-Version': '2022-11-28',
}
if pages:
per_page = 100 # default is 30 max is 100
page = 1
all_data = []
while True:
real_url = f"{url}?per_page={per_page}&page={page}"
print(f"Fetching from {real_url}")
data = requests.get(real_url, headers=headers).json()
all_data.extend(data)
if expected:
print(f"Received {len(data)} Total {len(all_data)} out of an expected {expected}")
else:
print(f"Received {len(data)} Total {len(all_data)}")
page += 1
if len(data) < per_page:
break
else:
print(f"Fetching from {url}")
all_data = requests.get(url, headers=headers).json()
return all_data
GitHub API GraphQL
-
Scalars (types)
-
String! means the field is string that cannot be null.
pip install requests
GitHub GraphQL explorer
GitHub GraphQL execute query
import sys
import json
import os
import requests
def run_query(query):
token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
'Authorization': f'Bearer {token}',
}
url = "https://api.github.com/graphql"
res = requests.post(url, json={"query": query}, headers=headers)
# print(res.status_code)
if res.status_code == 200:
return res.json()
print(f"Request failed with status_code: {res.status_code}")
print(res.data)
if __name__ == "__main__":
if 2 <= len(sys.argv) <= 3:
query_filename = sys.argv[1]
if len(sys.argv) == 3:
output_file = sys.argv[2]
else:
output_file = None
else:
exit(f"Usage: {sys.argv[0]} QUERY_FILE [OUTPUT_FILE]")
with open(query_filename) as fh:
query = fh.read()
result = run_query(query)
if output_file:
with open(output_file, 'w') as fh:
json.dump(result, fh, indent=4)
else:
print(result)
GitHub GraphQL execute query async
pip install gql[all]
import sys
import json
import os
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
if 2 <= len(sys.argv) <= 3:
query_filename = sys.argv[1]
if len(sys.argv) == 3:
output_file = sys.argv[2]
else:
output_file = None
else:
exit(f"Usage: {sys.argv[0]} QUERY_FILE [OUTPUT_FILE]")
with open(query_filename) as fh:
query = fh.read()
token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
'Authorization': f'Bearer {token}',
}
url = "https://api.github.com/graphql"
transport = AIOHTTPTransport(url=url, headers=headers)
client = Client(transport=transport, fetch_schema_from_transport=True)
result = client.execute(gql(query))
if output_file:
with open(output_file, 'w') as fh:
json.dump(result, fh, indent=4)
else:
print(result)
GitHub GraphQL who am i
- Get the username of who provided the token
query {
viewer {
login
}
}
python examples/github-graphql/run_query_requests.py examples/github-graphql/login.gql out.json
{
"viewer": {
"login": "szabgab"
}
}
GitHub GraphQL list my repositories
query {
viewer {
repositories(first: 30) {
totalCount
pageInfo {
hasNextPage
endCursor
}
edges {
node {
name
}
}
}
}
}
python examples/github-graphql/run_query_requests.py examples/github-graphql/list_my_repositories.gql out.json
{
"viewer": {
"repositories": {
"totalCount": 470,
"pageInfo": {
"hasNextPage": true,
"endCursor": "Y3Vyc29yOnYyOpHOACAlgw=="
},
"edges": [
{
"node": {
"name": "whitecamel.org"
}
},
{
"node": {
"name": "perl6-in-perl5"
}
},
{
"node": {
"name": "test-snapshots"
}
},
{
"node": {
"name": "padre-plugin-debugger"
}
},
{
"node": {
"name": "Math-RPN"
}
},
{
"node": {
"name": "perl6-conf"
}
},
{
"node": {
"name": "the-driver"
}
},
{
"node": {
"name": "Rehovot.pm"
}
},
{
"node": {
"name": "CPAN-Forum"
}
},
{
"node": {
"name": "test-runner"
}
},
{
"node": {
"name": "test-class"
}
},
{
"node": {
"name": "perl-android-scripts"
}
},
{
"node": {
"name": "perl-promotion"
}
},
{
"node": {
"name": "prestool"
}
},
{
"node": {
"name": "pdf-create"
}
},
{
"node": {
"name": "pdf6"
}
},
{
"node": {
"name": "try.rakudo.org"
}
},
{
"node": {
"name": "CPAN-Digger-old"
}
},
{
"node": {
"name": "peg"
}
},
{
"node": {
"name": "Hypolit"
}
},
{
"node": {
"name": "topposters"
}
},
{
"node": {
"name": "Bailador"
}
},
{
"node": {
"name": "git_experiments"
}
},
{
"node": {
"name": "Code-Explain"
}
},
{
"node": {
"name": "Code-Explain-Web"
}
},
{
"node": {
"name": "CGI--Simple"
}
},
{
"node": {
"name": "Prima"
}
},
{
"node": {
"name": "Test-Version"
}
},
{
"node": {
"name": "dwimmer"
}
},
{
"node": {
"name": "Text-Trac"
}
}
]
}
}
}
GitHub GraphQL list of repositories by username
query {
repositoryOwner(login: "cm-demo") {
repositories(first: 5, privacy: PUBLIC) {
totalCount
edges {
node {
id,
name,
isPrivate,
description
}
}
}
}
}
python examples/github-graphql/run_query_requests.py examples/github-graphql/list_repositories_by_username.gql out.json
{
"repositoryOwner": {
"repositories": {
"totalCount": 5,
"edges": [
{
"node": {
"id": "R_kgDOGSKE7A",
"name": "cm-demo",
"isPrivate": false,
"description": "Config files for my GitHub profile."
}
},
{
"node": {
"id": "R_kgDOIx8BIw",
"name": "cm-demo.github.io-osdc-2023-01-public",
"isPrivate": false,
"description": null
}
},
{
"node": {
"id": "R_kgDOI4Gftw",
"name": "cm-demo.github.io-osdc-2023-01-perl",
"isPrivate": false,
"description": null
}
},
{
"node": {
"id": "R_kgDOJNSvyA",
"name": "cm-demo.github.io-osdc-2023-03-azrieli-",
"isPrivate": false,
"description": null
}
},
{
"node": {
"id": "R_kgDOJWTJHw",
"name": "osdc-2023-03-azrieli",
"isPrivate": false,
"description": "OSDC at Azriel College starting in 2023.03"
}
}
]
}
}
}
GitHub GraphQL list issues by username
query {
user(login: "szabgab") {
issues(first: 10, filterBy: {since: "2023-03-20T00:00:00Z"}) {
totalCount
edges {
node {
number,
title,
state,
createdAt,
url,
repository {
owner {
login
}
}
}
}
}
}
}
python examples/github-graphql/run_query_requests.py examples/github-graphql/list_issues_by_username.gql put.json
{
"user": {
"issues": {
"totalCount": 50,
"edges": [
{
"node": {
"number": 8,
"title": "Check if package has link to Issues?",
"state": "CLOSED",
"createdAt": "2020-11-02T19:06:04Z",
"url": "https://github.com/szabgab/CPAN-Digger/issues/8",
"repository": {
"owner": {
"login": "szabgab"
}
}
}
},
{
"node": {
"number": 9,
"title": "Check if meta data contains the license field?",
"state": "CLOSED",
"createdAt": "2020-11-02T19:06:28Z",
"url": "https://github.com/szabgab/CPAN-Digger/issues/9",
"repository": {
"owner": {
"login": "szabgab"
}
}
}
},
{
"node": {
"number": 6051,
"title": "Hint how to unlock exercises",
"state": "OPEN",
"createdAt": "2021-10-21T11:43:51Z",
"url": "https://github.com/exercism/exercism/issues/6051",
"repository": {
"owner": {
"login": "exercism"
}
}
}
},
{
"node": {
"number": 20,
"title": "Add CPANcover data",
"state": "CLOSED",
"createdAt": "2022-12-06T04:46:24Z",
"url": "https://github.com/szabgab/CPAN-Digger/issues/20",
"repository": {
"owner": {
"login": "szabgab"
}
}
}
},
{
"node": {
"number": 1,
"title": "Misunderstood .gitignore?",
"state": "CLOSED",
"createdAt": "2022-12-30T05:02:13Z",
"url": "https://github.com/x-lamprocapnos-x/Movie-Selector/issues/1",
"repository": {
"owner": {
"login": "x-lamprocapnos-x"
}
}
}
},
{
"node": {
"number": 3,
"title": "Verify project URLs in the individual json files",
"state": "CLOSED",
"createdAt": "2023-02-08T12:59:29Z",
"url": "https://github.com/OSDC-Code-Maven/osdc-site-generator/issues/3",
"repository": {
"owner": {
"login": "OSDC-Code-Maven"
}
}
}
},
{
"node": {
"number": 1,
"title": "The __pycache__ folder should not be in git",
"state": "CLOSED",
"createdAt": "2023-02-12T14:38:30Z",
"url": "https://github.com/zguillez/python-toolz/issues/1",
"repository": {
"owner": {
"login": "zguillez"
}
}
}
},
{
"node": {
"number": 1,
"title": "Move all the data from the other 3 repositories",
"state": "OPEN",
"createdAt": "2023-03-05T07:44:06Z",
"url": "https://github.com/OSDC-Code-Maven/open-source-by-organizations/issues/1",
"repository": {
"owner": {
"login": "OSDC-Code-Maven"
}
}
}
},
{
"node": {
"number": 1823,
"title": "Flake error B031 caused by new release flake8-bugbear",
"state": "CLOSED",
"createdAt": "2023-03-10T12:04:24Z",
"url": "https://github.com/pallets/jinja/issues/1823",
"repository": {
"owner": {
"login": "pallets"
}
}
}
},
{
"node": {
"number": 6378,
"title": "How to setup local dev environment and run the tests?",
"state": "OPEN",
"createdAt": "2023-03-11T17:22:01Z",
"url": "https://github.com/psf/requests/issues/6378",
"repository": {
"owner": {
"login": "psf"
}
}
}
}
]
}
}
}
GitHub GraphQL list issues using parameter
import json
import os
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import datetime
token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
'Authorization': f'Bearer {token}',
}
url = "https://api.github.com/graphql"
query = '''
query($since:DateTime) {
user(login: "szabgab") {
issues(first: 1, filterBy: {since: $since}) {
totalCount
edges {
node {
number, title, state, createdAt, url, repository {
owner {
login
}
}
}
}
}
}
}
'''
#variables = {
# "since": "2023-04-10T00:00:00Z"
#}
ts = datetime.datetime.now() - datetime.timedelta(days = 10)
variables = {
"since": ts.strftime("%Y-%m-%dT%H:%M:%SZ")
}
transport = AIOHTTPTransport(url=url, headers=headers)
client = Client(transport=transport, fetch_schema_from_transport=True)
result = client.execute(gql(query), variable_values=variables)
print(result)
GitHub GraphQL list issues using several parameters
import json
import os
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import datetime
import sys
if len(sys.argv) == 2:
output_file = sys.argv[1]
else:
output_file = None
token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
'Authorization': f'Bearer {token}',
}
url = "https://api.github.com/graphql"
query = '''
query($since:DateTime, $first:Int, $user:String!) {
user(login: $user) {
issues(first: $first, filterBy: {since: $since}) {
totalCount
edges {
node {
number, title, state, createdAt, url, repository {
owner {
login
}
}
}
}
}
}
}
'''
ts = datetime.datetime.now() - datetime.timedelta(days = 20)
variables = {
"user": "szabgab",
"since": ts.strftime("%Y-%m-%dT%H:%M:%SZ"),
"first": 30,
}
transport = AIOHTTPTransport(url=url, headers=headers)
client = Client(transport=transport, fetch_schema_from_transport=True)
result = client.execute(gql(query), variable_values=variables)
if output_file:
with open(output_file, 'w') as fh:
json.dump(result, fh, indent=4)
else:
print(result)
GitHub GraphQL contribution counts
query($username:String!) {
user(login: $username) {
contributionsCollection {
contributionCalendar {
totalContributions
weeks {
contributionDays {
contributionCount
weekday
date
}
}
}
}
}
}
{
"username": "szabgab"
}
- Defaults to the last 1 year
query($username:String!, $from:DateTime, $to:DateTime) {
user(login: $username) {
contributionsCollection(from: $from, to: $to) {
contributionCalendar {
totalContributions
weeks {
contributionDays {
contributionCount
weekday
date
}
}
}
}
}
}
{
"username": "szabgab",
"from": "2013-03-20T00:00:00Z",
"to": "2013-04-20T00:00:00Z"
}
- Can set the start-date (defaults to now - 1 year)
- Can set the end-date (defaults to start-date + 1 year)
GitHub GraphQL list Pull-Requests
- List all the PRs created by a user in a time-range
import json
import os
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import datetime
import sys
if len(sys.argv) == 2:
output_file = sys.argv[1]
else:
output_file = None
token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
'Authorization': f'Bearer {token}',
}
url = "https://api.github.com/graphql"
query = '''
query($username:String!, $last:Int) {
user(login: $username) {
pullRequests(last: $last) {
totalCount
edges {
node {
number, title, state, createdAt, author { login }, url
}
}
}
}
}
'''
ts = datetime.datetime.now() - datetime.timedelta(days = 20)
variables = {
"username": "szabgab",
"last": 30,
}
transport = AIOHTTPTransport(url=url, headers=headers)
client = Client(transport=transport, fetch_schema_from_transport=True)
result = client.execute(gql(query), variable_values=variables)
if output_file:
with open(output_file, 'w') as fh:
json.dump(result, fh, indent=4)
else:
print(result)
import json
import os
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import datetime
import sys
if len(sys.argv) == 2:
output_file = sys.argv[1]
else:
output_file = None
token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
'Authorization': f'Bearer {token}',
}
url = "https://api.github.com/graphql"
query = '''
query($username:String!, $from:DateTime, $to:DateTime, $first:Int) {
user(login: $username) {
contributionsCollection(from: $from, to: $to) {
pullRequestContributions(first: $first) {
nodes {
pullRequest {
title, url, createdAt, state, repository { name }
}
}
}
}
}
}
'''
ts = datetime.datetime.now() - datetime.timedelta(days = 20)
variables = {
"username": "szabgab",
"first": 30,
"from": "2013-04-20T00:00:00Z",
"to": "2014-04-20T00:00:00Z"
}
transport = AIOHTTPTransport(url=url, headers=headers)
client = Client(transport=transport, fetch_schema_from_transport=True)
result = client.execute(gql(query), variable_values=variables)
if output_file:
with open(output_file, 'w') as fh:
json.dump(result, fh, indent=4)
else:
print(result)
GitHub GraphSQL paging using cursor
- cursor
import datetime
import argparse
import json
import os
import datetime
import sys
import requests
query = '''
query($after:String) {
viewer {
repositories(first: 100, after: $after, privacy: PUBLIC) {
pageInfo {
hasNextPage
endCursor
}
nodes {
name
releases(last:1) {
totalCount
nodes {
name
publishedAt
url
}
}
}
}
}
}
'''
def run_query(query, **variables):
token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
'Authorization': f'Bearer {token}',
}
#print(query)
url = "https://api.github.com/graphql"
res = requests.post(url, json={"query": query, "variables": variables}, headers=headers)
# print(res.status_code)
if res.status_code == 200:
return res.json()
print(f"Request failed with status_code: {res.status_code}")
print(res.data)
def run_query_all(query):
cursor = None
nodes = []
while True:
results = run_query(query, after=cursor)
# print(results)
# print("------")
nodes.extend(results['data']['viewer']['repositories']['nodes'])
if not results['data']['viewer']['repositories']['pageInfo']['hasNextPage']:
break
cursor = results['data']['viewer']['repositories']['pageInfo']['endCursor']
return nodes
def main():
#args = get_args()
today = datetime.date.today()
#print(today)
#print(today.weekday())
#now = datetime.datetime.now()
#print(now)
end_ts = today - datetime.timedelta(days=today.weekday())
start_ts = end_ts - datetime.timedelta(days=7)
#print(end_ts)
#print(start_ts)
#username = "szabgab"
#results = get_data(usernamem start_ts, end_ts)
results = run_query_all(query)
with open("out.json", "w") as fh:
json.dump(results, fh, indent=4)
main()
GitHub GraphQL activities
-
List all the activities of a user in a time-range
-
All the issues opened / commented on / closed
-
All the commits
-
All the activities of a list of users in a time-range
-
Get a list of projects written in python, that have between 2-5 stars and were updated in the last 5 weeks.
-
Given a repository list all the changes that are were done in all the forks.