Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Python and the GitHub API

GitHub API

GitHub data

  • Users / Organizations
  • Repositories
  • Commits
  • Issues
  • Pull-Requests
  • ...

GitHub API: REST vs GraphQL

  • REST API

  • Get data in the structure as the API provider though you'll need it.

  • Usually all the data from one table in the database.

  • GraphQL API

  • Have a mapping (edges) between pieces of data that are connected

  • Getting the data you need, nothing more

  • Nested fields

  • Strong typing of the data

  • Rare limits

Where is it used

GitHub get organization members

import json

from github_rest_api import get_from_github

orgid = 'github'
data = get_from_github(f"https://api.github.com/orgs/{orgid}/members")
with open("out.json", 'w') as fh:
    json.dump(data, fh, indent=4)
print(data)

python examples/github-rest/rest_get_org_members.py

{% embed include file="src/examples/github-graphql/get_org_members.gql)

python examples/github-graphql/run_query_requests.py examples/github-graphql/get_org_members.gql out.json

Details about an orgarnization REST

import json
import sys

from github_rest_api import get_from_github

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} USERNAME")

organization = sys.argv[1]

data = get_from_github(f"https://api.github.com/orgs/{organization}")
with open("out.json", 'w') as fh:
    json.dump(data, fh, indent=4)
python examples/github-rest/details-about-org.py github
python examples/github-rest/details-about-org.py kantoniko
python examples/github-rest/details-about-org.py osdc-code-maven

python examples/github-rest/details-about-org.py szabgab          error, this is a user

Details about an user REST

import json
import sys

from github_rest_api import get_from_github

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} USERNAME")

username = sys.argv[1]

data = get_from_github(f"https://api.github.com/users/{username}")
with open("out.json", 'w') as fh:
    json.dump(data, fh, indent=4)
python examples/github-rest/details-about-org.py szabgab

             but these also work:

python examples/github-rest/details-about-org.py github
python examples/github-rest/details-about-org.py kantoniko
python examples/github-rest/details-about-org.py osdc-code-maven

REST - List of repositories by organization (pagination!)

import json
import sys

from github_rest_api import get_from_github

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} USERNAME")

organization = sys.argv[1]

data = get_from_github(f"https://api.github.com/orgs/{organization}/repos", pages=True)
with open("out.json", 'w') as fh:
    json.dump(data, fh, indent=4)
python examples/github-rest/repos-of-org.py github
python examples/github-rest/repos-of-org.py kantoniko

python examples/github-rest/repos-of-org.py szabgab        error, this is a user

REST - List of reposistories by user (pagination!)

import json
import sys

from github_rest_api import get_from_github

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} USERNAME")

organization = sys.argv[1]

data = get_from_github(f"https://api.github.com/users/{organization}/repos", pages=True)
with open("out.json", 'w') as fh:
    json.dump(data, fh, indent=4)
python examples/github-rest/repos-of-user.py szabgab

        but these also work:
python examples/github-rest/repos-of-user.py kantoniko
python examples/github-rest/repos-of-user.py osdc-code-maven

GraphQL - List repositories by organization

import datetime
import argparse
import json
import os
import datetime
import sys
import requests

query = '''
query ($organization: String!) {
  organization(login: $organization) {
    avatarUrl
    repositories(first: 2, after: null) {
      nodes {
        createdAt
        url
        pushedAt
        name
        watchers {
          totalCount
        }
        visibility
        updatedAt
        stargazers {
          totalCount
        }
      }
      totalCount
      pageInfo {
        endCursor
        hasNextPage
      }
    }
  }
}
'''

def run_query(query, **variables):

    token = os.environ.get('MY_GITHUB_TOKEN')
    headers = {
        'Authorization': f'Bearer {token}',
    }

    #print(query)
    url = "https://api.github.com/graphql"
    res = requests.post(url, json={"query": query, "variables": variables}, headers=headers)
    # print(res.status_code)
    if res.status_code == 200:
        return res.json()
    print(f"Request failed with status_code: {res.status_code}")
    print(res.data)

def main():
    if len(sys.argv) != 2:
        exit(f"Usage: {sys.argv[0]} ORGANIZATION")

    organization = sys.argv[1]
    results = run_query(query, organization=organization)
    with open("out.json", "w") as fh:
        json.dump(results, fh, indent=4)

main()

GitHub API KEY (PERSONAL TOKEN)

GitHub REST API

pip install requests

GitHub REST API execute query

import requests
import os


def get_from_github(url, expected=0, pages=False):
    token = os.environ.get('MY_GITHUB_TOKEN')
    if not token:
        print('Missing MY_GITHUB_TOKEN. Not collecting data from Github')
        return

    headers = {
        'Accept': 'application/vnd.github+json',
        'Authorization': f'Bearer {token}',
        'X-GitHub-Api-Version': '2022-11-28',
    }

    if pages:
        per_page = 100 # default is 30 max is 100
        page = 1
        all_data = []
        while True:
            real_url = f"{url}?per_page={per_page}&page={page}"
            print(f"Fetching from {real_url}")
            data = requests.get(real_url, headers=headers).json()
            all_data.extend(data)
            if expected:
                print(f"Received {len(data)} Total {len(all_data)} out of an expected {expected}")
            else:
                print(f"Received {len(data)} Total {len(all_data)}")
            page += 1
            if len(data) < per_page:
                break
    else:
        print(f"Fetching from {url}")
        all_data = requests.get(url, headers=headers).json()

    return all_data


GitHub API GraphQL

pip install requests

GitHub GraphQL explorer

GrapQL explorer

GitHub GraphQL execute query

import sys
import json
import os
import requests

def run_query(query):
    token = os.environ.get('MY_GITHUB_TOKEN')
    headers = {
        'Authorization': f'Bearer {token}',
    }

    url = "https://api.github.com/graphql"
    res = requests.post(url, json={"query": query}, headers=headers)
    # print(res.status_code)
    if res.status_code == 200:
        return res.json()
    print(f"Request failed with status_code: {res.status_code}")
    print(res.data)

if __name__ == "__main__":
    if 2 <= len(sys.argv) <= 3:
        query_filename = sys.argv[1]
        if len(sys.argv) == 3:
            output_file = sys.argv[2]
        else:
            output_file = None
    else:
        exit(f"Usage: {sys.argv[0]} QUERY_FILE [OUTPUT_FILE]")

    with open(query_filename) as fh:
        query = fh.read()
    result = run_query(query)

    if output_file:
        with open(output_file, 'w') as fh:
            json.dump(result, fh, indent=4)
    else:
        print(result)

GitHub GraphQL execute query async

pip install gql[all]
import sys
import json
import os
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport

if 2 <= len(sys.argv) <= 3:
    query_filename = sys.argv[1]
    if len(sys.argv) == 3:
        output_file = sys.argv[2]
    else:
        output_file = None
else:
    exit(f"Usage: {sys.argv[0]} QUERY_FILE [OUTPUT_FILE]")

with open(query_filename) as fh:
    query = fh.read()

token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
    'Authorization': f'Bearer {token}',
}


url = "https://api.github.com/graphql"


transport = AIOHTTPTransport(url=url, headers=headers)
client = Client(transport=transport, fetch_schema_from_transport=True)
result = client.execute(gql(query))

if output_file:
    with open(output_file, 'w') as fh:
        json.dump(result, fh, indent=4)
else:
    print(result)

GitHub GraphQL who am i

  • Get the username of who provided the token
query {
  viewer {
    login
  }
}
python examples/github-graphql/run_query_requests.py examples/github-graphql/login.gql out.json
{
    "viewer": {
        "login": "szabgab"
    }
}

GitHub GraphQL list my repositories

query {
  viewer {
    repositories(first: 30) {
      totalCount
      pageInfo {
        hasNextPage
        endCursor
      }
      edges {
        node {
          name
        }
      }
    }
  }
}
python examples/github-graphql/run_query_requests.py examples/github-graphql/list_my_repositories.gql out.json
{
    "viewer": {
        "repositories": {
            "totalCount": 470,
            "pageInfo": {
                "hasNextPage": true,
                "endCursor": "Y3Vyc29yOnYyOpHOACAlgw=="
            },
            "edges": [
                {
                    "node": {
                        "name": "whitecamel.org"
                    }
                },
                {
                    "node": {
                        "name": "perl6-in-perl5"
                    }
                },
                {
                    "node": {
                        "name": "test-snapshots"
                    }
                },
                {
                    "node": {
                        "name": "padre-plugin-debugger"
                    }
                },
                {
                    "node": {
                        "name": "Math-RPN"
                    }
                },
                {
                    "node": {
                        "name": "perl6-conf"
                    }
                },
                {
                    "node": {
                        "name": "the-driver"
                    }
                },
                {
                    "node": {
                        "name": "Rehovot.pm"
                    }
                },
                {
                    "node": {
                        "name": "CPAN-Forum"
                    }
                },
                {
                    "node": {
                        "name": "test-runner"
                    }
                },
                {
                    "node": {
                        "name": "test-class"
                    }
                },
                {
                    "node": {
                        "name": "perl-android-scripts"
                    }
                },
                {
                    "node": {
                        "name": "perl-promotion"
                    }
                },
                {
                    "node": {
                        "name": "prestool"
                    }
                },
                {
                    "node": {
                        "name": "pdf-create"
                    }
                },
                {
                    "node": {
                        "name": "pdf6"
                    }
                },
                {
                    "node": {
                        "name": "try.rakudo.org"
                    }
                },
                {
                    "node": {
                        "name": "CPAN-Digger-old"
                    }
                },
                {
                    "node": {
                        "name": "peg"
                    }
                },
                {
                    "node": {
                        "name": "Hypolit"
                    }
                },
                {
                    "node": {
                        "name": "topposters"
                    }
                },
                {
                    "node": {
                        "name": "Bailador"
                    }
                },
                {
                    "node": {
                        "name": "git_experiments"
                    }
                },
                {
                    "node": {
                        "name": "Code-Explain"
                    }
                },
                {
                    "node": {
                        "name": "Code-Explain-Web"
                    }
                },
                {
                    "node": {
                        "name": "CGI--Simple"
                    }
                },
                {
                    "node": {
                        "name": "Prima"
                    }
                },
                {
                    "node": {
                        "name": "Test-Version"
                    }
                },
                {
                    "node": {
                        "name": "dwimmer"
                    }
                },
                {
                    "node": {
                        "name": "Text-Trac"
                    }
                }
            ]
        }
    }
}

GitHub GraphQL list of repositories by username

query {
  repositoryOwner(login: "cm-demo") {
    repositories(first: 5, privacy: PUBLIC) {
      totalCount
      edges {
        node {
          id,
          name,
          isPrivate,
          description
        }
      }
    }
  }
}
python examples/github-graphql/run_query_requests.py examples/github-graphql/list_repositories_by_username.gql out.json
{
    "repositoryOwner": {
        "repositories": {
            "totalCount": 5,
            "edges": [
                {
                    "node": {
                        "id": "R_kgDOGSKE7A",
                        "name": "cm-demo",
                        "isPrivate": false,
                        "description": "Config files for my GitHub profile."
                    }
                },
                {
                    "node": {
                        "id": "R_kgDOIx8BIw",
                        "name": "cm-demo.github.io-osdc-2023-01-public",
                        "isPrivate": false,
                        "description": null
                    }
                },
                {
                    "node": {
                        "id": "R_kgDOI4Gftw",
                        "name": "cm-demo.github.io-osdc-2023-01-perl",
                        "isPrivate": false,
                        "description": null
                    }
                },
                {
                    "node": {
                        "id": "R_kgDOJNSvyA",
                        "name": "cm-demo.github.io-osdc-2023-03-azrieli-",
                        "isPrivate": false,
                        "description": null
                    }
                },
                {
                    "node": {
                        "id": "R_kgDOJWTJHw",
                        "name": "osdc-2023-03-azrieli",
                        "isPrivate": false,
                        "description": "OSDC at Azriel College starting in 2023.03"
                    }
                }
            ]
        }
    }
}

GitHub GraphQL list issues by username

query {
  user(login: "szabgab") {
    issues(first: 10, filterBy: {since: "2023-03-20T00:00:00Z"}) {
      totalCount
      edges {
        node {
          number,
          title,
          state,
          createdAt,
          url,
          repository {
            owner {
              login
            }
          }
        }
      }
    }
  }
}

python examples/github-graphql/run_query_requests.py examples/github-graphql/list_issues_by_username.gql put.json
{
    "user": {
        "issues": {
            "totalCount": 50,
            "edges": [
                {
                    "node": {
                        "number": 8,
                        "title": "Check if package has link to Issues?",
                        "state": "CLOSED",
                        "createdAt": "2020-11-02T19:06:04Z",
                        "url": "https://github.com/szabgab/CPAN-Digger/issues/8",
                        "repository": {
                            "owner": {
                                "login": "szabgab"
                            }
                        }
                    }
                },
                {
                    "node": {
                        "number": 9,
                        "title": "Check if meta data contains the license field?",
                        "state": "CLOSED",
                        "createdAt": "2020-11-02T19:06:28Z",
                        "url": "https://github.com/szabgab/CPAN-Digger/issues/9",
                        "repository": {
                            "owner": {
                                "login": "szabgab"
                            }
                        }
                    }
                },
                {
                    "node": {
                        "number": 6051,
                        "title": "Hint how to unlock exercises",
                        "state": "OPEN",
                        "createdAt": "2021-10-21T11:43:51Z",
                        "url": "https://github.com/exercism/exercism/issues/6051",
                        "repository": {
                            "owner": {
                                "login": "exercism"
                            }
                        }
                    }
                },
                {
                    "node": {
                        "number": 20,
                        "title": "Add CPANcover data",
                        "state": "CLOSED",
                        "createdAt": "2022-12-06T04:46:24Z",
                        "url": "https://github.com/szabgab/CPAN-Digger/issues/20",
                        "repository": {
                            "owner": {
                                "login": "szabgab"
                            }
                        }
                    }
                },
                {
                    "node": {
                        "number": 1,
                        "title": "Misunderstood .gitignore?",
                        "state": "CLOSED",
                        "createdAt": "2022-12-30T05:02:13Z",
                        "url": "https://github.com/x-lamprocapnos-x/Movie-Selector/issues/1",
                        "repository": {
                            "owner": {
                                "login": "x-lamprocapnos-x"
                            }
                        }
                    }
                },
                {
                    "node": {
                        "number": 3,
                        "title": "Verify project URLs in the individual json files",
                        "state": "CLOSED",
                        "createdAt": "2023-02-08T12:59:29Z",
                        "url": "https://github.com/OSDC-Code-Maven/osdc-site-generator/issues/3",
                        "repository": {
                            "owner": {
                                "login": "OSDC-Code-Maven"
                            }
                        }
                    }
                },
                {
                    "node": {
                        "number": 1,
                        "title": "The __pycache__ folder should not be in git",
                        "state": "CLOSED",
                        "createdAt": "2023-02-12T14:38:30Z",
                        "url": "https://github.com/zguillez/python-toolz/issues/1",
                        "repository": {
                            "owner": {
                                "login": "zguillez"
                            }
                        }
                    }
                },
                {
                    "node": {
                        "number": 1,
                        "title": "Move all the data from the other 3 repositories",
                        "state": "OPEN",
                        "createdAt": "2023-03-05T07:44:06Z",
                        "url": "https://github.com/OSDC-Code-Maven/open-source-by-organizations/issues/1",
                        "repository": {
                            "owner": {
                                "login": "OSDC-Code-Maven"
                            }
                        }
                    }
                },
                {
                    "node": {
                        "number": 1823,
                        "title": "Flake error B031 caused by new release flake8-bugbear",
                        "state": "CLOSED",
                        "createdAt": "2023-03-10T12:04:24Z",
                        "url": "https://github.com/pallets/jinja/issues/1823",
                        "repository": {
                            "owner": {
                                "login": "pallets"
                            }
                        }
                    }
                },
                {
                    "node": {
                        "number": 6378,
                        "title": "How to setup local dev environment and run the tests?",
                        "state": "OPEN",
                        "createdAt": "2023-03-11T17:22:01Z",
                        "url": "https://github.com/psf/requests/issues/6378",
                        "repository": {
                            "owner": {
                                "login": "psf"
                            }
                        }
                    }
                }
            ]
        }
    }
}

GitHub GraphQL list issues using parameter

import json
import os
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import datetime

token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
    'Authorization': f'Bearer {token}',
}

url = "https://api.github.com/graphql"


query = '''
query($since:DateTime) {
  user(login: "szabgab") {
    issues(first: 1, filterBy: {since: $since}) {
      totalCount
      edges {
        node {
          number, title, state, createdAt, url, repository {
            owner {
              login
            }
          }
        }
      }
    }
  }
}
'''

#variables = {
#    "since": "2023-04-10T00:00:00Z"
#}

ts = datetime.datetime.now() - datetime.timedelta(days = 10)
variables = {
    "since": ts.strftime("%Y-%m-%dT%H:%M:%SZ")
}

transport = AIOHTTPTransport(url=url, headers=headers)
client = Client(transport=transport, fetch_schema_from_transport=True)
result = client.execute(gql(query), variable_values=variables)
print(result)



GitHub GraphQL list issues using several parameters

import json
import os
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import datetime
import sys

if len(sys.argv) == 2:
    output_file = sys.argv[1]
else:
    output_file = None


token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
    'Authorization': f'Bearer {token}',
}

url = "https://api.github.com/graphql"


query = '''
query($since:DateTime, $first:Int, $user:String!) {
  user(login: $user) {
    issues(first: $first, filterBy: {since: $since}) {
      totalCount
      edges {
        node {
          number, title, state, createdAt, url, repository {
            owner {
              login
            }
          }
        }
      }
    }
  }
}
'''

ts = datetime.datetime.now() - datetime.timedelta(days = 20)
variables = {
    "user": "szabgab",
    "since": ts.strftime("%Y-%m-%dT%H:%M:%SZ"),
    "first": 30,
}

transport = AIOHTTPTransport(url=url, headers=headers)
client = Client(transport=transport, fetch_schema_from_transport=True)
result = client.execute(gql(query), variable_values=variables)

if output_file:
    with open(output_file, 'w') as fh:
        json.dump(result, fh, indent=4)
else:
    print(result)



GitHub GraphQL contribution counts

query($username:String!) {
  user(login: $username) {
    contributionsCollection {
      contributionCalendar {
        totalContributions
        weeks {
          contributionDays {
            contributionCount
            weekday
            date
          }
        }
      }
    }
  }
}

{
  "username": "szabgab"
}
  • Defaults to the last 1 year
query($username:String!, $from:DateTime, $to:DateTime) {
  user(login: $username) {
    contributionsCollection(from: $from, to: $to) {
      contributionCalendar {
        totalContributions
        weeks {
          contributionDays {
            contributionCount
            weekday
            date
          }
        }
      }
    }
  }
}

{
  "username": "szabgab",
  "from": "2013-03-20T00:00:00Z",
  "to": "2013-04-20T00:00:00Z"
}
  • Can set the start-date (defaults to now - 1 year)
  • Can set the end-date (defaults to start-date + 1 year)

GitHub GraphQL list Pull-Requests

  • List all the PRs created by a user in a time-range
import json
import os
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import datetime
import sys

if len(sys.argv) == 2:
    output_file = sys.argv[1]
else:
    output_file = None


token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
    'Authorization': f'Bearer {token}',
}

url = "https://api.github.com/graphql"


query = '''
query($username:String!, $last:Int) {
  user(login: $username) {
    pullRequests(last: $last) {
      totalCount
      edges {
        node {
          number, title, state, createdAt, author { login }, url
        }
      }
    }
  }
}
'''

ts = datetime.datetime.now() - datetime.timedelta(days = 20)
variables = {
    "username": "szabgab",
    "last": 30,
}

transport = AIOHTTPTransport(url=url, headers=headers)
client = Client(transport=transport, fetch_schema_from_transport=True)
result = client.execute(gql(query), variable_values=variables)

if output_file:
    with open(output_file, 'w') as fh:
        json.dump(result, fh, indent=4)
else:
    print(result)



import json
import os
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import datetime
import sys

if len(sys.argv) == 2:
    output_file = sys.argv[1]
else:
    output_file = None


token = os.environ.get('MY_GITHUB_TOKEN')
headers = {
    'Authorization': f'Bearer {token}',
}

url = "https://api.github.com/graphql"


query = '''
query($username:String!, $from:DateTime, $to:DateTime, $first:Int) {
  user(login: $username) {
    contributionsCollection(from: $from, to: $to) {
      pullRequestContributions(first: $first) {
        nodes {
          pullRequest {
            title, url, createdAt, state, repository { name }
          }
        }
      }
    }
  }
}
'''

ts = datetime.datetime.now() - datetime.timedelta(days = 20)
variables = {
    "username": "szabgab",
    "first": 30,
    "from": "2013-04-20T00:00:00Z",
    "to": "2014-04-20T00:00:00Z"
}

transport = AIOHTTPTransport(url=url, headers=headers)
client = Client(transport=transport, fetch_schema_from_transport=True)
result = client.execute(gql(query), variable_values=variables)

if output_file:
    with open(output_file, 'w') as fh:
        json.dump(result, fh, indent=4)
else:
    print(result)



GitHub GraphSQL paging using cursor

  • cursor
import datetime
import argparse
import json
import os
import datetime
import sys
import requests

query = '''
query($after:String) {
  viewer {
    repositories(first: 100, after: $after, privacy: PUBLIC) {
      pageInfo {
        hasNextPage
        endCursor
      }
      nodes {
        name
        releases(last:1) {
          totalCount
          nodes {
            name
            publishedAt
            url
          }
        }
      }
    }
  }
}
'''

def run_query(query, **variables):

    token = os.environ.get('MY_GITHUB_TOKEN')
    headers = {
        'Authorization': f'Bearer {token}',
    }

    #print(query)
    url = "https://api.github.com/graphql"
    res = requests.post(url, json={"query": query, "variables": variables}, headers=headers)
    # print(res.status_code)
    if res.status_code == 200:
        return res.json()
    print(f"Request failed with status_code: {res.status_code}")
    print(res.data)

def run_query_all(query):
    cursor = None
    nodes = []
    while True:
        results = run_query(query, after=cursor)
        # print(results)
        # print("------")
        nodes.extend(results['data']['viewer']['repositories']['nodes'])
        if not results['data']['viewer']['repositories']['pageInfo']['hasNextPage']:
            break
        cursor = results['data']['viewer']['repositories']['pageInfo']['endCursor']
    return nodes

def main():
    #args = get_args()
    today = datetime.date.today()
    #print(today)
    #print(today.weekday())
    #now = datetime.datetime.now()
    #print(now)
    end_ts = today - datetime.timedelta(days=today.weekday())
    start_ts = end_ts - datetime.timedelta(days=7)
    #print(end_ts)
    #print(start_ts)
    #username = "szabgab"
    #results = get_data(usernamem start_ts, end_ts)

    results = run_query_all(query)
    with open("out.json", "w") as fh:
        json.dump(results, fh, indent=4)

main()

GitHub GraphQL activities

  • List all the activities of a user in a time-range

  • All the issues opened / commented on / closed

  • All the commits

  • All the activities of a list of users in a time-range

  • Get a list of projects written in python, that have between 2-5 stars and were updated in the last 5 weeks.

  • Given a repository list all the changes that are were done in all the forks.