Python Other
PyPi - Python Package Index
What is PyPi?
pip
- pip
$ pip install package_name
Configure pip on Windows to avoid SSL issues
On the command line:
pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org PACKAGE_NAME
Run the following command to get the list of configuration files:
pip config -v list
You will see something like this: (your username instead of FooBar)
For variant 'global', will try loading 'C:\ProgramData\pip\pip.ini'
For variant 'user', will try loading 'C:\Users\FooBar\pip\pip.ini'
For variant 'user', will try loading 'D:\Data\Users\FooBar\AppData\Roaming\pip\pip.ini'
For variant 'site', will try loading 'C:\Users\FooBar\AppData\Local\Programs\Python\Python310\pip.ini'
Create the first pip.ini
file with the following content:
[global]
trusted-host = pypi.org files.pythonhosted.org pypi.python.org
If you run the pip config -v list
again, you'll see an additional line on the output:
global.trusted-host='pypi.org, files.pythonhosted.org ,pypi.python.org'
pip
will now disregard the SSL issues.
Upgrade pip
pip install --upgrade pip
Will probably not work on Windows because file is in use...
Upgrade PIP on Windows
py -m pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --upgrade pip
PYTHONPATH
export PYTHONPATH=~/python
Requirements
numpy
pandas
requests
flask>=1.00
pip install -r requirements.txt
Virtualenv
- virtualenv
On Linux/macOS:
$ cd project_dir
$ virtualenv -p python3 venv
$ source venv/bin/activate
$ ...
$ deactivate
On Windows:
venv\Scripts\activate.bat
...
deactivate
The virtualenv
command will create a copy of python in the given directory inside the current directory.
In the above example it will create the copy in the 'venv' directory inside the 'project_dir'.
After source-ing the 'activate' file the PATH will include the local python with a local version of pip.
This requires bash or zsh.
See also the Python guide.
Python Web server
Hello world web
- WSGI
- CGI
from wsgiref.util import setup_testing_defaults
from wsgiref.simple_server import make_server
import time
def hello_world(environ, start_response):
setup_testing_defaults(environ)
status = '200 OK'
headers = [('Content-type', 'text/plain; charset=utf-8')]
start_response(status, headers)
res = f"Hello World {time.time()}".encode('utf-8')
return [res]
port = 8080
with make_server('0.0.0.0', port, hello_world) as httpd:
print("Serving on port {}...".format(port))
httpd.serve_forever()
Dump web environment info
from wsgiref.util import setup_testing_defaults
from wsgiref.simple_server import make_server
# A relatively simple WSGI application. It's going to print out the
# environment dictionary after being updated by setup_testing_defaults
def simple_app(environ, start_response):
setup_testing_defaults(environ)
status = '200 OK'
headers = [('Content-type', 'text/plain')]
start_response(status, headers)
ret = ["{}: {}\n".format(key, value)
for key, value in environ.iteritems()]
return ret
httpd = make_server('', 8000, simple_app)
print("Serving on port 8000...")
httpd.serve_forever()
# taken from the standard documentation of Python
Web echo
from wsgiref.util import setup_testing_defaults
from wsgiref.simple_server import make_server
import time
import cgi
def hello_world(environ, start_response):
setup_testing_defaults(environ)
status = '200 OK'
headers = [('Content-type', 'text/html')]
start_response(status, headers)
form = cgi.FieldStorage(fp=environ['wsgi.input'], environ=environ)
if 'txt' in form:
return 'Echo: ' + form['txt'].value
return """
<form>
<input name="txt" />
<input type="submit" value="Echo" />
</form>
"""
httpd = make_server('', 8000, hello_world)
print("Serving on port 8000...")
httpd.serve_forever()
Web form
from wsgiref.util import setup_testing_defaults
from wsgiref.simple_server import make_server
import time
import cgi
def hello_world(environ, start_response):
setup_testing_defaults(environ)
status = '200 OK'
headers = [('Content-type', 'text/html')]
start_response(status, headers)
form = cgi.FieldStorage(fp=environ['wsgi.input'], environ=environ)
html = ''
for f in form:
html += f + '==' + form[f].value + '<br>'
if not html:
html = """
<a href="?fname=Foo&lname=Bar">click</a>
<form>
Username: <input name="username" /><br>
Password: <input type="password" name="pw" /><br>
Age group: Under 18 <input type="radio" name="age" value="kid" >
18-30 <input type="radio" name="age" value="young" >
30- <input type="radio" name="age" value="old" >
<input type="submit" value="Send" />
</form>
"""
return html
httpd = make_server('', 8000, hello_world)
print("Serving on port 8000...")
httpd.serve_forever()
Resources
Networking
Secure shell
ssh
- On Windows install putty
import subprocess
import sys
if len(sys.argv) !=2:
exit("Usage: " + sys.argv[0] + " hostname")
host = sys.argv[1]
command = "uname -a"
ssh = subprocess.Popen(["ssh", host, command],
shell=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
result = ssh.stdout.readlines()
error = ssh.stderr.readlines()
if error:
for err in error:
sys.stderr.write("ERROR: {}\n".format(err))
if result:
print(result)
ssh from Windows
$ ssh foobar@hostname-or-ip
-o "StrictHostKeyChecking no"
$ plink.exe -ssh foobar@hostname-or-ip -pw "password" -C "uname -a"
$ plink.exe", "-ssh", "foobar@username-or-ip", "-pw", "no secret", "-C", "uname -a"
import subprocess
import sys
ssh = subprocess.Popen([r"c:\Users\foobar\download\plink.exe", "-ssh",
"foobar@username-or-ip",
"-pw", "password",
"-C", "uname -a"],
shell=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
result = ssh.stdout.readlines()
error = ssh.stderr.readlines()
if error:
for err in error:
sys.stderr.write("ERROR: {}\n".format(err))
if result:
print(result)
Parallel ssh
- parallel-ssh
- pip install parallel-ssh
from pssh import ParallelSSHClient
hosts = ['myhost1', 'myhost2']
client = ParallelSSHClient(hosts)
output = client.run_command('ls -ltrh /tmp/', sudo=True)
telnet
import telnetlib
hostname = '104.131.87.33'
user = 'gabor'
password = 'robag'
tn = telnetlib.Telnet(hostname)
tn.read_until("login: ")
tn.write(user + "\n")
tn.read_until("Password: ")
tn.write(password + "\n")
tn.read_until("~$")
tn.write("hostname\n")
print(tn.read_until("~$"))
print("-------");
tn.write("uptime\n")
print(tn.read_until("~$"))
print("-------");
print("going to exit")
tn.write("exit\n")
print("--------")
print(tn.read_all())
prompt for password
import getpass
password = getpass.getpass("Password:")
print(password)
ftp
$ sudo aptitude install proftpd
$ sudo /etc/init.d/proftpd start
$ sudo adduser (user: foo pw: bar)
from ftplib import FTP
ftp = FTP('localhost')
ftp.login("foo", "bar")
print(ftp.retrlines('LIST'))
print('-------')
for f in ftp.nlst():
print("file: " + f)
filename = 'ssh.py'
ftp.storlines("STOR " + filename, open(filename))
print('-------')
for f in ftp.nlst():
print("file: " + f)
ftp.delete(filename)
print('-------')
for f in ftp.nlst():
print("file: " + f)
-rw-rw-r-- 1 foo foo 6 Feb 18 19:18 a.txt
-rw-rw-r-- 1 foo foo 6 Feb 18 19:18 b.txt
226 Transfer complete
-------
file: b.txt
file: a.txt
-------
file: b.txt
file: a.txt
file: ssh.py
-------
file: b.txt
file: a.txt
Interactive shell
The Python interactive shell
- len
Type python
without any arguments on the command line and
you'll get into the Interactive shell of Python.
In the interactive shell you can type:
>>> print "hello"
hello
>>> "hello"
'hello'
>>> 6
6
>>> len("abc")
3
>>> "abc" + 6
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: cannot concatenate 'str' and 'int' objects
>>> "abc" + str(6)
'abc6'
REPL - Read Evaluate Print Loop
- int
- float
- REPL
A variable comes to existence the first time we assign a value to it. It points to an object and that object knows about its type.
>>> a = "abc"
>>> len(a)
3
>>> a = '3'
>>> a + 3
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: cannot concatenate 'str' and 'int' objects
>>> int(a) + 3
6
>>> a = '2.3'
>>> float(a) + 1
3.3
Using Modules
- import
- sys
- version
- executable
Python has lots of standard (and not standard) modules. You can load one of them using the
import
keyword. Once loaded, you can use functions from the module
or access its objects. For example the sys
module has a sys.version
and a sys.executable
variable.
>>> import sys
>>> sys.version
'2.7.3 (default, Apr 10 2012, 23:24:47) [MSC v.1500 64 bit (AMD64)]'
>>> sys.executable
'c:\\Python27\\python.exe'
You can also load specific object directly into your code.
>>> from sys import executable
>>> executable
'c:\\Python27\\python.exe'
To quit the interpreter call the exit()
function.
>>> exit
Use exit() or Ctrl-Z plus Return to exit
The import
binds the word sys to whatever it loaded from the file.
Getting help
- help()
- dir()
- import
>>> help
Type help() for interactive help, or help(object) for help about object.
>>> help() - entering an internal shell:
...
help> dir - explains about the dir command. Navigate using SPACE/ENTER/q
help> Ctrl-D - to quite, (Ctrl-Z ENTER on Windows)
>>> help(dir) - the same explanation as before
>>> dir()
['__builtins__', '__doc__', '__name__', '__package__']
>>> dir("") - list of string related methods
['__add__', '__class__', ... 'upper', 'zfill']
>>> dir(1) - list of integer related methods
['__abs__', '__add__', ... 'numerator', 'real']
>>> dir(__builtins__)
... - functions available in python
>>> help(abs) - exlain how abs() works
>>> help(sum)
>>> help(zip)
>>> help(int)
>>> help(str)
>>> help("".upper) - explain how the upper method of strings work
>>> import sys
>>> dir(sys)
>>> help(sys)
>>> help(sys)
>>> help(sys.path)
>>> help(sys.path.pop)
Exercise: Interactive shell
- Start the REPL and check the examples.
- Check the documentation in the REPL.
Logging
Simple logging
- logging
- basicConfig
import logging
logging.debug("debug")
logging.info("info")
logging.warning("warning")
logging.error("error")
logging.critical("critical")
logging.log(logging.WARNING, "another warning")
logging.log(40, "another error")
WARNING:root:warning
ERROR:root:error
CRITICAL:root:critical
WARNING:root:another warning
ERROR:root:another error
- Written on STDERR
Simple logging - set level
import logging
logging.basicConfig(level = logging.INFO)
logging.debug("debug")
logging.info("info")
logging.warning("warning")
logging.error("error")
logging.critical("critical")
INFO:root:info
WARNING:root:warning
ERROR:root:error
CRITICAL:root:critical
Simple logging to a file
import logging
import time
logging.basicConfig(level = logging.INFO, filename = time.strftime("my-%Y-%m-%d.log"))
logging.debug("debug")
logging.info("info")
logging.warning("warning")
logging.error("error")
logging.critical("critical")
Simple logging format
import logging
logging.basicConfig( format = '%(asctime)s %(levelname)-10s %(processName)s %(name)s %(message)s')
logging.debug("debug")
logging.info("info")
logging.warning("warning")
logging.error("error")
logging.critical("critical")
Simple logging change date format
import logging
logging.basicConfig( format = '%(asctime)s %(levelname)-10s %(processName)s %(name)s %(message)s', datefmt = "%Y-%m-%d-%H-%M-%S")
logging.debug("debug")
logging.info("info")
logging.warning("warning")
logging.error("error")
logging.critical("critical")
2020-04-22-18-59-16 WARNING MainProcess root warning
2020-04-22-18-59-16 ERROR MainProcess root error
2020-04-22-18-59-16 CRITICAL MainProcess root critical
getLogger
- getLogger
- FileHandler
- StreamHandler
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler('my.log')
fh.setLevel(logging.INFO)
fh.setFormatter( logging.Formatter('%(asctime)s - %(name)s - %(levelname)-10s - %(message)s') )
logger.addHandler(fh)
sh = logging.StreamHandler()
sh.setLevel(logging.DEBUG)
sh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)-10s - %(message)s'))
logger.addHandler(sh)
log = logging.getLogger(__name__)
log.debug("debug")
log.info("info")
log.warning("warning")
log.error("error")
log.critical("critical")
Time-based logrotation
- TimedRotatingFileHandler
import logging
import logging.handlers
log_file = "my.log"
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
ch = logging.handlers.TimedRotatingFileHandler(log_file, when='M', backupCount=2)
ch.setLevel(logging.INFO)
ch.setFormatter( logging.Formatter('%(asctime)s - %(name)s - %(levelname)-10s - %(message)s') )
logger.addHandler(ch)
log = logging.getLogger(__name__)
log.debug("debug")
log.info("info")
log.warning("warning")
log.error("error")
log.critical("critical")
- S - seconds
- M - minutes
- H - hours
- D - days
- docs
Size-based logrotation
import logging
import logging.handlers
log_file = "my.log"
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
ch = logging.handlers.RotatingFileHandler(log_file, maxBytes=100, backupCount=2)
ch.setLevel(logging.INFO)
ch.setFormatter( logging.Formatter('%(asctime)s - %(name)s - %(levelname)-10s - %(message)s') )
logger.addHandler(ch)
log = logging.getLogger(__name__)
log.debug("debug")
log.info("info")
log.warning("warning")
log.error("error")
log.critical("critical")
Closures
Counter local - not working
def counter():
count = 0
count += 1
return count
print(counter())
print(counter())
print(counter())
1
1
1
Counter with global
- global
count = 0
def counter():
global count
count += 1
return count
print(counter())
print(counter())
print(counter())
count = -42
print(counter())
1
2
3
-41
Create incrementors
In order to use in various map-expressions, we need a couple of functions that - for simplicity - need to increment a number:
def f3(x):
return x + 3
def f7(x):
return x + 7
def f23(x):
return x + 23
print(f3(2))
print(f7(3))
print(f3(4))
print(f7(10))
print(f23(19))
5
10
7
17
42
Create internal function
def create_func():
def internal():
print("Hello world")
internal()
func = create_func()
internal()
Hello world
Traceback (most recent call last):
File "create_internal_func.py", line 8, in <module>
internal()
NameError: name 'internal' is not defined
Create function by a function
def create_func():
def internal():
print("Hello world")
#internal()
return internal
func = create_func()
#internal()
func()
Hello world
Create function with parameters
def create_func(name):
def internal():
print(f"Hello {name}")
return internal
foo = create_func("Foo")
foo()
bar = create_func("Bar")
bar()
Hello Foo
Hello Bar
Counter closure
- nonlocal
def create_counter():
count = 0
def internal():
nonlocal count
count += 1
return count
return internal
counter = create_counter()
print(counter())
print(counter())
print(counter())
print()
other = create_counter()
print(counter())
print(other())
print(counter())
print(other())
print()
print(count)
1
2
3
4
1
5
2
Traceback (most recent call last):
File "counter.py", line 23, in <module>
print(count)
NameError: name 'count' is not defined
Make incrementor with def (closure)
- closure
def make_incrementor(n):
def inc(x):
return x + n
return inc
f3 = make_incrementor(3)
f7 = make_incrementor(7)
print(f3(2))
print(f7(3))
print(f3(4))
print(f7(10))
5
10
7
17
Make incrementor with lambda
def make_incrementor(n):
return lambda x: x + n
f3 = make_incrementor(3)
f7 = make_incrementor(7)
print(f3(2))
print(f7(3))
print(f3(4))
print(f7(10))
5
10
7
17
Exercise: closure bank
- Create a closure that returns a function that holds a number (like a bank account) that can be incremented or decremented as follows:
- Allow for an extra paramter called
prev
that defaults toFalse
. IfTrue
is passed then instead of returning the new balance, return the old balance.
bank = create_bank(20)
print(bank()) # 20
print(bank(7)) # 27
print(bank()) # 27
print(bank(-3)) # 24
print(bank()) # 24
print(bank(10, prev=True)) # 24
print(bank()) # 34
Exercise: counter with parameter
Change the counter example to accept a parameter and start counting from that number.
Solution: closure bank
def create_bank(n = 0):
balance = n
def bnk(change = 0, prev=False):
nonlocal balance
prev_balance = balance
balance += change
if prev:
return prev_balance
else:
return balance
return bnk
bank = create_bank(20)
print(bank()) # 20
print(bank(7)) # 27
print(bank()) # 27
print(bank(-3)) # 24
print(bank()) # 24
print(bank(10, prev=True)) # 24
print(bank()) # 34
20
27
27
24
24
24
34
Solution: counter with parameter
def create_counter(count=0):
def internal():
nonlocal count
count += 1
return count
return internal
counter = create_counter()
print(counter())
print(counter())
print(counter())
print()
other = create_counter(42)
print(counter())
print(other())
print(counter())
print(other())
1
2
3
4
43
5
44
Decorators
Decorators: simple example
- A decorators is that @something just before the declaration of the function.
- Decorators can modify the behavior of functions or can set some meta information about them.
@some_decorator
def some_function():
pass
Decorators - Flask
- In Flask we use decorators to designate function as "routes".
from flask import Flask
app = Flask(__name__)
@app.route("/")
def main():
return "Hello World!"
@app.route("/login")
def login():
return "Showing the login page ..."
FLASK_APP=flask_app flask run
Decorators - Pytest
- In Pytest we can use decorators to add special marks to test functions
- ... or to mark them as fixtures.
import sys
import pytest
@pytest.mark.skipif(sys.platform != 'linux', reason="Linux tests")
def test_linux():
assert True
@pytest.mark.skip(reason="To show we can skip tests without any condition.")
def test_any():
assert True
@pytest.fixture(autouse = True, scope="module")
def module_demo():
print(f"Fixture")
pytest -v
Decorators caching - no cache
- Each call will execute the function and do the (expensive) computation.
def compute(x, y):
print(f"Called with {x} and {y}")
# some long computation here
return x+y
print(compute(2, 3))
print(compute(3, 4))
print(compute(2, 3))
Called with 2 and 3
5
Called with 3 and 4
7
Called with 2 and 3
5
Decorators caching - with cache
-
cache
-
lru_cache
-
By adding the lru_cache decorator we can tell Python to cache the result and save on computation time.
import functools
@functools.lru_cache()
def compute(x, y):
print(f"Called with {x} and {y}")
# some long computation here
return x+y
print(compute(2, 3))
print(compute(3, 4))
print(compute(2, 3))
Called with 2 and 3
5
Called with 3 and 4
7
5
LRU - Least recently used cache
- LRU - Cache replacement policy
- When we call the function with (1, 5) it removes the least recently used results of (1, 2)
- So next time it has to be computed again.
import functools
@functools.lru_cache(maxsize=3)
def compute(x, y):
print(f"Called with {x} and {y}")
# some long computation here
return x+y
compute(1, 2) # Called with 1 and 2
compute(1, 2)
compute(1, 2)
compute(1, 3) # Called with 1 and 3
compute(1, 3)
compute(1, 4) # Called with 1 and 4
compute(1, 4)
compute(1, 5) # Called with 1 and 5
compute(1, 2) # Called with 1 and 2
compute(1, 2)
LRU - Least recently used cache
- Here we called (1, 2) after (1, 4) when it was still in the cache
- When we called (1, 5) it removed the LRU pair, but it was NOT the (1, 2) pair
- So it was in the cache even after the (1, 5) call.
import functools
@functools.lru_cache(maxsize=3)
def compute(x, y):
print(f"Called with {x} and {y}")
# some long computation here
return x+y
compute(1, 2) # Called with 1 and 2
compute(1, 2)
compute(1, 2)
compute(1, 3) # Called with 1 and 3
compute(1, 3)
compute(1, 4) # Called with 1 and 4
compute(1, 4)
compute(1, 2)
compute(1, 5) # Called with 1 and 5
compute(1, 2)
OOP - classmethod - staticmethod
class Person(object):
def __init__(self, name):
print(f"init: '{self}' '{self.__class__.__name__}'")
self.name = name
def show_name(self):
print(f"instance method: '{self}' '{self.__class__.__name__}'")
@classmethod
def from_occupation(cls, occupation):
print(f"class method '{cls}' '{cls.__class__.__name__}'")
@staticmethod
def is_valid_occupation(param):
print(f"static method '{param}' '{param.__class__.__name__}'")
fb = Person('Foo Bar')
fb.show_name()
fb.from_occupation('Tailor')
Person.from_occupation('Tailor') # This is how we should call it.
fb.is_valid_occupation('Tailor')
Person.is_valid_occupation('Tailor')
init: '<__main__.Person object at 0x7fb008f3a640>' 'Person'
instance method: '<__main__.Person object at 0x7fb008f3a640>' 'Person'
class method '<class '__main__.Person'>' 'type'
class method '<class '__main__.Person'>' 'type'
static method 'Tailor' 'str'
static method 'Tailor' 'str'
Use cases for decorators in Python
-
classmethod
-
staticmethod
-
pytest
-
Common decorators are @classmethod and @staticmethod.
-
Flask uses them to mark and configure the routes.
-
Pytest uses them to add marks to the tests.
-
Logging calls with parameters.
-
Logging elapsed time of calls.
-
Access control in Django or other web frameworks. (e.g. login required)
-
Memoization (caching)
-
Retry
-
Function timeout
-
Locking for thread safety
Function assignment
Before we learn about decorators let's remember that we can assign function names to other names and then use the new name:
def hello(name):
print(f"Hello {name}")
hello("Python")
print(hello)
greet = hello
greet("Python")
print(greet)
Hello Python
<function hello at 0x7f8aee3401f0>
Hello Python
<function hello at 0x7f8aee3401f0>
Function assignment - alias print to say
say = print
say("Hello World")
Function assignment - don't do this
numbers = [2, 4, 3, 1, 1, 1]
print(sum(numbers)) # 12
print(max(numbers)) # 4
sum = max
print(sum(numbers)) # 4
print(max(numbers)) # 4
sum = lambda values: len(values)
print(sum(numbers)) # 6
Passing functions as parameters
def call(func):
return func(42)
def double(val):
print(2*val)
call(double) # 84
call(lambda x: print(x // 2)) # 21
Traversing directory tree
import sys
import os
def walker(path, todo):
if os.path.isdir(path):
items = os.listdir(path)
for item in items:
walker(os.path.join(path, item), todo)
else:
todo(path)
def print_size(name):
print(f"{os.stat(name).st_size:6} {name} ")
if __name__ == '__main__':
if len(sys.argv) < 2:
exit(f"Usage: {sys.argv[0]} PATH")
walker(sys.argv[1], print)
#walker(sys.argv[1], print_size)
#walker(sys.argv[1], lambda name: print(f"{os.stat(name).st_size:6} {name[::-1]} "))
Declaring Functions inside other function
Let's also remember that we can define a function inside another function and then the internally defined function only exists in the scope of the function where it was defined in. Not outside.
def f():
def g():
print("in g")
print("start f")
g()
print("end f")
f()
g()
start f
in g
end f
Traceback (most recent call last):
File "examples/decorators/function_in_function.py", line 9, in <module>
g()
NameError: name 'g' is not defined
Returning a new function from a function
def create_function():
print("creating a function")
def internal():
print("This is the generated function")
print("creation done")
return internal
func = create_function()
func()
creating a function
creation done
This is the generated function
Returning a closure
def create_incrementer(num):
def inc(val):
return num + val
return inc
inc_5 = create_incrementer(5)
print(inc_5(10)) # 15
print(inc_5(0)) # 5
inc_7 = create_incrementer(7)
print(inc_7(10)) # 17
print(inc_7(0)) # 7
Decorator
-
@
-
A function that changes the behaviour of other functions.
-
The input of a decorator is a function.
-
The returned value of a decorator is a modified version of the same function.
from some_module import some_decorator
@some_decorator
def f(...):
...
def f(...):
...
f = some_decorator(f)
Decorator Demo
- Just a simple example created step-by-step
import time
def replace(func):
def new_func():
print("start new")
start = time.time()
func()
end = time.time()
print(f"end new {end-start}")
return new_func
@replace
def f():
time.sleep(1)
print("in f")
f()
Decorator to register function
- Pytest, Flask probably do this
functions = []
def register(func):
global functions
functions.append(func.__name__)
return func
@register
def f():
print("in f")
print(functions)
A recursive Fibonacci
def fibo(n):
if n in (1,2):
return 1
return fibo(n-1) + fibo(n-2)
print(fibo(5)) # 5
trace fibo
import decor
@decor.tron
def fibo(n):
if n in (1,2):
return 1
return fibo(n-1) + fibo(n-2)
print(fibo(5))
Calling fibo(5)
Calling fibo(4)
Calling fibo(3)
Calling fibo(2)
Calling fibo(1)
Calling fibo(2)
Calling fibo(3)
Calling fibo(2)
Calling fibo(1)
5
tron decorator
def tron(func):
def new_func(v):
print(f"Calling {func.__name__}({v})")
return func(v)
return new_func
Decorate with direct call
import decor
def fibo(n):
if n in (1,2):
return 1
return fibo(n-1) + fibo(n-2)
fibo = decor.tron(fibo)
print(fibo(5))
Decorate with parameter
import decor_param
@decor_param.tron('foo')
def fibo(n):
if n in (1,2):
return 1
return fibo(n-1) + fibo(n-2)
print(fibo(5))
foo Calling fibo(5)
foo Calling fibo(4)
foo Calling fibo(3)
foo Calling fibo(2)
foo Calling fibo(1)
foo Calling fibo(2)
foo Calling fibo(3)
foo Calling fibo(2)
foo Calling fibo(1)
5
Decorator accepting parameter
def tron(prefix):
def real_tron(func):
def new_func(v):
print("{} Calling {}({})".format(prefix, func.__name__, v))
return func(v)
return new_func
return real_tron
Decorate function with any signature
- How can we decorate a function that is flexible on the number of arguments?
- Accept
*args
and**kwargs
and pass them on.
from decor_any import tron
@tron
def one(param):
print(f"one({param})")
@tron
def two(first, second = 42):
print(f"two({first}, {second})")
one("hello")
one(param = "world")
two("hi")
two(first = "Foo", second = "Bar")
Decorate function with any signature - implementation
def tron(func):
def new_func(*args, **kw):
params = list(map(lambda p: str(p), args))
for (k, v) in kw.items():
params.append(f"{k}={v}")
print("Calling {}({})".format(func.__name__, ', '.join(params)))
return func(*args, **kw)
return new_func
Calling one(hello)
one(hello)
Calling one(param=world)
one(world)
Calling two(hi)
two(hi, 42)
Calling two(first=Foo, second=Bar)
two(Foo, Bar)
Decorate function with any signature - skeleton
def decorator(func):
def wrapper(*args, **kw):
return func(*args, **kw)
return wrapper
@decorator
def zero():
print("zero")
@decorator
def one(x):
print(f"one({x})")
@decorator
def two(x, y):
print(f"two({x, y})")
zero()
one('hello')
two( y = 7, x = 8 )
print(zero)
print(one)
print(two)
print(zero.__name__)
print(one.__name__)
print(two.__name__)
zero
one(hello)
two((8, 7))
<function decorator.<locals>.wrapper at 0x7f1165258a60>
<function decorator.<locals>.wrapper at 0x7f1165258b80>
<function decorator.<locals>.wrapper at 0x7f1165258ca0>
Decorate function with any signature - skeleton with name
import functools
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kw):
return func(*args, **kw)
return wrapper
@decorator
def zero():
print("zero")
@decorator
def one(x):
print(f"one({x})")
@decorator
def two(x, y):
print(f"two({x, y})")
zero()
one('hello')
two( y = 7, x = 8 )
print(zero)
print(one)
print(two)
print(zero.__name__)
print(one.__name__)
print(two.__name__)
zero
one(hello)
two((8, 7))
<function zero at 0x7f9079bdca60>
<function one at 0x7f9079bdcb80>
<function two at 0x7f9079bdcca0>
Functool - partial
- partial
from functools import partial
val = '101010'
print(int(val, base=2))
basetwo = partial(int, base=2)
basetwo.__doc__ = 'Convert base 2 string to an int.'
print(basetwo(val))
# Based on example from https://docs.python.org/3/library/functools.html
Exercise: Logger decorator
- In the previous pages we created a decorator that can decorate arbitrary function logging the call and its parameters.
- Add time measurement to each call to see how long each function took.
Exercise: decorators decorator
Write a function that gets a functions as attribute and returns a new functions while memoizing (caching) the input/output pairs. Then write a unit test that checks it. You probably will need to create a subroutine to be decoratorsd.
- Write tests for the fibonacci functions.
- Implement the decorators decorator for a function with a single parameter.
- Apply the decorator.
- Run the tests again.
- Check the speed differences.
- or decorate with tron to see the calls...
Solution: Logger decorator
import time
def tron(func):
def new_func(*args, **kwargs):
start = time.time()
print("Calling {}({}, {})".format(func.__name__, args, kwargs))
out = func(*args, **kwargs)
end = time.time()
print("Finished {}({})".format(func.__name__, out))
print("Elapsed time: {}".format(end - start))
return out
return new_func
Solution: Logger decorator (testing)
from logger_decor import tron
@tron
def f(a, b=1, *args, **kwargs):
print('a: ', a)
print('b: ', b)
print('args: ', args)
print('kwargs:', kwargs)
return a + b
f(2, 3, 4, 5, c=6, d=7)
print()
f(2, c=5, d=6)
print()
f(10)
Calling f((2, 3, 4, 5), {'c': 6, 'd': 7})
a: 2
b: 3
args: (4, 5)
kwargs: {'c': 6, 'd': 7}
Finished f(5)
Elapsed time: 1.3589859008789062e-05
Calling f((2,), {'c': 5, 'd': 6})
a: 2
b: 1
args: ()
kwargs: {'c': 5, 'd': 6}
Finished f(3)
Elapsed time: 5.245208740234375e-06
Calling f((10,), {})
a: 10
b: 1
args: ()
kwargs: {}
Finished f(11)
Elapsed time: 4.291534423828125e-06
Solution decorators decorator
import sys
import memoize_attribute
import memoize_nonlocal
import decor_any
#@memoize_attribute.memoize
#@memoize_nonlocal.memoize
#@decor_any.tron
def fibonacci(n):
if n == 1:
return 1
if n == 2:
return 1
return fibonacci(n-1) + fibonacci(n-2)
if __name__ == '__main__':
if len(sys.argv) != 2:
sys.stderr.write("Usage: {} N\n".format(sys.argv[0]))
exit(1)
print(fibonacci(int(sys.argv[1])))
def memoize(f):
data = {}
def caching(n):
nonlocal data
key = n
if key not in data:
data[key] = f(n)
return data[key]
return caching
def memoize(f):
def caching(n):
key = n
#if 'data' not in caching.__dict__:
# caching.data = {}
if key not in caching.data:
caching.data[key] = f(n)
return caching.data[key]
caching.data = {}
return caching
Before
$ time python fibonacci.py 35
9227465
real 0m3.850s
user 0m3.832s
sys 0m0.015s
After
$ time python fibonacci.py 35
9227465
real 0m0.034s
user 0m0.019s
sys 0m0.014s
A list of functions
def hello(name):
print(f"Hello {name}")
def morning(name):
print(f"Good morning {name}")
hello("Jane")
morning("Jane")
print()
funcs = [hello, morning]
funcs[0]("Peter")
print()
for func in funcs:
func("Mary")
Hello Jane
Good morning Jane
Hello Peter
Hello Mary
Good morning Mary
Insert element in sorted list using insort
- insort
import bisect
solar_system = ['Earth', 'Jupiter', 'Mercury', 'Saturn', 'Venus']
name = 'Mars'
# Find the location where to insert the element to keep the list sorted and insert the element
bisect.insort(solar_system, name)
print(solar_system)
print(sorted(solar_system))
import sys
import os
def traverse(path):
if os.path.isfile(path):
print(path)
return
if os.path.isdir(path):
for item in os.listdir(path):
traverse(os.path.join(path, item))
return
# other unhandled things
if len(sys.argv) < 2:
exit(f"Usage: {sys.argv[0]} DIR|FILE")
traverse(sys.argv[1])
import sys
import os
def traverse(path, func):
response = {}
if os.path.isfile(path):
func(path)
return response
if os.path.isdir(path):
for item in os.listdir(path):
traverse(os.path.join(path, item), func)
return response
# other unhandled things
if len(sys.argv) < 2:
exit(f"Usage: {sys.argv[0]} DIR|FILE")
#traverse(sys.argv[1], print)
#traverse(sys.argv[1], lambda path: print(f"{os.path.getsize(path):>6} {path}"))
import sys
import os
def traverse(path, func):
if os.path.isfile(path):
func(path)
return
if os.path.isdir(path):
for item in os.listdir(path):
traverse(os.path.join(path, item), func)
return
# other unhandled things
if len(sys.argv) < 2:
exit(f"Usage: {sys.argv[0]} DIR|FILE")
#traverse(sys.argv[1], print)
#traverse(sys.argv[1], lambda path: print(f"{os.path.getsize(path):>6} {path}"))
#from inspect import getmembers, isfunction
import inspect
def change(sub):
def new(*args, **kw):
print("before")
res = sub(*args, **kw)
print("after")
return res
return new
def add(x, y):
return x+y
#print(add(2, 3))
fixed = change(add)
#print(fixed(3, 4))
def replace(subname):
def new(*args, **kw):
print("before")
res = locals()[subname](*args, **kw)
print("after")
return res
locals()[subname] = new
replace('add')
add(1, 7)
def say():
print("hello")
#print(dir())
#getattr('say')
Context managers (with statement)
Why use context managers?
In certain operations you might want to ensure that when the operation is done there will be an opportunity to clean up after it. Even if decided to end the operation early or if there is an exception in the middle of the operation.
In the following pseudo-code example you can see that cleanup
must be called both at the end and before the early-end
, but
that still leaves the bad-code that raises exception avoiding the cleanup. That forces us to wrap the whole section in a try-block.
def sample():
start
do
do
do
do
cleanup
What is we have some conditions for early termination?
def sample():
start
do
do
if we are done early:
cleanup
return # early-end
do
do
cleanup
What if we might have an exception in the code?
def sample():
start
try:
do
do
if we are done early:
cleanup
return early-end
do
bad-code (raises exception)
do
cleanup
finally:
cleanup
It is a lot of unnecessary code duplication and we can easily forget to add it in every location where we early-end our code.
Using Context Manager
with cm_for_sample():
start
do
do
if we are done early:
return early-end
do
bad-code (raises exception)
do
cleanup
happens automatically, it is defined inside thecm_for_sample
Context Manager examples
A few examples where context managers can be useful:
-
Opening a file - close it once we are done with it so we don't leak file descriptors.
-
Changing directory - change back when we are done.
-
Create temporary directory - remove when we are done.
-
Open connection to database - close connection.
-
Open SSH connection - close connection.
-
More information about context managers
cd in a function
- getcwd
- chdir
In this example we have a function in which we change to a directory and then when we are done we change back to the original directory.
For this to work first we save the current working directory using the os.getcwd
call. Unfortunatelly in the middle of the code there
is a conditional call to return
. If that condition is True
we won't change back to the original directory. We could fix this by
calling os.chdir(start_dir)
just before calling return
. However this would still not solve the problem if there is an exception
in the function.
import sys
import os
def do_something(path):
start_dir = os.getcwd()
os.chdir(path)
content = os.listdir()
number = len(content)
print(number)
if number < 15:
return
os.chdir(start_dir)
def main():
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} PATH")
path = sys.argv[1]
print(os.getcwd())
do_something(path)
print(os.getcwd())
main()
$ python no_context_cd.py /tmp/
/home/gabor/work/slides/python-programming/examples/advanced
19
/home/gabor/work/slides/python-programming/examples/advanced
$ python no_context_cd.py /opt/
/home/gabor/work/slides/python-programming/examples/advanced
9
/opt
- In the second example
return
was called and thus we stayed on the /opt directory.:w
open in function
This is not the recommended way to open a file, but this is how it was done before the introduction of the with
context manager.
Here we have the same issue. We have a conditional call to return
where we forgot to close the file.
import sys
import re
def do_something(filename):
fh = open(filename)
while True:
line = fh.readline()
if line is None:
break
line = line.rstrip("\n")
if re.search(r'\A\s*\Z', line):
return
print(line)
fh.close()
def main():
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
filename = sys.argv[1]
do_something(filename)
main()
open in for loop
- stat
- os.stat
Calling write
does not immediately write to disk. The Operating System provides buffering as an optimization
to avoid frequent access to the disk. In this case it means the file has not been saved before we already check its size.
import os
for ix in range(10):
filename = f'data{ix}.txt'
fh = open(filename, 'w')
fh.write('hello')
if ix == 0:
break
fh.close()
stat = os.stat(filename)
print(stat.st_size) # 0, the file has not been saved yet
open in function using with
If we open the file in the recommended way using the with
statement then we can be sure that the close
method
of the fh
object will be called when we leave the context of the with
statement.
import sys
import re
def do_something(filename):
with open(filename) as fh:
while True:
line = fh.readline()
if line is None:
break
line = line.rstrip("\n")
if re.search(r'\A\s*\Z', line):
return
print(line)
def main():
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
filename = sys.argv[1]
do_something(filename)
main()
Plain context manager
from contextlib import contextmanager
import sys
param = ''
if len(sys.argv) == 2:
#exit(f"Usage: {sys.argv[0]} []")
param = sys.argv[1]
def code_with_context_manager():
with my_plain_context():
print(" In plain context")
if param == "return":
return
if param == "die":
raise Exception("we have a problem")
print(" More work")
@contextmanager
def my_plain_context():
print("setup context")
try:
yield
except Exception as err:
print(f" We got an exception: {err}")
print("cleanup context")
print("START")
code_with_context_manager()
print("END")
START
start context
In plain context
More work
end context
END
Param context manager
from contextlib import contextmanager
@contextmanager
def my_param_context(name):
print(f"start {name}")
yield
print(f"end {name}")
with my_param_context("foo"):
print("In param context")
start foo
In param context
end foo
Context manager that returns a value
from contextlib import contextmanager
import time
import random
import os
import shutil
@contextmanager
def my_tempdir():
print("start return")
tmpdir = '/tmp/' + str(time.time()) + str(random.random())
os.mkdir(tmpdir)
try:
yield tmpdir
finally:
shutil.rmtree(tmpdir)
print("end return")
import os
from my_tempdir import my_tempdir
with my_tempdir() as tmp_dir:
print(f"In return context with {tmp_dir}")
with open(tmp_dir + '/data.txt', 'w') as fh:
fh.write("hello")
print(os.listdir(tmp_dir))
print('')
print(tmp_dir)
print(os.path.exists(tmp_dir))
start return
In return context with /tmp/1578211890.49409370.6063140788762365
['data.txt']
end return
/tmp/1578211890.49409370.6063140788762365
False
Use my tempdir - return
import os
from my_tempdir import my_tempdir
def some_code():
with my_tempdir() as tmp_dir:
print(f"In return context with {tmp_dir}")
with open(tmp_dir + '/data.txt', 'w') as fh:
fh.write("hello")
print(os.listdir(tmp_dir))
return
print('')
print(tmp_dir)
print(os.path.exists(tmp_dir))
some_code()
start return
In return context with /tmp/1578211902.3545020.7667694368935928
['data.txt']
end return
Use my tempdir - exception
import os
from my_tempdir import my_tempdir
with my_tempdir() as tmp_dir:
print(f"In return context with {tmp_dir}")
with open(tmp_dir + '/data.txt', 'w') as fh:
fh.write("hello")
print(os.listdir(tmp_dir))
raise Exception('trouble')
print('')
print(tmp_dir)
print(os.path.exists(tmp_dir))
start return
In return context with /tmp/1578211921.12552210.9000097350821897
['data.txt']
end return
Traceback (most recent call last):
File "use_my_tempdir_exception.py", line 9, in <module>
raise Exception('trouble')
Exception: trouble
cwd context manager
import os
from contextlib import contextmanager
@contextmanager
def cwd(path):
oldpwd = os.getcwd()
os.chdir(path)
try:
yield
finally:
os.chdir(oldpwd)
import sys
import os
from mycwd import cwd
def do_something(path):
with cwd(path):
content = os.listdir()
if len(content) < 10:
return
def main():
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} PATH")
path = sys.argv[1]
print(os.getcwd())
do_something(path)
print(os.getcwd())
main()
$ python context_cd.py /tmp
/home/gabor/work/slides/python/examples/context
/home/gabor/work/slides/python/examples/context
$ python context_cd.py /opt
/home/gabor/work/slides/python/examples/context
/home/gabor/work/slides/python/examples/context
tempdir context manager
- contextlib
- contextmanager
- tempfile
- mkdtemp
import os
from contextlib import contextmanager
import tempfile
import shutil
@contextmanager
def tmpdir():
dd = tempfile.mkdtemp()
try:
yield dd
finally:
shutil.rmtree(dd)
from mytmpdir import tmpdir
import os
with tmpdir() as temp_dir:
print(temp_dir)
with open( os.path.join(temp_dir, 'some.txt'), 'w') as fh:
fh.write("hello")
print(os.path.exists(temp_dir))
print(os.listdir(temp_dir))
print(os.path.exists(temp_dir))
/tmp/tmprpuywa3_
True
['some.txt']
False
Context manager with class
- enter
- exit
class MyCM:
def __init__(self, name):
self.name = name
def __enter__(self):
print(f'__enter__ {self.name}')
return self
def __exit__(self, exception_type, exception, traceback):
print(f'__exit__ {self.name}')
def something(self):
print(f'something {self.name}')
def main():
with MyCM('Foo') as cm:
print(cm.name)
cm.something()
#raise Exception('nono')
print('in main - after')
main()
print('after main')
Context managers with class
- enter
- exit
Even if there was en exception in the middle of the process, the exit methods of each object will be called.
class MyCM:
def __init__(self, n):
self.name = n
def __enter__(self):
print('__enter__', self.name)
def __exit__(self, exception_type, exception, traceback):
print('__exit__ ', self.name)
def something(self):
print('something', self.name)
def main():
a = MyCM('a')
b = MyCM('b')
with a, b:
a.partner = b
b.partner = a
a.something()
raise Exception('nono')
b.something()
print('in main - after')
main()
print('after main')
__enter__ a
__enter__ b
something a
__exit__ b
__exit__ a
Traceback (most recent call last):
File "context-managers.py", line 27, in <module>
main()
File "context-managers.py", line 23, in main
raise Exception('nono')
Exception: nono
Context manager: with for file
- with
import sys
if len(sys.argv) != 2:
sys.stderr.write('Usage: {} FILENAME\n'.format(sys.argv[0]))
exit()
file = sys.argv[1]
print(file)
with open(file) as f:
for line in f:
val = 30/int(line)
print('done')
With - context managers
- with
class WithClass:
def __init__(self, name='default'):
self.name = name
def __enter__(self):
print('entering the system')
return self.name
def __exit__(self, exc_type, exc_value, traceback):
print('exiting the system')
def __str__(self):
return 'WithObject:'+self.name
x = WithClass()
with x as y:
print(x,y)
Exercise: Context manager
Create a few CSV file likes these:
a11,a12
a21,a22
b13,b14
b23,b24
c15,c16
c25,c26
Merge them horizontally to get this:
a11,a12,b13,b14,c15,c16
a21,a22,b23,b24,c25,c26
- Do it without your own context manager
- Create a context manager called myopen that accepts N filenames. It opens the first one to write and the other N-1 to read
with myopen(outfile, infile1, infile2, infile3) as out, ins:
...
Exercise: Tempdir on Windows
Make the tempdir context manager example work on windows as well. Probably need to cd out of the directory.
Solution: Context manager
import sys
from contextlib import contextmanager
if len(sys.argv) < 3:
exit(f"Usage: {sys.argv[0]} OUTFILE INFILEs")
outfile = sys.argv[1]
infiles = sys.argv[2:]
#print(outfile)
#print(infiles)
@contextmanager
def myopen(outfile, *infiles):
#print(len(infiles))
out = open(outfile, 'w')
ins = []
for filename in infiles:
ins.append(open(filename, 'r'))
try:
yield out, ins
except Exception as ex:
print(ex)
pass
finally:
out.close()
for fh in ins:
fh.close()
with myopen(outfile, *infiles) as (out_fh, input_fhs):
#print(out_fh.__class__.__name__)
#print(len(input_fhs))
while True:
row = ''
done = False
for infh in (input_fhs):
line = infh.readline()
#print(f"'{line}'")
if not line:
done = True
break
if row:
row += ','
row += line.rstrip("\n")
if done:
break
out_fh.write(row)
out_fh.write("\n")
Advanced lists
Change list while looping: endless list
numbers = [1, 1]
for n in numbers:
print(n)
numbers.append(numbers[-1] + numbers[-2])
if n > 100:
break
print(numbers)
Creating a Fibonacci series in a crazy way.
Change list while looping
Probably not a good idea...
numbers = [1, 2, 3, 4]
for n in numbers:
print(n)
if n == 2:
numbers.remove(2)
print(numbers)
1
2
4
[1, 3, 4]
Note, the loop only iterated 3 times, and it skipped value 3
Copy list before iteration
It is better to copy the list using list slices before the iteration starts.
numbers = [1, 2, 3, 4]
for n in numbers[:]:
print(n)
if n == 2:
numbers.remove(2)
print(numbers)
1
2
3
4
[1, 3, 4]
for with flag
names = ['Foo', 'Bar', 'Baz']
ok = False
for i in range(3):
name = input('Your name please: ')
if name in names:
ok = True
break
if not ok:
print("Not OK")
exit()
print("OK....")
for else
The else statement of the for loop is executed when the iteration ends normally. (without calling break)
names = ['Foo', 'Bar', 'Baz']
for i in range(3):
name = input('Your name please: ')
if name in names:
break
else:
print("Not OK")
exit()
print("OK....")
enumerate
- enumerate
names = ['Foo', 'Bar', 'Baz']
for i in range(len(names)):
print(i, names[i])
print('')
for i, n in enumerate(names):
print(i, n)
0 Foo
1 Bar
2 Baz
0 Foo
1 Bar
2 Baz
do while
- do while
There is no do-while in Python, but you can emulate it:
while True:
do_stuff()
if not loop_condition():
break
x = 0
while True:
x += 1
print(x)
if x > 0:
break
list slice is copy
x = [1, 1, 2, 3, 5, 8, 13, 21, 34]
y = x[2:5]
print(y) # [2, 3, 5]
x[2] = 20
print(x) # [1, 1, 20, 3, 5, 8, 13, 21, 34]
print(y) # [2, 3, 5]
Warnings
Warnings
- warn
from warnings import warn
def foo():
warn("foo will be deprecated soon. Use bar() instead", DeprecationWarning)
print("foo still works")
def main():
foo()
print("afterfoo")
main()
CSV
What is a CSV file?
-
CSV stands for Comma Separated Values
-
A CSV file is similar to the values you might put in an Excel file. Though in Excel each cell has both a
value
and aformat
(and maybe more) attributes. A CSV file only contains values. -
A CSV file has rows and in each row there are values separated by a comma.
-
In some cases the separator is some other character. e.g. a semic-colon (
;
), a pipeline (|
) or a TAB character. (The last one is also referred to a TSV file where TSV stands for TAB Separated Values. -
There are a number of other variations, so the csv-reading and writing librariers usually provide options to handle these variations.
-
Sometimes all the lines hold values. Sometimes the first line acts as the list of column-names.
CSV file without title row
- Some of the figures in Snow White in Hungarian.
{% embed include file="src/examples/csv/snowwhite.csv)
CSV file with header
- This CSV file contains information about the members of the Monthy Python show.
- The first row contains the titles of the columns.
{% embed include file="src/examples/csv/monty_python.csv)
Read CSV file into lists
import sys
import csv
if len(sys.argv) != 2:
sys.stderr.write("Usage: {} FILENAME\n".format(sys.argv[0]))
exit()
filename = sys.argv[1]
with open(filename) as fh:
rd = csv.reader(fh)
for row in rd:
print(row)
python examples/csv/read_csv.py example/snowwhite.csv
CSV with newlines missing closing quote
{% embed include file="src/examples/csv/with_newlines_error.csv)
CSV to dictionary
- DictReader
import sys
import csv
if len(sys.argv) != 2:
sys.stderr.write("Usage: {} FILENAME\n".format(sys.argv[0]))
exit()
filename = sys.argv[1]
with open(filename) as fh:
rd = csv.DictReader(fh, delimiter=',')
for row in rd:
print(row)
{'lname': 'Chapman', 'born': '8 January 1941', 'fname': 'Graham'}
{'lname': 'Idle', 'born': '29 March 1943', 'fname': 'Eric'}
{'lname': 'Gilliam', 'born': '22 November 1940', 'fname': 'Terry'}
{'lname': 'Jones', 'born': '1 February 1942', 'fname': 'Terry'}
{'lname': 'Cleese', 'born': '27 October 1939', 'fname': 'John'}
{'lname': 'Palin', 'born': '5 May 1943', 'fname': 'Michael'}
CSV Attributes
- delimiter
- doublequote
- escapechar
- lineterminator
- quotechar
- quoting
- skipinitialspace
- strict
CSV dialects
- list_dialects
The csv module defines a number of "dialects", sets of attributes.
import csv
for dialect_name in csv.list_dialects():
print(dialect_name)
dialect = csv.get_dialect(dialect_name)
for attribute_name in [
'delimiter',
'doublequote',
'escapechar',
'lineterminator',
'quotechar',
'quoting',
'skipinitialspace',
'strict',
]:
attr = getattr(dialect, attribute_name)
if attr == '\t':
attr = '\\t'
if attr == '\r\n':
attr = '\\r\\n'
print(" {:16} '{}'".format(attribute_name, attr))
excel
delimiter ','
doublequote 'True'
escapechar 'None'
lineterminator '\r\n'
quotechar '"'
quoting '0'
skipinitialspace 'False'
strict 'False'
excel-tab
delimiter '\t'
doublequote 'True'
escapechar 'None'
lineterminator '\r\n'
quotechar '"'
quoting '0'
skipinitialspace 'False'
strict 'False'
unix
delimiter ','
doublequote 'True'
escapechar 'None'
lineterminator '
'
quotechar '"'
quoting '1'
skipinitialspace 'False'
strict 'False'
Dialects of CSV files. See also: csv
Reading CSV the naive way
-
split
-
This is not recommended as it will fail in some cases. See next page!
Tudor;Vidor;10;Hapci
Szundi;Morgo;7;Szende
Kuka;Hofeherke;100;Kiralyno
Boszorkany;Herceg;9;Meselo
import sys, csv
if len(sys.argv) != 2:
sys.stderr.write("Usage: {} FILENAME\n".format(sys.argv[0]))
exit()
filename = sys.argv[1]
count = 0
with open(filename) as fh:
for line in fh:
line = line.rstrip("\n")
row = line.split(';')
#print(row)
count += int(row[2])
print("Total: {}".format(count))
python examples/csv/read_csv_split.py examples/csv/plain.csv
CSV with quotes and newlines
Tudor;Vidor;10;Hapci
Szundi;Morgo;7;Szende
Kuka;"Hofeherke; alma";100;Kiralyno
Boszorkany;Herceg;9;Meselo
Tudor;Vidor;10;Hapci
Szundi;Morgo;7;Szende
Kuka;"Hofeherke;
alma";100;Kiralyno
Boszorkany;Herceg;9;Meselo
Reading a CSV file
- csv
- reader
import sys
import csv
if len(sys.argv) != 2:
sys.stderr.write("Usage: {} FILENAME\n".format(sys.argv[0]))
exit()
filename = sys.argv[1]
count = 0
with open(filename) as fh:
rd = csv.reader(fh,
delimiter=';',
#strict=True,
)
for row in rd:
print(row)
count += int(row[2])
print("Total: {}".format(count))
python examples/csv/read_csv.py examples/csv/plain.csv
Exercise: CSV as dictionary of dictionaries
Create a script called monty_python_dictionary_of_dictionaries.py that given a file like the CSV file of Monty Python troupe (examples/csv/monty_python.csv" %}, will create a dictionary where we can look up information about them based on the first name. For example:
filename = 'examples/csv/monty_python.csv'
people = read_csv_file(filename)
print(people["Graham"]["lname"]) # Champman
print(people["John"]["born"]) # 27 October 1939
print(people["Michael"])
# {'lname': 'Palin', 'born': '5 May 1943', 'fname': 'Michael'}
print(people["Terry"]["lname"]) # Gilliam
Exercise: CSV as dictionary of tuples of dictionaries
Create a script called monty_python_dictionary_of_tuples.py that given a file like the CSV file of Monty Python troupe (examples/csv/monty_python.csv), will create a dictionary where we can look up information about them based on the first name and last name. For example:
filename = 'examples/csv/monty_python.csv'
people = read_csv_file(filename)
#print(people)
print(people[("Graham", "Chapman")])
# {'fname': 'Graham', 'lname': 'Chapman', 'born': '8 January 1941'}
print(people[("Michael", "Palin")])
# {'fname': 'Michael', 'lname': 'Palin', 'born': '5 May 1943'}
Exercise: count row length in csv files
- Write a script called csv_column_count.py that given a CSV file will tell if all the rows have the same length or if some of them are different.
- Show which ones are different.
- Try it on
examples/csv/plain.csv
and onexamples/csv/uneven.csv
Solution: CSV as dictionary of dictionaries
import csv
import sys
def read_csv_file(filename):
name_of = {}
with open(filename) as fh:
rd = csv.DictReader(fh, delimiter=',')
for row in rd:
name_of[ row['fname'] ] = row
print(name_of)
return name_of
filename = 'examples/csv/monty_python.csv'
if len(sys.argv) == 2:
filename = sys.argv[1]
people = read_csv_file(filename)
print(people["Graham"]["lname"]) # Champman
print(people["John"]["born"]) # 27 October 1939
print(people["Michael"])
# {'lname': 'Palin', 'born': '5 May 1943', 'fname': 'Michael'}
print(people["Terry"]["lname"]) # Gilliam
Solution: CSV as dictionary of tuples of dictionaries
Create a script called monty_python_dictionary_of_tuples.py that given a file like the CSV file of Monty Python troupe (examples/csv/monty_python.csv" %}, will create a dictionary where we can look up information about them based on the first name and last name. For example:
import csv
import sys
def read_csv_file(filename):
name_of = {}
with open(filename) as fh:
rd = csv.DictReader(fh, delimiter=',')
for row in rd:
name_of[ (row['fname'], row['lname']) ] = row
return name_of
filename = 'examples/csv/monty_python.csv'
if len(sys.argv) == 2:
filename = sys.argv[1]
people = read_csv_file(filename)
#print(people)
print(people[("Graham", "Chapman")])
# {'fname': 'Graham', 'lname': 'Chapman', 'born': '8 January 1941'}
print(people[("Michael", "Palin")])
# {'fname': 'Michael', 'lname': 'Palin', 'born': '5 May 1943'}
Solution: count row length in csv files
import csv
import sys
from collections import defaultdict
def check_rows(filename):
rows = []
widthes = defaultdict(int)
with open(filename) as fh:
rd = csv.reader(fh, delimiter=';')
for row in rd:
width = len(row)
rows.append(width)
widthes[width] += 1
#print(widthes)
if len(widthes.keys()) > 1:
print("Not all the rows have the same number of cells")
cell_counts = sorted(widthes.keys(), key=lambda x: widthes[x], reverse=True)
print(f"Most common number of cells is {cell_counts[0]} with {widthes[ cell_counts[0] ]} rows")
for count in cell_counts[1:]:
print(f" Cells: {count}")
print(f" Rows:")
for row, cells in enumerate(rows):
if cells == count:
print(f" {row}")
else:
values = list(widthes.values())
print(f"All rows have the same number of cells: {values[0]}")
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
filename = sys.argv[1]
check_rows(filename)
Excel
Spreadsheets
- CSV files - use the standard csv library
- Microsoft Excel files (various versions and formats)
- Open Office / Libre Office Calc
Python Excel
- Python Excel
- openpyxl
- xlsxwriter
- xlrd
- xlwt
- xlutils using xlrd and xlwt. Mostly obsolete.
Create an Excel file from scratch
- Workbook
- active
- save
- column_dimensions
import openpyxl
import datetime
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = 42
ws['A2'] = datetime.datetime.now()
#ws.column_dimensions['A'].width = 20.0
wb.save("first.xlsx")
Worksheets in Excel
- create_sheet
import openpyxl
import datetime
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = 42
ws.title = "First"
ws2 = wb.create_sheet()
ws2.title = "Second sheet"
ws2['A1'] = datetime.datetime.now()
ws2.sheet_properties.tabColor = "1072BA"
wb.save("two_worksheets.xlsx")
Add expressions to Excel
Nothing special needed.
import openpyxl
import datetime
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = 19
ws['A2'] = 23
ws['A3'] = "=A1+A2"
wb.save("expression.xlsx")
Format field
import openpyxl
import datetime
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = 123456.78
ws['A2'] = 123456.78
ws['A3'] = 123456.78
ws['A4'] = -123456.78
ws['A5'] = datetime.datetime.now()
ws.column_dimensions['A'].width = 20.0
ws['A2'].number_format = '0.00E+00'
ws['A3'].number_format = '#,##0_);[RED](#,##0)'
ws['A4'].number_format = '#,##0_);[RED](#,##0)'
wb.save("format.xlsx")
Number series and chart
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Chart"
a = ["First", 20, 28, 30, 37, 18, 47]
b = ["Second", 35, 30, 40, 40, 38, 35]
# write them as columns
for i in range(len(a)):
ws.cell(row=i+1, column=1).value = a[i]
ws.cell(row=i+1, column=2).value = b[i]
lc = openpyxl.chart.LineChart()
lc.title = "Two Lines Chart"
#lc.style=13
data = openpyxl.chart.Reference(ws,
min_col=1,
min_row=1,
max_col=2,
max_row=len(a))
lc.add_data(data, titles_from_data=True)
ws.add_chart(lc, "D1")
wb.save("chart.xlsx")
Read Excel file
import openpyxl
wb = openpyxl.load_workbook(filename = 'chart.xlsx')
for ws in wb.worksheets:
print(ws.title)
ws = wb.worksheets[0]
print(ws['A1'].value)
Update Excel file
import openpyxl
wb = openpyxl.load_workbook(filename = 'chart.xlsx')
for ws in wb.worksheets:
print(ws.title)
ws = wb.worksheets[0]
c = ["Third", 40, 20, 35, 25, 20, 35]
for i in range(len(c)):
ws.cell(row=i+1, column=3).value = c[i]
lc = openpyxl.chart.LineChart()
lc.title = "Three Lines Chart"
data = openpyxl.chart .Reference(ws,
min_col=1,
min_row=1,
max_col=3,
max_row=len(c))
lc.add_data(data, titles_from_data=True)
ws.add_chart(lc, "H15")
wb.save("chart.xlsx")
Barchart
import openpyxl
import random
from openpyxl.chart import BarChart, Series, Reference
wb = openpyxl.Workbook()
ws = wb.active
randomList1 = []
randomList2 = []
randomList3 = []
randomList4 = []
for i in range(0,12):
randomList1.append(random.randint(0,100))
randomList2.append(random.randint(0, 100))
randomList3.append(random.randint(0, 100))
randomList4.append(random.randint(0, 100))
randomList1.insert(0,"Bananas")
randomList2.insert(0,"Pears")
randomList3.insert(0,"Apples")
randomList4.insert(0,"Kiwis")
print(f"""Random number list1: {randomList1}
Random number list2: {randomList2}
Random number list3: {randomList3}
Random number list4: {randomList4}""")
months = ['Fruit','Jan', 'Feb', 'March', 'April', 'May', 'June', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']
rows = [
months,
randomList1,
randomList2,
randomList3,
randomList4,
]
for row in rows:
ws.append(row)
chart1 = BarChart()
chart1.type = "col"
chart1.style = 12
chart1.title = "Fruit Count per Month"
chart1.y_axis.title = 'Fruit Number'
chart1.x_axis.title = 'Fruit Type'
data = Reference(ws, min_col=2, min_row=1, max_row=5, max_col=13)
cats = Reference(ws, min_col=1, min_row=2, max_row=5)
chart1.add_data(data, titles_from_data=True)
chart1.set_categories(cats)
chart1.shape = 4
ws.add_chart(chart1, "A11")
wb.save("row_10.xlsx")
Exercise: Excel
- Create a series of 10 random numbers between 1 and 100 and save them in an Excel file in a column.
- Create a graph showing the values.
- Add a second series of 10 random numbers, add them to the Excel file as a second column next to the first one.
- Add a 3rd colum containing the average of the first two columns.
- Update the graph to include all 3 number serieses
XML
XML Data
- xml
{% embed include file="src/examples/xml/data.xml)
Expat - Callbacks
- xml.parsers.expat
import xml.parsers.expat
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
file = sys.argv[1]
def start_element(name, attrs):
print('Start element: {} {}'.format(name, attrs))
def end_element(name):
print('End element: {}'.format(name))
def char_data(data):
print('Character data: {}'.format(repr(data)))
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = start_element
p.EndElementHandler = end_element
p.CharacterDataHandler = char_data
p.ParseFile(open(file, 'rb'))
print('done')
XML DOM - Document Object Model
import xml.dom.minidom
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
file = sys.argv[1]
dom = xml.dom.minidom.parse(file)
root = dom.firstChild
print(root.tagName)
print('')
for node in root.childNodes:
if node.nodeType != node.TEXT_NODE:
print('name: ', node.tagName)
print('id: ', node.getAttribute('id'))
print('')
emails = dom.getElementsByTagName("email")
for e in emails:
print('email', e.getAttribute('id'), e.firstChild.data)
main
name: person
id: 1
name: person
id: 3
email home moo@zorghome.com
email work moo@work.com
XML SAX - Simple API for XML
import xml.sax
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
file = sys.argv[1]
class EventHandler(xml.sax.ContentHandler):
def startElement(self, name, attrs):
print('start', (name, attrs._attrs))
def characters(self, text):
if not text.isspace():
print('text', text)
def endElement(self, name):
print('end', name)
xml.sax.parse(file, EventHandler())
start (u'main', {})
start (u'person', {u'id': u'1'})
start (u'fname', {})
text Foo
end fname
start (u'lname', {})
text Bar
end lname
end person
start (u'person', {u'id': u'3'})
start (u'fname', {})
text Moo
end fname
start (u'lname', {})
text Zorg
end lname
start (u'email', {u'id': u'home'})
text moo@zorghome.com
end email
start (u'email', {u'id': u'work'})
text moo@work.com
end email
end person
end main
SAX collect
import xml.sax
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
file = sys.argv[1]
class EventHandler(xml.sax.ContentHandler):
def __init__(self, c):
self.path = []
self.collector = c
def startElement(self, name, attrs):
self.path.append({ 'name' : name, 'attr' : attrs._attrs })
def characters(self, text):
self.path[-1]['text'] = text
def endElement(self, name):
element = self.path.pop()
print('End name: ', name)
if element['name'] == 'email':
collector.append(element)
collector = []
xml.sax.parse(file, EventHandler(collector))
print(collector)
End name: fname
End name: lname
End name: person
End name: fname
End name: lname
End name: email
End name: email
End name: person
End name: main
[{'text': u'moo@zorghome.com', 'name': u'email', 'attr': {u'id': u'home'}},
{'text': u'moo@work.com', 'name': u'email', 'attr': {u'id': u'work'}}]
XML elementtree
import xml.etree.ElementTree as ET
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME")
file = sys.argv[1]
tree = ET.parse(file)
root = tree.getroot()
print(root.tag)
for p in root.iter('person'):
print(p.attrib)
print('')
for p in root.iter('email'):
print(p.attrib, p.text)
print('')
elements = tree.findall(".//*[@id='home']")
for e in elements:
print(e.tag, e.attrib)
main
{'id': '1'}
{'id': '3'}
{'id': 'home'} moo@zorghome.com
{'id': 'work'} moo@work.com
email {'id': 'home'}
SAX with coroutine
import xml.sax
file = 'examples/xml/data.xml'
class EventHandler(xml.sax.ContentHandler):
def __init__(self,target):
self.target = target
def startElement(self,name,attrs):
self.target.send(('start',(name,attrs._attrs)))
def characters(self,text):
self.target.send(('text',text))
def endElement(self,name):
self.target.send(('end',name))
def printer():
def start(*args,**kwargs):
cr = func(*args,**kwargs)
cr.next()
return cr
return start
# example use
if __name__ == '__main__':
@coroutine
def printer():
while True:
event = (yield)
print(event)
xml.sax.parse(file, EventHandler(printer()))
copied from Stack Overflow based on coroutines
import xml.sax
file = 'examples/xml/data.xml'
class EventHandler(xml.sax.ContentHandler):
def __init__(self,target):
self.target = target
def startElement(self,name,attrs):
self.target.send(('start',(name,attrs._attrs)))
def characters(self,text):
self.target.send(('text',text))
def endElement(self,name):
self.target.send(('end',name))
def coroutine(func):
def start(*args,**kwargs):
cr = func(*args,**kwargs)
cr.next()
return cr
return start
# example use
if __name__ == '__main__':
@coroutine
def printer():
while True:
event = (yield)
print(event)
xml.sax.parse(file, EventHandler(printer()))
Matplotlib
About Matplotlib
Matplotlib Line
import matplotlib.pyplot as plt
plt.plot([ 1, 2, 3, 4 ],[ 23, 42, 10, 19 ])
plt.show()
#plt.savefig('line.png')
Matplotlib Line with dates
import datetime
import matplotlib.pyplot as plt
fig, subplots = plt.subplots()
subplots.plot(
[datetime.date(2017, 1, 5), datetime.date(2017, 3, 5), datetime.date(2017, 5, 5)],
[ 23, 17, 19 ],
label='An example',
)
subplots.legend(loc='upper center', shadow=True)
fig.autofmt_xdate()
plt.show()
#plt.savefig('line_with_dates.png')
Matplotlib Simple Pie
import matplotlib.pyplot as plt
plt.pie([ 23, 42, 10, 19 ])
plt.show()
#plt.savefig('simple_pie.png')
Matplotlib Simple Pie with params
import matplotlib.pyplot as plt
plt.pie(
x = [ 23, 42, 10, 19 ],
# explode = [0, 0, 0.1, 0.3],
# labels = ["failure", "success", "maybe", "what"],
# colors = ["red", "green", "blue", "#A395C1"],
# shadow = True,
# radius = 1.3,
)
plt.show()
#plt.savefig('simple_pie_params.png')
Matplotlib Pie
import matplotlib.pyplot as plt
# Make a square figure and axes
plt.figure(1, figsize=(6, 6))
#ax = plt.axes([0.1, 0.1, 0.8, 0.8])
labels = 'Frogs', 'Hogs', 'Dogs', 'Logs'
fracs = [15, 30, 45, 10]
explode = (0, 0.05, 0, 0)
plt.pie(fracs,
explode=explode,
labels=labels,
autopct='%1.1f%%',
shadow=True)
plt.title('Raining Hogs and Dogs',
bbox={'facecolor': '0.8', 'pad': 5})
plt.show()
#plt.savefig('pie.png')
#plt.savefig('pie.pdf')
Matplotlib Pie (test cases)
import matplotlib.pyplot as plt
cases = {
'success': 38,
'failure': 7,
'skipped': 3,
'xfailed': 8,
'xpassed': 4,
}
explode = (0, 0.1, 0.1, 0.1, 0.1)
labels = cases.keys()
sizes = cases.values()
fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90)
ax1.axis('equal')
plt.tight_layout()
plt.show()
#plt.savefig('pie_for_tests.png')
Plot, scatter, histogram
- plot - line
- scatter - just the values
- histogram (to group the values into bins)
- plt.hist(data, bin=10)
Seaborn
Searborn use examples
In Jupyter notebook type %matplotlib
before writing the seaborn code.
In plain Python import matplotlib
, then assign the result of the ploting function
to a variable, and call matplotlib.pyplot.show(r)
.
Seaborn tip
"""
Source : https://seaborn.pydata.org/introduction.html
"""
import seaborn as sns
sns.set() # Apply the default default seaborn theme, scaling, and color palette. Optional.
tips = sns.load_dataset("tips") # Load example dataset into Pandas DataFrame
#print(type(tips))
# print(tips)
plot = sns.relplot(
x = "total_bill",
y = "tip",
col = "time",
hue = "smoker",
style = "smoker",
size = "size",
data = tips)
# print(type(plot)) # seaborn.axisgrid.FacetGrid
plot.savefig("tips.png")
Seaborn Anscombes Quartet
"""
Anscombe's quartet
==================
_thumb: .4, .4
Source: https://seaborn.pydata.org/examples/anscombes_quartet.html
"""
import seaborn as sns
import matplotlib
sns.set(style="ticks")
# Load the example dataset for Anscombe's quartet
df = sns.load_dataset("anscombe")
# Show the results of a linear regression within each dataset
r = sns.lmplot(
x="x",
y="y",
col="dataset",
hue="dataset",
data=df,
col_wrap=2,
ci=None,
palette="muted",
height=4,
scatter_kws={"s": 50, "alpha": 1})
matplotlib.pyplot.show(r)
Tox
Tox Examples
def add(x, y):
return x+y
from setuptools import setup
setup(name='mymath',
version='0.2',
description='The best math library',
url='http://github.com/szabgab/mymath',
author='Foo Bar',
author_email='foo@bar.com',
license='MIT',
packages=['mymath'],
zip_safe=False,
requires=[
],
long_description='Long description',
scripts=[],
)
import mymath
def test_add():
assert mymath.add(2, 3) == 5
{% embed include file="src/examples/tox/tox.ini)
Selenium
Selenium installation
Get started with Selenium
from selenium import webdriver
import chromedriver_autoinstaller
import sys
import re
import time
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} URL")
url = sys.argv[1]
chromedriver_autoinstaller.install()
options = webdriver.ChromeOptions()
#options.add_argument('headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
driver.fullscreen_window()
print(driver.title)
time.sleep(5)
box = driver.find_element_by_id('search_box')
box.send_keys("selenium")
time.sleep(5)
box.send_keys(u'\ue007') # press enter on the box
time.sleep(5)
# element = driver.find_element_by_class_name('')
# element.is_displayed()
# print(element.get_attribute('href'))
# print(element.text)
# match = re.search(r'Code', driver.page_source)
# print(match)
# button = driver.find_element_by_class_name('')
# button.click()
#import code
#code.interact(local=locals())
#from ptpython.repl import embed
#embed(globals(), locals())
driver.close()
Selenium Headless Screenshot
from selenium import webdriver
import chromedriver_autoinstaller
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} URL")
url = sys.argv[1]
chromedriver_autoinstaller.install()
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
print(driver.title)
driver.get_screenshot_as_file('screenshot.png')
driver.close()
Playwright
Playwright installation
pip install playwright
playwright install
Playwright demo
from playwright.sync_api import sync_playwright
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} URL")
url = sys.argv[1]
with sync_playwright() as play:
for browser_type in [play.chromium]: #, play.firefox, play.webkit]:
browser = browser_type.launch(headless=False)
page = browser.new_page()
page.goto(url)
search_box = page.query_selector("#search_box");
#from ptpython.repl import embed
#embed(globals(), locals())
#page.screenshot(path=f'example-{browser_type.name}.png')
browser.close()
Playwright screenshot
from playwright.sync_api import sync_playwright
with sync_playwright() as play:
for browser_type in [play.chromium]: #, play.firefox, play.webkit]:
browser = browser_type.launch()
page = browser.new_page()
page.goto('http://whatsmyuseragent.org/')
page.screenshot(path=f'example-{browser_type.name}.png')
browser.close()
Advancted functions
Variable scopes
- Local (inside a def)
- Enclosing (in the enclosing def, aka. nonlocal)
- Global (outside of all defs)
Name resolution order (LEGB)
- Local
- Enclosing
- Global
- Built-in
Scoping: global seen from fuction
a = 42
def f():
print(a)
f()
42
Assignment creates local scope
a = 42
def f():
a = 23
print(a)
print('ok')
print(a)
f()
print(a)
ok
42
23
42
Local scope gone wrong
a = 42
def f():
print(a)
a = 23
print('ok')
print(a)
f()
print(a)
ok
42
Traceback (most recent call last):
File "scoping_external_variable.py", line 8, in <module>
f()
File "scoping_external_variable.py", line 3, in f
print(a)
UnboundLocalError: local variable 'a' referenced before assignment
Accessing a global variable inside a function works, but if I change it (make it refer to another piece of data), then it is disallowed. If I only change the data inside (for mutable variables), that works, but is a bad practice.
Changing global variable from a function
a = 42
def f():
global a
print(a)
a = 23
print(a) # 42
f() # 42
print(a) # 23
Does not need to be created outside
def f():
global a
a = 23
f()
print(a) # 23
Global variables mutable in functions
a = [2]
def f():
print(a) # [2]
a.append(3)
print(a) # [2, 3]
a[0] = 4
f()
print(a) # [4, 3]
Scoping issues
text = ['aaaa', 'bb', 'ccc ccc']
length_1 = [ len(s) for s in text ]
print(length_1) # [4, 2, 7]
length_2 = [ len(s) for x in text ]
print(length_2) # [7, 7, 7]
List comprehensions don't create their own scope!
sub in sub
Functions can be defined inside functions.
def f():
print("in f")
def g():
print("in g")
g()
f()
#g() # does not exist here
They are scoped locally
Scoping sub in sub (enclosing scope)
def external_func():
the_answer = 42
def func(args):
print(args, "the_answer:", the_answer)
# the_answer = 'what was the question?'
# enabling this would give:
# UnboundLocalError: local variable 'the_answer'
# referenced before assignment
func("first")
func("second")
external_func()
{% embed include file="src/examples/advanced-functions/scoping_internal_sub.out)
Function objects
The difference between
x = foo
y = foo()
c = 0
def foo():
global c
c += 1
return c
print(foo()) # 1
print(foo()) # 2
x = foo # assigning the function object
y = foo() # assigning the return value of the function
print(foo()) # 4
print(x()) # 5
print(y) # 3
Functions are created at run time
def and class are run-time Everything is runtime. Even compilation is runtime.
foo() will return a random value every time, but when bar is defined it freezes the specific value that foo returned when bar was created.
import random
def foo():
return random.random()
print(foo())
print(foo())
def bar(a, b = foo()):
return [a, b]
print(bar(1))
print(bar(2))
{% embed include file="src/examples/advanced-functions/runtime-def.out)
Mutable default
The default list assigned to b is created when the f functions is defined. After that, each call to f() (that does not get a "b" parameter) uses this common list.
def f(a, b = []):
b.append(a)
return b
print(f(1))
print(f(2))
print(f(3))
{% embed include file="src/examples/advanced-functions/mutable_default_parameter.out)
Use None instead:
Use None as default parameter
def f(a, b = None):
if b == None:
b = []
b.append(a)
return b
print(f(1))
print(f(2))
print(f(3))
{% embed include file="src/examples/advanced-functions/none_as_default_parameter.out)
Inner function created every time the outer function runs
Also defined during run-time, but in every call of bar() the innter_func is redefined again and again.
import random
def foo():
return random.random()
print(foo())
print(foo())
def bar(a, b = foo()):
def inner_func(x, y = foo()):
return [x, y]
print('inner', inner_func(a))
return [a, b]
print(bar(1))
print(bar(2))
{% embed include file="src/examples/advanced-functions/runtime-inner-def.out)
Static variable
- static
There are no function-level static variables in Python, but you can fake it quite easily
def counter():
if 'cnt' not in counter.__dict__:
counter.cnt = 0
counter.cnt += 1
return counter.cnt
print(counter()) # 1
print(counter()) # 2
print(counter()) # 3
print(counter.cnt) # 3
counter.cnt = 6
print(counter()) # 7
Static variable in generated function
def create():
def func():
func.cnt += 1
return func.cnt
func.cnt = 0
return func
a = create()
b = create()
print(a()) # 1
print(a()) # 2
print(b()) # 1
print(a()) # 3
b.cnt = 7
print(a.cnt) # 3
print(b.cnt) # 7
Inspect
The inspect module provides introspection to Python runtime.
inspect.stack
returns the stack-trace. Element 0 is the deepes (where we called inspect stack).
Each level has several values. A represantation of the frame, filename, linenumber, subroutine-name.
import inspect
import sys
level = int(sys.argv[1])
def f():
print("in f before g")
g()
print("in f after g")
def g():
print("in g")
PrintFrame()
def PrintFrame():
st = inspect.stack()
frame = st[level][0]
info = inspect.getframeinfo(frame)
print('__file__: ', info.filename)
print('__line__: ', info.lineno)
print('__function__: ', info.function)
print('* file', st[level][1])
print('* line', st[level][2])
print('* sub', st[level][3])
f()
python caller.py 1
in f before g
in g
__file__: caller.py
__line__: 15
__function__: g
* file caller.py
* line 15
* sub g
in f after g
Variable number of function arguments
Python function arguments - a reminder
- Order of parameter
- Arguments with default values are optional (and come at the end of the definition)
- Number of arguments is know at the time of function definition. The only flexibility is provided by the optional arguments.
def f(a, b = 42):
print(a)
print(b)
f(23)
# 23
# 42
f(19, 11)
# 19
# 11
f(b=7, a=8)
# 8
# 7
# f() # (runtime) TypeError: f() takes at least 1 argument (0 given)
# f(1, 2, 3) # (runtime) TypeError: f() takes at most 2 arguments (3 given)
# f(b=10, 23) # SyntaxError: non-keyword arg after keyword arg
# def g(a=23, b):
# pass
# SyntaxError: non-default argument follows default argument
Functions with unknown number of argumerns
- sum(a, b, c, ...)
- reduce(function, a, b, c, ...)
- report (function, foo = 23, bar = 19, moo = 70, ...)
- report (function, a, b, c, ..., foo = 23, bar = 19, moo = 70, ...)
Variable length argument list with *
and **
*
**
*args
**kwargs
def f(a, b=1, *args, **kwargs):
print('a: ', a)
print('b: ', b)
print('args: ', args)
print('kwargs:', kwargs)
return a + b
f(2, 3, 4, 5, c=6, d=7)
print()
f(2, c=5, d=6)
print()
f(10)
a: 2
b: 3
args: (4, 5)
kwargs: {'c': 6, 'd': 7}
a: 2
b: 1
args: ()
kwargs: {'c': 5, 'd': 6}
a: 10
b: 1
args: ()
kwargs: {}
Passing arguments as they were received (but incorrectly)
What if we need to pass the list of individual arguments (or pairs) to another function?
def f(*args, **kwargs):
print('f args: ', args)
print('f kwargs: ', kwargs)
g(args, kwargs)
def g(*args, **kwargs):
print('g args: ', args)
print('g kwargs: ', kwargs)
f(1, 2, a=3, b=4)
f args: (1, 2)
f kwargs: {'a': 3, 'b': 4}
g args: ((1, 2), {'a': 3, 'b': 4})
g kwargs: {}
g() received 2 individual parameters, the first was a tuple, the second a dictionary
Unpacking args before passing them on
def f(*args, **kwargs):
print('f: ', args)
print('f: ', kwargs)
g(*args, **kwargs)
def g(*args, **kwargs):
print('g: ', args)
print('g: ', kwargs)
f(1, 2, a=3, b=4)
f: (1, 2)
f: {'a': 3, 'b': 4}
g: (1, 2)
g: {'a': 3, 'b': 4}
Exercise: implement the my_sum function
- my_sum should be able to accept any number of values and return their sum.
- my_sum() should return 0 or None. Decide yourself!
- my_sum(2, 3) should return 5. etc.
Solution: implement the my_sum function
def my_sum(*numbers):
s = 0
for n in numbers:
s += n
return s
print(my_sum()) # 0
print(my_sum(2, 3)) # 5
print(my_sum(-1, 2, -1,)) # 0
Exercise: implement the reduce function
my_reduce(function, a, b, c, ...)
- 'function' is expected to be a function that receives two arguments and returns a result.
- If only the function is given, return None.
- If only one value is given, return that value.
- Take the first two values, run the function on them. Then take the result and the next value and run the function on them. etc. When no more values are left, return the last result.
# print(my_reduce()) # TypeError: my_reduce() takes at least 1 argument (0 given)
print(my_reduce(lambda x,y: x+y)) # None
print(my_reduce(lambda x,y: x+y, 3)) # 3
print(my_reduce(lambda x,y: x+y, -1, 4, -2)) # 1
print(my_reduce(lambda x,y: x*y, -1, 4, -2)) # 8
Soluton: implement the reduce function
def my_reduce(f, *args):
if len(args) == 0:
return None
result = args[0]
for i in range(1, len(args)):
result = f(result, args[i])
return result
# print(my_reduce()) # TypeError: my_reduce() takes at least 1 argument (0 given)
print(my_reduce(lambda x,y: x+y)) # None
print(my_reduce(lambda x,y: x+y, 3)) # 3
print(my_reduce(lambda x,y: x+y, -1, 4, -2)) # 1
print(my_reduce(lambda x,y: x*y, -1, 4, -2)) # 8
Exercise: sort pairs
Create a function called sort_pairs, that would receive a sorting method, e.g. the word 'keys' or the word 'values' and will receive an arbitrary number of key-value pairs and will return a list of tuples.
sort_pairs( 'keys', foo = 23, bar = 47)
[('bar', 47), ('foo', 23)]
sort_pairs( 'values', foo = 23, bar = 47)
[('foo', 23), ('bar', 47)]
Solution: sort pairs
def sort_pairs(how, **kwargs):
if how == 'keys':
sort_function = lambda s : s[0];
elif how == 'values':
sort_function = lambda s : s[1];
else:
raise Exception("Invalid sort function")
return sorted(kwargs.items(), key=sort_function)
k = sort_pairs( 'keys', foo = 23, bar = 47)
print(k)
v = sort_pairs( 'values', foo = 23, bar = 47)
print(v)
Python Packages
Why Create package
As a module gets larger and larger it will be more and more difficult to maintain.
It might be eaier if we split it up into multiple files and put those files inside a directory. A 'package' is just that. A bunch of Python modules that belong together and are placed in a directory hierarchy. In order to tell Python that you really mean these files to be a package one must add a file called init.py in each directory of the project. In the most simple case the file can be empty.
- Code reuse
- Separation of concerns
- Easier distribution
Create package
- init.py
mymath/
__init__.py
calc.py
...
internal_use.py
def add(x, y):
return x+y
# empty
Internal usage
import calc
print(calc.add(7, 8)) # 15
from calc import add
print(add(3, 5)) # 8
cd examples/package
python 1/mymath/internal_use.py
use module in package - relative path
import sys
import os
path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '1')
# print(path) # /home/gabor/work/slides/python-programming/examples/package/1
sys.path.insert(0, path)
import mymath.calc
print(mymath.calc.add(2, 5))
from mymath.calc import add
print(add(2, 3))
7
5
use package (does not work)
import sys
import os
sys.path.insert(0, os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'1' ) )
import mymath
print(mymath.calc.add(4, 7))
Traceback (most recent call last):
File "use_project/proj1_2.py", line 9, in <module>
print(mymath.calc.add(4, 7))
AttributeError: module 'mymath' has no attribute 'calc'
If we import the main package name, it does not have access to the module inside.
package importing (and exporting) module
- init.py
Put import (and thus re-export) in init.py
def add(x, y):
return x+y
import mymath.calc
use package (module) with import
Still works...
import sys
import os
path = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '2' )
# print(path)
sys.path.insert(0, path)
import mymath.calc
print(mymath.calc.add(2, 5)) # 7
from mymath.calc import add
print(add(2, 3)) # 5
use package with import
Now we can import the module from the package and use that.
import sys
import os
sys.path.insert(0, os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'2' ) )
import mymath
print(mymath.calc.add(4, 7)) # 11
from mymath import calc
print(calc.add(5, 9)) # 14
Creating an installable Python package
The directory layout of a package:
├── mymath
│ ├── calc.py
│ └── __init__.py
└── setup.py
from setuptools import setup
setup(name='mymath',
version='0.1',
description='The best math library',
url='http://github.com/szabgab/mymath',
author='Foo Bar',
author_email='foo@bar.com',
license='MIT',
packages=['mymath'],
zip_safe=False,
)
Create tar.gz file
$ python setup.py sdist
- mymath.egg-info/
- dist/mymath-0.1.tar.gz
running sdist
running egg_info
creating mymath.egg-info
writing mymath.egg-info/PKG-INFO
writing top-level names to mymath.egg-info/top_level.txt
writing dependency_links to mymath.egg-info/dependency_links.txt
writing manifest file 'mymath.egg-info/SOURCES.txt'
reading manifest file 'mymath.egg-info/SOURCES.txt'
writing manifest file 'mymath.egg-info/SOURCES.txt'
warning: sdist: standard file not found: should have one of README, README.txt
creating mymath-0.1
creating mymath-0.1/mymath
creating mymath-0.1/mymath.egg-info
making hard links in mymath-0.1...
hard linking setup.py -> mymath-0.1
hard linking mymath/__init__.py -> mymath-0.1/mymath
hard linking mymath.egg-info/PKG-INFO -> mymath-0.1/mymath.egg-info
hard linking mymath.egg-info/SOURCES.txt -> mymath-0.1/mymath.egg-info
hard linking mymath.egg-info/dependency_links.txt -> mymath-0.1/mymath.egg-info
hard linking mymath.egg-info/not-zip-safe -> mymath-0.1/mymath.egg-info
hard linking mymath.egg-info/top_level.txt -> mymath-0.1/mymath.egg-info
Writing mymath-0.1/setup.cfg
creating dist
Creating tar archive
removing 'mymath-0.1' (and everything under it)
Install Package
- pip
- easy_install
$ pip install dist/mymath-0.1.tar.gz
$ easy_install --prefix ~/python/ dist/mymath-0.1.tar.gz
$ python setup.py install --prefix ~/python/
Upload to PyPi or distribute to your users.
Dependencies
requires=[
'lawyerup',
],
To list them
$ python setup.py --requires
In the setup.py file we only need to change the version number and we can release a new version of the package.
Add README file
.
├── bin
│ ├── runmymath.bat
│ └── runmymath.py
├── MANIFEST.in
├── mymath
│ └── test
│ ├── __init__.py
│ ├── test_all.py
│ └── test_calc.py
├── README.rst
└── setup.py
mymath
------
Super awesome Python module to compute the sum of numbers.
To use:
import mymath
mymath.sum(1, 2, 3)
include README.rst
Add README file (setup.py)
In the setup.py add the following function:
def readme():
with open('README.rst') as f:
return f.read()
and in the setup() call include the following parameter:
long_description=readme(),
This will display the README file when called at
$ python setup.py --long-description
Include executables
root/
setup.py
README.rst
MANIFEST.in
bin/
runmymath.py
runmymath.bat
mymath/
__init__.py
calc.py
import mymath
def main():
print("running")
main()
{% embed include file="src/examples/package/3/bin/runmymath.bat)
setup.py will need to get
scripts=['bin/runmymath.py', 'bin/runmymath.bat'],
Add tests
- unittest
- discover
root/
setup.py
README.rst
MANIFEST.in
bin/
runmymath.py
runmymath.bat
mymath/
__init__.py
calc.py
test/
__init__.py
test_all.py
test_calc.py
#empty (needed for unittest discover)
python mymath/test/test_calc.py
python mymath/test/test_all.py
python -m unittest discover
Add tests calc
from os.path import dirname,abspath
import sys
sys.path.insert(0, dirname(dirname(dirname(abspath(__file__)))))
from mymath.calc import add
import unittest
class AddTest(unittest.TestCase):
def test_add(self):
self.assertEqual(add(2, 3), 5)
self.assertEqual(add(2, -2), 0)
#self.assertEqual(add(1, 1), 1)
if __name__ == '__main__':
unittest.main()
Add tests all
from os.path import dirname,abspath
import sys
sys.path.insert(0, dirname(dirname(dirname(abspath(__file__)))))
from mymath.calc import *
import unittest
class AllTest(unittest.TestCase):
def test_sum(self):
self.assertEqual(add(2, 3), 5)
#self.assertEqual(sum(1, 1), 2)
#self.assertEqual(div(6, 2), 3)
if __name__ == '__main__':
unittest.main()
setup.py
from setuptools import setup
def readme():
with open('README.rst') as f:
return f.read()
setup(name='mymath',
version='0.2',
description='The best math library',
url='http://github.com/szabgab/mymath',
author='Foo Bar',
author_email='foo@bar.com',
license='MIT',
packages=['mymath'],
zip_safe=False,
requires=[
'lawyerup',
],
long_description=readme(),
scripts=['bin/runmymath.py', 'bin/runmymath.bat'],
)
Run tests and create package
python setup.py test
python setup.py sdist
Exercise: package
-
Go to Pypi, find some interesting module and install it in a non-standard location (or in a virtualenv)
-
Check if it was installed (try to import it in a python script).
-
Take one of the previously created modules, and create a package for it.
-
Install this new package in a non-standard location.
-
Check if it works from some other place in your file-system.
-
Take the mymath package, add another method, add tests and create the distubtable zip file.
Exercise: create executable
- Go over some of the examples in the course and package that.
- Package a script using some of your favorite modules.
Distribution of Python code
Distribution demo 1
def whoami():
print(__file__)
if __name__ == "__main__":
whoami()
from distutils.core import setup
setup(
name='demo1',
version='1.0',
)
- Install from the current folder
pip install .
- Use it on the command line: (try it in a different folder!)
python -m demo1
/home/gabor/venv3/lib/python3.10/site-packages/demo1.py
- Use it in the interactive shell
python
>>> import demo1
>>> demo1.whoami()
- Uninstall (without asking questions)
pip uninstall demo1 --yes
build/
demo1.egg-info/
Distribution demo 2
- The name of the package (demo2a in setup.py) and the name of the module (the filename demo2b.py) don't neet to be the same.
- The name of the folder (demo2)
def whoami():
print(__file__)
if __name__ == "__main__":
whoami()
from distutils.core import setup
setup(
name='demo2a',
version='1.0',
)
- Install:
pip install .
- Use the name of the module
python -m demo2b
- Uninstall using the package name
pip uninstall demo2a --yes
Distribution demo 3
One package with multiple python files
Distribute Python application as an exe
Packaging applications (creating executable binaries)
-
py2exe
-
Freeze
-
py2app
-
cx_Freeze
-
PyInstaller
-
py2exe on Windows (discontinued)
-
Freeze on Linux
-
py2app on Mac
-
cx_Freeze cross-platform
-
PyInstaller cross-platform
Using PyInstaller
print("hello world")
pip install pyinstaller
pyinstaller myscript.py
pyinstaller --onefile hello_world.py
- See the results in dist/
Other PyInstaller examples
Use this to see where does the packaged version of our code look for modules:
import sys
print(sys.path)
Use this to see how to pass command line parameters to the packaged exe:
import sys
print(sys.argv)
Other
pyinstaller --onefile --windowed myscript.py
Py2app for Mac
pip install py2app
py2applet examples/other/hello.py
Ctypes
ctypes - hello
- ctypes
#include <stdio.h>
char * echo(char * what)
{
return what;
}
int add_int(int a, int b)
{
int sum = a+b;
return sum;
}
int add_int(int a, int b)
{
int sum = a+b;
return sum;
}
int main(void)
{
printf("hello\n");
printf("%d\n", add_int(2, 3));
printf("%s\n", echo("Foo"));
return 0;
}
gcc -o hello hello.c
gcc -o hello.so -shared -fPIC hello.c
from ctypes import cdll
from ctypes import c_char_p
hello_lib = cdll.LoadLibrary("hello.so")
print(hello_lib.add_int(4, 5)) # 9
print(hello_lib.echo('Hello World')) # 153977204
hello_lib.echo.restype = c_char_p
print(hello_lib.echo('Hello World')) # Hello World
concat
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int len(char * s)
{
return strlen(s);
}
char * concat(char * a, char * b)
{
char * res;
int leng = strlen(a) + strlen(b);
res = (char *)malloc(leng);
strcpy (res, a);
strcat (res, b);
return res;
}
int main(void)
{
printf("concat\n");
printf("%d\n", len("abc"));
printf("%d\n", len(""));
printf("%d\n", len("xxxxxxxxxx"));
printf("%s\n", concat("Foo1", "Bar"));
return 0;
}
from ctypes import cdll
from ctypes import c_char_p
more_lib = cdll.LoadLibrary("more.so")
print(more_lib.len("abcd")) # 4
print(more_lib.len("")) # 0
print(more_lib.len("x" * 123)) # 123
more_lib.concat.restype = c_char_p
print(more_lib.concat("abc", "def"))
links
2to3
Convertig from Python 2 to Python 3
- 2to3
from future import ...
division
- division
print 3/2 # 1
from __future__ import division
print 3/2 # 1.5
print in Python 2
fname = 'Foo'
lname = 'Bar'
print("Name: %s %s" % (fname, lname))
print("Name: {} {}".format(fname, lname))
print(fname, lname)
print fname, lname
Name: Foo Bar
Name: Foo Bar
('Foo', 'Bar')
Foo Bar
print in Python 3
print now requires print()
from __future__ import print_function
fname = 'Foo'
lname = 'Bar'
print("Name: %s %s" % (fname, lname))
print("Name: {} {}".format(fname, lname))
print(fname, lname)
Name: Foo Bar
Name: Foo Bar
Foo Bar
input and raw_input
- raw_input
- input
raw_input()
was renamed to input()
In Python 2 raw_input()
returned the raw string. input(), on the other hand ran eval(raw_input())
which meant it tried to execute the input string as a piece of Python code. This was dangerous and was not really used.
In Python 3 raw_input() is gone. input() behaves as the old raw_input() returning the raw string. If you would like to get the old, and dangerous, behavior of input() you can call eval(input()).
Code that works on both 2 and 3
import platform
def my_input(text):
if platform.python_version_tuple()[0] == 3:
return input(text)
else:
return raw_input(text)
Compare different types
x = 3
y = '3'
# Python 2 Python 3
print( x > y ) # False TypeError: unorderable types: int() > str()
print( x < y ) # True TypeError: unorderable types: int() < str()
print( x == y ) # False False
Octal numbers
Octal numbers in 2.x was 011
in 3.x is: 0o11
2to3 Resources
- python3porting book
- wiki
- Dive into Python 3
- The future module
- The third-party future module
- The six module
- docs of 2to3
Design Patterns
What are Design Patterns?
Not all the Design Patterns discussed for Java or C++ are interesting, relevant or even needed in Python. Design Patterns are formal descriptions of how people do things, and not how you should do things. The formal description makes it easy to talk about them.
Some of the DPs exists to overcome problems in that specific language. Oher DPs are more general, solving classes of problem that are generic.
Don't replace built-in objects
import sys
print = 'hello'
sys.stdout.write(print)
sys.stdout.write('\n')
pip install flake8-builtins
flake8 --ignore= replace_print.py
replace_print.py:3:1: A001 "print" is a python builtin and is being shadowed, consider renaming the variable
Facade - simple interface to complex system
Facade, a structural design pattern. - Provide a simple interface (maybe a single class with few methods) to some complex system behind it. This gives flexibility for the implementation of the complex system while users gain simplicity in using it in certain subsets of operations.
os.path.basename, os.path.dirname are faced for os.path.split + indexing in the list
os.path.basename = os.path.split()[-1]
os.path.split = split with os.sep
os.path.join(names) = os.sep.join(names)
os.path.isdir(path) = stat.S_ISDIR(os.stat(path))
Monkey Patching
import real_class
class faker(object): pass
fake = faker
real_class.time = fake
fake.sleep =
fake.time =
- handy in emergencies
- easily abused for NON-emergencies - gives dynamic languages a bad name
- subtle hidden "communication" via secret obscure pathways (explicit is better)
class Monkey:
def __init__(self, count):
self.bananas = count
def is_hungry(self):
hungry = True
if hungry:
self.eat()
def eat(self):
self.bananas -= 1
m = Monkey(10)
print(m.bananas) # 10
print(m.is_hungry()) # None
print(m.bananas) # 9
Monkey.eat = lambda self: True
om = Monkey(10)
print(om.bananas) # 10
print(om.is_hungry()) # None
print(om.bananas) # 10
Creation DPs "Just One"
we want just one instance to exist
- Singleton - subclassing can never be really smooth
- Use a module instead of a class (no inheritance, no special methods)
- make just one instance (self discipline, no enforcement), need to decide to "when" (in which part if the code) to make it
- monostate (borg)
Singleton
class Singleton(object):
def __new__(cls, *a, **kw):
if not hasattr(cls, '_inst'):
cls._inst = super(Singleton, cls).__new__(*a, **kw)
return cls._inst
the problem
class Foo(Singleton): pass
class Bar(Foo): pass
f = Foo()
b = Bar()
# what class is b now? is that a Bar or a Foo instance?
Monostate (Borg)
class Monostate(object):
_shared_state = {}
def __new__(cls, *a, **kw):
obj = super(Monostate, cls).__new__(*a, **kw)
obj.__dict__ = _shared_state
return obj
class Foo(Monostate) pass
class Bar(Foo) pass
f = Foo()
b = Bar()
Better than singleton, data overriding to the rescue: But what if two calls to the constructor provide different initial data?
Dispatch table
calls = []
calls.append( lambda x: x+1 )
calls.append( lambda x: x*2 )
others = [
lambda x: x-1,
lambda x: 0
]
def do_something( call_list ):
for c in call_list:
print(c(3))
do_something( calls )
do_something( others )
Python Pitfalls
Reuse of existing module name
import random
print(random.random())
$ python examples/pitfalls/random.py
Traceback (most recent call last):
File "examples/pitfalls/random.py", line 1, in <module>
import random
File ".../examples/pitfalls/random.py", line 3, in <module>
print(random.random())
TypeError: 'module' object is not callable
- Write an example to use random number and call your example number.py
- Same with any other module name.
- Lack of multi-level namespaces
- Solution: user longer names. Maybe with project specific names.
Use the same name more than once
class Corp(object):
people = []
def add(self, name, salary):
Corp.people.append({ 'name': name, 'salary' : salary})
def total(self):
self.total = 0
for n in Corp.people:
self.total += n['salary']
return self.total
c = Corp()
c.add("Foo", 19)
print(c.total())
c.add("Bar", 23)
print(c.total())
$ python examples/pitfalls/corp.py
19
Traceback (most recent call last):
File "examples/pitfalls/corp.py", line 19, in <module>
print(c.total())
TypeError: 'int' object is not callable
Compare string and number
x = 2
y = "2"
print(x > y)
print(x < y)
Python 2 - compares them based on the type of values (wat?)
$ python examples/pitfalls/compare.py
False
True
Python 3 - throws exception as expected.
$ python3 examples/pitfalls/compare.py
Traceback (most recent call last):
File "examples/pitfalls/compare.py", line 4, in <module>
print(x > y)
TypeError: unorderable types: int() > str()
Compare different types
x = 2
y = "2"
print(x == y)
with open(__file__) as fh:
print(fh == x)
In both Python 2 and Pyhton 3 these return False
import sys
hidden = 42 # would be random
if sys.version_info.major < 3:
guess = raw_input('Your guess: ')
else:
guess = input('Your guess: ')
if hidden == guess:
print("Match!")
Will never match. Even if user types in 42. - Hard to debug and understand as there is no error.
Sort mixed data
from __future__ import print_function
mixed = [10, '1 foo', 42, '4 bar']
print(mixed) # [100, 'foo', 42, 'bar']
mixed.sort()
print(mixed) # [42, 100, 'bar', 'foo']
In Python 2 it "works" is some strange way.
$ python examples/pitfalls/sort.py
[10, '1 foo', 42, '4 bar']
[10, 42, '1 foo', '4 bar']
In Python 3 in correctly throws an exception.
air:python gabor$ python3 examples/pitfalls/sort.py
[10, '1 foo', 42, '4 bar']
Traceback (most recent call last):
File "examples/pitfalls/sort.py", line 5, in <module>
mixed.sort()
TypeError: unorderable types: str() < int()
Linters
Static Code Analyzis - Linters
-
lint
-
PEP8
-
Flake8
-
Pylint
PEP8
- pep8
pip install pep8
F811 - redefinition of unused
- flake8
import subprocess
import datetime
import sys
from datetime import datetime
$ flake8 importer.py
importer.py:4:1: F811 redefinition of unused 'datetime' from line 2
Warn when Redefining functions
- pylint
sum = 42
def len(thing):
print(f"Use {thing}.__len__() instead!")
len("abc")
pylint redef.py
************* Module redef
redef.py:1:0: C0111: Missing module docstring (missing-docstring)
redef.py:2:0: W0622: Redefining built-in 'sum' (redefined-builtin)
redef.py:4:0: W0622: Redefining built-in 'len' (redefined-builtin)
redef.py:2:0: C0103: Constant name "sum" doesn't conform to UPPER_CASE naming style (invalid-name)
redef.py:4:0: C0111: Missing function docstring (missing-docstring)
--------------------------------------------------------------------
Your code has been rated at -2.50/10 (previous run: -2.50/10, +0.00)
Signals
Signals and Python
-
kill
-
man 7 signal (on Linux)
-
Unix: kill PID, kill -9 PID, Ctrl-C, Ctrl-Z
-
os.kill
Sending Signal
- kill
import signal
import os
print("before")
os.kill(os.getpid(), signal.SIGUSR1)
print("after")
before
User defined signal 1: 30
Catching Signal
import signal
import os
def handler(signum, frame):
print('Signal handler called with signal', signum)
signal.signal(signal.SIGUSR1, handler)
print("before")
os.kill(os.getpid(), signal.SIGUSR1)
print("after")
before
('Signal handler called with signal', 30)
after
Catching Ctrl-C on Unix
username = input('Username:')
print(username)
$ python ctrl_c.py
{% embed include file="src/examples/signals/ctrl_c.out)
import signal
def handler(signum, frame):
print('Signal handler called with signal', signum)
signal.signal(signal.SIGINT, handler)
username = input('Username:')
print(username)
- Cannot stop using Ctrl-C !
- Ctrl-Z and then kill %1
- kill PID
Catching Ctrl-C on Unix confirm
import signal
import time
def handler(signum, frame):
answer = input('We are almost done. Do you really want to exit? [yes]:')
if answer == 'yes':
print('bye')
exit()
print("Then let's keep running")
signal.signal(signal.SIGINT, handler)
for _ in range(10):
time.sleep(5)
Alarm signal and timeouts
import signal
class MyTimeout(Exception):
pass
def handler(signum, frame):
print('Signal handler called with signal', signum)
raise MyTimeout
try:
signal.signal(signal.SIGALRM, handler)
signal.alarm(5)
number = input("Divide by (5 sec):")
signal.alarm(0)
print(42/int(number))
except MyTimeout:
print('timeout')
except Exception as e:
print(e)
#raise
print("Still working")
Exercise: Catching Ctrl-C on Unix 2nd time
- When Ctrl-C is pressed display: "In order to really kill the application press Ctrl-C again" and keep running. If the user presses Ctrl-C again, then let id die.
- Improve the previous that if 5 sec within the first Ctrl-C there is no 2nd Ctrl-C then any further Ctrl-C will trigger the above message again.
Exercise: Signals
- What signal is sent when you run kill PID?
- Write a script that will disable the kill PID for your process. How can you kill it then?
- What signal is sent when we press Ctrl-Z ?
Ctrl-z
import signal
import os
print(os.getpid())
username = input('Username:')
print(username)
kill PID
import signal
import os
print(os.getpid())
def handler(signum, frame):
print('Signal handler called with signal', signum)
signal.signal(signal.SIGTERM, handler)
username = input('Username:')
print(username)
Data Science
Data Scince Resources
-
Machine Learning with Andrew Ng.
-
Data Scientist with Python on DataCamp.
-
[Stanford cs231n])http://cs231n.stanford.edu/)
-
Pandas profiling
FAQ
How not to name example scirpts?
Don't - by mistake - call one of your files the same as a module you will be loading.
For example random.py
is a bad idea if you will import random
.
Your code will try to locate random.py to load, but will find itself and not the one that comes with Python.
Python will also create a random.pyc file - a compiled file - and it will take time till you recall this and delete that too. Till then the whole thing will seem to be broken.
Platform independent code
In general Python is platform independent, but still needs some care to make sure you don't step on some aspects of Operating System or the file system that works differently on other OS-es.
- Filenames are case sensitive on some OS-es (e.g. Windows). They used to be restricted to 8.3. Make sure you are within the restriction of every OS you might want to use.
- Directory path: (slash or backslash or something else?) use the os.path methods.
- os.path.expanduser('~') works on both Linux and Windows, but the root of a Linux/Unix file system starts with a slash (/) and on Windows it is c:\ and d:\ etc.
- On Linux/Unix you have user 'root' and on Windows 'Administrator'
- File permissions are different on Linux and Windows.
- Stay away from OS specific calls, but as a last resort use os.name or sys.platform to figure out which os is this. os.name is 'posix' on Linux and 'nt' on Windows.
- For GUI use wxWindows that has a native look on Windows and Gnome look on Linux.
- Pay attention to any 32/64 bit issues. Big/Little Endian issues.
- Some modules might be OS specific. Check the documentation.
- Pay attention to the use of os.system and subsystem modules.
How to profile a python code to find causes of slowness?
Use one of these modules:
- cProfile is in C. It is faster and preferable.
- profile
pdb = Python Debugger
- pdb
Include the following code in your script at any point, and run the script as you'd do normally. It will stop at the given point and enter the debugger.
import pdb; pdb.set_trace()
Avoid Redefining functions
Can I tell python to stop compilation when someone is redefining a function? Or at least give me a warning?
Use pylint
for that
Algorithm
Exercise: Find the odd value
-
Given a list of values, we know that every value comes in pairs except one. Find where it is:
-
f(["a", "a", "b", "b", "c", "d", "d"]) would return 4
Solution: Find the odd value
def find_odd(values):
'''
>>> find_odd(['c'])
0
>>> find_odd(['c', 'x', 'x'])
0
>>> find_odd(['x', 'x', 'c'])
2
>>> find_odd(['x', 'x', 'c', 'y', 'y'])
2
>>> find_odd(['a', 'a', 'b', 'b', 'd', 'd', 'x', 'x', 'c', 'y', 'y'])
8
>>> find_odd(['a', 'a', 'c', 'b', 'b', 'd', 'd', 'x', 'x', 'y', 'y'])
2
'''
if len(values) % 2 == 0:
raise Exception("Number of elements must not be divisible by 2")
start = 0
end = len(values) - 1
while True:
if end - start < 2:
return start
if start > end:
raise Exception("We have a problem")
middle = start + int((end-start)/2)
middle -= middle % 2
if middle < 0:
middle += 2
#return middle
if values[middle] == values[middle+1]:
#return 'a'
start = middle+2
else:
#return 'b'
end = middle
# To verify run
# pytest --doctest-modules find_the_odd_value.py
Exercise: Generalized find the odd value
-
Given a list of values, we know that every value comes in groups of N except one group that has less than N element. Given the list and the number N find where it starts:
-
f(["a", "a", "a", "b", "b", "b", "x", "d", "d", "d"], 3) would return 6
-
f(["a", "a", "a", "b", "b", "b", "x", "y", "d", "d", "d"], 3) would return 6
Solution: Generlized Find the odd value
def find_odd(values, size=2):
'''
>>> find_odd(['c'])
0
>>> find_odd(['c', 'x', 'x'])
0
>>> find_odd(['x', 'x', 'c'])
2
>>> find_odd(['x', 'x', 'c', 'y', 'y'])
2
>>> find_odd(['a', 'a', 'b', 'b', 'd', 'd', 'x', 'x', 'c', 'y', 'y'])
8
>>> find_odd(['a', 'a', 'c', 'b', 'b', 'd', 'd', 'x', 'x', 'y', 'y'])
2
>>> find_odd(['c'], 3)
0
>>> find_odd(['c', 'd'], 3)
0
>>> find_odd(['c', 'x', 'x', 'x'], 3)
0
>>> find_odd(['c', 'd', 'x', 'x', 'x'], 3)
0
>>> find_odd(['x', 'x', 'x', 'c', 'd'], 3)
3
>>> find_odd(['x', 'x', 'x', 'c', 'd', 'y', 'y', 'y'], 3)
3
>>> find_odd(['a', 'a', 'a', 'b', 'b', 'b', 'd', 'd', 'd', 'x', 'x', 'x', 'c', 'y', 'y', 'y'], 3)
12
>>> find_odd(['a', 'a', 'a', 'b', 'b', 'b', 'd', 'd', 'd', 'x', 'x', 'x', 'c', 'q', 'y', 'y', 'y'], 3)
12
>>> find_odd(['a', 'a', 'a', 'c', 'b', 'b', 'b', 'd', 'd', 'd', 'x', 'x', 'x', 'y', 'y', 'y'], 3)
3
>>> find_odd(['a', 'a', 'a', 'b', 'b', 'b', 'c', 'z', 'd', 'd', 'd', 'x', 'x', 'x', 'y', 'y', 'y'], 3)
6
'''
if len(values) % size == 0:
raise Exception(f"Number of elements must not be divisible by {size}")
start = 0
end = len(values) - 1
while True:
if end - start < size:
return start
if start > end:
raise Exception("We have a problem")
middle = start + int((end-start)/size)
middle -= middle % size
if middle < 0:
middle += size
#return middle
if all(map(lambda val: values[middle] == val, values[middle+1:middle+size])):
#return f'a {middle}'
start = middle+size
else:
end = middle
# To verify run
# pytest --doctest-modules generalized_find_the_odd_value.py
Exercise: Shortest sublist with sum over limit
-
Given a list of integers [10, 12, 35, 7] and a number e.g. 25 return the length of the shortests sublist where the sum of the numbers is greater than or equal to the given number. If no such sublist can be found return -1.
-
A few examples:
>>> shortest([], 7)
-1
>>> shortest([2, 3], 7)
-1
>>> shortest([2, 3], 0)
0
>>> shortest([], 0)
0
>>> shortest([7, 3], 7)
1
>>> shortest([4, 7, 3], 7)
1
>>> shortest([1, 23, 1, 1, 10, 11, 12], 30)
3
>>> shortest([1, 23, 1, 1, 10, 11, 12], 24)
2
>>> shortest([1, 10, 11, 40], 30)
1
Solution: Shortest sublist with sum over limit
def shortest(numbers, limit):
'''
>>> shortest([], 7)
-1
>>> shortest([2, 3], 7)
-1
>>> shortest([2, 3], 0)
0
>>> shortest([], 0)
0
>>> shortest([7, 3], 7)
1
>>> shortest([4, 7, 3], 7)
1
>>> shortest([1, 23, 1, 1, 10, 11, 12], 30)
3
>>> shortest([1, 23, 1, 1, 10, 11, 12], 24)
2
>>> shortest([1, 10, 11, 40], 30)
1
'''
if limit == 0:
return 0
length = None
start = 0
end = -1
total = 0
while True:
#start < len(numbers) and end <= len(numbers) and start < end:
if total >= limit:
if length is None:
length = 1 + end-start
else:
length = min(length, 1 + end-start)
total -= numbers[start]
start += 1
if start > end:
break
else:
end += 1
if end >= len(numbers):
break
total += numbers[end]
return -1 if length is None else length
# To verify run
# pytest --doctest-modules shortest_sublist.py
Refactor
Refactoring example - change variable name
data = ['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn']
for i in data:
print(i)
celestical_objects = ['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn']
for planet in celestical_objects:
print(planet)
How to Refactor
- Write tests that will verify the behaviour (or at least compare the new behavior to the old behavior)
- Make a small change.
- Run the tests.
Exercise: Fix deep indentation
import re
import sys
print("Welcome to D3L3.1415 TELEphone InDX. Please wait while we fetch all phones in the document")
if len(sys.argv) != 2:
print("Invalid argument number. D3L3.1415 rules are for your own good, please try again")
exit()
phone = []
print('the TELEphone numbers are: ')
path = sys.argv[1]
with open(path, 'r') as fh:
for line in fh:
match = re.search(r' .+-.+', line)
if match:
splinter = match.group(0).split()
for check in splinter:
b = list(check)
a = b[len(b) - 1]
if (ord(a) >= 48) and (ord(a) <= 57):
phone.append(check)
for tele in phone:
print(tele)
import sys
import os
def getinput():
string = []
if len(sys.argv)<2:
exit(f'USE: {sys.argv[0]} FILE'.center(40,' -'))
elif os.path.isfile(sys.argv[1]):
file = sys.argv[1]
else:
file = 0
if file:
with open(file) as f:
for line in f:
string.append(line.strip('\n'))
try:
string = ''.join(string)
except Exception:
exit('EXCEPTION IN INPUT'.center(40,' -'))
else:
string = sys.argv[1]
return(string)
#def funique(items):
# unique = []
#
# filt = [' ', '.', ',', ':']
# items = list(filter(lambda x : x not in filt ,items))
#
# for item in items:
# if item.split()[0] not in unique:
# unique.append(item.split()[0])
# return(unique)
# def count(unique,items):
#
# count = [0]*len(unique)
# print(count)
# diction = {unique[i]:count[i] for i in range(len(unique))}
# print(type(diction))
# for u in unique:
# print('from count unique: ',u)
# for item in items:
# print('from count item: ',item)
# if u == item:
# print(True)
# print(type(u))
# print(diction['A'],diction['T'])
# diction[u][0] += 1
# print(id(diction['A']),id(diction['T']))
# print(diction)
# return
# return(diction)
# In[162]:
def count(items):
filt = [' ', '.', ',', ':']
items = list(filter(lambda x : x not in filt ,items))
diction = {}
for item in items:
if item in diction.keys():
diction[item] += 1
else:
diction[item] = 1
return(diction)
def out(diction):
sort = sorted(d)
summ = sum(diction[x] for x in diction.keys())
for key in sort:
percent = (d[key]/summ)*100
print(f'{key:<2}{d[key]:<3}-{percent:>6.2f} %')
items = getinput()
d = count(items)
out(d)
Overview of Python syntax
Scalars
- Numbers (int, float)
- Strings (str)
- Boolean
Numbers
a = 23
b = 2.3
c = a + b
d = a - b
e = a * b
f = a / b
g = a // b # int(a/b)
m = a % 7 # modulo
x = a ** 2 # exponent
Strings
a = "double quote"
b = 'single quote'
c = """
multi
line
can use either single or double quotes
"""
d = f"f-string with {a} or with {b}"
e = r"\raw\string\to\keep\backslashes"
x = a + b
var[3]
var[3:7]
len(var)
ord(char)
chr(number)
var.title()
var.upper()
var.lower()
var.index(sub)
var.rindex(sub)
var.find(sub)
short in long
if short in long:
print('in')
':'.join(list_of_strings)
some_string.split(':')
int - float - string conversion
int()
float()
str()
Booleans
True
False
Lists
fruits = ['apple', 'banana', 'peach', 'pear']
fruits[2]
fruits[1:3]
fruits[::2]
fruits[:]
len(fruits)
import copy
copy.copy(fruits) # shallow copy
copy.deepcopy(fruits)
element in some_list
if element in some_list:
print('in')
fruits.index(sub) # return location or raises Exception
fruits.insert(location, anothe_fruit)
fruits.append(another_fruit)
fruits.remove(some_fruit) # remove by value
fruits.pop(location) # remove by location
list()
fruits.sort()
sorted(fruits)
Queue and Stack
Stack:
append
pop
Queue:
append
pop(0)
from collections import deque
Stack
fruits.append(...)
fruits.pop()
Queue
fruits = deque()
fruits.append(...)
fruits.popleft()
Dictionaries
Tuples
"inmutable list"
tuple()
fruits = ('apple', 'banana', 'peach')
Sets
set()
set(some_list)
fruits = {'apple', 'banana', 'peach'}
I/O
print(var)
print(var, end=" ", sep="")
STDIN - Standard input
input("Some question: ")
CLI
sys.argv
argparse
Control flow
- Loops
- Conditionals
- Boolean operators
- Conditional (ternary) operator
- Exceptions
While - Loops
while cond:
pass
break
continue
For - Loops
for var in some_string:
print(var)
for var in range(3, 15, 2):
print(var)
for var in some_list:
print(var)
for var in some_iterable:
print(var)
for var in some_iterable:
print(var)
else:
print("finished iterating")
Conditionals
if cond1:
pass
elif cond1:
pass
else:
pass
Comparision operators
==
!=
<
<=
>=
>
Boolean operators
and
or
not
The conditional (ternary) operator
result = this if condition else that
Random Values
import random
random.seed(42)
random.random()
random.randrange(1, 7)
random.choice(values)
random.sample(values)
Math
import math
math.pi
math.sin()
Exceptions
raise Exception("Some text")
raise ValueError("Some text")
try:
# Risky code
except Exception as err:
# Handle exception
Files
(Plain text, CSV, Excel, JSON, YAML)
Functions
Modules
bytes
Exception handling
Flake8 Pylint assert
Serialization (Marshalling)
Why Serialization is needed?
- Data transfer between processes on the same computer
- Network traffic
- Storing data for later reuse in another process
Questions to ask
- Which programming languages support it besides Python?
- Can the files be access on other operating system, other architectures, different versions of Python?
- How long does it take to store additional entry?
- How long does it take to access an entry?
- How much memory is needed? Do we need to read the whole file or can we read records?
- How much disk-space is being used for the serialized data?
Various tools for serialization
- Plain text
- CSV
- JSON
- YAML
- XML
- Matlab format savemat loadmat
- pickl (python only)
- marshal (internal usage)
- Protobuf
- HDF5 in python: h5py
- parquet in python: parquet
Serialization with h5py
- HDF5 - Hierarchical Data Format - supports n-dimensional datasets and each element in the dataset may itself be a complex object.
- docs
TODO: fix these
import h5py
import os
import sys
import numpy as np
filename = 'counter.h5'
if len(sys.argv) == 1:
if not os.path.exists(filename):
print("counter does not exist yet")
exit(1)
with h5py.File(filename, 'r') as hdf:
for name in hdf.keys():
print(f"{name}: {hdf[name][0]}")
exit()
if not os.path.exists(filename):
with h5py.File(filename, 'w') as hdf:
pass
with h5py.File(filename, 'r+') as hdf:
for name in sys.argv[1:]:
if name not in hdf:
hdf[name] = np.zeros(1, dtype=int)
hdf[name][0] += 1
print(f"{name}: {hdf[name][0]}")
import h5py
#import numpy as np
#
#original_data = []
#
#count = 10
#size = (2, 5)
filename = 'data.h5'
#
#for _ in range(count):
# row = np.random.random(size)
# print(row)
# original_data.append(row)
with h5py.File(filename, 'w') as hdf:
hdf["a"] = 23
hdf["b"] = 19
with h5py.File(filename, 'r') as hdf:
print(hdf) # <HDF5 file "data.h5" (mode r)>
print(hdf.keys()) # <KeysViewHDF5 ['a', 'b']>
for key in hdf.keys():
print(key, hdf[key])
import sys
import h5py
filename = sys.argv[1]
with h5py.File(filename, 'r') as hdf:
loaded = hdf['data'][:]
print(len(loaded))
print(type(loaded))
print(loaded.size)
print(loaded.shape)
print(type(loaded[0]))
print(loaded[0].size)
print(loaded[0].shape)
Serialization of single Numpy array
pip install numpy
pip install scipy
pip install h5py
pip install protobuf
import os
import sys
import json
import numpy as np
import h5py
import scipy.io
import pickle
def main():
size = (2, 4)
if len(sys.argv) == 3:
size = (int(sys.argv[1]), int(sys.argv[2]))
print(f"size: {size}\n")
original = np.random.random(size)
#print(original)
try_json(original)
try_pickle(original)
try_matlab(original)
try_hdf5(original)
def try_json(original):
with open('demo.json', 'w') as fh:
json.dump(original, fh, default=lambda obj: obj.tolist())
with open('demo.json') as fh:
loaded = np.array(json.load(fh)) #, default=lambda obj: obj.tolist())
#print(loaded)
assert np.array_equal(original, loaded)
print(f"json: {os.path.getsize('demo.json'):7}")
def try_pickle(original):
with open('demo.pickle', 'wb') as fh:
pickle.dump(original, fh, pickle.HIGHEST_PROTOCOL)
with open('demo.pickle', 'rb') as fh:
loaded = pickle.load(fh)
assert np.array_equal(original, loaded)
print(f"pickle: {os.path.getsize('demo.pickle'):7}")
def try_matlab(original):
scipy.io.savemat('demo.mat', {'data': original})
mat = scipy.io.loadmat('demo.mat')
loaded = mat['data']
assert np.array_equal(original, loaded)
print(f"matlab: {os.path.getsize('demo.mat'):7}")
def try_hdf5(original):
with h5py.File('demo.h5', 'w') as hdf:
hdf['data'] = original
with h5py.File('demo.h5', 'r') as hdf:
loaded = hdf['data'][:] # [:] is needed to copy the content
assert np.array_equal(original, loaded)
print(f"hdf5: {os.path.getsize('demo.h5'):7}")
main()
- try to
gzip
the JSON file and maybe also the others and see the sizes.
Serialization of multiple Numpy arrays
- hdf5 allows you to access specific array without loading the whole data structure into memory.
- Same with SQlite, but it is much bigger.
import os
import sys
import glob
import json
import sqlite3
import numpy as np
import h5py
import scipy.io
import pickle
def main():
for path in glob.glob("demo*"):
os.unlink(path)
if len(sys.argv) != 4:
exit(f"Usage: {sys.argv[0]} ROWS, COLS, COUNT")
size = (int(sys.argv[1]), int(sys.argv[2]))
count = int(sys.argv[3])
print(f"size: {size} count {count}\n")
originals = [np.random.random(size) for _ in range(count)]
#print(originals)
try_json(originals)
try_pickle(originals)
try_matlab(originals)
try_hdf5(originals)
try_hdf5_separate(originals)
try_sqlite(originals)
def try_json(originals):
with open('demo.json', 'w') as fh:
json.dump(originals, fh, default=lambda obj: obj.tolist())
with open('demo.json') as fh:
loaded = np.array(json.load(fh)) #, default=lambda obj: obj.tolist())
#print(loaded)
assert np.array_equal(originals, loaded)
print(f"json: {os.path.getsize('demo.json'):7}")
def try_pickle(originals):
with open('demo.pickle', 'wb') as fh:
pickle.dump(originals, fh, pickle.HIGHEST_PROTOCOL)
with open('demo.pickle', 'rb') as fh:
loaded = pickle.load(fh)
assert np.array_equal(originals, loaded)
print(f"pickle: {os.path.getsize('demo.pickle'):7}")
def try_matlab(originals):
scipy.io.savemat('demo.mat', {'data': originals})
mat = scipy.io.loadmat('demo.mat')
loaded = mat['data']
assert np.array_equal(originals, loaded)
print(f"matlab: {os.path.getsize('demo.mat'):7}")
def try_hdf5(originals):
with h5py.File('demo.h5', 'w') as hdf:
hdf['data'] = originals
with h5py.File('demo.h5', 'r') as hdf:
loaded = hdf['data'][:] # [:] is needed to copy the content
assert np.array_equal(originals, loaded)
#print(loaded)
print(f"hdf5: {os.path.getsize('demo.h5'):7}")
# Don't load all the data in memory when reading
def try_hdf5_separate(originals):
with h5py.File('demo.hdf5', 'w') as hdf:
hdf['data'] = originals
for ix in range(len(originals)):
with h5py.File('demo.hdf5', 'r') as hdf:
loaded = hdf['data'][ix][:] # [:] is needed to copy the content
#print(loaded)
assert np.array_equal(originals[ix], loaded)
print(f"hdf5: {os.path.getsize('demo.hdf5'):7}")
# Don't load all the data in memory when reading
def try_sqlite(originals):
conn = sqlite3.connect("demo.db")
curs = conn.cursor()
try:
curs.execute('''CREATE TABLE arrays (
id INTEGER PRIMARY KEY AUTOINCREMENT,
array BlOB NOT NULL
)''')
sql = '''INSERT INTO arrays (array) VALUES (?)'''
pickled = [pickle.dumps(arr, pickle.HIGHEST_PROTOCOL) for arr in originals]
#for arr in pickled:
# curs.execute(sql, (arr,))
# needs a list of tuples for the placeholder
curs.executemany(sql, [(arr,) for arr in pickled])
conn.commit()
except sqlite3.OperationalError as err:
print(f'sqlite error: {err.args[0]}')
conn.close()
for ix in range(1, len(originals)+1):
try:
conn = sqlite3.connect("demo.db")
curs = conn.cursor()
sql = '''SELECT array FROM arrays WHERE id == ?'''
curs.execute(sql, (ix,))
loaded = pickle.loads(curs.fetchone()[0])
except sqlite3.OperationalError as err:
print(f'sqlite error: {err.args[0]}')
exit()
assert np.array_equal(originals[ix-1], loaded)
print(f"sqlite: {os.path.getsize('demo.db'):7}")
main()
Other slides
Other slides
Some slides that used to be part of the material and they might return to be there, but for now they were parked here.
Atom for Python
Some details about the Atom editor. You can freely skip this part. Personally I don't use it now.
Autocomplete
- apm install autocomplete-python
Autocomplete
- easy_install jedi
- apm install autocomplete-plus-python-jedi
Linter
- easy_install flake8
- easy_install flake8-docstrings
- apm install linter
- apm install linter-flake8
IDLE - Integrated DeveLopment Environment
-
IDLE
-
Python shell
-
Better editing
-
Limited debugger
-
c:\Python27\Lib\idlelib\idle.bat
-
C:\Users\Gabor\AppData\Local\Programs\Python\Python35\Lib\idlelib\idle.bat
sh-bang - executable on Linux/Apple
#!/usr/bin/env python
print("Hello World")
- The first line staring with # is needed if you want to have a file that can be executed without explicitly typing in python as well.
- Make your file executable: chmod u+x hello_ex.py
- Run like: ./hello_ex.py
- In order to run it as hello_ex.py in needs to be located in one of the directories listed in the PATH environment variable.
pydoc
If you really want it, you can also read some of the documentation on the command line, but unless you are locked up some place without Internet connection, I don't recommend this.
Type pydoc
. On Windows, you might need to create the following file and put it in a directory in your PATH. (see echo %PATH%
)
@python c:\Python27\Lib\pydoc.py %*
Spyder Intro
- iPython console (bottom right)
- Spyder-Py2 / Preferences / Console / Advanced Settings
- Save the file (Ctrl-S / Command-S)
- Run/Run (F5)
- F9 - execute selected text (e.g. we can eecute a function definition after we've changed it)
- TAB for autocomple names of already existing variables.
print("abc")
"abc". shows the available methods.
"abc".center Command-I will explain what is "center"
Interactive Debugging
def f(a, b):
c = a + b
d = a * b
return c+d
def run():
print(f(2, 3))
import code
code.interact(local=locals())
print(f(19, 23))
run()
Parameter passing
def hello(name):
msg = name + '!!!!'
print('Hello ' + msg)
hello('Foo')
hello('Bar')
Hello Foo!!!!
Command line arguments and main
import sys
def hello(name):
msg = name + '!!!!'
print('Hello ' + msg)
def main():
hello(sys.argv[1])
main()
Run as python argv.py Foo
Later we'll see the argparse
module that can handle command line arguments in a better way.
Name of the current function in Python
- inspect
- currentframe
- stack
import inspect
def first():
print(inspect.currentframe().f_code.co_name)
print(inspect.stack()[0][3])
second()
def second():
print(inspect.currentframe().f_code.co_name)
print(inspect.stack()[0][3])
def main():
first()
main()
Name of the caller function in Python
- inspect
- stack
import inspect
def first():
print("in first")
print("Called by", inspect.stack()[1][3])
second()
def second():
print("in second")
print("Called by", inspect.stack()[1][3])
def main():
first()
main()
Stack trace in Python using inspect
- inspect
- stack
import inspect
def first():
second()
def second():
for info in inspect.stack():
#print(info)
#FrameInfo(
# frame=<frame at 0x1c18b18, file 'stack_trace.py', line 9, code second>,
# filename='stack_trace.py',
# lineno=8,
# function='second',
# code_context=[' for level in inspect.stack():\n'],
# index=0)
#print(info.frame)
print(info.filename)
print(info.lineno)
print(info.function)
print(info.code_context)
print('')
def main():
first()
if __name__ == '__main__':
main()
stack_trace.py
8
second
[' for info in inspect.stack():\n']
stack_trace.py
4
first
[' second()\n']
stack_trace.py
26
main
[' first()\n']
stack_trace.py
30
<module>
[' main()\n']
Getting the class name of an object
- class
- name
- type
How to find out which class an object (instance) belongs to?
import re
a = 2
b = "3"
c = 2.3
m = re.search(r'\d', str(c))
print(a.__class__) # <type 'int'>
print(b.__class__) # <type 'str'>
print(c.__class__) # <type 'float'>
print(type(a)) # <type 'int'>
print(type(b)) # <type 'str'>
print(type(c)) # <type 'float'>
print(a.__class__.__name__) # int
print(b.__class__.__name__) # str
print(c.__class__.__name__) # float
print(re.__class__.__name__) # module
print(m.__class__.__name__) # SRE_Match or Match
Circular references
circular references are cleaned up the by the garbage collector but maybe not all the memory is given back to the OS, and it can take some time to clean them up.
import time
def create_pair():
a = {'name' : 'Foo'}
b = {'name' : 'Bar'}
a['pair'] = b
b['pair'] = a
#print(a)
for i in range(1, 30000000):
create_pair()
print("let's sleep now a bit")
time.sleep(20)
but weakref might expedite the cleanup. See also the gc module and if I can show it http://stackoverflow.com/questions/2428301/should-i-worry-about-circular-references-in-python
Context managers: with (file) experiments
with open('out.txt', 'w') as h:
h.write("hello\n")
h = open('out.txt')
print(h.read())
f = open('out.txt', 'w')
f.write("hello\n")
f.close()
# for line in open("myfile.txt"):
# print line,
# the file is closed only when script ends
range vs xrange in Python
- range
- xrange
from __future__ import print_function
import sys
r = range(1000)
x = xrange(1000)
for v in r: # 0..999
pass
for v in x: # 0..999
pass
print(sys.getsizeof(r)) # 8072
print(sys.getsizeof(x)) # 40
In Python 2 range
creates a list of values range(from, to, step)
and xrnage
creates and iterator.
In Python 3 range
creates the iterator and if really necesary then list(range())
can create the list.
profile (with hotshot) slow code
It was experimental and dropped from Python 3
import slow
import os
import hotshot, hotshot.stats
prof = hotshot.Profile("slow.prof")
prof.runcall(slow.main, 1000)
prof.close()
stats = hotshot.stats.load("slow.prof")
stats.strip_dirs()
stats.sort_stats('time', 'calls')
stats.print_stats(20)
os.remove("slow.prof")
501501 function calls in 0.337 seconds
Ordered by: internal time, call count
ncalls tottime percall cumtime percall filename:lineno(function)
498501 0.192 0.000 0.192 0.000 slow.py:37(swap)
1 0.136 0.136 0.335 0.335 slow.py:21(sort)
999 0.006 0.000 0.006 0.000 slow.py:4(f)
999 0.002 0.000 0.002 0.000 random.py:173(randrange)
1 0.001 0.001 0.003 0.003 slow.py:31(get_str)
999 0.000 0.000 0.000 0.000 slow.py:10(g)
1 0.000 0.000 0.337 0.337 slow.py:14(main)
0 0.000 0.000 profile:0(profiler)
Python Descriptors
- init
- get
- set
- delete
A more manual way to implement the property() functionality we have just seen. Use cases:
-
Implement type-checking and/or value checking for attribute setters ()
Python from .NET
TODO and add to dotnet
TODO: example with async call in .NET getting back to python
Matplotlib subplot
- Generates a separate graph, but when saving to disk, the image is blank
fig, ax = plt.subplots()
ax.plot(
[ 1, 2, 3, 4 ],
[ 10, 3, 45, 5 ],
)
Jupyter StackOverflow - historgram
# Historgram of the top 20 countries
first20.hist(bins = 20)
# Plot using Seaborn
plot = sns.relplot(data = first20)
plot.set_xticklabels(rotation=90)
Jupyter StackOverflow - OpenSourcer
df['OpenSourcer'].value_counts()
df['OpenSourcer'].unique()
Jupyter StackOverflow - cross tabulation
# Crosstabulation
first10 = country_count.head(10)
subset = df[ df['Country'].isin( first10.keys() ) ]
# subset.count()
# subset['OpenSourcer'].value_counts()
grouped = subset.groupby('Country')['OpenSourcer'].value_counts()
# grouped.plot.bar(figsize=(15,15))
pd.crosstab(subset['Country'], df['OpenSourcer'])
ct = pd.crosstab(subset['Country'], df['OpenSourcer']).apply(lambda r: 100 * r/r.sum(), axis=1)
ct
ct.transpose().hist(figsize=(15, 15))
Jupyter StackOverflow - salaries
# Try to show the average salary by country
grp = df.groupby('Country').mean().round({'CompTotal' : 0})
#grp['CompTotal']
pd.set_option('display.float_format', lambda x: '{:,}'.format(x))
grp.sort_values('CompTotal', ascending=False)
Jupyter StackOverflow - replace values
nd = df.replace({'OpenSourcer' : {
'Never' : 0,
'Less than once per year' : 1,
'Less than once a month but more than once per year' : 2,
'Once a month or more often' : 3,
} })
nd
nd.describe()
nd.groupby('Country').mean().sort_values('OpenSourcer', ascending=False)
NameError
python common_error.py 42
import sys
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} Number")
if 42 < int(sys.argv[1]):
res = "bigger"
elif int(sys.argv[1]) < 42:
res = "smaller"
print(res)
# NameError: name 'res' is not defined
UnboundLocalError
python common_error_in_function.py 42
import sys
def check():
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} Number")
if 42 < int(sys.argv[1]):
res = "bigger"
elif int(sys.argv[1]) < 42:
res = "smaller"
print(res)
check()
# UnboundLocalError: local variable 'res' referenced before assignment
Insert element in sorted list using bisect
- bisect
import bisect
solar_system = ['Earth', 'Jupiter', 'Mercury', 'Saturn', 'Venus']
name = 'Mars'
# Find the location where to insert the element to keep the list sorted
loc = bisect.bisect(solar_system, name)
print(loc)
solar_system.insert(loc, name)
print(solar_system)
print(sorted(solar_system))
Gravatar in Python
import hashlib
import sys
def gravatar(email):
return hashlib.md5(email.strip().lower().encode('utf8')).hexdigest()
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} EMAIL")
email = sys.argv[1]
code = gravatar(email)
print(f"https://www.gravatar.com/avatar/{code}?s=100&d=blank")
Debug with ptpython
pip install ptpython
- Then either use it as a REPL to explore code or make your application fall back into this REPL to debug your code.
import requests
from ptpython.repl import embed
res = requests.get("https://code-maven.com/")
embed(globals(), locals())
print("done")
REPL - Interactive debugging with ptpython
from ptpython.repl import embed
x = 32
embed(globals(), locals())
y = 42
print('OK')
Print in color on the CLI
- colorama
from colorama import Fore, Back, Style
print('default color text')
print(Fore.RED + 'red text' + Style.RESET_ALL)
print(Back.GREEN + 'black with green background' + Style.RESET_ALL)
print(Fore.YELLOW + Back.BLACK + 'yellow text with black background' + Style.RESET_ALL)
print('default color text')
print(Fore.RED)
print('red text')
print(Back.BLACK)
print('red text black background')
print(Style.RESET_ALL)
print('back to default color')
Easy Install
$ easy_install module_name
- Intsalling pip on Windows as well:
easy_install pip
Will work on Windows as well.
easy_install -d ~/python Genshi
sorting with sorted using a key
To sort the list according to length using sorted
animals = ['snail', 'cow', 'elephant', 'chicken']
animals_in_abc = sorted(animals)
print(animals)
print(animals_in_abc)
animals_by_length = sorted(animals, key=len)
print(animals_by_length)
['snail', 'cow', 'elephant', 'chicken']
['chicken', 'cow', 'elephant', 'snail']
['cow', 'snail', 'chicken', 'elephant']
get and set locale
- locale
- LC_CTYPE
- getlocale
- setlocale
import locale
print(locale.getlocale(locale.LC_CTYPE))
locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8')
print(locale.getlocale(locale.LC_CTYPE))
locale.setlocale(locale.LC_CTYPE, 'en_IL.UTF-8')
print(locale.getlocale(locale.LC_CTYPE))
##locale.setlocale(locale.LC_CTYPE, 'ZH.UTF-8')
#print(locale.getlocale(locale.LC_CTYPE))
Modify time anomality
Without calling flush the modify-time of the two files will be the same. Even if we sleep 0.001 seconds. Despite the fact that the filesystem provide more accurate values.
If we we wait a bit between calls, or if we flush the buffer of the file, then the timestamps will be different.
import os
import time
with open("first.txt", "w") as fh:
fh.flush()
pass
print(f"time: {time.time()}")
#time.sleep(0.01)
with open("second.txt", "w") as fh:
pass
first = os.path.getmtime("first.txt")
second = os.path.getmtime("second.txt")
print(first)
print(second)
print("same" if first == second else "diff")
Some strange code
folder = "animals/"
image = "cat"
fname = f'{folder}images/{image}.jpg',
config = {
'file_name': fname[0],
'title': "Animals",
}
print(config)
is vs ==
a = 1
b = a
c = 1
print(a == b) # True
print(a == c) # True
print(a is b) # True
print(a is c) # True
a = {"name": "Foo"}
b = a
c = {"name": "Foo"}
print(a == b) # True
print(a == c) # True
print(a is b) # True
print(a is c) # False
print_function
from __future__ import print_function
print(23)
Dividers (no break or continue)
We will see how break and continue work, but first let's see a loop to find all the dividers on a number n.
i = 2
n = 3*5*7
while i < n:
if (n / i) * i == n:
print('{:2} divides {}'.format(i, n))
i = i + 1
3 divides 105
5 divides 105
7 divides 105
15 divides 105
21 divides 105
35 divides 105
Remove file
- os.remove
- os.unlink
Modules: more
-
sys.modules
-
imp.reload
-
reload
-
sys.modules to list loaded modules
-
imp.reload to reload module (Just reload before 3.3)
import __builtin__
def xx(name):
print("hello")
__builtin__.__import__ = xx;
print('body')
def f():
print("in f")
import sys
print('mod' in sys.modules) # False
import mod
print('mod' in sys.modules) # True
print(sys.modules['mod'])
# <module 'mod' from '/stuff/python/examples/modules/mod.py'>
print(sys.modules["sys"]) # <module 'sys' (built-in)>
import hooks
- import
Python resources
- Central Python site
- Python documentation
- Learning Python the Hard way
- Python Weekly
- PyCoder's Weekly
Progress bar
# http://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console
import time, sys
for i in range(10):
sys.stdout.write('\r' + '=' * i)
sys.stdout.flush()
time.sleep(1)
from __future__
from __future__ import print_function
from __future__ import division
or
from __future__ import print_function, division
See also future
We cannot import everything that is in future, because we don't know what will be in future in the future.... and we don't want to blindly change the behaviour of Python.
Variable scope
-
scope
-
There are two scopes: outside of all functions and inside of a function.
-
The first assignment to a variable defines it.
-
Variables that were declared outside all functions can be seen inside, but cannot be changed.
-
One can connect the outside name to an inside name using the 'global' keyword.
-
if and for blocks don't provide scoping.
a = 23
def main():
global b
b = 17
c = 42
print('a:', a) # a: 23
print('b:', b) # b: 17
print('c:', c) # c: 42
if True:
print('a:', a) # a: 23
print('b:', b) # b: 17
b = 99
print('b:', b) # b: 99
print('c:', c) # c: 42
print('a:', a) # a: 23
print('b:', b) # b: 99
print('c:', c) # c: 42
main()
print('a:', a) # a: 23
print('b:', b) # b: 99
print('c:', c) # c:
# Traceback (most recent call last):
# File "examples\basics\scope.py", line 27, in <module>
# print 'c:', c # c:
# NameError: name 'c' is not defined
global scope
scope
# x is global
x = 1
print(x, "- before sub")
def f():
#print(x, "- inside before declaration") # UnboundLocalError
x = 2
print(x, "- inside sub")
print(x, "- after sub declaration")
f()
print(x, "- after calling sub")
# 1 - before sub
# 1 - after sub declaration
# 2 - inside sub
# 1 - after calling sub
# x is global
def f():
#print(x, "- inside before declaration") # UnboundLocalError
x = 2
print(x, "- inside sub")
x = 1
print(x, "- before calling sub")
print(x, "- after sub declaration")
f()
print(x, "- after calling sub")
# 1 - before calling sub
# 1 - after sub declaration
# 2 - inside sub
# 1 - after calling sub
If we declare a variable outside of all the subroutines, it does not matter if we do it before the sub declaration, or after it. In neither case has the global variable any presence inside the sub.
def f():
x = 2
print(x, "- inside sub")
# print(x, " - after sub declaration") # NameError
f()
# print(x, " - after calling sub") # NameError
# 2 - inside sub
A name declared inside a subroutine is not visible outside.
def f():
global x
# print(x) # NameError
x = 2
print(x, "- inside sub")
# print(x, " - after sub declaration") # NameError
f()
print(x, "- after calling sub")
# 2 - inside sub
# 2 - after calling sub
Unless it was marked using the global word.
type
- type
- name
x = 2
y = '2'
z = [2, '2']
d = {}
def f():
pass
l = lambda q: q
class Cold():
pass
cold = Cold()
class Cnew(object):
pass
cnew = Cnew()
# r = xrange(10) # Python 3 does not have xrange
print(type(x)) # <type 'int'>
print(type(y)) # <type 'str'>
print(type(z)) # <type 'list'>
print(type(d)) # <type 'dict'>
print(type(f)) # <type 'function'>
print(type(l)) # <type 'function'>
print(type(Cold)) # <type 'classobj'>
print(type(cold)) # <type 'instance'>
print(type(Cnew)) # <type 'type'>
print(type(cnew)) # <class '__main__.Cnew'>
#print(type(r)) # <type 'xrange'>
print(type(x).__name__) # int
print(type(y).__name__) # str
print(type(z).__name__) # list
Look deeper in a list
x = ['abcd', 'efgh']
print(x) # ['abcd', 'efgh']
print(x[0:1]) # ['abcd']
print(x[0]) # 'abcd'
print(x[0][0]) # a
print(x[0][1]) # b
print(x[0][0:2]) # ab
More examples
import random
class Game:
def __init__(self):
self.lower_limit = 0
self.upper_limit = 200
self.number = random.randrange(self.lower_limit, self.upper_limit)
self.is_debug = False
self.running = True
def debug(self):
self.is_debug = not self.is_debug
def guess(self, num):
if num == 'd':
self.debug()
return
if self.is_debug:
print("Hidden number {}. Your guess is {}".format(self.number, num))
if num < self.number:
print("Too small")
elif num > self.number:
print("Too big")
else:
print("Bingo")
self.running = False
g = Game()
g.guess('d')
try:
g.guess('z')
except Exception as e:
print(e)
try:
g.guess('201')
except Exception as e:
print(e)
try:
g.guess('-1')
except Exception as e:
print(e)
Hidden number 137. Your guess is z
Not a Number z
Hidden number 137. Your guess is 201
Number 201 is too big
Hidden number 137. Your guess is -1
Number -1 is too small
import random
class SpaceShipError(Exception):
def __init__(self, inp):
self.inp = inp
class NumberTooBigError(SpaceShipError):
def __str__(self):
return "Number {} is too big".format(self.inp)
class NumberTooSmallError(SpaceShipError):
def __str__(self):
return "Number {} is too small".format(self.inp)
class NotANumberError(SpaceShipError):
def __str__(self):
return "Not a Number {}".format(self.inp)
class Game:
def __init__(self):
self.lower_limit = 0
self.upper_limit = 200
self.number = random.randrange(self.lower_limit, self.upper_limit)
self.is_debug = False
self.running = True
def debug(self):
self.is_debug = not self.is_debug
def guess(self, num):
if num == 'd':
self.debug()
return
if self.is_debug:
print("Hidden number {}. Your guess is {}".format(self.number, num))
try:
num = int(num)
except Exception:
raise NotANumberError(num)
if num > self.upper_limit:
raise NumberTooBigError(num)
if num < self.upper_limit:
raise NumberTooSmallError(num)
if num < self.number:
print("Too small")
elif num > self.number:
print("Too big")
else:
print("Bingo")
self.running = False
g = Game()
g.guess('d')
try:
g.guess('z')
except Exception as e:
print(e)
try:
g.guess('201')
except Exception as e:
print(e)
try:
g.guess('-1')
except Exception as e:
print(e)
#while g.running:
# guess = input("Please type in your guess: ")
# g.guess(int(guess))
This will run if there was no exception at all
Always executes. 6/2 ended.
Always executes. 6/0 ended.
Always executes. 6/a ended.
Traceback (most recent call last):
File "try.py", line 22, in <module>
main()
File "try.py", line 9, in main
divide(cnt, num)
File "try.py", line 3, in divide
return x/y
TypeError: unsupported operand type(s) for /: 'int' and 'str'
def divide(x, y):
return x/y
def main():
cnt = 6
for num in [2, 0, 'a']:
try:
divide(cnt, num)
except ZeroDivisionError:
pass
except (IOError, MemoryError) as err:
print(err)
else:
print("This will run if there was no exception at all")
finally:
print("Always executes. {}/{} ended.".format(cnt, num))
print("done")
main()
1
2
Fizz
4
Buzz
Fizz
7
8
Fizz
Buzz
11
Fizz
13
14
FizzBuzz
16
17
Fizz
19
Buzz
Fizz
22
23
Fizz
Buzz
26
Fizz
28
29
FizzBuzz
31
32
Fizz
34
Buzz
Fizz
37
38
Fizz
Buzz
41
Fizz
43
44
FizzBuzz
46
47
Fizz
49
Buzz
Fizz
52
53
Fizz
Buzz
56
Fizz
58
59
FizzBuzz
61
62
Fizz
64
Buzz
Fizz
67
68
Fizz
Buzz
71
Fizz
73
74
FizzBuzz
76
77
Fizz
79
Buzz
Fizz
82
83
Fizz
Buzz
86
Fizz
88
89
FizzBuzz
91
92
Fizz
94
Buzz
Fizz
97
98
Fizz
Buzz
def fizzbuzz():
for i in range(1, 101):
if i % 15 == 0:
print("FizzBuzz")
continue
if i % 3 == 0:
print("Fizz")
continue
if i % 5 == 0:
print("Buzz")
continue
print(i)
if __name__ == "__main__":
fizzbuzz()
import fb
def test_fb(capsys):
fb.fizzbuzz()
out, err = capsys.readouterr()
assert err == ''
with open('expected.txt') as fh:
expected = fh.read()
assert out == expected
import sys
import os
import time
if len(sys.argv) != 3:
exit(f"Usage: {sys.argv[0]} FILENAME count")
filename, count = sys.argv[1:]
print(f"start {os.getpid()}")
time.sleep(1)
for _ in range(int(count)):
try:
if not os.path.exists(filename):
with open(filename, 'w') as fh:
fh.write("0\n")
with open(filename, 'r') as fh:
number = int(fh.readline())
number += 1
with open(filename, 'w') as fh:
#fh.seek(0,0)
fh.write(f"{number}\n")
except Exception:
pass
print(f"done {os.getpid()}")
import sys
import os
import time
if len(sys.argv) != 3:
exit(f"Usage: {sys.argv[0]} FILENAME count")
filename, count = sys.argv[1:]
print(f"start {os.getpid()}")
time.sleep(1)
for _ in range(int(count)):
#try:
if not os.path.exists(filename):
with open(filename, 'w') as fh:
fh.write("0\n")
with open(filename, 'r+') as fh:
number = int(fh.readline())
number += 1
fh.seek(0,0)
fh.write(f"{number}\n")
# with open(filename, 'w') as fh:
# fh.write(f"{number}\n")
#except Exception:
# pass
print(f"done {os.getpid()}")
import subprocess
import sys
if len(sys.argv) != 4:
exit(f"Usage: {sys.argv[0]} FILENAME count processes")
filename, count, process_count = sys.argv[1:]
command = [sys.executable, 'count.py', filename, count]
processes = []
for _ in range(int(process_count)):
processes.append(subprocess.Popen(command))
print('Started')
for proc in processes:
proc.communicate()
print('Done')
name,birthdate,weight,height
Alice Archer,1997-01-10,57.9,1.56
Ben Brown,1985-02-15,72.5,1.77
Chloe Cooper,1983-03-22,53.6,1.65
Daniel Donovan,1981-04-30,83.1,1.75
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "bc5909c3-0e34-46b7-af44-d8b59bbd1817",
"metadata": {},
"outputs": [],
"source": [
"!pip install polars"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0422d14b-b3d5-407b-8594-31633a056594",
"metadata": {},
"outputs": [],
"source": [
"import polars as pl\n",
"import datetime as dt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef584243-839b-47a3-a34a-52b5d8d5d4c2",
"metadata": {},
"outputs": [],
"source": [
"df = pl.DataFrame(\n",
" {\n",
" \"name\": [\"Alice Archer\", \"Ben Brown\", \"Chloe Cooper\", \"Daniel Donovan\"],\n",
" \"birthdate\": [\n",
" dt.date(1997, 1, 10),\n",
" dt.date(1985, 2, 15),\n",
" dt.date(1983, 3, 22),\n",
" dt.date(1981, 4, 30),\n",
" ],\n",
" \"weight\": [57.9, 72.5, 53.6, 83.1], # (kg)\n",
" \"height\": [1.56, 1.77, 1.65, 1.75], # (m)\n",
" }\n",
")\n",
"\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2943f6c6-2989-4b66-ac3f-37f218d578bb",
"metadata": {},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7904a22a-aced-4618-951c-80afaeaf7ba5",
"metadata": {},
"outputs": [],
"source": [
"dir(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6cb1363a-0706-4fb8-9ce1-284c2ce14720",
"metadata": {},
"outputs": [],
"source": [
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "832f56a2-a543-4db5-ab41-be2c8cfa989a",
"metadata": {},
"outputs": [],
"source": [
"df.tail(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0ffa702-8f7e-488d-9175-82a2ae9c8738",
"metadata": {},
"outputs": [],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "362dc935-407a-44be-bbc9-1a7111b852ba",
"metadata": {},
"outputs": [],
"source": [
"df.write_csv(\"getting_started.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7bfde105-95dc-4b3f-9763-1f2e132b9b0e",
"metadata": {},
"outputs": [],
"source": [
"df_csv = pl.read_csv(\"getting_started.csv\", try_parse_dates=True)\n",
"print(df_csv)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70a7e566-33e1-471d-a71d-f7e645787355",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "58e1ecba-46f7-47f6-ad33-f708f07ee28d",
"metadata": {},
"outputs": [],
"source": [
"result = df.select(\n",
" pl.col(\"name\"),\n",
" (pl.col(\"weight\", \"height\") * 0.95).round(2).name.suffix(\"-5%\"),\n",
")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39d7eecf-dea3-409d-b904-5b260de8ca7d",
"metadata": {},
"outputs": [],
"source": [
"result = df.with_columns(\n",
" birth_year=pl.col(\"birthdate\").dt.year(),\n",
" bmi=pl.col(\"weight\") / (pl.col(\"height\") ** 2),\n",
")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbfce594-67a9-47d4-bdc1-5488f532163d",
"metadata": {},
"outputs": [],
"source": [
"result = df.filter(pl.col(\"birthdate\").dt.year() < 1990)\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1870d98c-d320-4fd3-8f03-4f43c8958a7c",
"metadata": {},
"outputs": [],
"source": [
"result = df.filter(\n",
" pl.col(\"birthdate\").is_between(dt.date(1982, 12, 31), dt.date(1996, 1, 1)),\n",
" pl.col(\"height\") < 1.7,\n",
")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac8411e7-5f8a-46ad-aca4-83892af2b80b",
"metadata": {},
"outputs": [],
"source": [
"result = df.group_by(\n",
" (pl.col(\"birthdate\").dt.year() // 10 * 10).alias(\"decade\"),\n",
" maintain_order=True,\n",
").len()\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be3c0d55-c16c-4cda-8d04-c0fb56007833",
"metadata": {},
"outputs": [],
"source": [
"result = df.group_by(\n",
" (pl.col(\"birthdate\").dt.year() // 10 * 10).alias(\"decade\"),\n",
" maintain_order=True,\n",
").agg(\n",
" pl.len().alias(\"sample_size\"),\n",
" pl.col(\"weight\").mean().round(2).alias(\"avg_weight\"),\n",
" pl.col(\"height\").max().alias(\"tallest\"),\n",
")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29c60262-5dbf-49d6-b27f-fd81e11068fb",
"metadata": {},
"outputs": [],
"source": [
"result = (\n",
" df.with_columns(\n",
" (pl.col(\"birthdate\").dt.year() // 10 * 10).alias(\"decade\"),\n",
" pl.col(\"name\").str.split(by=\" \").list.first(),\n",
" )\n",
" .select(\n",
" pl.all().exclude(\"birthdate\"),\n",
" )\n",
" .group_by(\n",
" pl.col(\"decade\"),\n",
" maintain_order=True,\n",
" )\n",
" .agg(\n",
" pl.col(\"name\"),\n",
" pl.col(\"weight\", \"height\").mean().round(2).name.prefix(\"avg_\"),\n",
" )\n",
")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe9d8375-97a1-40f1-8c17-7b4e2994059e",
"metadata": {},
"outputs": [],
"source": [
"df2 = pl.DataFrame(\n",
" {\n",
" \"name\": [\"Ben Brown\", \"Daniel Donovan\", \"Alice Archer\", \"Chloe Cooper\"],\n",
" \"parent\": [True, False, False, False],\n",
" \"siblings\": [1, 2, 3, 4],\n",
" }\n",
")\n",
"print(df2)\n",
"print(df.join(df2, on=\"name\", how=\"left\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "477e99a9-a13a-457c-bcb2-2f45154a86f7",
"metadata": {},
"outputs": [],
"source": [
"df3 = pl.DataFrame(\n",
" {\n",
" \"name\": [\"Ethan Edwards\", \"Fiona Foster\", \"Grace Gibson\", \"Henry Harris\"],\n",
" \"birthdate\": [\n",
" dt.date(1977, 5, 10),\n",
" dt.date(1975, 6, 23),\n",
" dt.date(1973, 7, 22),\n",
" dt.date(1971, 8, 3),\n",
" ],\n",
" \"weight\": [67.9, 72.5, 57.6, 93.1], # (kg)\n",
" \"height\": [1.76, 1.6, 1.66, 1.8], # (m)\n",
" }\n",
")\n",
"print(df3)\n",
"print(pl.concat([df, df3], how=\"vertical\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
1,5.1,3.5,1.4,0.2,Iris-setosa
2,4.9,3.0,1.4,0.2,Iris-setosa
3,4.7,3.2,1.3,0.2,Iris-setosa
4,4.6,3.1,1.5,0.2,Iris-setosa
5,5.0,3.6,1.4,0.2,Iris-setosa
6,5.4,3.9,1.7,0.4,Iris-setosa
7,4.6,3.4,1.4,0.3,Iris-setosa
8,5.0,3.4,1.5,0.2,Iris-setosa
9,4.4,2.9,1.4,0.2,Iris-setosa
10,4.9,3.1,1.5,0.1,Iris-setosa
11,5.4,3.7,1.5,0.2,Iris-setosa
12,4.8,3.4,1.6,0.2,Iris-setosa
13,4.8,3.0,1.4,0.1,Iris-setosa
14,4.3,3.0,1.1,0.1,Iris-setosa
15,5.8,4.0,1.2,0.2,Iris-setosa
16,5.7,4.4,1.5,0.4,Iris-setosa
17,5.4,3.9,1.3,0.4,Iris-setosa
18,5.1,3.5,1.4,0.3,Iris-setosa
19,5.7,3.8,1.7,0.3,Iris-setosa
20,5.1,3.8,1.5,0.3,Iris-setosa
21,5.4,3.4,1.7,0.2,Iris-setosa
22,5.1,3.7,1.5,0.4,Iris-setosa
23,4.6,3.6,1.0,0.2,Iris-setosa
24,5.1,3.3,1.7,0.5,Iris-setosa
25,4.8,3.4,1.9,0.2,Iris-setosa
26,5.0,3.0,1.6,0.2,Iris-setosa
27,5.0,3.4,1.6,0.4,Iris-setosa
28,5.2,3.5,1.5,0.2,Iris-setosa
29,5.2,3.4,1.4,0.2,Iris-setosa
30,4.7,3.2,1.6,0.2,Iris-setosa
31,4.8,3.1,1.6,0.2,Iris-setosa
32,5.4,3.4,1.5,0.4,Iris-setosa
33,5.2,4.1,1.5,0.1,Iris-setosa
34,5.5,4.2,1.4,0.2,Iris-setosa
35,4.9,3.1,1.5,0.1,Iris-setosa
36,5.0,3.2,1.2,0.2,Iris-setosa
37,5.5,3.5,1.3,0.2,Iris-setosa
38,4.9,3.1,1.5,0.1,Iris-setosa
39,4.4,3.0,1.3,0.2,Iris-setosa
40,5.1,3.4,1.5,0.2,Iris-setosa
41,5.0,3.5,1.3,0.3,Iris-setosa
42,4.5,2.3,1.3,0.3,Iris-setosa
43,4.4,3.2,1.3,0.2,Iris-setosa
44,5.0,3.5,1.6,0.6,Iris-setosa
45,5.1,3.8,1.9,0.4,Iris-setosa
46,4.8,3.0,1.4,0.3,Iris-setosa
47,5.1,3.8,1.6,0.2,Iris-setosa
48,4.6,3.2,1.4,0.2,Iris-setosa
49,5.3,3.7,1.5,0.2,Iris-setosa
50,5.0,3.3,1.4,0.2,Iris-setosa
51,7.0,3.2,4.7,1.4,Iris-versicolor
52,6.4,3.2,4.5,1.5,Iris-versicolor
53,6.9,3.1,4.9,1.5,Iris-versicolor
54,5.5,2.3,4.0,1.3,Iris-versicolor
55,6.5,2.8,4.6,1.5,Iris-versicolor
56,5.7,2.8,4.5,1.3,Iris-versicolor
57,6.3,3.3,4.7,1.6,Iris-versicolor
58,4.9,2.4,3.3,1.0,Iris-versicolor
59,6.6,2.9,4.6,1.3,Iris-versicolor
60,5.2,2.7,3.9,1.4,Iris-versicolor
61,5.0,2.0,3.5,1.0,Iris-versicolor
62,5.9,3.0,4.2,1.5,Iris-versicolor
63,6.0,2.2,4.0,1.0,Iris-versicolor
64,6.1,2.9,4.7,1.4,Iris-versicolor
65,5.6,2.9,3.6,1.3,Iris-versicolor
66,6.7,3.1,4.4,1.4,Iris-versicolor
67,5.6,3.0,4.5,1.5,Iris-versicolor
68,5.8,2.7,4.1,1.0,Iris-versicolor
69,6.2,2.2,4.5,1.5,Iris-versicolor
70,5.6,2.5,3.9,1.1,Iris-versicolor
71,5.9,3.2,4.8,1.8,Iris-versicolor
72,6.1,2.8,4.0,1.3,Iris-versicolor
73,6.3,2.5,4.9,1.5,Iris-versicolor
74,6.1,2.8,4.7,1.2,Iris-versicolor
75,6.4,2.9,4.3,1.3,Iris-versicolor
76,6.6,3.0,4.4,1.4,Iris-versicolor
77,6.8,2.8,4.8,1.4,Iris-versicolor
78,6.7,3.0,5.0,1.7,Iris-versicolor
79,6.0,2.9,4.5,1.5,Iris-versicolor
80,5.7,2.6,3.5,1.0,Iris-versicolor
81,5.5,2.4,3.8,1.1,Iris-versicolor
82,5.5,2.4,3.7,1.0,Iris-versicolor
83,5.8,2.7,3.9,1.2,Iris-versicolor
84,6.0,2.7,5.1,1.6,Iris-versicolor
85,5.4,3.0,4.5,1.5,Iris-versicolor
86,6.0,3.4,4.5,1.6,Iris-versicolor
87,6.7,3.1,4.7,1.5,Iris-versicolor
88,6.3,2.3,4.4,1.3,Iris-versicolor
89,5.6,3.0,4.1,1.3,Iris-versicolor
90,5.5,2.5,4.0,1.3,Iris-versicolor
91,5.5,2.6,4.4,1.2,Iris-versicolor
92,6.1,3.0,4.6,1.4,Iris-versicolor
93,5.8,2.6,4.0,1.2,Iris-versicolor
94,5.0,2.3,3.3,1.0,Iris-versicolor
95,5.6,2.7,4.2,1.3,Iris-versicolor
96,5.7,3.0,4.2,1.2,Iris-versicolor
97,5.7,2.9,4.2,1.3,Iris-versicolor
98,6.2,2.9,4.3,1.3,Iris-versicolor
99,5.1,2.5,3.0,1.1,Iris-versicolor
100,5.7,2.8,4.1,1.3,Iris-versicolor
101,6.3,3.3,6.0,2.5,Iris-virginica
102,5.8,2.7,5.1,1.9,Iris-virginica
103,7.1,3.0,5.9,2.1,Iris-virginica
104,6.3,2.9,5.6,1.8,Iris-virginica
105,6.5,3.0,5.8,2.2,Iris-virginica
106,7.6,3.0,6.6,2.1,Iris-virginica
107,4.9,2.5,4.5,1.7,Iris-virginica
108,7.3,2.9,6.3,1.8,Iris-virginica
109,6.7,2.5,5.8,1.8,Iris-virginica
110,7.2,3.6,6.1,2.5,Iris-virginica
111,6.5,3.2,5.1,2.0,Iris-virginica
112,6.4,2.7,5.3,1.9,Iris-virginica
113,6.8,3.0,5.5,2.1,Iris-virginica
114,5.7,2.5,5.0,2.0,Iris-virginica
115,5.8,2.8,5.1,2.4,Iris-virginica
116,6.4,3.2,5.3,2.3,Iris-virginica
117,6.5,3.0,5.5,1.8,Iris-virginica
118,7.7,3.8,6.7,2.2,Iris-virginica
119,7.7,2.6,6.9,2.3,Iris-virginica
120,6.0,2.2,5.0,1.5,Iris-virginica
121,6.9,3.2,5.7,2.3,Iris-virginica
122,5.6,2.8,4.9,2.0,Iris-virginica
123,7.7,2.8,6.7,2.0,Iris-virginica
124,6.3,2.7,4.9,1.8,Iris-virginica
125,6.7,3.3,5.7,2.1,Iris-virginica
126,7.2,3.2,6.0,1.8,Iris-virginica
127,6.2,2.8,4.8,1.8,Iris-virginica
128,6.1,3.0,4.9,1.8,Iris-virginica
129,6.4,2.8,5.6,2.1,Iris-virginica
130,7.2,3.0,5.8,1.6,Iris-virginica
131,7.4,2.8,6.1,1.9,Iris-virginica
132,7.9,3.8,6.4,2.0,Iris-virginica
133,6.4,2.8,5.6,2.2,Iris-virginica
134,6.3,2.8,5.1,1.5,Iris-virginica
135,6.1,2.6,5.6,1.4,Iris-virginica
136,7.7,3.0,6.1,2.3,Iris-virginica
137,6.3,3.4,5.6,2.4,Iris-virginica
138,6.4,3.1,5.5,1.8,Iris-virginica
139,6.0,3.0,4.8,1.8,Iris-virginica
140,6.9,3.1,5.4,2.1,Iris-virginica
141,6.7,3.1,5.6,2.4,Iris-virginica
142,6.9,3.1,5.1,2.3,Iris-virginica
143,5.8,2.7,5.1,1.9,Iris-virginica
144,6.8,3.2,5.9,2.3,Iris-virginica
145,6.7,3.3,5.7,2.5,Iris-virginica
146,6.7,3.0,5.2,2.3,Iris-virginica
147,6.3,2.5,5.0,1.9,Iris-virginica
148,6.5,3.0,5.2,2.0,Iris-virginica
149,6.2,3.4,5.4,2.3,Iris-virginica
150,5.9,3.0,5.1,1.8,Iris-virginica
{
"cells": [
{
"cell_type": "markdown",
"id": "f90cdfdc-bc05-4cdc-b900-022517b33b41",
"metadata": {},
"source": [
"[Iris flower data set](https://en.wikipedia.org/wiki/Iris_flower_data_set) with [Polars](https://pola.rs/)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "fd4ec3fc-c2d8-4a50-bc9c-99cafefb9647",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting polars\n",
" Downloading polars-1.20.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)\n",
"Downloading polars-1.20.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32.9 MB)\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m32.9/32.9 MB\u001b[0m \u001b[31m37.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
"\u001b[?25hInstalling collected packages: polars\n",
"Successfully installed polars-1.20.0\n"
]
}
],
"source": [
"!pip install polars"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2f2387a7-a2aa-4c66-812d-aff6814fc4e9",
"metadata": {},
"outputs": [],
"source": [
"import polars as pl"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "dfdd0b3a-ae63-49cb-80a0-a1a4fab76366",
"metadata": {},
"outputs": [],
"source": [
"df = pl.scan_csv(\"iris.csv\").collect()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "070e59b6-6d06-43d9-b2b6-1edff05059d6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (150, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Id</th><th>SepalLengthCm</th><th>SepalWidthCm</th><th>PetalLengthCm</th><th>PetalWidthCm</th><th>Species</th></tr><tr><td>i64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>str</td></tr></thead><tbody><tr><td>1</td><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>"Iris-setosa"</td></tr><tr><td>2</td><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>"Iris-setosa"</td></tr><tr><td>3</td><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>"Iris-setosa"</td></tr><tr><td>4</td><td>4.6</td><td>3.1</td><td>1.5</td><td>0.2</td><td>"Iris-setosa"</td></tr><tr><td>5</td><td>5.0</td><td>3.6</td><td>1.4</td><td>0.2</td><td>"Iris-setosa"</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>146</td><td>6.7</td><td>3.0</td><td>5.2</td><td>2.3</td><td>"Iris-virginica"</td></tr><tr><td>147</td><td>6.3</td><td>2.5</td><td>5.0</td><td>1.9</td><td>"Iris-virginica"</td></tr><tr><td>148</td><td>6.5</td><td>3.0</td><td>5.2</td><td>2.0</td><td>"Iris-virginica"</td></tr><tr><td>149</td><td>6.2</td><td>3.4</td><td>5.4</td><td>2.3</td><td>"Iris-virginica"</td></tr><tr><td>150</td><td>5.9</td><td>3.0</td><td>5.1</td><td>1.8</td><td>"Iris-virginica"</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (150, 6)\n",
"┌─────┬───────────────┬──────────────┬───────────────┬──────────────┬────────────────┐\n",
"│ Id ┆ SepalLengthCm ┆ SepalWidthCm ┆ PetalLengthCm ┆ PetalWidthCm ┆ Species │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str │\n",
"╞═════╪═══════════════╪══════════════╪═══════════════╪══════════════╪════════════════╡\n",
"│ 1 ┆ 5.1 ┆ 3.5 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n",
"│ 2 ┆ 4.9 ┆ 3.0 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n",
"│ 3 ┆ 4.7 ┆ 3.2 ┆ 1.3 ┆ 0.2 ┆ Iris-setosa │\n",
"│ 4 ┆ 4.6 ┆ 3.1 ┆ 1.5 ┆ 0.2 ┆ Iris-setosa │\n",
"│ 5 ┆ 5.0 ┆ 3.6 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 146 ┆ 6.7 ┆ 3.0 ┆ 5.2 ┆ 2.3 ┆ Iris-virginica │\n",
"│ 147 ┆ 6.3 ┆ 2.5 ┆ 5.0 ┆ 1.9 ┆ Iris-virginica │\n",
"│ 148 ┆ 6.5 ┆ 3.0 ┆ 5.2 ┆ 2.0 ┆ Iris-virginica │\n",
"│ 149 ┆ 6.2 ┆ 3.4 ┆ 5.4 ┆ 2.3 ┆ Iris-virginica │\n",
"│ 150 ┆ 5.9 ┆ 3.0 ┆ 5.1 ┆ 1.8 ┆ Iris-virginica │\n",
"└─────┴───────────────┴──────────────┴───────────────┴──────────────┴────────────────┘"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "d72c9b12-0a65-4aa3-9490-646018d49940",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"polars.dataframe.frame.DataFrame"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "13aad9e3-76fb-4b26-9c29-e30b0dbc6410",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['__add__',\n",
" '__annotations__',\n",
" '__array__',\n",
" '__arrow_c_stream__',\n",
" '__bool__',\n",
" '__class__',\n",
" '__contains__',\n",
" '__copy__',\n",
" '__dataframe__',\n",
" '__deepcopy__',\n",
" '__delattr__',\n",
" '__dict__',\n",
" '__dir__',\n",
" '__doc__',\n",
" '__eq__',\n",
" '__floordiv__',\n",
" '__format__',\n",
" '__ge__',\n",
" '__getattribute__',\n",
" '__getitem__',\n",
" '__getstate__',\n",
" '__gt__',\n",
" '__hash__',\n",
" '__init__',\n",
" '__init_subclass__',\n",
" '__iter__',\n",
" '__le__',\n",
" '__len__',\n",
" '__lt__',\n",
" '__mod__',\n",
" '__module__',\n",
" '__mul__',\n",
" '__ne__',\n",
" '__new__',\n",
" '__radd__',\n",
" '__reduce__',\n",
" '__reduce_ex__',\n",
" '__repr__',\n",
" '__reversed__',\n",
" '__rmul__',\n",
" '__setattr__',\n",
" '__setitem__',\n",
" '__setstate__',\n",
" '__sizeof__',\n",
" '__str__',\n",
" '__sub__',\n",
" '__subclasshook__',\n",
" '__truediv__',\n",
" '__weakref__',\n",
" '_accessors',\n",
" '_cast_all_from_to',\n",
" '_comp',\n",
" '_compare_to_non_df',\n",
" '_compare_to_other_df',\n",
" '_df',\n",
" '_div',\n",
" '_from_arrow',\n",
" '_from_pandas',\n",
" '_from_pydf',\n",
" '_ipython_key_completions_',\n",
" '_replace',\n",
" '_repr_html_',\n",
" '_row_encode',\n",
" '_to_metadata',\n",
" '_to_pandas_with_object_columns',\n",
" '_to_pandas_without_object_columns',\n",
" 'approx_n_unique',\n",
" 'bottom_k',\n",
" 'cast',\n",
" 'clear',\n",
" 'clone',\n",
" 'collect_schema',\n",
" 'columns',\n",
" 'corr',\n",
" 'count',\n",
" 'describe',\n",
" 'deserialize',\n",
" 'drop',\n",
" 'drop_in_place',\n",
" 'drop_nans',\n",
" 'drop_nulls',\n",
" 'dtypes',\n",
" 'equals',\n",
" 'estimated_size',\n",
" 'explode',\n",
" 'extend',\n",
" 'fill_nan',\n",
" 'fill_null',\n",
" 'filter',\n",
" 'flags',\n",
" 'fold',\n",
" 'gather_every',\n",
" 'get_column',\n",
" 'get_column_index',\n",
" 'get_columns',\n",
" 'glimpse',\n",
" 'group_by',\n",
" 'group_by_dynamic',\n",
" 'hash_rows',\n",
" 'head',\n",
" 'height',\n",
" 'hstack',\n",
" 'insert_column',\n",
" 'interpolate',\n",
" 'is_duplicated',\n",
" 'is_empty',\n",
" 'is_unique',\n",
" 'item',\n",
" 'iter_columns',\n",
" 'iter_rows',\n",
" 'iter_slices',\n",
" 'join',\n",
" 'join_asof',\n",
" 'join_where',\n",
" 'lazy',\n",
" 'limit',\n",
" 'map_rows',\n",
" 'max',\n",
" 'max_horizontal',\n",
" 'mean',\n",
" 'mean_horizontal',\n",
" 'median',\n",
" 'melt',\n",
" 'merge_sorted',\n",
" 'min',\n",
" 'min_horizontal',\n",
" 'n_chunks',\n",
" 'n_unique',\n",
" 'null_count',\n",
" 'partition_by',\n",
" 'pipe',\n",
" 'pivot',\n",
" 'plot',\n",
" 'product',\n",
" 'quantile',\n",
" 'rechunk',\n",
" 'rename',\n",
" 'replace_column',\n",
" 'reverse',\n",
" 'rolling',\n",
" 'row',\n",
" 'rows',\n",
" 'rows_by_key',\n",
" 'sample',\n",
" 'schema',\n",
" 'select',\n",
" 'select_seq',\n",
" 'serialize',\n",
" 'set_sorted',\n",
" 'shape',\n",
" 'shift',\n",
" 'shrink_to_fit',\n",
" 'slice',\n",
" 'sort',\n",
" 'sql',\n",
" 'std',\n",
" 'style',\n",
" 'sum',\n",
" 'sum_horizontal',\n",
" 'tail',\n",
" 'to_arrow',\n",
" 'to_dict',\n",
" 'to_dicts',\n",
" 'to_dummies',\n",
" 'to_init_repr',\n",
" 'to_jax',\n",
" 'to_numpy',\n",
" 'to_pandas',\n",
" 'to_series',\n",
" 'to_struct',\n",
" 'to_torch',\n",
" 'top_k',\n",
" 'transpose',\n",
" 'unique',\n",
" 'unnest',\n",
" 'unpivot',\n",
" 'unstack',\n",
" 'update',\n",
" 'upsample',\n",
" 'var',\n",
" 'vstack',\n",
" 'width',\n",
" 'with_columns',\n",
" 'with_columns_seq',\n",
" 'with_row_count',\n",
" 'with_row_index',\n",
" 'write_avro',\n",
" 'write_clipboard',\n",
" 'write_csv',\n",
" 'write_database',\n",
" 'write_delta',\n",
" 'write_excel',\n",
" 'write_ipc',\n",
" 'write_ipc_stream',\n",
" 'write_json',\n",
" 'write_ndjson',\n",
" 'write_parquet']"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dir(df)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "8c126b37-4a9b-4582-b8e9-5c7fe45afedc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Id',\n",
" 'SepalLengthCm',\n",
" 'SepalWidthCm',\n",
" 'PetalLengthCm',\n",
" 'PetalWidthCm',\n",
" 'Species']"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "f0e0d26b-5b26-4127-809e-3d132a7c5cdd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (3, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Id</th><th>SepalLengthCm</th><th>SepalWidthCm</th><th>PetalLengthCm</th><th>PetalWidthCm</th><th>Species</th></tr><tr><td>i64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>str</td></tr></thead><tbody><tr><td>1</td><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>"Iris-setosa"</td></tr><tr><td>2</td><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>"Iris-setosa"</td></tr><tr><td>3</td><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>"Iris-setosa"</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (3, 6)\n",
"┌─────┬───────────────┬──────────────┬───────────────┬──────────────┬─────────────┐\n",
"│ Id ┆ SepalLengthCm ┆ SepalWidthCm ┆ PetalLengthCm ┆ PetalWidthCm ┆ Species │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str │\n",
"╞═════╪═══════════════╪══════════════╪═══════════════╪══════════════╪═════════════╡\n",
"│ 1 ┆ 5.1 ┆ 3.5 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n",
"│ 2 ┆ 4.9 ┆ 3.0 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n",
"│ 3 ┆ 4.7 ┆ 3.2 ┆ 1.3 ┆ 0.2 ┆ Iris-setosa │\n",
"└─────┴───────────────┴──────────────┴───────────────┴──────────────┴─────────────┘"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "2991a8d6-52cd-4ae0-b66c-6ae5f5f0476c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (2, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Id</th><th>SepalLengthCm</th><th>SepalWidthCm</th><th>PetalLengthCm</th><th>PetalWidthCm</th><th>Species</th></tr><tr><td>i64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>str</td></tr></thead><tbody><tr><td>149</td><td>6.2</td><td>3.4</td><td>5.4</td><td>2.3</td><td>"Iris-virginica"</td></tr><tr><td>150</td><td>5.9</td><td>3.0</td><td>5.1</td><td>1.8</td><td>"Iris-virginica"</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (2, 6)\n",
"┌─────┬───────────────┬──────────────┬───────────────┬──────────────┬────────────────┐\n",
"│ Id ┆ SepalLengthCm ┆ SepalWidthCm ┆ PetalLengthCm ┆ PetalWidthCm ┆ Species │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str │\n",
"╞═════╪═══════════════╪══════════════╪═══════════════╪══════════════╪════════════════╡\n",
"│ 149 ┆ 6.2 ┆ 3.4 ┆ 5.4 ┆ 2.3 ┆ Iris-virginica │\n",
"│ 150 ┆ 5.9 ┆ 3.0 ┆ 5.1 ┆ 1.8 ┆ Iris-virginica │\n",
"└─────┴───────────────┴──────────────┴───────────────┴──────────────┴────────────────┘"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail(2)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "91776e1e-1355-47de-b08a-fc76204ba086",
"metadata": {},
"outputs": [],
"source": [
"df = pl.scan_csv(\"iris.csv\").filter(pl.col(\"SepalLengthCm\") > 5).group_by(\"Species\").agg(pl.all().sum()).collect()\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "75a2a13e-91bc-4005-8347-4244a8669911",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (3, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Species</th><th>Id</th><th>SepalLengthCm</th><th>SepalWidthCm</th><th>PetalLengthCm</th><th>PetalWidthCm</th></tr><tr><td>str</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>"Iris-setosa"</td><td>564</td><td>116.9</td><td>81.7</td><td>33.2</td><td>6.1</td></tr><tr><td>"Iris-versicolor"</td><td>3562</td><td>281.9</td><td>131.8</td><td>202.9</td><td>63.3</td></tr><tr><td>"Iris-virginica"</td><td>6168</td><td>324.5</td><td>146.2</td><td>273.1</td><td>99.6</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (3, 6)\n",
"┌─────────────────┬──────┬───────────────┬──────────────┬───────────────┬──────────────┐\n",
"│ Species ┆ Id ┆ SepalLengthCm ┆ SepalWidthCm ┆ PetalLengthCm ┆ PetalWidthCm │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═════════════════╪══════╪═══════════════╪══════════════╪═══════════════╪══════════════╡\n",
"│ Iris-setosa ┆ 564 ┆ 116.9 ┆ 81.7 ┆ 33.2 ┆ 6.1 │\n",
"│ Iris-versicolor ┆ 3562 ┆ 281.9 ┆ 131.8 ┆ 202.9 ┆ 63.3 │\n",
"│ Iris-virginica ┆ 6168 ┆ 324.5 ┆ 146.2 ┆ 273.1 ┆ 99.6 │\n",
"└─────────────────┴──────┴───────────────┴──────────────┴───────────────┴──────────────┘"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}