import sys
import concurrent.futures
import requests
import time
def download_all(urls):
for url in urls:
res = requests.get(url)
print(f"{url} - {len(res.text)}")
return
if __name__ == '__main__':
if len(sys.argv) != 2:
exit(f"Usage: {sys.argv[0]} FILENAME (examples/parallel/urls.txt)")
filename = sys.argv[1]
with open(filename) as fh:
urls = list(map(lambda s: s.rstrip("\n"), fh.readlines()))
start = time.time()
download_all(urls)
end = time.time()
elapsed = end-start
print(f"Elapsed time : {elapsed:2f}")
import concurrent.futures
import requests
import threading
import time
import sys
from bs4 import BeautifulSoup
thread_local = threading.local()
def get_title(url):
res = requests.get(url)
if res.status_code:
soup = BeautifulSoup(res.text, 'html.parser')
return None if soup.title is None else soup.title.string
else:
return None
def main():
if len(sys.argv) < 4:
exit(f"{sys.argv[0]} LIMIT PARALLEL FILENAME")
limit = int(sys.argv[1])
parallel = int(sys.argv[2])
filename = sys.argv[3]
with open(filename) as fh:
urls = list(map(lambda line: line.rstrip("\n"), fh))
if len(urls) > limit:
urls = urls[0:limit]
#print(urls)
start = time.monotonic()
#titles = list(map(get_title, urls))
with concurrent.futures.ThreadPoolExecutor(max_workers=parallel) as exe:
titles = list(exe.map(get_title, urls))
for ix in range(len(urls)):
print(f"{urls[ix]} {titles[ix]}")
end = time.monotonic()
print(f"Elapsed time: {end-start}")
main()