talk: Small fixes, add source list
This commit is contained in:
parent
c2a26653af
commit
1606044a40
4 changed files with 36 additions and 0 deletions
BIN
talk/credits.ods
Normal file
BIN
talk/credits.ods
Normal file
Binary file not shown.
36
talk/pics/original/scrape.py
Normal file
36
talk/pics/original/scrape.py
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import IPython
|
||||
from os import path
|
||||
|
||||
if __name__ != '__main__':
|
||||
raise ImportError('This is a command-line script and not supposed to be imported.')
|
||||
|
||||
pic_ids = [ re.match(r'.*-([0-9a-zA-Z-]{11})-unsplash\.jpg$', fn) for fn in os.listdir() ]
|
||||
pic_ids = [ match.group(1) for match in pic_ids if match ]
|
||||
|
||||
for id in pic_ids:
|
||||
try:
|
||||
res = requests.get(f'https://unsplash.com/photos/{id}')
|
||||
soup = BeautifulSoup(res.text, features='lxml')
|
||||
|
||||
title = soup.find('title').text
|
||||
match = re.match(r'(.*) photo – Free (.*)Image on Unsplash', title)
|
||||
if match:
|
||||
title, category = match.groups()
|
||||
else:
|
||||
match = re.match(r'Free (.*)Image on Unsplash', title)
|
||||
category, = match.groups()
|
||||
|
||||
alts = [ img['alt'] for img in [ a.findChild('img') for a in soup.find_all('a') if a['href'].startswith('/@') ] if img ]
|
||||
name = re.match("Go to (.*)'s profile", alts[0]).group(1)
|
||||
|
||||
print(f'{name}: {title if title else category.strip()}')
|
||||
except:
|
||||
print(id, file=sys.stderr)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue