talk: Small fixes, add source list
This commit is contained in:
parent
c2a26653af
commit
1606044a40
4 changed files with 36 additions and 0 deletions
BIN
talk/credits.ods
Normal file
BIN
talk/credits.ods
Normal file
Binary file not shown.
36
talk/pics/original/scrape.py
Normal file
36
talk/pics/original/scrape.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import IPython
|
||||||
|
from os import path
|
||||||
|
|
||||||
|
if __name__ != '__main__':
|
||||||
|
raise ImportError('This is a command-line script and not supposed to be imported.')
|
||||||
|
|
||||||
|
pic_ids = [ re.match(r'.*-([0-9a-zA-Z-]{11})-unsplash\.jpg$', fn) for fn in os.listdir() ]
|
||||||
|
pic_ids = [ match.group(1) for match in pic_ids if match ]
|
||||||
|
|
||||||
|
for id in pic_ids:
|
||||||
|
try:
|
||||||
|
res = requests.get(f'https://unsplash.com/photos/{id}')
|
||||||
|
soup = BeautifulSoup(res.text, features='lxml')
|
||||||
|
|
||||||
|
title = soup.find('title').text
|
||||||
|
match = re.match(r'(.*) photo – Free (.*)Image on Unsplash', title)
|
||||||
|
if match:
|
||||||
|
title, category = match.groups()
|
||||||
|
else:
|
||||||
|
match = re.match(r'Free (.*)Image on Unsplash', title)
|
||||||
|
category, = match.groups()
|
||||||
|
|
||||||
|
alts = [ img['alt'] for img in [ a.findChild('img') for a in soup.find_all('a') if a['href'].startswith('/@') ] if img ]
|
||||||
|
name = re.match("Go to (.*)'s profile", alts[0]).group(1)
|
||||||
|
|
||||||
|
print(f'{name}: {title if title else category.strip()}')
|
||||||
|
except:
|
||||||
|
print(id, file=sys.stderr)
|
||||||
|
|
||||||
Binary file not shown.
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue