MLP Trixie Lulamoon bot for Mastodon
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

47 lines
1.4 KiB

import sys
import os.path as op
from urllib.parse import urlparse
import requests
from bs4 import BeautifulSoup
URLs = [
r'https://mlp.fandom.com/wiki/Trixie/Gallery/Seasons_1-5',
r'https://mlp.fandom.com/wiki/Trixie/Gallery/Season_6',
r'https://mlp.fandom.com/wiki/Trixie/Gallery', # Season 7 and higher are on the main gallery page
]
screenshots_urls = []
def dispense(s, nbeg, nend):
resp = requests.get(s)
soup = BeautifulSoup(resp.text, 'lxml')
# Season screenshots are stored in gallery-i tags
for i in range(nbeg, nend):
gallery_tag = soup.find(id=f'gallery-{i}')
# Extracting the image url from a needed child
for child in gallery_tag.find_all(class_='wikia-gallery-item'):
fullpath = urlparse(child.img.get('src'))
flavor = urlparse(child.img.get('alt'))
path = fullpath.path
while not (path.endswith('.png') or path.endswith('.jpg')):
path = op.dirname(path)
screenshots_urls.append(f'{fullpath.scheme}://{fullpath.netloc}{path}')
screenshots_urls.append(flavor.path)
def main():
dispense(URLs[0], 0, 5) # Seasons 1-5
dispense(URLs[1], 0, 3) # Season 6
dispense(URLs[2], 5, 15) # Seasons 7+
with open('screens.dat', 'w', encoding='utf-8') as file:
for one_url in screenshots_urls:
print(one_url, file=file)
if __name__ == '__main__':
sys.exit(main())