Init
This commit is contained in:
commit
6aa29fd017
|
@ -0,0 +1,2 @@
|
||||||
|
token.dat
|
||||||
|
venv
|
|
@ -0,0 +1 @@
|
||||||
|
392
|
|
@ -0,0 +1,279 @@
|
||||||
|
#!/home/naiji/mastodon/vocaloiddb-bot/venv/bin/python
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
import requests
|
||||||
|
|
||||||
|
import os.path as op
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from mastodon import Mastodon
|
||||||
|
|
||||||
|
MIKUDB_HEAD = 'http://mikudb.moe/page/'
|
||||||
|
MIKUDB_TAIL = '/?s'
|
||||||
|
|
||||||
|
VOCADB_HEAD = 'https://vocadb.net/'
|
||||||
|
|
||||||
|
def findRandomAlbumUrl(last_page_id) -> str:
|
||||||
|
alarm_counter = 0
|
||||||
|
while True and alarm_counter < 5:
|
||||||
|
selection_page_id = random.randint(0, int(last_page_id))
|
||||||
|
resp = requests.get(MIKUDB_HEAD + str(selection_page_id) + MIKUDB_TAIL)
|
||||||
|
soup = BeautifulSoup(resp.text, 'lxml')
|
||||||
|
found_album = True
|
||||||
|
album_entries = soup.findAll('div', {'class': 'searchres album-box grid_19'})
|
||||||
|
if len(album_entries) != 0:
|
||||||
|
found_album = True
|
||||||
|
album_entry = random.choice(album_entries)
|
||||||
|
return str(album_entry.findAll('a', href=True)[0]["href"])
|
||||||
|
else:
|
||||||
|
alarm_counter += 1
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def findAlbumImageUrl(soup) -> str:
|
||||||
|
image_soup = soup.findAll('a', {'rel': 'lightbox'})
|
||||||
|
if len(image_soup) == 0:
|
||||||
|
return ""
|
||||||
|
else:
|
||||||
|
return str(image_soup[0]["href"])
|
||||||
|
|
||||||
|
def findAlbumTitle(soup) -> str:
|
||||||
|
title_soup = soup.findAll('h1', {'class': 'album-title'}, text=True)
|
||||||
|
if len(title_soup) == 0:
|
||||||
|
return "UNKOWN TITLE!! somewhat the parser failed... idk, please ping @NaiJi on this post"
|
||||||
|
else:
|
||||||
|
return str(title_soup[0].get_text())
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
with open('last_page.dat', 'r', encoding='utf-8') as file:
|
||||||
|
last_page_id = file.readlines()[0]
|
||||||
|
|
||||||
|
album_url = findRandomAlbumUrl(last_page_id)
|
||||||
|
print(album_url)
|
||||||
|
if album_url == "":
|
||||||
|
return
|
||||||
|
|
||||||
|
# PARSING ACTUAL ALBUM PAGE
|
||||||
|
|
||||||
|
resp = requests.get(album_url)
|
||||||
|
soup = BeautifulSoup(resp.text, 'lxml')
|
||||||
|
|
||||||
|
image_url = findAlbumImageUrl(soup)
|
||||||
|
album_title = findAlbumTitle(soup)
|
||||||
|
|
||||||
|
# PARSING ALBUM INFO BOX
|
||||||
|
|
||||||
|
info_raw = str(soup.find('div', {'class': 'album-box album-infopost panel panel-default'}))
|
||||||
|
info_splits = info_raw.split('\n')
|
||||||
|
|
||||||
|
if len(info_splits) != 1:
|
||||||
|
|
||||||
|
span_token = '</span>'
|
||||||
|
li_token = '</li>'
|
||||||
|
tag_token = 'rel="tag">'
|
||||||
|
a_token = '</a>'
|
||||||
|
href_token = '<a href="'
|
||||||
|
href_end_token = '">'
|
||||||
|
|
||||||
|
# # # ALTERNATIVE NAME
|
||||||
|
|
||||||
|
alternative_name = ''
|
||||||
|
for split in info_splits:
|
||||||
|
if ' names:' in split:
|
||||||
|
begin = split.find(span_token, 0) + len(span_token)
|
||||||
|
end = split.find(li_token, 0)
|
||||||
|
alternative_name = split[begin : end]
|
||||||
|
break
|
||||||
|
|
||||||
|
# # # TYPE
|
||||||
|
|
||||||
|
type_names = []
|
||||||
|
for split in info_splits:
|
||||||
|
if 'Type:' in split:
|
||||||
|
amount = split.count(tag_token)
|
||||||
|
begin = 0
|
||||||
|
end = 0
|
||||||
|
for i in range(amount):
|
||||||
|
begin = split.find(tag_token, end) + len(tag_token)
|
||||||
|
end = split.find(a_token, begin)
|
||||||
|
type_names.append(split[begin : end])
|
||||||
|
break
|
||||||
|
|
||||||
|
# # # RELEASE YEAR
|
||||||
|
|
||||||
|
release_year = ''
|
||||||
|
for split in info_splits:
|
||||||
|
if 'Release Date:' in split:
|
||||||
|
begin = split.find(tag_token, 0) + len(tag_token)
|
||||||
|
end = split.find(a_token, 0)
|
||||||
|
release_year = split[begin : end]
|
||||||
|
break
|
||||||
|
|
||||||
|
# # # VOCALS
|
||||||
|
|
||||||
|
vocal_names = []
|
||||||
|
for split in info_splits:
|
||||||
|
if 'Vocals:' in split:
|
||||||
|
amount = split.count(tag_token)
|
||||||
|
begin = 0
|
||||||
|
end = 0
|
||||||
|
for i in range(amount):
|
||||||
|
begin = split.find(tag_token, end) + len(tag_token)
|
||||||
|
end = split.find(a_token, begin)
|
||||||
|
vocal_names.append(split[begin : end])
|
||||||
|
break
|
||||||
|
|
||||||
|
# # # PRODUCERS
|
||||||
|
|
||||||
|
producers_names = []
|
||||||
|
for split in info_splits:
|
||||||
|
if 'Producer:' in split:
|
||||||
|
amount = split.count(tag_token)
|
||||||
|
begin = 0
|
||||||
|
end = 0
|
||||||
|
for i in range(amount):
|
||||||
|
begin = split.find(tag_token, end) + len(tag_token)
|
||||||
|
end = split.find(a_token, begin)
|
||||||
|
producers_names.append(split[begin : end])
|
||||||
|
break
|
||||||
|
|
||||||
|
# # # GENRES
|
||||||
|
|
||||||
|
genres_names = []
|
||||||
|
for split in info_splits:
|
||||||
|
if 'Genre:' in split:
|
||||||
|
amount = split.count(tag_token)
|
||||||
|
begin = 0
|
||||||
|
end = 0
|
||||||
|
for i in range(amount):
|
||||||
|
begin = split.find(tag_token, end) + len(tag_token)
|
||||||
|
end = split.find(a_token, begin)
|
||||||
|
genres_names.append(split[begin : end])
|
||||||
|
break
|
||||||
|
|
||||||
|
# # # LINKS
|
||||||
|
|
||||||
|
links = []
|
||||||
|
for split in info_splits:
|
||||||
|
if 'Official site' in split:
|
||||||
|
amount = split.count(href_token)
|
||||||
|
begin = 0
|
||||||
|
end = 0
|
||||||
|
for i in range(amount):
|
||||||
|
begin = split.find(href_token, end) + len(href_token)
|
||||||
|
end = split.find(href_end_token, begin)
|
||||||
|
links.append(split[begin : end])
|
||||||
|
break
|
||||||
|
|
||||||
|
print(album_title)
|
||||||
|
print('--------')
|
||||||
|
print(alternative_name)
|
||||||
|
print(type_names)
|
||||||
|
print(vocal_names)
|
||||||
|
print(producers_names)
|
||||||
|
print(genres_names)
|
||||||
|
print(release_year)
|
||||||
|
print(links)
|
||||||
|
print(image_url)
|
||||||
|
|
||||||
|
# SEARCHING FOR YOUTUBE URL
|
||||||
|
|
||||||
|
youtube_url = ''
|
||||||
|
|
||||||
|
video_page_splits = str(soup).split('\n')
|
||||||
|
for split in video_page_splits:
|
||||||
|
if 'youtube' in split:
|
||||||
|
begin = split.find('src="', 0) + len('src="')
|
||||||
|
end = split.find('"', begin)
|
||||||
|
youtube_url = split[begin : end]
|
||||||
|
|
||||||
|
# SEARCHING FOR VOCADB URL
|
||||||
|
|
||||||
|
vocadb_url = ""
|
||||||
|
entry_content_soup = soup.findAll('div', {'class': 'entry-content'})
|
||||||
|
entry_content_splits = str(entry_content_soup).split('\n')
|
||||||
|
for split in entry_content_splits:
|
||||||
|
if 'vocadb.net' in split:
|
||||||
|
begin = split.find('a href="', 0) + len('a href="')
|
||||||
|
end = split.find('">Vo', 0)
|
||||||
|
vocadb_url = split[begin : end]
|
||||||
|
|
||||||
|
# PARSING VOCADB PAGE
|
||||||
|
|
||||||
|
external_links = []
|
||||||
|
vocadb_url = vocadb_url.replace('amp;', '')
|
||||||
|
if len(vocadb_url) > 0:
|
||||||
|
resp = requests.get(vocadb_url)
|
||||||
|
soup = BeautifulSoup(resp.text, 'lxml')
|
||||||
|
if len(soup.findAll('img', {'class': 'coverPic'})) > 0:
|
||||||
|
vocadb_splits = str(soup).split('\n')
|
||||||
|
for split in vocadb_splits:
|
||||||
|
if 'www.nicovideo.jp/watch' in split and len(youtube_url) == 0:
|
||||||
|
begin = split.find('href="', 0) + len('href="')
|
||||||
|
end = split.find('">', begin)
|
||||||
|
youtube_url = split[begin : end]
|
||||||
|
if 'class="extLink"' in split and 'amazon' not in split:
|
||||||
|
begin = split.find('href="', 0) + len('href="')
|
||||||
|
end = split.find('" onclick', begin)
|
||||||
|
external_links.append(split[begin : end])
|
||||||
|
|
||||||
|
print(external_links)
|
||||||
|
print(youtube_url)
|
||||||
|
|
||||||
|
text = "ALBUM:\n" + album_title
|
||||||
|
|
||||||
|
if len(alternative_name) > 0:
|
||||||
|
text += str('\n\nALTERNATIVE TITLES:\n' + alternative_name)
|
||||||
|
|
||||||
|
if len(type_names) > 0:
|
||||||
|
text += '\n\nTYPE:\n'
|
||||||
|
for type_name in type_names:
|
||||||
|
text += (type_name + ' ')
|
||||||
|
|
||||||
|
if len(vocal_names) > 0:
|
||||||
|
text += '\n\nVOCAL:\n'
|
||||||
|
for vocal_name in vocal_names:
|
||||||
|
text += (vocal_name + ' ')
|
||||||
|
|
||||||
|
if len(producers_names) > 0:
|
||||||
|
text += '\n\nPRODUCING:\n'
|
||||||
|
for producer_name in producers_names:
|
||||||
|
text += (producer_name + ' ')
|
||||||
|
|
||||||
|
if len(genres_names) > 0:
|
||||||
|
text += '\n\nGENRE:\n'
|
||||||
|
for genre_name in genres_names:
|
||||||
|
text += (genre_name + ' ')
|
||||||
|
|
||||||
|
if len(release_year) > 0:
|
||||||
|
text += str('\n\nRELEASED:\n' + release_year)
|
||||||
|
|
||||||
|
if len(youtube_url) > 0:
|
||||||
|
text += str('\n\nVIDEO: \n' + youtube_url)
|
||||||
|
|
||||||
|
if len(external_links) == 0:
|
||||||
|
external_links = links
|
||||||
|
|
||||||
|
if len(external_links) > 0:
|
||||||
|
text += '\n\nLINKS: \n'
|
||||||
|
for external_link in external_links:
|
||||||
|
text += (external_link + '\n\n')
|
||||||
|
|
||||||
|
mastodon = Mastodon(
|
||||||
|
access_token = 'token.dat',
|
||||||
|
api_base_url = 'https://udongein.xyz/'
|
||||||
|
)
|
||||||
|
|
||||||
|
fformat = op.splitext(image_url)[1][1:]
|
||||||
|
if (fformat == 'jpg'):
|
||||||
|
fformat = 'jpeg'
|
||||||
|
|
||||||
|
image_media = mastodon.media_post(requests.get(image_url).content, f'image/{fformat}')
|
||||||
|
|
||||||
|
mastodon.status_post(text, media_ids=[image_media], visibility='unlisted', sensitive=False)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
Loading…
Reference in New Issue