Initial commit

This commit is contained in:
syrell 2023-07-09 16:12:36 +02:00
commit 23c73f1bc9
Signed by: syrell
GPG Key ID: BC9570E849334AF9
5 changed files with 363 additions and 0 deletions

161
.gitignore vendored Normal file
View File

@ -0,0 +1,161 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.vscode/

11
LICENSE Normal file
View File

@ -0,0 +1,11 @@
Copyright (c) <year> <owner>. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

54
README.adoc Normal file
View File

@ -0,0 +1,54 @@
= Anki Pokemon flashcards generator
== Context
Wanting to learn the Pokedex using flashcards on Anki I haven't found decks which were convenient for my usage therefore I created a handy script that scrap Pokemon pictures and then format a csv file which is easily importable into Anki.
== Procedure and usage
This script will download every Pokedex entry for a given generation. It scraps https://www.pokemontrash.com/[Pokemon Trash] website which only contains entries from 1st to 7th generation but contains good quality Pokemon PNG pictures. Then, strings are formatted and written to a csv file that you'd then import into Anki.
WARNING: Based on its nature, scrapping is not guaranteed to work in the future. Also, this script retrieves french Pokemon names.
[source,shell]
-----
usage: pokemons_to_anki.py [-h] -g GENERATION [-p IMAGES_PATH] [-f FILENAME]
optional arguments:
-h, --help show this help message and exit
-g GENERATION, --generation GENERATION
<int> Generation to retrieve <1-7>
-p IMAGES_PATH, --images-path IMAGES_PATH
Where to store Pokemons pictures, default to ./Pictures
-f FILENAME, --filename FILENAME
Name of CSV produced in output, default to output.csv
-----
== CSV Output and flashcard example
[.right]
image:flashcard_example.png[Camérupt flashcard]
[source, csv]
<img src="252.png"><br>N°252 ;Arcko
<img src="253.png"><br>N°253 ;Massko
<img src="254.png"><br>N°254 ;Jungko
<img src="255.png"><br>N°255 ;Poussifeu
<img src="256.png"><br>N°256 ;Galifeu
<img src="257.png"><br>N°257 ;Braségali
<img src="258.png"><br>N°258 ;Gobou
<img src="259.png"><br>N°259 ;Flobio
<img src="260.png"><br>N°260 ;Laggron
<img src="261.png"><br>N°261 ;Medhyèna
<img src="262.png"><br>N°262 ;Grahyèna
<img src="263.png"><br>N°263 ;Zigzaton
<img src="264.png"><br>N°264 ;Linéon
<img src="265.png"><br>N°265 ;Chenipotte
<img src="266.png"><br>N°266 ;Armulys
<img src="267.png"><br>N°267 ;Charmillon
<img src="268.png"><br>N°268 ;Blindalys
<img src="269.png"><br>N°269 ;Papinox
<img src="270.png"><br>N°270 ;Nénupiot
<img src="271.png"><br>N°271 ;Lombre
<img src="272.png"><br>N°272 ;Ludicolo
...

BIN
flashcard_example.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

137
pokemons_to_anki.py Normal file
View File

@ -0,0 +1,137 @@
# -*- coding: utf-8 -*-
import argparse
import os
import re
from typing import List
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
def download_img(url, img_path) -> None:
try:
# Format path as pwd/pokedex_entry_number.png
filename = os.path.join(img_path, url.split("/")[-1])
except Exception as e:
raise SystemExit(e)
try:
response = requests.get(url, stream=True)
file_size = int(response.headers.get("Content-Length", 0))
progress = tqdm(
response.iter_content(1024),
f"Downloading {filename}",
total=file_size,
unit="B",
unit_scale=True,
unit_divisor=1024,
)
except requests.exceptions.RequestException as e:
raise SystemExit(e)
if not os.path.exists(filename):
with open(filename, "wb") as f:
for data in progress.iterable:
f.write(data)
progress.update(len(data))
else:
print(f"File {filename} already exists!")
def write_csv_entries(pokemons_list, csv_file) -> None:
if not os.path.exists(csv_file):
if ".csv" not in csv_file:
csv_file = csv_file + ".csv"
with open(csv_file, "w", encoding="utf-8") as f:
print(f"Writing entries to {csv_file}")
for pokemon_row in pokemons_list:
f.write(pokemon_row + "\n")
else:
print(f"File {csv_file} already exists!")
def find_pokemon_image(main_url) -> List:
try:
res = requests.get(main_url)
soup = BeautifulSoup(res.content, "html.parser")
return soup.find_all("table", {"class": "pokedex"})
except requests.exceptions.RequestException as e:
raise SystemExit(e)
def format_csv_entries(pokemons_table, db_url, pokedex_url, img_path) -> List:
pokedex_number = r"[0-9]{1,3}"
pokemon_pattern = r"[0-9]{1,3}.*html"
pokemons_list = []
for row in pokemons_table.find_all("tr"):
columns = row.find_all("td")[2]
current_pokemon = columns.get_text()
current_entry = re.search(pokedex_number, str(columns)).group()
anki_balise = (
'<img src="'
+ current_entry
+ '.png"><br>N°'
+ current_entry
+ " ;"
)
pokemons_list.append(anki_balise + current_pokemon)
link_balise = columns.find_all("a")
current_url = re.search(pokemon_pattern, str(link_balise)).group()
new_url = pokedex_url + current_url
try:
entry_pokemon = requests.get(new_url)
except requests.exceptions.RequestException as e:
raise SystemExit(e)
new_soup = BeautifulSoup(entry_pokemon.content, "html.parser")
pokemon_img_url = new_soup.find_all("img")[0].attrs.get("src")
if "http" not in pokemon_img_url:
pokemon_img_url = db_url + pokemon_img_url
download_img(pokemon_img_url, img_path)
return pokemons_list
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"-g",
"--generation",
type=int,
help="<int> Generation to retrieve <1-7>",
required=True,
)
parser.add_argument(
"-p",
"--images-path",
help="Where to store Pokemons pictures, default to ./Pictures",
default="Pictures",
)
parser.add_argument(
"-f",
"--filename",
help="Name of CSV produced in output, default to output.csv",
default="output.csv",
)
args = parser.parse_args()
if not os.path.exists(args.images_path):
os.makedirs(args.images_path)
db_url = "https://www.pokemontrash.com/"
pokedex_url = db_url + "pokedex/"
main_url = pokedex_url + "liste-pokemon.php"
pokemons_soap = find_pokemon_image(main_url)
# Table index 0 is 7th generation
chosen_generation_table = pokemons_soap[7 - args.generation]
pokemons_list = format_csv_entries(
chosen_generation_table, db_url, pokedex_url, args.images_path
)
write_csv_entries(pokemons_list, args.filename)
if __name__ == "__main__":
main()