# -*- coding: utf-8 -*- import argparse import os import re from typing import List import requests from bs4 import BeautifulSoup from tqdm import tqdm def download_img(url, img_path) -> None: try: # Format path as pwd/pokedex_entry_number.png filename = os.path.join(img_path, url.split("/")[-1]) except Exception as e: raise SystemExit(e) try: response = requests.get(url, stream=True) file_size = int(response.headers.get("Content-Length", 0)) progress = tqdm( response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024, ) except requests.exceptions.RequestException as e: raise SystemExit(e) if not os.path.exists(filename): with open(filename, "wb") as f: for data in progress.iterable: f.write(data) progress.update(len(data)) else: print(f"File {filename} already exists!") def write_csv_entries(pokemons_list, csv_file) -> None: if not os.path.exists(csv_file): if ".csv" not in csv_file: csv_file = csv_file + ".csv" with open(csv_file, "w", encoding="utf-8") as f: print(f"Writing entries to {csv_file}") for pokemon_row in pokemons_list: f.write(pokemon_row + "\n") else: print(f"File {csv_file} already exists!") def find_pokemon_image(main_url) -> List: try: res = requests.get(main_url) soup = BeautifulSoup(res.content, "html.parser") return soup.find_all("table", {"class": "pokedex"}) except requests.exceptions.RequestException as e: raise SystemExit(e) def format_csv_entries(pokemons_table, db_url, pokedex_url, img_path) -> List: pokedex_number = r"[0-9]{1,3}" pokemon_pattern = r"[0-9]{1,3}.*html" pokemons_list = [] for row in pokemons_table.find_all("tr"): columns = row.find_all("td")[2] current_pokemon = columns.get_text() current_entry = re.search(pokedex_number, str(columns)).group() anki_balise = ( '
N°' + current_entry + " ;" ) pokemons_list.append(anki_balise + current_pokemon) link_balise = columns.find_all("a") current_url = re.search(pokemon_pattern, str(link_balise)).group() new_url = pokedex_url + current_url try: entry_pokemon = requests.get(new_url) except requests.exceptions.RequestException as e: raise SystemExit(e) new_soup = BeautifulSoup(entry_pokemon.content, "html.parser") pokemon_img_url = new_soup.find_all("img")[0].attrs.get("src") if "http" not in pokemon_img_url: pokemon_img_url = db_url + pokemon_img_url download_img(pokemon_img_url, img_path) return pokemons_list def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( "-g", "--generation", type=int, help=" Generation to retrieve <1-7>", required=True, ) parser.add_argument( "-p", "--images-path", help="Where to store Pokemons pictures, default to ./Pictures", default="Pictures", ) parser.add_argument( "-f", "--filename", help="Name of CSV produced in output, default to output.csv", default="output.csv", ) args = parser.parse_args() if not os.path.exists(args.images_path): os.makedirs(args.images_path) db_url = "https://www.pokemontrash.com/" pokedex_url = db_url + "pokedex/" main_url = pokedex_url + "liste-pokemon.php" pokemons_soap = find_pokemon_image(main_url) # Table index 0 is 7th generation chosen_generation_table = pokemons_soap[7 - args.generation] pokemons_list = format_csv_entries( chosen_generation_table, db_url, pokedex_url, args.images_path ) write_csv_entries(pokemons_list, args.filename) if __name__ == "__main__": main()