|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import os |
|
|
import cv2 |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
LINE_ART_DIR = "train_images/line_arts" |
|
|
COLORED_DIR = "train_images/colored" |
|
|
|
|
|
|
|
|
os.makedirs(LINE_ART_DIR, exist_ok=True) |
|
|
os.makedirs(COLORED_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
def download_and_process_images(manga_title, url): |
|
|
response = requests.get(url) |
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
|
|
|
image_tags = soup.find_all('img') |
|
|
|
|
|
for img in image_tags: |
|
|
img_url = img['src'] |
|
|
img_data = requests.get(img_url).content |
|
|
img_array = np.frombuffer(img_data, np.uint8) |
|
|
img = cv2.imdecode(img_array, cv2.IMREAD_COLOR) |
|
|
|
|
|
|
|
|
if len(img.shape) == 3 and img.shape[2] == 3: |
|
|
|
|
|
colored_path = os.path.join(COLORED_DIR, f"{manga_title}_colored.png") |
|
|
cv2.imwrite(colored_path, img) |
|
|
else: |
|
|
|
|
|
line_art_path = os.path.join(LINE_ART_DIR, f"{manga_title}_line_art.png") |
|
|
cv2.imwrite(line_art_path, img) |
|
|
|
|
|
|
|
|
manga_title = "example_manga" |
|
|
nhentai_url = "https://nhentai.net/g/your_manga_id/" |
|
|
hitomi_url = "https://hitomi.la/galleries/your_manga_id.html" |
|
|
|
|
|
download_and_process_images(manga_title, nhentai_url) |
|
|
download_and_process_images(manga_title, hitomi_url) |