From 0c8c6399aa1944920d13281d62ecfe1f4788bf27 Mon Sep 17 00:00:00 2001 From: Jonas Raneryd Imaizumi Date: Sun, 22 Feb 2026 23:22:53 +0100 Subject: [PATCH] Time to vacuum it up --- .gitignore | 13 + README.md | 87 ++++- svtplay-dl-category.py | 774 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 873 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100755 svtplay-dl-category.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1b33641 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# Download state files +seen_urls.txt +seen_episodes.txt +series_state.json +errors.json + +# Downloaded content +Downloads/ + +# Python +__pycache__/ +*.pyc +*.pyo diff --git a/README.md b/README.md index 09da8d9..b0b77c7 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,87 @@ # svtplay-auto-dl -A script that downloads all SVT Play videos in a given category + +Script for batch-downloading videos from SVT Play using [svtplay-dl](https://svtplay-dl.se/). + +## Prerequisites + +- Python 3.10+ +- [svtplay-dl](https://svtplay-dl.se/) installed and available in `PATH` + +## svtplay-dl-category.py + +Downloads all videos from a given SVT Play category page. Handles both movies (singles) and series with multiple episodes. + +### Features + +- Parses SVT Play category pages to discover all available content +- Downloads movies and individual series episodes via `svtplay-dl -S` +- Downloads cover images as `poster.jpg` (Jellyfin-compatible naming) +- Organizes files into `Downloads// (<Year>)/` +- Tracks downloads across runs to avoid re-downloading moved files +- Retry logic with permanent error tracking for failing downloads +- Detects stale series and suggests marking them as complete +- Graceful stop on Ctrl+C (finishes the current download before exiting) + +### Usage + +```bash +# Download all movies from the default category (Filmer) +python3 svtplay-dl-category.py + +# Download from a different category +python3 svtplay-dl-category.py --url https://www.svtplay.se/kategori/drama?tab=all + +# Preview what would be downloaded without actually downloading +python3 svtplay-dl-category.py --dry-run + +# Mark a finished series so it's never re-checked +python3 svtplay-dl-category.py --mark-complete https://www.svtplay.se/show-name + +# Undo if the series gets new seasons later +python3 svtplay-dl-category.py --unmark-complete https://www.svtplay.se/show-name +``` + +### Options + +| Flag | Default | Description | +|------|---------|-------------| +| `--url` | `.../kategori/filmer?tab=all` | Category page URL | +| `--output-dir` | `Downloads` | Base output directory | +| `--seen-file` | `seen_urls.txt` | Tracks completed movies and manually-completed series | +| `--seen-episodes-file` | `seen_episodes.txt` | Tracks downloaded episode URLs | +| `--series-state-file` | `series_state.json` | Tracks series check history for staleness detection | +| `--errors-file` | `errors.json` | Tracks download errors and permanent failures | +| `--max-dl N` | `0` (no limit) | Stop after N successful downloads | +| `--sleep` | `1.0` | Delay between downloads (seconds) | +| `--stale-days` | `365` | Days without new episodes before suggesting completion | +| `--dry-run` | | Print commands without downloading | +| `--mark-complete URL` | | Add a series URL to the seen file and exit | +| `--unmark-complete URL` | | Remove a series URL from the seen file and exit | + +### Tracking files + +The script uses four files to maintain state across runs: + +- **`seen_urls.txt`** -- Movies are added automatically after download. Series are added manually via `--mark-complete`. Any URL in this file is skipped entirely. +- **`seen_episodes.txt`** -- Individual episode URLs, added after each successful download. Since files are moved out of the download folder, this is used instead of relying on svtplay-dl's file-on-disk detection. +- **`series_state.json`** -- Per-series metadata: how many times it has been checked with no new episodes, and when the last new episode was found. Used for staleness suggestions. +- **`errors.json`** -- Per-URL error tracking. A download is retried once immediately on failure, then retried on the next run. After 3 total failed runs, the URL is marked as a permanent error and skipped with a warning. + +### Output structure + +``` +Downloads/ + Filmer/ + Alltid nära dig (2020)/ + poster.jpg + Alltid nära dig (2020).mkv + Bron (2011)/ + poster.jpg + Bron.S01E01.avsnitt-1.mkv + Bron.S01E02.avsnitt-2.mkv + ... +``` + +### Graceful stop + +Press Ctrl+C once to stop after the current download finishes. Press Ctrl+C again to force quit immediately. diff --git a/svtplay-dl-category.py b/svtplay-dl-category.py new file mode 100755 index 0000000..c5e08a9 --- /dev/null +++ b/svtplay-dl-category.py @@ -0,0 +1,774 @@ +#!/usr/bin/env python3 +""" +Download all videos from a SVT Play category page. + +Features: +- Extracts program listings from SVT Play category pages (embedded JSON) +- Downloads movies and series episodes via svtplay-dl +- Tracks movies, episodes, series state, and errors across runs +- Downloads cover images (poster.jpg) for Jellyfin +- Graceful stop on Ctrl+C (finishes current download) +- Suggests marking stale series as complete + +Usage examples: + python3 svtplay-dl-category.py + python3 svtplay-dl-category.py --url https://www.svtplay.se/kategori/serier?tab=all + python3 svtplay-dl-category.py --dry-run + python3 svtplay-dl-category.py --mark-complete https://www.svtplay.se/show-name + python3 svtplay-dl-category.py --unmark-complete https://www.svtplay.se/show-name +""" + +import argparse +import json +import os +import re +import signal +import subprocess +import sys +import time +from datetime import datetime +from html.parser import HTMLParser +from urllib.parse import urljoin, urlparse +from urllib.request import Request, urlopen + +DEFAULT_CATEGORY_URL = "https://www.svtplay.se/kategori/filmer?tab=all" +INFO_SEARCH_EXPR = r'<script\s+id="__NEXT_DATA__"[^>]*>({.+})</script>' + +# --------------------------------------------------------------------------- +# Graceful stop +# --------------------------------------------------------------------------- + +stop_requested = False +current_child: subprocess.Popen | None = None + + +def _signal_handler(signum, frame): + global stop_requested + if stop_requested: + print("\nForce quit!", file=sys.stderr) + if current_child is not None: + current_child.terminate() + sys.exit(1) + print("\nGraceful stop requested. Finishing current download...", + file=sys.stderr) + stop_requested = True + + +signal.signal(signal.SIGINT, _signal_handler) +signal.signal(signal.SIGTERM, _signal_handler) + +# --------------------------------------------------------------------------- +# HTTP helpers +# --------------------------------------------------------------------------- + +_HTTP_HEADERS = { + "User-Agent": ( + "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) " + "Gecko/20100101 Firefox/128.0" + ), + "Accept": ( + "text/html,application/xhtml+xml,application/xml;" + "q=0.9,*/*;q=0.8" + ), + "Accept-Language": "sv-SE,sv;q=0.9,en-US;q=0.8,en;q=0.7", +} + + +def fetch_html(url: str, timeout: int = 30) -> str: + req = Request(url, headers=_HTTP_HEADERS) + with urlopen(req, timeout=timeout) as resp: + charset = resp.headers.get_content_charset() or "utf-8" + return resp.read().decode(charset, errors="replace") + + +def download_file(url: str, dest_path: str, timeout: int = 60) -> bool: + req = Request(url, headers={ + "User-Agent": _HTTP_HEADERS["User-Agent"], + }) + try: + with urlopen(req, timeout=timeout) as resp: + with open(dest_path, "wb") as f: + while True: + chunk = resp.read(8192) + if not chunk: + break + f.write(chunk) + return True + except Exception as e: + print(f" WARNING: Image download failed: {e}", file=sys.stderr) + if os.path.exists(dest_path): + os.remove(dest_path) + return False + +# --------------------------------------------------------------------------- +# SVT Play JSON extraction +# --------------------------------------------------------------------------- + + +def extract_page_json(html: str) -> dict | None: + match = re.search(INFO_SEARCH_EXPR, html) + if not match: + return None + try: + return json.loads(match.group(1)) + except json.JSONDecodeError: + return None + + +def _iter_urql_entries(page_json: dict): + for entry in page_json.get("props", {}).get("urqlState", {}).values(): + if "data" in entry: + try: + yield json.loads(entry["data"]) + except (json.JSONDecodeError, TypeError): + continue + +# --------------------------------------------------------------------------- +# Category page parsing +# --------------------------------------------------------------------------- + + +def get_category_name(page_json: dict, url: str) -> str: + for entry in _iter_urql_entries(page_json): + for key, data in entry.items(): + if key == "categoryPage" and isinstance(data, dict): + for field in ("heading", "name"): + if data.get(field): + return data[field] + + path = urlparse(url).path + parts = [p for p in path.split("/") if p] + if len(parts) >= 2 and parts[0] == "kategori": + return parts[1].replace("-", " ").title() + return "Unknown" + + +def get_category_items(page_json: dict) -> list[dict]: + items: list[dict] = [] + for entry in _iter_urql_entries(page_json): + for key, data in entry.items(): + if key != "categoryPage" or not isinstance(data, dict): + continue + for tab in data.get("lazyLoadedTabs", []): + if tab.get("slug") != "all": + continue + for module in tab.get("modules", []): + sel = module.get("selection") + if sel: + items.extend(sel.get("items", [])) + return items + +# --------------------------------------------------------------------------- +# Detail page parsing — metadata +# --------------------------------------------------------------------------- + + +def _find_details(page_json: dict) -> dict | None: + # Prefer entries that have smartStart (like svtplay-dl does) + for entry in _iter_urql_entries(page_json): + for key, data in entry.items(): + if (key == "detailsPageByPath" + and isinstance(data, dict) + and "smartStart" in data): + return data + for entry in _iter_urql_entries(page_json): + for key, data in entry.items(): + if (key == "detailsPageByPath" + and isinstance(data, dict) + and "item" in data): + return data + return None + + +def get_video_metadata(html: str) -> tuple[str | None, str | None, + str | None]: + """Return (name, year, image_url) from a detail page.""" + page_json = extract_page_json(html) + if not page_json: + return None, None, None + + details = _find_details(page_json) + if not details: + return None, None, None + + name = _safe_get(details, "item", "parent", "name") + if not name: + name = _safe_get(details, "item", "name") + + year = _safe_get(details, "moreDetails", "productionYear") + if year is not None: + year = str(year) + + image_url = _image_from_json(details) or _image_from_html(html) + return name, year, image_url + + +def _safe_get(d, *keys): + for k in keys: + if not isinstance(d, dict): + return None + d = d.get(k) + return d + + +def _image_url_from_dict(img: dict) -> str | None: + if "id" in img and "changed" in img: + return ( + f"https://www.svtstatic.se/image/original/default/" + f"{img['id']}/{img['changed']}?format=auto&quality=100" + ) + return None + + +def _image_from_json(details: dict) -> str | None: + img = _safe_get(details, "item", "parent", "image", "wide") + if isinstance(img, dict): + return _image_url_from_dict(img) + if isinstance(img, str) and img: + return img + + img = _safe_get(details, "images", "wide") + if isinstance(img, dict): + return _image_url_from_dict(img) + if isinstance(img, str) and img: + return img + + return None + +# --------------------------------------------------------------------------- +# Detail page parsing — cover image HTML fallback +# --------------------------------------------------------------------------- + + +class _ImageSrcsetExtractor(HTMLParser): + def __init__(self): + super().__init__() + self._in_container = False + self.image_url: str | None = None + + def handle_starttag(self, tag, attrs): + attr = dict(attrs) + if tag == "div" and attr.get("data-css-selector") == "imageContainer": + self._in_container = True + if tag == "img" and self._in_container and self.image_url is None: + srcset = attr.get("srcset", "") + if srcset: + best_url, best_w = None, 0 + for part in srcset.split(","): + part = part.strip() + pieces = part.rsplit(" ", 1) + if len(pieces) == 2: + try: + w = int(pieces[1].rstrip("w")) + except ValueError: + continue + if w > best_w: + best_w = w + best_url = pieces[0] + self.image_url = best_url or attr.get("src") + else: + self.image_url = attr.get("src") + + def handle_endtag(self, tag): + if tag == "div": + self._in_container = False + + +def _image_from_html(html: str) -> str | None: + p = _ImageSrcsetExtractor() + p.feed(html) + return p.image_url + +# --------------------------------------------------------------------------- +# Episode discovery (replaces svtplay-dl -A) +# --------------------------------------------------------------------------- + + +def discover_episode_urls(html: str) -> list[str]: + page_json = extract_page_json(html) + if not page_json: + return [] + + details = _find_details(page_json) + if not details: + return [] + + # If this is a Single, return its own URL + parent_type = _safe_get(details, "item", "parent", "__typename") + if parent_type == "Single": + path = _safe_get(details, "item", "urls", "svtplay") + if path: + return [urljoin("https://www.svtplay.se", path)] + return [] + + videos: list[str] = [] + for module in details.get("modules", []): + mod_id = module.get("id", "") + if mod_id in ("upcoming", "related") or mod_id.startswith("details"): + continue + if "clips" in mod_id: + continue + sel = module.get("selection") + if not sel: + continue + for item in sel.get("items", []): + path = _safe_get(item, "item", "urls", "svtplay") + if path: + full = urljoin("https://www.svtplay.se", path) + if full not in videos: + videos.append(full) + return videos + +# --------------------------------------------------------------------------- +# Tracking files (seen_urls.txt / seen_episodes.txt) +# --------------------------------------------------------------------------- + + +def load_seen(path: str) -> set[str]: + if not path or not os.path.exists(path): + return set() + with open(path, "r", encoding="utf-8") as f: + return {line.strip() for line in f if line.strip()} + + +def append_seen(path: str, url: str) -> None: + if not path: + return + with open(path, "a", encoding="utf-8") as f: + f.write(url + "\n") + + +def remove_from_seen(path: str, url: str) -> bool: + if not path or not os.path.exists(path): + return False + with open(path, "r", encoding="utf-8") as f: + lines = f.readlines() + filtered = [l for l in lines if l.strip() != url] + if len(filtered) == len(lines): + return False + with open(path, "w", encoding="utf-8") as f: + f.writelines(filtered) + return True + +# --------------------------------------------------------------------------- +# JSON state files (series_state.json / errors.json) +# --------------------------------------------------------------------------- + + +def load_json_state(path: str) -> dict: + if not path or not os.path.exists(path): + return {} + try: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + return {} + + +def save_json_state(path: str, data: dict) -> None: + if not path: + return + tmp = path + ".tmp" + with open(tmp, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + os.replace(tmp, path) + +# --------------------------------------------------------------------------- +# Error tracking +# --------------------------------------------------------------------------- + + +def is_permanent_error(errors: dict, url: str) -> bool: + return errors.get(url, {}).get("permanent", False) + + +def record_error(errors: dict, url: str, return_code: int, + errors_file: str) -> None: + entry = errors.get(url, {"fail_count": 0, "permanent": False}) + entry["fail_count"] = entry.get("fail_count", 0) + 1 + entry["last_error"] = f"svtplay-dl exited with code {return_code}" + entry["last_failure"] = datetime.now().isoformat() + if entry["fail_count"] > 2: + entry["permanent"] = True + errors[url] = entry + save_json_state(errors_file, errors) + +# --------------------------------------------------------------------------- +# Series state +# --------------------------------------------------------------------------- + + +def update_series_state(state: dict, show_url: str, found_new: bool, + show_name: str, state_file: str) -> None: + entry = state.get(show_url, { + "name": show_name, + "check_count": 0, + "last_new_episode_date": None, + }) + entry["name"] = show_name or entry.get("name", show_url) + + if found_new: + entry["check_count"] = 0 + entry["last_new_episode_date"] = datetime.now().isoformat() + else: + entry["check_count"] = entry.get("check_count", 0) + 1 + + state[show_url] = entry + save_json_state(state_file, state) + + +def find_stale_series(state: dict, stale_days: int): + now = datetime.now() + for url, entry in state.items(): + checks = entry.get("check_count", 0) + if checks < 2: + continue + raw = entry.get("last_new_episode_date") + if raw: + try: + days = (now - datetime.fromisoformat(raw)).days + except ValueError: + days = 9999 + else: + days = 9999 + if days >= stale_days: + yield url, entry.get("name", url), days, checks + +# --------------------------------------------------------------------------- +# svtplay-dl invocation +# --------------------------------------------------------------------------- + + +def run_svtplay_dl(url: str, output_dir: str, dry_run: bool) -> int: + global current_child + cmd = ["svtplay-dl", "-S", "-o", output_dir, url] + print(f" >> {' '.join(cmd)}") + if dry_run: + return 0 + try: + current_child = subprocess.Popen(cmd, start_new_session=True) + rc = current_child.wait() + current_child = None + return rc + except FileNotFoundError: + current_child = None + print("ERROR: svtplay-dl not found in PATH.", file=sys.stderr) + return 127 + + +def download_with_retry(url: str, output_dir: str, dry_run: bool, + errors: dict, errors_file: str) -> bool: + """Attempt download with one immediate retry. Returns True on success.""" + if is_permanent_error(errors, url): + print(f" SKIP (permanent error): {url} — see errors.json") + return False + + rc = run_svtplay_dl(url, output_dir, dry_run) + if rc == 0: + if url in errors: + del errors[url] + save_json_state(errors_file, errors) + return True + + print(f" Retrying {url} ...") + rc = run_svtplay_dl(url, output_dir, dry_run) + if rc == 0: + if url in errors: + del errors[url] + save_json_state(errors_file, errors) + return True + + record_error(errors, url, rc, errors_file) + entry = errors.get(url, {}) + if entry.get("permanent"): + print(f" PERMANENT ERROR: {url} " + f"(failed {entry['fail_count']} times total)", + file=sys.stderr) + else: + print(f" ERROR: {url} " + f"(will retry next run, " + f"{entry.get('fail_count', 0)} failures total)", + file=sys.stderr) + return False + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def sanitize_filename(name: str) -> str: + name = re.sub(r'[<>:"/\\|?*]', "", name) + return name.strip(". ") + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def main(): + ap = argparse.ArgumentParser( + description="Download all videos from a SVT Play category page.", + ) + ap.add_argument( + "--url", default=DEFAULT_CATEGORY_URL, + help="Category page URL (default: %(default)s)", + ) + ap.add_argument( + "--output-dir", default="Downloads", + help="Base output directory (default: %(default)s)", + ) + ap.add_argument( + "--seen-file", default="seen_urls.txt", + help="Tracks completed movie / series URLs (default: %(default)s)", + ) + ap.add_argument( + "--seen-episodes-file", default="seen_episodes.txt", + help="Tracks downloaded episode URLs (default: %(default)s)", + ) + ap.add_argument( + "--series-state-file", default="series_state.json", + help="Tracks series check history (default: %(default)s)", + ) + ap.add_argument( + "--errors-file", default="errors.json", + help="Tracks download errors (default: %(default)s)", + ) + ap.add_argument( + "--sleep", type=float, default=1.0, + help="Delay between downloads in seconds (default: %(default)s)", + ) + ap.add_argument( + "--stale-days", type=int, default=365, + help="Days w/o new episodes before suggesting completion " + "(default: %(default)s)", + ) + ap.add_argument( + "--max-dl", type=int, default=0, metavar="N", + help="Stop after N successful downloads (0 = no limit)", + ) + ap.add_argument( + "--dry-run", action="store_true", + help="Print commands without actually downloading", + ) + ap.add_argument( + "--mark-complete", metavar="URL", + help="Mark a series URL as complete and exit", + ) + ap.add_argument( + "--unmark-complete", metavar="URL", + help="Unmark a series URL as complete and exit", + ) + args = ap.parse_args() + + # ---- mark / unmark ---- + if args.mark_complete: + seen = load_seen(args.seen_file) + if args.mark_complete in seen: + print(f"Already marked as complete: {args.mark_complete}") + else: + append_seen(args.seen_file, args.mark_complete) + print(f"Marked as complete: {args.mark_complete}") + return + + if args.unmark_complete: + if remove_from_seen(args.seen_file, args.unmark_complete): + print(f"Unmarked (will be re-checked): {args.unmark_complete}") + else: + print(f"Not found in seen file: {args.unmark_complete}") + return + + # ---- load state ---- + seen = load_seen(args.seen_file) + seen_episodes = load_seen(args.seen_episodes_file) + series_state = load_json_state(args.series_state_file) + errors = load_json_state(args.errors_file) + + # ---- fetch category page ---- + print(f"Fetching category page: {args.url}") + try: + cat_html = fetch_html(args.url) + except Exception as e: + print(f"ERROR: Failed to fetch category page: {e}", file=sys.stderr) + sys.exit(1) + + page_json = extract_page_json(cat_html) + if not page_json: + print("ERROR: Could not extract JSON data from category page.", + file=sys.stderr) + sys.exit(1) + + category_name = get_category_name(page_json, args.url) + items = get_category_items(page_json) + print(f"Category: {category_name}") + print(f"Found {len(items)} items in category listing.") + + if not items: + print("No items found. The page structure may have changed.", + file=sys.stderr) + sys.exit(1) + + # ---- process items ---- + stats = dict( + movies_downloaded=0, + episodes_downloaded=0, + series_checked=0, + skipped_seen=0, + skipped_permanent=0, + errors_this_run=0, + ) + + def dl_limit_reached(): + if args.max_dl <= 0: + return False + total = stats["movies_downloaded"] + stats["episodes_downloaded"] + return total >= args.max_dl + + for idx, item_data in enumerate(items): + if stop_requested or dl_limit_reached(): + if dl_limit_reached(): + print(f"\nReached --max-dl={args.max_dl}. Stopping.") + else: + print("\nStopping as requested.") + break + + try: + item = item_data["item"] + url_path = item["urls"]["svtplay"] + item_url = urljoin("https://www.svtplay.se", url_path) + is_single = item.get("__typename") == "Single" + name_hint = url_path.rstrip("/").rsplit("/", 1)[-1] + except (KeyError, TypeError) as e: + print(f"\n WARNING: Skipping malformed item #{idx}: {e}") + continue + + kind = "Movie" if is_single else "Series" + print(f"\n[{idx + 1}/{len(items)}] {kind}: {name_hint}") + + if item_url in seen: + print(" Skipped (in seen file)") + stats["skipped_seen"] += 1 + continue + + # ---- fetch detail page ---- + print(f" Fetching: {item_url}") + try: + detail_html = fetch_html(item_url) + except Exception as e: + print(f" ERROR fetching detail page: {e}", file=sys.stderr) + stats["errors_this_run"] += 1 + continue + + name, year, image_url = get_video_metadata(detail_html) + if not name: + name = name_hint + + folder_name = (f"{sanitize_filename(name)} ({year})" + if year else sanitize_filename(name)) + folder_path = os.path.join( + args.output_dir, sanitize_filename(category_name), folder_name, + ) + if not args.dry_run: + os.makedirs(folder_path, exist_ok=True) + print(f" -> {folder_path}") + + # ---- poster image ---- + poster_path = os.path.join(folder_path, "poster.jpg") + if image_url and not os.path.exists(poster_path): + print(" Downloading poster...") + if args.dry_run: + print(f" >> (dry-run) download poster -> {poster_path}") + else: + download_file(image_url, poster_path) + + # ---- download ---- + if is_single: + if is_permanent_error(errors, item_url): + print(f" SKIP (permanent error) — see errors.json") + stats["skipped_permanent"] += 1 + continue + + if download_with_retry(item_url, folder_path, args.dry_run, + errors, args.errors_file): + if not args.dry_run: + append_seen(args.seen_file, item_url) + seen.add(item_url) + stats["movies_downloaded"] += 1 + else: + stats["errors_this_run"] += 1 + + else: + stats["series_checked"] += 1 + episode_urls = discover_episode_urls(detail_html) + total_eps = len(episode_urls) + + new_eps = [ + ep for ep in episode_urls + if ep not in seen_episodes + and not is_permanent_error(errors, ep) + ] + perm_skipped = sum( + 1 for ep in episode_urls + if is_permanent_error(errors, ep) + ) + if perm_skipped: + stats["skipped_permanent"] += perm_skipped + + print(f" Episodes: {total_eps} total, {len(new_eps)} new" + + (f", {perm_skipped} permanently failed" + if perm_skipped else "")) + + found_new = len(new_eps) > 0 + + for ep_i, ep_url in enumerate(new_eps): + if stop_requested or dl_limit_reached(): + break + print(f" Episode [{ep_i + 1}/{len(new_eps)}]: {ep_url}") + if download_with_retry(ep_url, folder_path, args.dry_run, + errors, args.errors_file): + if not args.dry_run: + append_seen(args.seen_episodes_file, ep_url) + seen_episodes.add(ep_url) + stats["episodes_downloaded"] += 1 + else: + stats["errors_this_run"] += 1 + + if (args.sleep > 0 + and ep_i < len(new_eps) - 1 + and not stop_requested): + time.sleep(args.sleep) + + if not args.dry_run: + update_series_state(series_state, item_url, found_new, + name, args.series_state_file) + + if args.sleep > 0 and idx < len(items) - 1 and not stop_requested: + time.sleep(args.sleep) + + # ---- stale series suggestions ---- + stale = list(find_stale_series(series_state, args.stale_days)) + stale = [(u, n, d, c) for u, n, d, c in stale if u not in seen] + if stale: + print(f"\n{'=' * 60}") + print("STALE SERIES — consider marking as complete:") + print(f"{'=' * 60}") + for url, name, days, checks in stale: + print(f'\n "{name}" — no new episodes for {days} days ' + f"(checked {checks} times)") + print(f" python3 {sys.argv[0]} --mark-complete {url}") + + # ---- summary ---- + print(f"\n{'=' * 60}") + print("SUMMARY") + print(f"{'=' * 60}") + print(f" Movies downloaded: {stats['movies_downloaded']}") + print(f" Episodes downloaded: {stats['episodes_downloaded']}") + print(f" Series checked: {stats['series_checked']}") + print(f" Skipped (already seen): {stats['skipped_seen']}") + print(f" Skipped (perm. error): {stats['skipped_permanent']}") + print(f" Errors this run: {stats['errors_this_run']}") + if stop_requested: + print(" (Run was interrupted by user)") + + +if __name__ == "__main__": + main()