Files
dotfiles/scripts/archive-page.py
2026-06-10 21:16:20 +03:00

106 lines
4.0 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import sys
import shutil
import subprocess
import hashlib
import urllib.parse
from pathlib import Path
# ==============================================================================
# CONFIGURATION
# ==============================================================================
# Change this path to wherever you want your categorized archives to sit.
OUTPUT_DIR = Path("/home/kyren/Archive/html")
# ==============================================================================
def verify_dependencies():
"""Ensure monolith is available on the system."""
if not shutil.which("monolith"):
print("Error: 'monolith' CLI not found. Please ensure it is installed and in your PATH.", file=sys.stderr)
sys.exit(1)
def clean_filename(title: str) -> str:
"""Sanitize the webpage title to be safe for filesystems."""
forbidden = ["/", "\\", "<", ">", ":", '"', "|", "?", "*", "\x00"]
for char in forbidden:
title = title.replace(char, "_")
return title.strip()
def get_fallback_filename(url: str) -> str:
"""Generates a clean fallback filename based on the URL structure."""
parsed = urllib.parse.urlparse(url)
clean_path = parsed.path.strip("/").replace("/", "_")
name = f"{parsed.netloc}_{clean_path}" if clean_path else parsed.netloc
return clean_filename(name)[:60]
def register_dolphin_thumbnail(html_path: Path):
"""Generates a screenshot and registers it inside the XDG Thumbnail Cache."""
from playwright.sync_api import sync_playwright
print(" Generating visual thumbnail for Dolphin...")
# 1. Standard XDG paths for Large (256x256) thumbnails
thumb_dir = Path(os.path.expanduser("~/.cache/thumbnails/large"))
thumb_dir.mkdir(parents=True, exist_ok=True)
# 2. Convert absolute file path to a deterministic File URI
file_uri = html_path.resolve().as_uri()
# 3. MD5 hash the exact URI (required by the Freedesktop Spec)
md5_hash = hashlib.md5(file_uri.encode("utf-8")).hexdigest()
target_thumb_path = thumb_dir / f"{md5_hash}.png"
# 4. Spin up headless Chromium to capture a crisp view of the *local* file
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page(viewport={"width": 1024, "height": 768})
try:
page.goto(file_uri, wait_until="networkidle")
# Take the screenshot, automatically scaling/fitting into Dolphin's standard
page.screenshot(path=str(target_thumb_path), type="png")
print(f" Thumbnail registered successfully at: {target_thumb_path}")
except Exception as e:
print(f" Warning: Could not generate preview thumbnail: {e}", file=sys.stderr)
finally:
browser.close()
def main():
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <URL>", file=sys.stderr)
sys.exit(1)
target_url = sys.argv[1]
verify_dependencies()
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Generate a reliable filename based on the target URL
filename_base = get_fallback_filename(target_url)
final_html_path = OUTPUT_DIR / f"{filename_base}.html"
print(f" Archiving: {target_url}")
print(f" Saving to: {final_html_path}")
# Run Monolith to compile the remote web page down into a single local file
try:
# -o specifies the output file destination
subprocess.run([
"monolith",
target_url,
"-o", str(final_html_path)
], check=True)
except subprocess.CalledProcessError as e:
print(f"Error executing Monolith: {e}", file=sys.stderr)
sys.exit(1)
# Generate the thumbnail mapping from the locally saved file
if final_html_path.exists():
register_dolphin_thumbnail(final_html_path)
print("\n Done! File is ready to browse natively in Dolphin.")
else:
print("Error: Target HTML file was not created successfully.", file=sys.stderr)
if __name__ == "__main__":
main()