106 lines
4.0 KiB
Python
Executable File
106 lines
4.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import os
|
|
import sys
|
|
import shutil
|
|
import subprocess
|
|
import hashlib
|
|
import urllib.parse
|
|
from pathlib import Path
|
|
|
|
# ==============================================================================
|
|
# CONFIGURATION
|
|
# ==============================================================================
|
|
# Change this path to wherever you want your categorized archives to sit.
|
|
OUTPUT_DIR = Path("/home/kyren/Archive/html")
|
|
# ==============================================================================
|
|
|
|
def verify_dependencies():
|
|
"""Ensure monolith is available on the system."""
|
|
if not shutil.which("monolith"):
|
|
print("Error: 'monolith' CLI not found. Please ensure it is installed and in your PATH.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
def clean_filename(title: str) -> str:
|
|
"""Sanitize the webpage title to be safe for filesystems."""
|
|
forbidden = ["/", "\\", "<", ">", ":", '"', "|", "?", "*", "\x00"]
|
|
for char in forbidden:
|
|
title = title.replace(char, "_")
|
|
return title.strip()
|
|
|
|
def get_fallback_filename(url: str) -> str:
|
|
"""Generates a clean fallback filename based on the URL structure."""
|
|
parsed = urllib.parse.urlparse(url)
|
|
clean_path = parsed.path.strip("/").replace("/", "_")
|
|
name = f"{parsed.netloc}_{clean_path}" if clean_path else parsed.netloc
|
|
return clean_filename(name)[:60]
|
|
|
|
def register_dolphin_thumbnail(html_path: Path):
|
|
"""Generates a screenshot and registers it inside the XDG Thumbnail Cache."""
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
print(" Generating visual thumbnail for Dolphin...")
|
|
|
|
# 1. Standard XDG paths for Large (256x256) thumbnails
|
|
thumb_dir = Path(os.path.expanduser("~/.cache/thumbnails/large"))
|
|
thumb_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# 2. Convert absolute file path to a deterministic File URI
|
|
file_uri = html_path.resolve().as_uri()
|
|
|
|
# 3. MD5 hash the exact URI (required by the Freedesktop Spec)
|
|
md5_hash = hashlib.md5(file_uri.encode("utf-8")).hexdigest()
|
|
target_thumb_path = thumb_dir / f"{md5_hash}.png"
|
|
|
|
# 4. Spin up headless Chromium to capture a crisp view of the *local* file
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page = browser.new_page(viewport={"width": 1024, "height": 768})
|
|
|
|
try:
|
|
page.goto(file_uri, wait_until="networkidle")
|
|
# Take the screenshot, automatically scaling/fitting into Dolphin's standard
|
|
page.screenshot(path=str(target_thumb_path), type="png")
|
|
print(f" Thumbnail registered successfully at: {target_thumb_path}")
|
|
except Exception as e:
|
|
print(f" Warning: Could not generate preview thumbnail: {e}", file=sys.stderr)
|
|
finally:
|
|
browser.close()
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(f"Usage: {sys.argv[0]} <URL>", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
target_url = sys.argv[1]
|
|
verify_dependencies()
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Generate a reliable filename based on the target URL
|
|
filename_base = get_fallback_filename(target_url)
|
|
final_html_path = OUTPUT_DIR / f"{filename_base}.html"
|
|
|
|
print(f" Archiving: {target_url}")
|
|
print(f" Saving to: {final_html_path}")
|
|
|
|
# Run Monolith to compile the remote web page down into a single local file
|
|
try:
|
|
# -o specifies the output file destination
|
|
subprocess.run([
|
|
"monolith",
|
|
target_url,
|
|
"-o", str(final_html_path)
|
|
], check=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"Error executing Monolith: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Generate the thumbnail mapping from the locally saved file
|
|
if final_html_path.exists():
|
|
register_dolphin_thumbnail(final_html_path)
|
|
print("\n Done! File is ready to browse natively in Dolphin.")
|
|
else:
|
|
print("Error: Target HTML file was not created successfully.", file=sys.stderr)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|