import os
import re
from charset_normalizer import from_path

def detect_encoding(filepath):
    result = from_path(filepath).best()
    return result.encoding if result else "utf-8"

def process_html_file(filepath):
    encoding = detect_encoding(filepath)

    with open(filepath, "r", encoding=encoding, errors="ignore") as f:
        content = f.read()

    # Skip if <title> starts with "Lost Pubs"
    title_match = re.search(r"<title>(.*?)</title>", content, re.IGNORECASE | re.DOTALL)
    if not title_match:
        return False
    page_title = title_match.group(1).strip()
    if page_title.startswith("Lost Pubs"):
        return False

    changed = False

    def add_alt_and_title(match):
        nonlocal changed
        tag = match.group(0)
        tag_lower = tag.lower()

        # If either attribute is missing, rebuild the tag
        attrs = []
        if "alt=" not in tag_lower:
            attrs.append(f'alt="{page_title}"')
        if "title=" not in tag_lower:
            attrs.append(f'title="{page_title}"')

        if attrs:
            changed = True
            # Insert attributes just before the closing >
            if tag.endswith(">"):
                tag = tag[:-1] + " " + " ".join(attrs) + ">"
        return tag

    new_content = re.sub(
        r'<img[^>]*src=["\']pics/[^"\']+\.jpg["\'][^>]*>',
        add_alt_and_title,
        content,
        flags=re.IGNORECASE
    )

    if changed:
        with open(filepath, "w", encoding=encoding, errors="ignore") as f:
            f.write(new_content)
    return changed

def main():
    directory = os.getcwd()
    for filename in os.listdir(directory):
        if filename.lower().endswith(".html"):
            filepath = os.path.join(directory, filename)
            updated = process_html_file(filepath)
            if updated:
                print(f"Updated ALT and TITLE attributes in: {filename}")
            else:
                print(f"No changes needed: {filename}")

if __name__ == "__main__":
    main()