from requests_html import HTMLSession
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from urllib.parse import urlparse
from PIL import Image
from io import BytesIO
from django.core.files import File
import uuid

def gen_uuid():
    return uuid.uuid4().hex()

class WebPage:
    def __init__(self, url):
        self.url = url
        self.url_domain = urlparse(url).netloc

    def fetch_meta_data(self):
        session = HTMLSession()
        response = session.get(self.url)

        favicon = response.html.find('link[rel="icon"]', first=True).attrs.get('href') if response.html.find('link[rel="icon"]', first=True) else None
        if favicon and favicon.startswith('http'):
            ico = favicon
        elif favicon and not favicon.startswith('http'):
            ico = 'https://' + self.url_domain  + favicon
        else:
            ico = None

        meta_data = {
            'title': response.html.find('title', first=True).text if response.html.find('title', first=True) else None,
            'description': response.html.find('meta[name="description"]', first=True).attrs.get('content', 'No description') if response.html.find('meta[name="description"]', first=True) else None,
            'keywords': response.html.find('meta[name="keywords"]', first=True).attrs.get('content', 'No keywords') if response.html.find('meta[name="keywords"]', first=True) else None,
            'ico' : ico
        }

        meta_find = response.html.find('meta')
        for tm in meta_find:
            if tm.attrs.get('name') and tm.attrs.get('content') and tm.attrs.get('name') not in meta_data:
                meta_data[tm.attrs.get('name')] = tm.attrs.get('content')
            elif tm.attrs.get('property') and tm.attrs.get('content') and tm.attrs.get('name') not in meta_data:
                meta_data[tm.attrs.get('property')] = tm.attrs.get('content')
        return meta_data

class ScreenshotTaker:
    def __init__(self, url, image_uid):
        self.url = url
        self.image_uid = image_uid

    def capture_screenshot(self):
        chrome_options = Options()
        chrome_options.add_argument("--headless")  # Ensure GUI is off
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")

        # Set up Chrome driver
        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
        #driver = webdriver.Chrome(r'C:\\Users\\user\\OneDrive\-\Desktop\\My Portfolio\\JOB TRACKER\APP3\\jobApp\\packages\\chrome-win64\\chrome.exe', options=chrome_options)
        driver.get(self.url)


        # Take a screenshot
        screenshot = driver.get_screenshot_as_png()
        driver.quit()

        # Save the screenshot
        with open(self.image_uid, 'wb') as file:
            file.write(screenshot)


class WebPageAnalyzer:
    def __init__(self, url, image_uid):
        self.web_page = WebPage(url)
        self.screenshot_taker = ScreenshotTaker(url, image_uid)
        self.image_uid = image_uid

    def analyze(self):
        # Fetch and print meta data
        meta_data = self.web_page.fetch_meta_data()
        print("Meta Data:")
        for key, value in meta_data.items():
            print(f"{key}: {value}")

        # Capture and save the screenshot
        self.screenshot_taker.capture_screenshot()
        print(f"Screenshot saved as {self.image_uid}")
        return meta_data


def main(url, image_name):
    image_uid = "media/company_image_uploads/" + image_name
    analyzer = WebPageAnalyzer(url, image_uid)
    meta_data = analyzer.analyze()
    return [image_uid, meta_data]

if __name__ == "__main__":
    main()