<?php
/**
 * @author 1902 Software
 * @copyright Copyright © 2023 1902 Software (https://1902software.com/magento/)
 * @package WriteTextAI_WriteTextAI
 */

namespace WriteTextAI\WriteTextAI\Helper;

use Magento\Framework\App\Helper\AbstractHelper;
use WriteTextAI\WriteTextAI\Model\Config\Source\DisallowedHtml;
use Magento\Framework\Filter\FilterManager;
use Magento\Cms\Model\Template\Filter as CmsFilter;
use Magento\Store\Model\StoreManagerInterface;
use Laminas\Uri\UriFactory;
use Magento\Framework\Escaper;

class Html extends AbstractHelper
{
    /**
     * @var FilterManager
     */
    protected $filterManager;

    /**
     * @var CmsFilter
     */
    protected $cmsFilter;

    /**
     * @var StoreManagerInterface
     */
    protected $storeManager;

    /**
     * @var Escaper
     */
    protected $escaper;

    /**
     * Constructor.
     * @param FilterManager $filterManager
     * @param CmsFilter $cmsFilter
     * @param StoreManagerInterface $storeManager
     * @param Escaper $escaper
     */
    public function __construct(
        FilterManager $filterManager,
        CmsFilter $cmsFilter,
        StoreManagerInterface $storeManager,
        Escaper $escaper
    ) {
        $this->filterManager = $filterManager;
        $this->cmsFilter = $cmsFilter;
        $this->storeManager = $storeManager;
        $this->escaper = $escaper;
    }

    /**
     * Remove disallowed html
     *
     * @param string $html
     * @return string
     */
    public function removeDisallowedHtml($html = '')
    {
        if (empty($html)) {
            return $html;
        }

        $html = $this->removeScriptAndStyleContent($html);
        /**
         * $html = $this->removeDisallowedAttributes($html, DisallowedHtml::ALLOWED_ATTRIBUTES);
         * $html = $this->removeDisallowedStyles($html, DisallowedHtml::ALLOWED_STYLES);
         */

        /**
         * $params = [
         *     'allowableTags' => DisallowedHtml::ALLOWED_TAGS,
         *     'escape' => false
         * ];
         *
         * try {
         *     $html = $this->filterManager->stripTags($html, $params);
         * } catch (\Exception $e) {
         *     return $html;
         * }
         */

        return $html;
    }

    /**
     * Convert encoding
     *
     * @param string $html
     *
     * @return string
     */
    private function convertEncoding($html)
    {
        return mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
    }

    /**
     * Remove script and style content
     *
     * @param string $html
     * @return string
     */
    private function removeScriptAndStyleContent($html)
    {
        try {
            $dom = new \DOMDocument();
            libxml_use_internal_errors(true);
            $dom->loadHTML(
                $this->convertEncoding("<div>{$html}</div>"),
                LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_NOWARNING | LIBXML_NOERROR
            );
            libxml_clear_errors();

            $xpath = new \DOMXPath($dom);

            foreach ($xpath->query('//script') as $node) {
                $node->nodeValue = '';
            }

            // foreach ($xpath->query('//style') as $node) {
            //     $node->nodeValue = '';
            // }

            $result = "";
            foreach ($dom->documentElement->childNodes as $child) {
                $result .= $dom->saveHTML($child);
            }
            $result = preg_replace('/<script[^>]*><\/script>/i', '', $result);
            //$result = preg_replace('/<style[^>]*><\/style>/i', '', $result);
            $cleanHtml = preg_replace(
                [
                    '#&lt;script\b[^&]*?&gt;.*?&lt;/script&gt;#is'
                ],
                '',
                $result
            );
            return $cleanHtml;
        } catch (\Exception $e) {
            return $html;
        }
    }

    /**
     * Remove disallowed attributes
     *
     * @param string $html
     * @param array $allowedAttributes
     * @return string
     */
    private function removeDisallowedAttributes($html, $allowedAttributes)
    {
        try {
            $dom = new \DOMDocument();
            libxml_use_internal_errors(true);
            $dom->loadHTML(
                $this->convertEncoding("<div>{$html}</div>"),
                LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_NOWARNING | LIBXML_NOERROR
            );
            libxml_clear_errors();

            $xpath = new \DOMXPath($dom);

            foreach ($xpath->query('//@*') as $node) {
                if (!isset($allowedAttributes[$node->parentNode->nodeName]) ||
                    !in_array($node->nodeName, $allowedAttributes[$node->parentNode->nodeName])
                ) {
                    $node->parentNode->removeAttribute($node->nodeName);
                }
            }

            $result = "";
            foreach ($dom->documentElement->childNodes as $child) {
                $result .= $dom->saveHTML($child);
            }
            
            return $result;
        } catch (\Exception $e) {
            return $html;
        }
    }

    /**
     * Remove disallowed styles
     *
     * @param string $html
     * @param array $allowedStyles
     * @return string
     */
    private function removeDisallowedStyles($html, $allowedStyles)
    {
        try {
            $dom = new \DOMDocument();
            libxml_use_internal_errors(true);
            $dom->loadHTML(
                $this->convertEncoding("<div>{$html}</div>"),
                LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_NOWARNING | LIBXML_NOERROR
            );
            libxml_clear_errors();

            $xpath = new \DOMXPath($dom);

            foreach ($xpath->query('//@style') as $node) {
                $styles = explode(';', (string) $node->nodeValue);
                $newStyles = [];

                foreach ($styles as $style) {
                    $style = trim($style);

                    if ($style) {
                        $style = explode(':', (string) $style);
                        $style[0] = trim($style[0]);
                        $style[1] = trim($style[1]);

                        if (isset($allowedStyles[$node->parentNode->nodeName]) && (
                                in_array($style[0], $allowedStyles[$node->parentNode->nodeName]) ||
                                in_array('*', $allowedStyles[$node->parentNode->nodeName])
                            )
                        ) {
                            $newStyles[] = $style[0] . ': ' . $style[1];
                        }
                    }
                }

                $node->nodeValue = implode('; ', (array) $newStyles);
            }

            $result = "";
            foreach ($dom->documentElement->childNodes as $child) {
                $result .= $dom->saveHTML($child);
            }
            
            return $result;
        } catch (\Exception $e) {
            return $html;
        }
    }

    /**
     * Get image url from content
     *
     * @param array $fields
     * @param int $storeId
     * @return array
     */
    public function getImageUrlFromContent($fields, $storeId)
    {
        $imageUrls = [];
        try {
            foreach ($fields as $field => $content) {
                if (strpos($content, '<img') === false) {
                    continue;
                }

                // Create DOM document to parse HTML content
                $dom = new \DOMDocument();
                libxml_use_internal_errors(true);
                $dom->loadHTML($content, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
                libxml_clear_errors();

                    // Find all img tags
                $images = $dom->getElementsByTagName('img');
                
                foreach ($images as $image) {
                    $src = $image->getAttribute('src');
                    
                    if (!$src) {
                        continue;
                    }
                    
                    // Check if src contains {{media directive
                    if (strpos($src, '{{media') !== false) {
                        // Extract the media directive parameters
                        preg_match('/{{media url="([^"]+)"}}/', $src, $matches);
                        if (!isset($matches[1])) {
                            continue;
                        }
                        $mediaUrl = $matches[1];
                        // Use the CMS filter's mediaDirective function
                        $imagePath = $this->cmsFilter->mediaDirective(['', '', ' url="' . $mediaUrl . '"']);
                        if ($imagePath) {
                            $baseUrl = $this->storeManager->getStore()->getBaseUrl(
                                \Magento\Framework\UrlInterface::URL_TYPE_DIRECT_LINK
                            );
                            $imageUrls[] = [
                                "url" => $baseUrl . $imagePath
                            ];
                        }
                    } else {
                        $imageUrls[] = [
                            "url" => $src
                        ];
                    }
                }
            }
        } catch (\Exception $e) {
            //do nothing
            return $imageUrls;
        }
        
        return $imageUrls;
    }

    /**
     * Add alt text to content
     *
     * @param string $content
     * @param array $imageAltTexts
     * @param bool $isAltIncluded
     * @return string
     */
    public function addAltTextToContentViaPreg($content, $imageAltTexts, $isAltIncluded = false)
    {
        try {
            $result = preg_replace_callback(
                '/<img\s+[^>]*src=["\']([^"\']+)["\'][^>]*>/i',
                function ($matches) use ($imageAltTexts, $isAltIncluded) {
                    $imgTag = $matches[0];
                    $src = $matches[1];
                    try {
                        $uriFromSrc = UriFactory::factory($src ?? '');
                        $relativeUrlFromSrc = $uriFromSrc->getPath() ?: '';
                    } catch (\Exception $e) {
                        $relativeUrlFromSrc = '';
                    }
                    $imageUrl = $src;

                    if ($src === '') {
                        return $imgTag; // skip empty src
                    }

                     // Handle Magento media directive {{media url="..."}}
                    if (strpos($src, '{{media') !== false) {
                        if (preg_match('/{{media url="([^"]+)"}}/', $src, $m)) {
                            $mediaUrl = $m[1];
                            $imagePath = $this->cmsFilter->mediaDirective(['', '', ' url="' . $mediaUrl . '"']);
                            if ($imagePath) {
                                $baseUrl = $this->storeManager->getStore()->getBaseUrl(
                                    \Magento\Framework\UrlInterface::URL_TYPE_DIRECT_LINK
                                );
                                $imageUrl = $baseUrl . $imagePath;
                            }
                        }
                    }
        
                    // Try to find matching alt text
                    foreach ($imageAltTexts as $imageAltText) {
                        try {
                            $uri = UriFactory::factory($imageAltText['noncached_url']);
                            $relativeUrl = $uri->getPath() ?: '';
                        } catch (\Exception $e) {
                            $relativeUrl = '';
                        }
                        if ($imageAltText['noncached_url'] === $src || $relativeUrl === $relativeUrlFromSrc) {
                            $altToTransfer = $imageAltText['alt'];

                            if ($isAltIncluded
                                && !empty($imageAltText['writetext_alt'])) {
                                $altToTransfer = $imageAltText['writetext_alt'];
                            }
        
                            // Replace or add alt attribute
                            if (preg_match('/alt=["\'][^"\']*["\']/', $imgTag)) {
                                $imgTag = preg_replace(
                                    '/alt=["\'][^"\']*["\']/',
                                    'alt="' . $this->escaper->escapeHtml($altToTransfer) . '"',
                                    $imgTag
                                );
                            } else {
                                $imgTag = str_replace(
                                    '<img',
                                    '<img alt="' . $this->escaper->escapeHtml($altToTransfer) . '"',
                                    $imgTag
                                );
                            }
                            break;
                        }
                    }
        
                    return $imgTag;
                },
                $content
            );
            
            return $result;
        } catch (\Exception $e) {
            return $content;
        }
    }

    /**
     * Add alt text to content
     *
     * @param string $content
     * @param array $imageAltTexts
     * @param bool $isAltIncluded
     * @return string
     */
    public function addAltTextToContent($content, $imageAltTexts, $isAltIncluded = false)
    {
        try {
            $dom = new \DOMDocument();
            libxml_use_internal_errors(true);
            $encodedContent = $this->convertEncoding("<div>{$content}</div>");
            $encodedContent = $this->escapeLooseAngles($encodedContent);
            $dom->loadHTML(
                $encodedContent,
                LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_NOWARNING | LIBXML_NOERROR
            );
            libxml_clear_errors();
            
            $xpath = new \DOMXPath($dom);
            $images = $xpath->query('//img');

            if ($images->length === 0) {
                return $content;
            }

            foreach ($images as $image) {
                $src = $image->getAttribute('src');
                $imageUrl = $src;

                if ($src === '') {
                    continue;
                }

                // Handle media directive URLs
                if (strpos($src, '{{media') !== false) {
                    preg_match('/{{media url="([^"]+)"}}/', $src, $matches);
                    if (isset($matches[1])) {
                        $mediaUrl = $matches[1];
                        $imagePath = $this->cmsFilter->mediaDirective(['', '', ' url="' . $mediaUrl . '"']);
                        if ($imagePath) {
                            $baseUrl = $this->storeManager->getStore()->getBaseUrl(
                                \Magento\Framework\UrlInterface::URL_TYPE_DIRECT_LINK
                            );
                            $imageUrl = $baseUrl . $imagePath;
                        }
                    }
                }

                // Find matching alt text
                foreach ($imageAltTexts as $imageAltText) {
                    try {
                        $uri = UriFactory::factory($imageAltText['noncached_url']);
                        $relativeUrl = $uri->getPath() ?: '';
                    } catch (\Exception $e) {
                        $relativeUrl = '';
                    }
                    if ($imageAltText['noncached_url'] === $imageUrl || $relativeUrl === $imageUrl) {
                        // Remove existing alt attribute if present
                        if ($image->hasAttribute('alt') && $image->getAttribute('alt') !== '') {
                            continue;
                        }
                        // Set new alt text
                        $altToTransfer = $imageAltText['alt'];
                        if ($isAltIncluded
                            && isset($imageAltText['writetext_alt'])
                            && !empty($imageAltText['writetext_alt'])) {
                            $altToTransfer = $imageAltText['writetext_alt'];
                        }
                        $image->setAttribute('alt', $altToTransfer);
                        break;
                    }
                }
            }

            $html = $dom->saveHTML($dom->documentElement);
            // Remove wrapper div
            $html = preg_replace('/^<div>|<\/div>$/', '', $html);
            
            return $html;
        } catch (\Exception $e) {
            return $content;
        }
    }

    /**
     * Remove all emojis from a string.
     *
     * @param string $content
     * @return string
     */
    public function stripEmojis($content)
    {
        // Match all emojis and remove them
        return preg_replace(
            '/[\x{1F1E6}-\x{1F1FF}]|' .         // flags
            '[\x{1F300}-\x{1F5FF}]|' .          // symbols & pictographs
            '[\x{1F600}-\x{1F64F}]|' .          // emoticons
            '[\x{1F680}-\x{1F6FF}]|' .          // transport & map symbols
            '[\x{1F700}-\x{1F77F}]|' .          // alchemical symbols
            '[\x{1F780}-\x{1F7FF}]|' .          // Geometric Shapes Extended
            '[\x{1F800}-\x{1F8FF}]|' .          // Supplemental Arrows-C
            '[\x{1F900}-\x{1F9FF}]|' .          // Supplemental Symbols and Pictographs
            '[\x{1FA00}-\x{1FA6F}]|' .          // Chess symbols, etc.
            '[\x{1FA70}-\x{1FAFF}]|' .          // Symbols and Pictographs Extended-A
            '[\x{2600}-\x{26FF}]|' .            // Misc symbols
            '[\x{2700}-\x{27BF}]|' .            // Dingbats
            '[\x{FE00}-\x{FE0F}]|' .            // Variation Selectors
            '[\x{200D}]|' .                     // Zero-width joiner
            '[\x{23CF}\x{23E9}-\x{23FA}]' .     // Misc Technical
            '/u',
            '',
            $content
        );
    }
    
    /**
     * Escape loose angles
     *
     * @param string $html
     * @return string
     */
    private function escapeLooseAngles($html)
    {
        // Replace `<` not followed by a-z (so it won’t break <p>, <img>, etc.)
        return preg_replace('/<(?![a-zA-Z\/!])/', '&lt;', $html);
    }
}
