import type { QueryType } from "../server/types";
import type { SanitizeAttributes, SanitizerOptions } from "../types/SanitizerTypes";

const blackListedQueriesRegex = [
    "[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b([-a-zA-Z0-9()@:%_\\+.~#?&//=]*)", // RegEx to filter out URL
    "/<script\b[^<]*(?:(?!</script>)<[^<]*)*</scripts*>/gi", // RegEx to filter out script tags
];

export const sanitizeQueryParams = (queries: QueryType): QueryType => {
    const sanitizedQueries = Object.entries(queries).filter(([key, query]) => {
        if (typeof query === "string") {
            return !blackListedQueriesRegex.some((regex) => query.match(regex));
        }
        return true;
    });

    return Object.fromEntries(sanitizedQueries);
};

/**
 * Legal Tags
 */
export const defaultAllowedTags = [
    "a",
    "abbr",
    "b",
    "bdi",
    "bdo",
    "blockquote",
    "br",
    "caption",
    "cite",
    "code",
    "dd",
    "dfn",
    "div",
    "dl",
    "dt",
    "em",
    "figcaption",
    "figure",
    "h1",
    "h2",
    "h3",
    "h4",
    "h5",
    "h6",
    "hgroup",
    "hr",
    "i",
    "kbd",
    "li",
    "mark",
    "ol",
    "p",
    "pre",
    "q",
    "rb",
    "rp",
    "rt",
    "rtc",
    "s",
    "samp",
    "small",
    "span",
    "strong",
    "sub",
    "sup",
    "time",
    "u",
    "ul",
    "wbr",
];

const defaultAllowedSchemes = ["https", "http", "mailto", "tel"];
const defaultAllowedSchemeRegex = new RegExp(`^(${defaultAllowedSchemes.join("|")}):`);

const defaultSanitizeOptions: SanitizerOptions = {
    allowedSchemes: defaultAllowedSchemes,
    allowedTags: defaultAllowedTags,
    disallowedTagsMode: "discard", // We currently only support "discard". See https://www.npmjs.com/package/sanitize-html#what-if-i-want-disallowed-tags-to-be-escaped-rather-than-discarded for more info.
    allowedAttributes: {
        a: ["href", "name", "target", "style"],
    },
    transformTags: {
        a: (tagName: string, attribs: SanitizeAttributes) => {
            const attributes: typeof attribs = {};
            Object.entries(attribs).forEach(([attrName, attrValue]) => {
                // custom rules for specific attrNames can be placed here (like discarding invalid href values)
                switch (attrName) {
                    case "href":
                        if (defaultAllowedSchemeRegex.test(attrValue)) attributes[attrName] = attrValue;
                        break;

                    default:
                        attributes[attrName] = attrValue;
                        break;
                }
            });
            return { tagName, attribs: attributes };
        },
    },
};

/**
 * Sanitizes the HTML
 * Place this function as close to the formatter/fetch as possible as this operation is somewhat costly
 * @param unsanitizedHtml The html that needs to be sanitized
 * @param options The optional sanitization options, pass none to use the default sanitization options
 * @returns Sanitized html as a string
 */
export const sanitize = (unsanitizedHtml: string, options?: Partial<SanitizerOptions>): string => {
    return sanitizeHtml(unsanitizedHtml, {
        ...defaultSanitizeOptions,
        ...options,
    });
};

/**
 * Sanitizes HTML and removes link elements entirely as external links are not supported on retailer screens.
 * Since labels cache is indifferent to where they are used, we cannot run this sanitization during parsing.
 */
export const sanitizeRetailerLinks = (unsanitizedHtml: string, options?: Partial<SanitizerOptions>): string => {
    const modifiedOptions = {
        ...options,
        transformTags: {
            a: () => ({ tagName: "span", attribs: {} }),
        },
    };

    return sanitize(unsanitizedHtml, modifiedOptions);
};

/**
 * Helper function which will handle all the sanitization logic.
 */
const sanitizeHtml = (unsanitizedHtml: string, options?: Partial<SanitizerOptions>): string => {
    const allowedTags = options?.allowedTags;

    // Transform all the tags according to the transformTags function
    let html = applyTransformTags(unsanitizedHtml, options);

    // If allowedTags is provided, remove all other tags
    if (allowedTags) {
        const disallowedTagsRegex = new RegExp(`<(?!/?(?:${allowedTags.join("|")})\\b)[^>]*>`, "gi");
        html = html.replace(disallowedTagsRegex, "");
    }

    // Remove all attributes other then the ones that have been allowed
    html = html.replace(/<[^/][^>]*>/g, (tag) => removeDisallowedAttributes(tag, options));

    return html;
};

const applyTransformTags = (unsanitizedHtml: string, options?: Partial<SanitizerOptions>): string => {
    const transformTags = options?.transformTags || {};
    return Object.entries(transformTags).reduce((acc, [tag, replacement]) => {
        const tagRegex = new RegExp(`<${tag}(\\s+[^>]*)?>`, "g");
        const closingTagRegex = new RegExp(`</${tag}>`, "g");
        const parsedReplacement =
            typeof replacement === "string" ? replacement : replacement(tag, extractAttributes(tag, acc));

        const parsedReplacementTag =
            typeof parsedReplacement === "string" ? parsedReplacement : parsedReplacement.tagName;

        // Add attributes to the replacement tag
        const tags =
            typeof parsedReplacement !== "string"
                ? Object.entries(parsedReplacement.attribs).map(([attrName, attrValue]) => `${attrName}='${attrValue}'`)
                : [];

        return acc
            .replace(tagRegex, `<${parsedReplacementTag} ${tags.join(" ")}>`)
            .replace(closingTagRegex, `</${parsedReplacementTag}>`);
    }, unsanitizedHtml);
};

/**
 * Will extract all attributes from a piece of html.
 */
const extractAttributes = (tag: string, html: string): Record<string, string> => {
    const tagRegex = new RegExp(`<${tag}\\s+([^>]*)>`, "i");
    const match = html.match(tagRegex);

    if (!match || !match[1]) return {};

    const attributesString = match[1];
    const attributeRegex = /(\S+)=["']?((?:.(?!["']?\s+(?:\S+)=|\s*\/?[>"']))+.)["']?/gi;
    const attributes: Record<string, string> = {};

    let attributeMatch: RegExpExecArray | null = attributeRegex.exec(attributesString);
    while (attributeMatch !== null) {
        if (attributeMatch[1] && attributeMatch[2]) {
            const attributeKey = attributeMatch[1];
            const attributeValue = attributeMatch[2];
            attributes[attributeKey] = attributeValue;
        }
        attributeMatch = attributeRegex.exec(attributesString);
    }

    return attributes;
};

/**
 * Will remove all attributes that are not part of options.allowedAttributes.
 * E.g. if options.allowedAttributes = { a: ["href", "name", "target", "style"] }, then all attributes other than those provided will be removed from "a" tags.
 * All other attributes on other elements will be removed as well if not present in options.allowedAttributes.
 */
const removeDisallowedAttributes = (tag: string, options?: Partial<SanitizerOptions>): string => {
    const allowedAttributes = options?.allowedAttributes || {};

    return tag.replace(/<(\w+)([^>]*)>/g, (_, tagName: string, attributes: string) => {
        const allowed = allowedAttributes[tagName.toLowerCase()] || [];
        const cleanedAttributes = attributes
            .replace(/^\s+|\s+$/g, "")
            .split(/\s+/)
            .filter((attr) => {
                const [name] = attr.split("=");
                return allowed.includes(name);
            })
            .join(" ");

        return `<${tagName}${cleanedAttributes ? ` ${cleanedAttributes}` : ""}>`;
    });
};
