import { handleImages } from '@/lib/components/Shared/editor/ckeditor/document-images-importer';

/**
 * HTML contains meta tags and body tag we don't need. We can remove everything before the body tag.
 */
const removeHtmlBeforeBody = (html) => {
	const indexOfBody = html.indexOf('<body');
	if (indexOfBody > 0) {
		html = html.substring(indexOfBody);
	}

	return html;
};

/**
 * Pasted html contains meta and body tags we don't need. We remove everything before the body tag.
 */
const removeGoogleDocsBoldWrapper = (html) => {
	if (!html.includes('<b style="font-weight:normal;"')) {
		return html;
	}

	// https://regex101.com/r/PytWe9/1
	const regex = new RegExp('(<b style="font-weight:normal;"[\\s\\S]*?>)([\\s\\S]*?)(<\\/b>)', 'gi');
	const replacement = '$2';
	return html.replaceAll(regex, replacement);
};

/**
 * When importing Google Docs, the <li> tags contain paragraphs. It results in broken (skipped) lists.
 * We fix this issue by removing the embedded paragraph tags.
 */
const stripListAttributes = (html) => {
	if (!html.includes('<li ')) {
		return html;
	}

	// https://regex101.com/r/nAIqmv/1
	const regex = new RegExp('(?:<li[\\s\\S]*?>)(?:<p[\\s\\S]*?>)?([\\s\\S]*?)(?:<\\/p>)?(?:<\\/li>)', 'gi');
	const replacement = '<li>$1</li>';
	return html.replaceAll(regex, replacement);
};

/**
 * It sometimes happens that html contains empty span tags <span (..) />. This produces empty lines. Remove them.
 */
const removeEmptySpans = (html) => {
	if (!html.includes('<span')) {
		return html;
	}

	let oldHtmlLength = -1;
	while (oldHtmlLength !== html.length) {
		oldHtmlLength = html.length;

		// https://regex101.com/r/wQCGwq/4
		const regex = new RegExp('(<span[^>]*?\\/>)|(<span [^>]*?>[\\s]*<\\/span>)', 'gi');
		const replacement = ' ';
		html = html.replace(regex, replacement);
	}

	return html;
};

const removeEmptyParagraphs = (html) => {
	if (!html.includes('<br')) {
		return html;
	}

	// https://regex101.com/r/qY3TkX/1
	const regex = new RegExp('(<p\\s?[^>]*?>[\\s]*<\\/p>)', 'gi');
	const replacement = '';
	return html.replaceAll(regex, replacement);
};

/**
 * After removal of empty spans we might have been left now with double spaces.
 * We don't want it, and it can be removed.
 */
const removeDoubleSpaces = (html) => html.replaceAll('  ', ' ');

const removeBlankLines = (html) => {
	if (!html.includes('<br')) {
		return html;
	}

	const regex = new RegExp('(<br[\\s\\S]*?>)', 'gi');
	const replacement = '';
	return html.replaceAll(regex, replacement);
};

/**
 * Importing a document from Google Docs or Microsoft Word produces a lot of garbage. We need to clean it up.
 */
const processDocumentHtml = (wordHtml) => {
	if (!wordHtml) {
		return '';
	}

	let html = wordHtml + '';
	html = removeHtmlBeforeBody(html);
	html = removeGoogleDocsBoldWrapper(html);
	html = handleImages(html);
	html = stripListAttributes(html);
	html = removeEmptySpans(html);
	html = removeEmptyParagraphs(html);
	html = removeDoubleSpaces(html);
	html = removeBlankLines(html);

	return html;
};

export { processDocumentHtml };
