import { textImporters, UrlInfo, urlInfoToTree } from '../../../../base/src/data/import'
import { U } from '../../common'

const genericQueries = {
	title: [
		'meta[property="og:title"]/@content',
		'meta[property="twitter:title"]/@content',
		'meta[name="title"]/@content',
	],
	url: [
		'link[rel="canonical"]/@href',
		'meta[property="og:url"]/@content',
		'meta[property="twitter:url"]/@content',
	],
	description: [
		'meta[property="og:description"]/@content',
		'meta[property="twitter:description"]/@content',
		'meta[name="description"]/@content',
		'p[class*="description"]',
		'div[class*="description"]', // wikipedia.org ('shortdescription')
	],
	icon: [
		'meta[property="og:logo"]/@content',
		'meta[itemprop="logo"]/@content',
		'link[rel="icon"][type]/@href',
		'link[rel~="icon"]/@href',
		'img[itemprop="logo"]/@src',
	],
	image: [
		'meta[property="og:image:secure_url"]/@content',
		'meta[property="og:image:url"]/@content',
		'meta[property="og:image"]/@content',
		'meta[property="twitter:image"]/@content',
		'meta[name="twitter:image:src"]/@content',
		'meta[name="twitter:image"]/@content',
		'#main-image-container img/@src', // amazon.com
	],
	type: [
		'meta[property="og:type"]/@content',
	],
}

const specialQueries = [
	{
		prefix: 'https://gitlab.com/',
		queries: {
			title: ['meta[property="og:title"]/@content'],
			description: ['div[class~="description"]'],
			icon: ['link[rel~="icon"]/@href'],
		}
	},
] as {
	prefix: string
	queries: {
		title?: string[], description?: string[],
		icon?: string[], image?: string[], type?: string[],
	}
}[]

UrlInfo.parser['text/html'] = htmlToUrlInfo
UrlInfo.parser['application/xhtml+xml'] = htmlToUrlInfo

function htmlToUrlInfo(txt: string, ct: string, url: string) {
	const doc = new DOMParser().parseFromString(txt, ct as DOMParserSupportedType)
	if (!url) {
		const urlTag = doc.querySelector('meta[name="source.url"]')
		if (urlTag)
			url = urlTag.getAttribute('content')
	}
	const info: UrlInfo = {}
	const queries = specialQueries.find(r => url.startsWith(r.prefix))?.queries
		?? genericQueries
	for (const k of Object.keys(queries))
		info[k] = firstMatch(doc, queries[k])
	if (!info.url)
		info.url = url
	else if (info.url !== url)
		// some canonical URLs seem to be incomplete (eg. https://heise.de)
		info.url = U.url.absolute(info.url, url)
	let baseUrl = info.url
	const baseTag = doc.querySelector('base[href]')
	if (baseTag)
		baseUrl = U.url.absolute(baseTag.getAttribute('href'), baseUrl)
	if (!info.title)
		info.title = doc.title
	if (info.icon)
		info.icon = U.url.absolute(info.icon as string, baseUrl)
	if (info.image)
		info.image = U.url.absolute(info.image as string, baseUrl)
	return info
}

function firstMatch(doc: Document, patterns: string[]) {
	for (const p of patterns) {
		const i = p.indexOf('/@')
		const s = i > 0 ? p.substring(0, i) : p
		const a = i > 0 ? p.substring(i + 2) : null
		const e = doc.querySelector(s)
		if (e) {
			return a ? e.getAttribute(a) : e.textContent
		}
	}
}

textImporters.push(newItemsForHtmlContent)

function newItemsForHtmlContent(txt: string) {
	if (!txt.startsWith('<html>') ||
		!/<meta name="source.type" content="HtmlContent">/.test(txt))
		return null
	const info = htmlToUrlInfo(txt, 'text/html', null)
	return [urlInfoToTree(info)]
}
