import TurndownService from 'turndown'

import config from '../config'

const turndownService = new TurndownService()

turndownService
  .addRule('quotes and ems', {
    filter: function (node) {
      return node.tagName === 'BR' && node.parentNode && node.parentNode.tagName === 'EM'
    },
    replacement: function (content, node) {
      if (node.previousSibling && node.previousSibling.tagName !== 'BR') {
        if (node.nextSibling && node.nextSibling.tagName === 'BR') {
          return '_ \n\n'
        } else {
          return '_  \n _'
        }
      } else {
        if (node.previousSibling && node.previousSibling.tagName === 'BR') {
          return '\n\n _'
        } else {
          return '  \n'
        }
      }
    },
  })
  .addRule('tag', {
    filter: function (node) {
      return node.tagName === 'SPAN' && config.tagTypes.some(t => node.classList.contains(t))
    },
    replacement: function (content, node) {
      const { tagId } = node.dataset
      return `[${content}](tag://${node.classList[0]}/${tagId})`
    },
  })
  .addRule('itemlink', {
    filter: function (node) {
      return node.tagName === 'SPAN' && node.classList.contains('itemlink')
    },
    replacement: function (content, node) {
      const { id, templateid, parentid } = node.dataset
      return `[${content}](itemlink://${templateid}/${parentid}/${id})`
    },
  })
  .addRule('infoblock', {
    filter: function (node) {
      return node.tagName === 'SPAN' && node.classList.contains('info-block')
    },
    replacement: function (content, node) {
      const { infoId } = node.dataset
      return `[?](${Array.from(node.classList).join('_')}://${infoId})`
    },
  })
  .addRule('referenceblock', {
    filter: function (node) {
      return node.tagName === 'SPAN' && node.classList.contains('ref-block')
    },
    replacement: function (content, node) {
      const { refId, blockId } = node.dataset
      return `[?](${Array.from(node.classList).join('_')}://${refId}/${blockId})`
    },
  })
  .addRule('align', {
    filter: function (node) {
      return (
        (node.tagName === 'P' || node.tagName === 'BLOCKQUOTE') &&
        [
          `${config.alignClassPrefix}-align-center`,
          `${config.alignClassPrefix}-align-left`,
          `${config.alignClassPrefix}-align-right`,
          `${config.alignClassPrefix}-align-justify`,
        ].some(t => node.classList.contains(t))
      )
    },
    replacement: function (content, node) {
      const isQuote = node.tagName === 'BLOCKQUOTE'
      if (!isQuote) {
        return `${node.classList[0]} ~~ ${content}\n\n`
      }
      return `> ~~ ${node.classList[0]} ~~ ${content.replace(/\n/, '\n> ')}\n\n`
    },
  })

export function sanitizeContent(content) {
  /* When we used Quill, it saved data to DB by just using editor.innerHTML
  instead of editor.getContent(), this left a lot of invalid data/markup.
  Check html-to-md tests for exactly what we're fixing */

  // These ones tend to be nested so we'll recurse
  const pointlessSpans = new RegExp(/<span contenteditable="false">\uFEFF{0,}<\/span>/)
  let noSpans = content
  while (pointlessSpans.test(noSpans)) {
    noSpans = noSpans.replace(pointlessSpans, '')
  }

  return (
    noSpans
      // They're non breaking spaces, so we replace them with real spaces. Initially we replaced
      // them with an empty string, but that ends up running words and tags together so they don't
      // convert well. With no obvious way to insert these using Quill, the assumption is they're
      // there by mistake or happenstance.
      // There are a number of situations with nbsp, so I'm making guesses as to what to do.
      .replace(/>&nbsp;</gm, '><')
      .replace(/&nbsp;/gm, ' ')
      // Some of the content has newlines instead of <br>'s.
      .replace(/\n/gm, '<br>')
      // These two cause problems with markdown running over a linebreak so we swap the tags
      .replace(/<br>\s*<\/strong>/gm, '</strong><br />')
      .replace(/<em><br>/gm, '<br /><em>')
      // What even is this? It appears to be some nonsense directly lifted from
      // quill and inserted into the database
      .replace(/<span class="ql-cursor">([\ufeff]|.)?<\/span>/, '')
      // The loop above sometimes leaves behind a span with multiple FEFFs and
      // they're a pain to read in the tests so I changed them.
      .replace(/>\uFEFF{1,}<\/span>/gm, '>ref</span>')
      // Also, we sometimes have groups of FEFFs just kicking around, waiting to cause problems
      .replace(/\uFEFF{1,}/gm, '')
      // No carats or asterisks, for some reason
      .replace(/\^\*/gm, '')
      // The em should probably precede the word starting
      .replace(/([A-Z]{1})<em>/gm, '<em>$1')
      // It's likely there's meant to be a space after the emboldened text
      .replace(/<\/em>([A-Z(]{1})/gm, '</em> $1')
      .replace(/<\/em>(\w{1})([.,])/gm, '$1</em>$2')
      // There is some strong punctuation followed by multiple spaces
      .replace(/<strong>([,.]\s*)<\/strong>/gm, '$1 ')
      // There is some empathic punctuation followed by multiple spaces
      .replace(/<em>([,.]\s*)<\/em>/gm, '$1 ')
      // There are is some empathic full stops on their own without spaces
      .replace(/<em>(.)<\/em>/gm, '$1')
      // Sometimes we have a word that runs straight into an emboldened tag, so add a space
      .replace(/(\w)<em><span/gm, '$1 <em><span')
      .replace(/<\/span><\/em>(\w)/gm, '</span></em> $1')
      .replace(/(\w)<em> (\w)/gm, '$1 <em>$2')
      // I'm pretty sure this one is nullified by a regex earlier
      // .replace(/<\/em>([\w]+)/gm, '$1</em> ')
      // Unlikely that we want a space before some punctuation
      .replace(/ ([,.?!])/gm, '$1')
      // Collapse multiple spaces
      .replace(/[ ]+/gm, ' ')
      // Crazy specific issue with emphasised tags
      .replace(/ <em>(<span class="\w*?" data-tag-id="\d*?">)<br>(.*?)<\/span>/g, '<br><em>$1$2</span>')
      .replace(/ (<span class="\w*?" data-tag-id="\d*?">)<br>(.*?)<\/span>/g, ' <br>$1$2</span>')
  )
}

export const htmlToMarkdown = value =>
  turndownService
    // https://github.com/domchristie/turndown#escaping-markdown-characters
    .turndown(sanitizeContent(value))
    .replace(/ \[\?/gm, '[?')
