import { toFloatTime } from '@kpv-lab/time-utils'

export function processData(data: string) {
  const rows = splitIntoLines(data)
  const items: Array<any> = []
  let idx = 0
  let prevYear: number
  let startRow = ''

  rows.forEach(row => {
    const block = processBlock(startRow + ' ' + row)

    if (block.start) {
      // begin new item
      if (!block.start.year) {
        block.start.year = prevYear
      }

      if (startRow) {
        // replace previous row
        items[idx] = block
      } else {
        items.push(block)
      }

      idx = items.length - 1
      prevYear = block.start.year
      startRow = !block.text ? row : ''
    } else if (items[idx]) {
      // append to current item
      items[idx].text += `\n${block.text}`
      startRow = ''
    }
  })

  // final clean of item text before returning
  items.forEach(item => {
    item.texts = item.texts?.map((t: string) =>
      t
        .replace(/@@/g, '\n')
        .replace(/[\s]{2,}/, ' ')
        .trim()
    )

    item.text = item.text
      .replace(/@@/g, '\n')
      .replace(/[\s]{2,}/, ' ')
      .trim()
    item.start = `${item.start.year}_${item.start.month}_${item.start.day}`
    if (item.end && item.end.year) {
      item.end = `${item.end.year}_${item.end.month}_${item.end.day}`
    } else {
      item.end = 0
    }
  })

  // Sort the items into ascending order by start date
  items.sort((a, b) =>
    toFloatTime(a.start) - toFloatTime(b.start) || toFloatTime(b.end) - toFloatTime(a.end)
  )
  return items
}

export function splitIntoLines(str: string) {
  return str
    .trim()
    .replace(/(\r\n?|\n|\u2028){2,}/gm, '@@@@')
    .replace(/(\r\n?|\n|\u2028)+/gm, '@@')
    .replace(/\x09+/g, ' ') // tab to space
    .replace(/[\x00-\x1F]+/g, '') // the first characters up to a space
    .split('@@')
}

export const regexps = [
  {
    // day, month names, year numbers with any type of delimiter other than a letter
    regex: '^([0-9]{1,2})[ |st|nd|rd|th]*[^a-z0-9]+(jan|feb|mar|märz?|mrz|apr|may|mai|june?|juni?|july?|juli?|aug|sept?|oct|okt|nov|dec|dez)(uar|uary|ruar|ruary|ch|il|ust|ember|ober)?[^a-z0-9]+([0-9]{3,})',
    order: ['day', 'month', 'year'],
  },
  {
    // month names, day, year numbers with any type of delimiter other than a letter
    regex: '^(jan|feb|mar|märz?|mrz|apr|may|mai|june?|juni?|july?|juli?|aug|sept?|oct|okt|nov|dec|dez)(uar|uary|ruar|ruary|ch|il|ust|ember|ober)?[^a-z0-9]+([0-9]{1,2})[ |st|nd|rd|th]*[^a-z0-9]+([0-9]{3,})',
    order: ['month', 'day', 'year'],
  },
  {
    // day, month, year numbers with any type of delimiter other than a letter
    regex: '^([0-9]{1,2})[^a-z0-9]+([0-9]{1,2})[^a-z0-9]+([0-9]{3,})',
    order: ['day', 'month', 'year'],
  },
  {
    // year, month names, day numbers with any type of delimiter other than a letter
    regex: '^([0-9]{3,})[^a-z0-9]+(jan|feb|mar|märz?|mrz|apr|may|mai|june?|juni?|july?|juli?|aug|sept?|oct|okt|nov|dec|dez)(uar|uary|ruar|ruary|ch|il|ust|ember|ober)?[^a-z0-9]+([0-9]{1,2})[:-]?',
    order: ['year', 'month', 'day'],
  },
  {
    // year, day numbers with ordinal and month name
    regex: '^([0-9]{3,})[^a-z0-9]+([0-9]{1,2})[ |st|nd|rd|th]*[^a-z0-9]+(jan|feb|mar|märz?|mrz|apr|may|mai|june?|juni?|july?|juli?|aug|sept?|oct|okt|nov|dec|dez)(uar|uary|ruar|ruary|ch|il|ust|ember|ober)?[^a-z0-9]?\\s',
    order: ['year', 'day', 'month'],
  },
  {
    // year, month, day numbers with any type of delimiter other than a letter
    regex: '^([0-9]{3,})[^a-z0-9]+([0-9]{1,2})[^a-z0-9]+([0-9]{1,2})',
    order: ['year', 'month', 'day'],
  },
  {
    // month names, year numbers with any type of delimiter other than a letter
    regex: '^(jan|feb|mar|märz?|mrz|apr|may|mai|june?|juni?|july?|juli?|aug|sept?|oct|okt|nov|dec|dez)(uar|uary|ruar|ruary|ch|il|ust|ember|ober)?[^a-z0-9]+([0-9]{3,})',
    order: ['month', 'year'],
  },
  {
    // month year numbers with any type of delimiter other than a letter
    regex: '^([0-9]{1,2})[^a-z0-9]+([0-9]{3,})',
    order: ['month', 'year'],
  },
  {
    // year, month names with any type of delimiter other than a letter
    regex: '^([0-9]{3,4})[^a-z0-9]+(jan|feb|mar|märz?|mrz|apr|may|mai|june?|juni?|july?|juli?|aug|sept?|oct|okt|nov|dec|dez)(uar|uary|ruar|ruary|ch|il|ust|ember|ober)?[^a-z0-9]+',
    order: ['year', 'month'],
  },
  {
    // year, month numbers with any type of delimiter other than a letter
    regex: '^([0-9]{3,4})[^a-z0-9]+([0-9]{1,2})[^a-z0-9]+',
    order: ['year', 'month'],
  },
  {
    // month names, day numbers with any type of delimiter other than a letter
    regex: '^(jan|feb|mar|märz?|mrz|apr|may|mai|june?|juni?|july?|juli?|aug|sept?|oct|okt|nov|dec|dez)(uar|uary|ruar|ruary|ch|il|ust|ember|ober)?[^a-z0-9]+([0-9]{1,2})',
    order: ['month', 'day'],
  },
  {
    // day, month names with any type of delimiter other than a letter
    regex: '^([0-9]{1,2})[^a-z0-9]+(jan|feb|mar|märz?|mrz|apr|may|mai|june?|juni?|july?|juli?|aug|sept?|oct|okt|nov|dec|dez)(uar|uary|ruar|ruary|ch|il|ust|ember|ober)?[^\u00C0-\u017Fa-z0-9]+[^\u00C0-\u017Fa-z0-9]+',
    order: ['day', 'month'],
  },
  {
    // month names with any type of delimiter other than a letter
    regex: '^(jan|feb|mar|märz?|mrz|apr|may|mai|june?|juni?|july?|juli?|aug|sept?|oct|okt|nov|dec|dez)(uar|uary|ruar|ruary|ch|il|ust|ember|ober)?[^\u00C0-\u017Fa-z0-9]+',
    order: ['month'],
  },
  {
    // basic year: 1990
    regex: '\\(([0-9]{4,4})\\)',
    order: ['year'],
  },
  {
    // basic year: 1990
    regex: '([0-9]{4,4})\\b',
    order: ['year'],
  },
  // {
  //   // basic year at end: 1990
  //   regex: /([0-9]{4,4})[^a-z0-9\-]*$/i,
  //   order: ['year']
  // },
  {
    // basic years: 25, 145, 1990
    regex: '^([0-9]{2,})',
    order: ['year'],
  },
]

type _date = {
  year: number,
  month: number,
  day: number,
}

const splitTexts = (str: string) =>{
  const splitted =  str.split('||')
  return splitted.map(t => t.trim())
}

export function processBlock(data = '') {
  let str = data.replace(/^[^\u00C0-\u017Fa-z0-9«»„,]+/i, '').trim()
  const block: {
    start: false | _date,
    end: false | _date,
    text: string,
    texts?: Array<string>,
    startIsApproximate?: boolean,
    endIsApproximate?: boolean,
  } = { start: false, end: false, text: '' }
  // start date
  for (const r of regexps) {
    const regex = addCaToRegex(r.regex)
    const re = new RegExp(regex, 'i')
    const res = re.exec(str)
    if (res && res.length > 1) {
      const date = processDate(res.slice(1), r.order)
      if (date) {
        block.start = date
        block.startIsApproximate = /ca\.|~|≈/i.test(res[0])
        // console.log('date', date, res);
        if ((res.index === 0) || (res.index + res[0].length === str.length)) {
          str = str.substr(0, res.index) + str.substr(res.index + res[0].length)
        }
      }
      break
    }
  }

  if (!block.start) {
    if (str.includes('||')) {
      block.texts = splitTexts(str)
      return block
    }

    block.text = str
    return block
  }

  const nonAlphanumericStart = new RegExp('^[^\\p{Alphabetic}\\p{Decimal_Number}«»„‚"\'?!*()]+', 'ui')
  const nonAlphanumericEnd = new RegExp(
    "[^a-z0-9\\.\\?\"'!«»‘„”*“;%€ß$üäö()]+$",
    'ui'
  )

  str = str.replace(nonAlphanumericStart, '')

  // end date
  for (const r of regexps) {
    const regex = addCaToRegex(r.regex)
    const re = new RegExp(regex, 'i')
    const res = re.exec(str)
    if (res && res.length > 1) {
      const date = processDate(res.slice(1), r.order)
      if (date) {
        block.end = date
        block.endIsApproximate = /ca\.|~|≈/i.test(res[0])
        if (res.index === 0) {
          str = str.substr(0, res.index) + str.substr(res.index + res[0].length)
        }

        // make sure the end dates is after the start
        const start = block.start
        const end = block.end as _date

        const d1 = toFloatTime(`${start.year}_${start.month}_${start.day}`)
        const d2 = toFloatTime(`${end.year}_${end.month}_${end.day}`)
        if (d1 >= d2) {
          block.end = false
        }
      }
      break
    }
  }

  str = str.replace(nonAlphanumericStart, '').replace(nonAlphanumericEnd, '')

  if (str.includes('||')) {
    block.texts = splitTexts(str)
    return block
  }

  block.text = str
  return block
}

export function processDate(parts: Array<string>, order: Array<string>) {
  const fields: any = { year: 0, month: 0, day: 0 }

  let idx = 0
  order.forEach(key => {
    let part = parts[idx]
    while ((!part || /^(uar|uary|ruar|ruary|ch|il|ust|ember|ober)/i.test(part) || /^(ca|~|≈)/i.test(part)) && idx < parts.length) {
      idx++
      part = parts[idx]
    }
    fields[key] = processField(part, key)
    idx++
  })

  // if (fields.year === 0) {
  //   return false;
  // }

  if (fields.month === 0) {
    fields.day = 0
  }

  if (!fields.year && !fields.month) {
    return false
  }

  return fields
  // return `${fields.year}_${fields.month}_${fields.day}`;
}

export function processField(str: string, type?: string) {
  let val = 0

  if (/[^\d]+/.test(str)) {
    // non numeric
    if (/^ja/i.test(str)) {
      val = 1
    } else if (/^fe/i.test(str)) {
      val = 2
    } else if (/^mar/i.test(str) || /^mär/i.test(str) || /^mrz/i.test(str)) {
      val = 3
    } else if (/^ap/i.test(str)) {
      val = 4
    } else if (/^ma/i.test(str)) {
      val = 5
    } else if (/^jun/i.test(str)) {
      val = 6
    } else if (/^jul/i.test(str)) {
      val = 7
    } else if (/^au/i.test(str)) {
      val = 8
    } else if (/^se/i.test(str)) {
      val = 9
    } else if (/^oc/i.test(str) || /^ok/i.test(str)) {
      val = 10
    } else if (/^no/i.test(str)) {
      val = 11
    } else if (/^de/i.test(str)) {
      val = 12
    }
  } else {
    // numeric
    val = parseInt(str)

    if (type === 'month' && val > 12) {
      val = 0
    } else if (type === 'day' && val > 31) {
      val = 0
    }
  }

  return val
}

const caRegexAtStart = '((ca\\.|~|≈)[^a-z0-9]*)?'
const caRegexAtEnd = '([^a-z0-9]+(ca\\.))?'
const addCaToRegex = (regex: string): string => {
  let newRegex = ''
  if (regex.startsWith('^')) {
    newRegex = regex.slice(1) // remove ^
    newRegex = `^${caRegexAtStart}${newRegex}${caRegexAtEnd}`
  } else {
    newRegex = regex
    newRegex = `${caRegexAtStart}${newRegex}${caRegexAtEnd}`
  }
  return newRegex
}