import { enumerate } from 'utils/iterable'
import { removeAccents } from 'utils/string'

import { Inference, InferenceGenerator } from './Inference'

type BookPage = import('EntityModel').BookPage
type PDFEngine = import('pdf-engine').PDFEngine

export class SectionInference extends Inference {
  constructor (public pageID: string, public sectionTitleText: string, public pageNumber: number) {
    super()
  }
}

const MAX_WORDS = 5
const MIN_WORD_LEN = 4
const MAX_TEXT_LEN = 500
const ALNUM = /^[A-Z0-9]+$/i

/**
 * The following regexp removes "special" characters.
 * FIXME: Better support for i18n characters.
 * See:
 *  - https://stackoverflow.com/questions/4328500
 *  - https://mathiasbynens.be/notes/es6-unicode-regex
 */
const PUNCTUATION = /[^\w\s]/g

/**
 * Infer whether a string of text is likely to be a valid section.
 */
export class SectionInferenceGenerator extends InferenceGenerator<SectionInference> {
  constructor (private pages: BookPage[], private engine: PDFEngine) { super() }

  isValidWord (possibleWord) {
    const normalized = removeAccents(possibleWord).replace(PUNCTUATION, '')
    return normalized.length >= MIN_WORD_LEN && ALNUM.test(normalized)
  }

  sectionInference (text: string) {
    if (text.length > MAX_TEXT_LEN) { return null }
    const validWords = []
    for (const possibleWord of text.split(/\s+/g)) {
      if (!this.isValidWord(possibleWord)) { continue }
      if (validWords.length >= MAX_WORDS) { return null }
      validWords.push(possibleWord)
    }
    return validWords.join(' ').trim() || null
  }

  async generate (): Promise<SectionInference[]> {
    const inferences = []
    this.process.incrementTaskCount(this.pages.length)

    const futures = this.pages.map(async p =>
      this.sectionInference(await this.engine.getText(p)))

    for (const [i, page] of enumerate(this.pages)) {
      const sectionTitleText = await futures[i]
      if (sectionTitleText) {
        inferences.push(
          new SectionInference(
            page.pageID, sectionTitleText, 1 + i,
          ),
        )
      }

      this.process.tick()
    }

    this.process.isComplete(true)
    return inferences
  }
}
