import type { SupportedLanguages } from '../../i18n/language-options';

function cleanUpText(text: string) {
  return (
    text
      // remove drama dots
      .replace(/\.{3}/g, '')
      // Remove <break time="500ms"> tags
      .replace(/<break [^>]+>/g, '')
      // Replace newlines
      .replace(/\n|\r/g, ' ')
      // condense whitespace
      .replace(/ +/, ' ')
  );
}

export class CaptionChunkMetric {
  constructor(
    public word: string,
    public widthChars: number,
    public timeMs: number,
    public chunkSeparator = '\u00A0' // non-breaking space
  ) {}

  toDOM() {
    const $el = document.createElement('span');
    $el.textContent = this.word + this.chunkSeparator;
    return $el;
  }
}

interface MetricsCreator {
  parse(text: string, lang: string): CaptionChunkMetric[];
}

class ASCIIMetricsCreator implements MetricsCreator {
  constructor(private charTimeMs: number) {}

  private timeText(word: string, _lang: string) {
    // NOTE: these are reasonable ms values for Finn v1.
    let timeMs = word.length * this.charTimeMs;
    const last = word[word.length - 1];

    if (last === '.' || last === '?' || last === '!' || last === ':') {
      timeMs += 1000;
    }

    if (last === ',') {
      timeMs += 300;
    }

    return timeMs;
  }

  private measureText(word: string, _lang: string) {
    // For now just keep in chars. Later we might try actual pixels, but this
    // gets quite expensive
    return word.length;
  }

  parse(text: string, lang: string) {
    const sanitized = cleanUpText(text);
    // Split into words and remove empties.
    const words = sanitized.split(/\s+/).filter((w) => w.length > 0);

    const metrics = words.map(
      (word) =>
        new CaptionChunkMetric(
          word,
          this.measureText(word + ' ', lang),
          this.timeText(word, lang),
          // When translations were returned from gcloud, korean always used two
          // spaces between the words :shrug:.
          lang === 'ko' ? '\u00A0\u00A0' : '\u00A0'
        )
    );
    return metrics;
  }
}

class CJKMetricsCreator implements MetricsCreator {
  constructor(private characterTimeMs: number) {}

  parse(text: string, _lang: string) {
    const sanitized = cleanUpText(text);

    const metrics = [];
    const points = Array.from(sanitized);

    // https://en.wikipedia.org/wiki/CJK_Symbols_and_Punctuation
    // IDEOGRAPHIC SPACE, IDEOGRAPHIC COMMA, IDEOGRAPHIC FULL STOP,
    const cjkBreaks = /\u3000|\u3001|\u3002/u;
    const fullWidthPunc = /，|．|：|；|！|？/u;
    const cjkOpening = /「|『|（|【|〔|〈|《|〖|｛|〘|〚/u;
    const latinPunc = /,|\.|:|;|!|\?/u;
    const whitespace = /\s+/u;

    let currChunk = '';

    while (points.length > 0) {
      const curr = points.shift();
      if (curr === undefined) continue;
      else if (whitespace.exec(curr)) {
        if (currChunk.length > 0) {
          currChunk += curr;
        } else {
          // squelch initial/leading whitespace
          continue;
        }
      } else if (cjkBreaks.exec(curr)) {
        currChunk += curr;
        metrics.push(
          new CaptionChunkMetric(
            currChunk,
            currChunk.length,
            currChunk.length * this.characterTimeMs
          )
        );
        currChunk = '';
      } else if (cjkOpening.exec(curr)) {
        metrics.push(
          new CaptionChunkMetric(
            currChunk,
            currChunk.length,
            currChunk.length * this.characterTimeMs
          )
        );
        currChunk = '';
        currChunk += curr;
      } else if (fullWidthPunc.exec(curr)) {
        currChunk += curr;
        metrics.push(
          new CaptionChunkMetric(
            currChunk,
            currChunk.length,
            currChunk.length * this.characterTimeMs
          )
        );
        currChunk = '';
      } else if (latinPunc.exec(curr)) {
        currChunk += curr;
        metrics.push(
          new CaptionChunkMetric(
            currChunk,
            currChunk.length,
            currChunk.length * this.characterTimeMs
          )
        );
        currChunk = '';
      } else {
        currChunk += curr;
      }
    }

    // Close anything pending!
    if (currChunk.length > 0) {
      metrics.push(
        new CaptionChunkMetric(
          currChunk,
          currChunk.length,
          currChunk.length * this.characterTimeMs
        )
      );
    }

    // Slight parser hack: rather than checking when opening/closing a chunk,
    // just filter out empties here.
    const filtered = metrics.filter((m) => m.timeMs > 0 && m.widthChars > 0);

    return filtered;
  }
}

export function createMetricsCreator(lang: SupportedLanguages) {
  switch (lang) {
    case 'zh-CN':
    case 'zh-TW':
      return new CJKMetricsCreator(200);
    case 'ja':
      return new CJKMetricsCreator(125);
    case 'ko':
      // NOTE: not a bug that we use the ASCII metrics creator for Korean. It is
      // more like a Latin language than CJK in terms of punctuation and word
      // breaks.
      return new ASCIIMetricsCreator(100);
    default:
      return new ASCIIMetricsCreator(50);
  }
}

export function configureLineLength(lang: SupportedLanguages, isV2 = false) {
  if (isV2) {
    // TODO(drew): revisit this for other languages
    return 36;
  }

  switch (lang) {
    case 'zh-CN':
    case 'zh-TW':
    case 'ja':
      return 25;
    case 'ko':
    default:
      return 50;
  }
}

export function configureExpireDurationMs(lang: SupportedLanguages) {
  switch (lang) {
    case 'zh-CN':
    case 'zh-TW':
    case 'ja':
      return 8000;
    case 'ko':
    default:
      return 5000;
  }
}

export function configureLineCount(lang: SupportedLanguages) {
  switch (lang) {
    case 'zh-CN':
    case 'zh-TW':
    case 'ja':
      return 3;
    case 'ko':
    default:
      return 2;
  }
}
