/**
 * Lists all the loadable scripts (non-Emoji or non-picture-based).
 *
 * Add more scripts when supported (e.g. when added to FALLBACK_FONT_DEF in "glyphs.ts")
 * Examples of more scripts: "han", "symbols"
 */
export const LOADABLE_SCRIPTS = ["en", "ext", "he", "arab", "cyr", "grk", "han", "music"] as const;

export const NON_LOADABLE_SCRIPTS = ["space", "emoji", "unknown"] as const;
type LoadableScriptsType = typeof LOADABLE_SCRIPTS;
type NonLoadableScriptsType = typeof NON_LOADABLE_SCRIPTS;

export type LoadableScript = LoadableScriptsType[number];
export type NonLoadableScript = NonLoadableScriptsType[number];
export type Script = LoadableScript | NonLoadableScript;
type ScriptTest = [RegExp | RegExp[], Script];

const SCRIPTS: ScriptTest[] = [
  // See list of supported "Script"s in https://www.unicode.org/reports/tr24/#Script_Values_Table and https://en.wikipedia.org/wiki/Script_(Unicode)
  //
  // See list of unicode block ranges (for unicode characters not supported by a "Script"):
  //  https://unicode-table.com/en/blocks/
  //
  // Emojis:
  // Do NOT use "\p{Emoji]" as it also captures numbers (and maybe other non-emoji characters).
  // Use "\p{Emoji_Presentation}" followed by "\p{Extended_Pictographic}" instead.
  //
  // NOTE: We need to check Symbols before anything else, because some of the symbols match
  //       the `\p{Script=Common}` RegEx.
  //
  // Examples for testing:
  // - Random: 😂📚🦿🙌👎🦹🧚‍♂️💙🇻🇦🛡️⚔️🛏️🧈🍣!
  // - Flags: 🇹🇨🇧🇱🇧🇬🇨🇺🇬🇧🇺🇬!            <-- flags are multi-unicode emojis (not currently supported, and therefore filtered out in the EmojiPicker)
  //   TODO: Add support for multi-unicode emojis
  // - Emoji_Modifier_Base:✊✋🖕🖖
  // - Emoji_Presentation:😠😥🥐🥞
  // - Emoji:😰😳🕐🕧
  // - Music: 𝅝♩♪♫♬♭𝄢
  // - Chinese: 华人⼈人⺅亼
  //
  // [/[\u{1F300}-\u{1F5FF}]/iu, "symbols" /* Miscellaneous Symbols and Pictographs */],
  [/[\u{1D100}-\u{1D1FF}]/iu, "music" /* Musical Symbols */],
  //
  // [/[\p{Emoji}]/iu, "emoji"],
  [/[\p{Emoji_Presentation}]/iu, "emoji"],
  [/[\p{Extended_Pictographic}]/iu, "emoji"],
  //
  [/[\p{ASCII}\p{Script=Common}\p{White_Space}]/iu, "en"],
  [[/[\p{Script=Latin}]/iu, /[^\p{ASCII}\p{Script=Common}\p{White_Space}]/iu], "ext"],
  [/\p{Script=Hebrew}/iu, "he"],
  [/\p{Script=Arabic}/iu, "arab"],
  [/\p{Script=Cyrillic}/iu, "cyr"],
  [/\p{Script=Greek}/iu, "grk"],
  // [/\p{Script=Armenian}/iu, "XXX"],
  // [/\p{Script=Coptic}/iu, "XXX"],
  // [/\p{Script=Armenian}/iu, "XXX"],
  // [/\p{Script=Georgian}/iu, "georgian"],
  // [/\p{Script=Syriac}/iu, "XXX"],

  // NOTE 1: "han" needs to be AFTER English/Latin/Common, because some characters (like "K") may be recognized as Han.
  [/\p{Script=Han}/iu, "han"]
];

// cache here
const _charToScriptCache: { [idx: string]: Script } = {};

export function scriptFromChar(char: string, withSpace: boolean = false): Script {
  // if not in our cache
  if (!_charToScriptCache[char]) {
    // Special case that we try and detect if we have whitespace
    if (char === "") {
      _charToScriptCache[char] = "en";
    }
    if (isSpace(char)) {
      _charToScriptCache[char] = "space";
    } else {
      // regular pass through scripts
      for (let script of SCRIPTS) {
        const [regex, type] = script;

        const tests = Array.isArray(regex) ? regex : [regex];

        if (tests.every((r) => r.test(char))) {
          _charToScriptCache[char] = type;
          break;
        }
      }
    }

    _charToScriptCache[char] = _charToScriptCache[char] || "unknown";
  }

  // When we compile, we don't want spaces as a "language"
  if (!withSpace && _charToScriptCache[char] === "space") {
    return "en";
  }

  return _charToScriptCache[char];
}

export function scriptsFromText(text: string, onlyLoadableScript: boolean = false): Script[] {
  const set: Set<Script> = new Set();

  for (let char of text) {
    const script = scriptFromChar(char);
    if (onlyLoadableScript && NON_LOADABLE_SCRIPTS.includes(script as NonLoadableScript)) {
      continue;
    }
    set.add(script);
  }

  return Array.from(set);
}

export function scriptFromCodePoint(codePoint: number): Script {
  if (!codePoint || Number.isNaN(codePoint)) {
    return "unknown";
  }

  return scriptFromChar(String.fromCodePoint(codePoint));
}

export function isSpace(char: string): boolean {
  return /\p{White_Space}/iu.test(char);
}
