string.charCodeAt

์ฃผ์–ด์ง„ ์ธ๋ฑ์Šค์— ๋Œ€ํ•œ UTF-16 code unit์„ ๋‚˜ํƒ€๋‚ด๋Š” 0 - 65535์‚ฌ์ด์˜ ์ •์ˆ˜๋ฅผ ๋ฐ˜ํ™˜ํ•œ๋‹ค.

const sentence = "dhgn gkstl dhtlqtkqns";

const index = 4;

console.log(`index: ${index}, ${sentence.charCodeAt(index)}`); // index: 4, 32

UTF-16 ์ฝ”๋“œ ๋‹จ์œ„๋Š” single UTF-16 ์ฝ”๋“œ ๋‹จ์œ„๋กœ ํ‘œํ˜„ํ•  ์ˆ˜ ์žˆ๋Š” ์ฝ”๋“œ ํฌ์ธํŠธ์˜ Unicode ์ฝ”๋“œ ํฌ์ธํŠธ์™€ ์ผ์น˜ํ•˜์ง€๋งŒ, Unicode ์ฝ”๋“œ ํฌ์ธํŠธ๋ฅผ UTF-16 ์ฝ”๋“œ ๋‹จ์œ„๋กœ ๋‚˜ํƒ€๋‚ผ ์ˆ˜ ์—†๋‹ค๋ฉด (๊ฐ’์ด 0x10000๋ณด๋‹ค ํฌ๊ธฐ ๋•Œ๋ฌธ์—) ์ฝ”๋“œ ํฌ์ธํŠธ์˜ surrogate pair ์ฒซ๋ฒˆ์งธ part๋ฅผ ๋ฐ˜ํ™˜ํ•œ๋‹ค. ์ „์ฒด ์ฝ”๋“œ ํฌ์ธํŠธ ๊ฐ’์„ ์›ํ•œ๋‹ค๋ฉด codePointAt()์„ ์‚ฌ์šฉํ•œ๋‹ค.

Syntax

str.charCodeAt(index)

Parameters

index

0๋ณด๋‹ค ๊ฐ™๊ฑฐ๋‚˜ ํฌ๊ณ , string์˜ length๋ณด๋‹ค ์ž‘์€ ์ •์ˆ˜, number๊ฐ€ ์•„๋‹๊ฒฝ์šฐ default๋Š” 0์ด๋‹ค.

Return value

์ฃผ์–ด์ง„ index์— ๋Œ€ํ•œ ๋ฌธ์ž์˜ UTF-16 code unit ๊ฐ’๋ฅผ ๋‚˜ํƒ€๋‚ธ ์ˆซ์ž

index๊ฐ€ ๋ฒ”์œ„๋ฐ–์„ ๋„˜์—ˆ์„ ๋• NaN

Description

Unicode code point ๋ฒ”์œ„๋Š” 0์—์„œ 1114111 (0x10FFFF)๊นŒ์ง€์ด๋‹ค.

์•ž์ชฝ 128 Unicode ์ฝ”๋“œ ํฌ์ธํŠธ๋Š” ASCII ๋ฌธ์ž ์ธ์ฝ”๋”ฉ์— ๋Œ€์‘๋œ๋‹ค. https://developer.mozilla.org/ko/docs/Web/JavaScript/Guide/Obsolete_Pages/Core_JavaScript_1.5_Guide/Unicode

charCodeAt()๋Š” 65536๋ณด๋‹ค ์ž‘์€ ๊ฐ’์„ ํ•ญ์ƒ ๋ฐ˜ํ™˜ํ•œ๋‹ค.

higher code points๋Š” surrogate pair๋กœ ๋‚˜ํƒ€๋‚ธ๋‹ค. 65536์ด์ƒ์ธ ๊ฐœ๋ณ„ ๋ฌธ์ž๋Š” ์ „์ฒด ๋ฌธ์ž๋ฅผ ๊ฒ€์‚ฌํ•˜๊ฑฐ๋‚˜ charCodeAt(i)์™€ charCodeAt(i+1) ๋‘ ๋ฌธ์ž๋ฅผ ๊ฒ€์‚ฌํ•ด์•ผํ•œ๋‹ค.

์ด๋Ÿด๋• codePointAt(i)๋ฅผ ์‚ฌ์šฉํ•œ๋‹ค.

Examples

using charCodeAt()

'ABC'.charCodeAt(0); // A์˜ Unicode ๊ฐ’์ธ 65๋ฅผ return ํ•œ๋‹ค.

๋ฌธ์ž์—ด์˜ ์ด์ „ ๋ถ€๋ถ„์— "๋‹ค๊ตญ์–ด ๊ธฐ๋ณธํ‰๋ฉด์ด ์•„๋‹Œ ๋ฌธ์ž"๊ฐ€ ์กด์žฌํ•œ๋‹ค๋Š” ๊ฒƒ์„ ์•Œ์ง€ ๋ชปํ•  ๋•Œ

for ๋ฃจํ”„ ๋“ฑ์— ์‚ฌ์šฉ๋  ์ˆ˜ ์žˆ๋‹ค.

function fixedCharCodeAt(str, idx) {
  // ex. fixedCharCodeAt('\uD800\uDC00', 0); // 65536
  // ex. fixedCharCodeAt('\uD800\uDC00', 1); // false ์•ž์— ์žˆ๋Š” ๊ฒฝ์šฐ false
  idx = idx || 0;
  let code = str.charCodeAt(idx);
  let hi, low;

  // High surrogate
  if (0xD800 <= code && code <= 0xDBFF) {
    hi = code;
    low = str.charCodeAt(idx + 1);
    if (isNaN(low)) {
      throw 'High surrogate not followed by ' +
        'low surrogate in fixedCharCodeAt()';
    }
    return ((hi - 0xD800) * 0x400) +
      (low - 0xDC00) + 0x10000;
  }
     // Low surrogate์ผ ๊ฒฝ์šฐ ์•ž๋ถ€๋ถ„์— non-BMP๊ฐ€ ์กด์žฌํ•˜๋Š” ๊ฒฝ์šฐ
  if(0xDC00 <= code && code <= 0xDFFF) return false;

  return code;
}

๋ฌธ์ž์—ด์˜ ์ด์ „ ๋ถ€๋ถ„์— "๋‹ค๊ตญ์–ด ๊ธฐ๋ณธํ‰๋ฉด์ด ์•„๋‹Œ ๋ฌธ์ž"๊ฐ€ ์กด์žฌํ•œ๋‹ค๋Š” ๊ฒƒ์„ ์•Œ ๋•Œ

function knownCharCodeAt(str, idx) {
  str += '';
  let code,
      end = str.length;

  let surrogatePairs = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
  while ((surrogatePairs.exec(str)) != null) {
    let li = surrogatePairs.lastIndex;
    if (li - 2 < idx) {
      idx++;
    } else {
      break;
    }
  }

  if (idx >= end || idx < 0) return NaN;

  code = str.charCodeAt(idx);

  let hi, low;
  // surrogate pair
  if (0xD800 <= code && code <= 0xDBFF) {
    hi = code;
    low = str.charCodeAt(idx + 1);
    return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
  }

  return code;
}

๐Ÿค”

Last updated

Was this helpful?