kana.ts 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. /**
  2. * This module is mainly for handling romaji input to match the provided kana
  3. * input. While most kana map one-to-one with romaji, some kana have multiple
  4. * ways to be inputted. In addition, we also have to handle っ which causes the
  5. * next consonant to be repeated.
  6. *
  7. * The state management is done by having a state machine for each kana and it
  8. * should handle all possible variations of the romaji to be inputted.
  9. * Additionally, it also keeps track of what is left to be input, and adjusts
  10. * itself accordingly if an alternative romaji was used.
  11. */
  12. /// <reference path="state.ts" />
  13. namespace kana {
  14. import StateMachine = state.StateMachine;
  15. import TransitionResult = state.TransitionResult;
  16. import t = state.makeTransition;
  17. function literal(source: string): StateMachine {
  18. let transitions = [];
  19. for (let i = 0; i < source.length; ++i) {
  20. let from = source.substring(i);
  21. let input = source.charAt(i);
  22. let to = source.substring(i+1);
  23. transitions.push(t(from, input, to));
  24. }
  25. return state.buildFromTransitions(source, transitions);
  26. }
  27. function shi(): StateMachine {
  28. return state.buildFromTransitions('shi', [
  29. t('shi', 's', 'hi'),
  30. t('hi', 'h', 'i'),
  31. t('hi', 'i', ''),
  32. t('i', 'i', '')
  33. ]);
  34. }
  35. function chi(): StateMachine {
  36. return state.buildFromTransitions('chi', [
  37. t('chi', 'c', 'hi'),
  38. t('chi', 't', 'i'),
  39. t('hi', 'h', 'i'),
  40. t('i', 'i', '')
  41. ]);
  42. }
  43. function tsu(): StateMachine {
  44. return state.buildFromTransitions('tsu', [
  45. t('tsu', 't', 'su'),
  46. t('su', 's', 'u'),
  47. t('su', 'u', ''),
  48. t('u', 'u', '')
  49. ]);
  50. }
  51. function fu(): StateMachine {
  52. return state.buildFromTransitions('fu', [
  53. t('fu', 'f', 'u'),
  54. t('fu', 'h', 'u'),
  55. t('u', 'u', '')
  56. ]);
  57. }
  58. function ji(): StateMachine {
  59. return state.buildFromTransitions('ji', [
  60. t('ji', 'j', 'i'),
  61. t('ji', 'z', 'i'),
  62. t('i', 'i', '')
  63. ]);
  64. }
  65. function sh(end: string): StateMachine {
  66. let source = 'sh' + end;
  67. let middle = 'h' + end;
  68. return state.buildFromTransitions(source, [
  69. t(source, 's', middle),
  70. t(middle, 'h', end),
  71. t(middle, 'y', end),
  72. t(end, end, '')
  73. ]);
  74. }
  75. function ch(end: string): StateMachine {
  76. let source = 'ch' + end;
  77. let middle = 'h' + end;
  78. let altMiddle = 'y' + end;
  79. return state.buildFromTransitions(source, [
  80. t(source, 'c', middle),
  81. t(middle, 'h', end),
  82. t(source, 't', altMiddle),
  83. t(altMiddle, 'y', end),
  84. t(end, end, '')
  85. ]);
  86. }
  87. function j(end: string): StateMachine {
  88. let source = 'j' + end;
  89. let altMiddle = 'y' + end;
  90. return state.buildFromTransitions(source, [
  91. t(source, 'j', end),
  92. t(source, 'z', altMiddle),
  93. t(end, 'y', end),
  94. t(altMiddle, 'y', end),
  95. t(end, end, '')
  96. ]);
  97. }
  98. interface KanaMapping {
  99. [index: string]: StateMachine
  100. }
  101. const SINGLE_KANA_MAPPING: KanaMapping = {
  102. "あ": literal('a'),
  103. "い": literal('i'),
  104. "う": literal('u'),
  105. "え": literal('e'),
  106. "お": literal('o'),
  107. "か": literal('ka'),
  108. "き": literal('ki'),
  109. "く": literal('ku'),
  110. "け": literal('ke'),
  111. "こ": literal('ko'),
  112. "さ": literal('sa'),
  113. "し": shi(),
  114. "す": literal('su'),
  115. "せ": literal('se'),
  116. "そ": literal('so'),
  117. "た": literal('ta'),
  118. "ち": chi(),
  119. "つ": tsu(),
  120. "て": literal('te'),
  121. "と": literal('to'),
  122. "な": literal('na'),
  123. "に": literal('ni'),
  124. "ぬ": literal('nu'),
  125. "ね": literal('ne'),
  126. "の": literal('no'),
  127. "は": literal('ha'),
  128. "ひ": literal('hi'),
  129. "ふ": fu(),
  130. "へ": literal('he'),
  131. "ほ": literal('ho'),
  132. "ま": literal('ma'),
  133. "み": literal('mi'),
  134. "む": literal('mu'),
  135. "め": literal('me'),
  136. "も": literal('mo'),
  137. "や": literal('ya'),
  138. "ゆ": literal('yu'),
  139. "よ": literal('yo'),
  140. "ら": literal('ra'),
  141. "り": literal('ri'),
  142. "る": literal('ru'),
  143. "れ": literal('re'),
  144. "ろ": literal('ro'),
  145. "わ": literal('wa'),
  146. "を": literal('wo'),
  147. "ん": literal('n'),
  148. "が": literal('ga'),
  149. "ぎ": literal('gi'),
  150. "ぐ": literal('gu'),
  151. "げ": literal('ge'),
  152. "ご": literal('go'),
  153. "ざ": literal('za'),
  154. "じ": ji(),
  155. "ず": literal('zu'),
  156. "ぜ": literal('ze'),
  157. "ぞ": literal('zo'),
  158. "だ": literal('da'),
  159. "ぢ": literal('di'),
  160. "づ": literal('du'),
  161. "で": literal('de'),
  162. "ど": literal('do'),
  163. "ば": literal('ba'),
  164. "び": literal('bi'),
  165. "ぶ": literal('bu'),
  166. "べ": literal('be'),
  167. "ぼ": literal('bo'),
  168. "ぱ": literal('pa'),
  169. "ぴ": literal('pi'),
  170. "ぷ": literal('pu'),
  171. "ぺ": literal('pe'),
  172. "ぽ": literal('po')
  173. }
  174. const DOUBLE_KANA_MAPPING: KanaMapping = {
  175. "きゃ": literal('kya'),
  176. "きゅ": literal('kyu'),
  177. "きょ": literal('kyo'),
  178. "しゃ": sh('a'),
  179. "しゅ": sh('u'),
  180. "しょ": sh('o'),
  181. "ちゃ": ch('a'),
  182. "ちゅ": ch('u'),
  183. "ちょ": ch('o'),
  184. "にゃ": literal('nya'),
  185. "にゅ": literal('nyu'),
  186. "にょ": literal('nyo'),
  187. "ひゃ": literal('hya'),
  188. "ひゅ": literal('hyu'),
  189. "ひょ": literal('hyo'),
  190. "みゃ": literal('mya'),
  191. "みゅ": literal('myu'),
  192. "みょ": literal('myo'),
  193. "りゃ": literal('rya'),
  194. "りゅ": literal('ryu'),
  195. "りょ": literal('ryo'),
  196. "ぎゃ": literal('gya'),
  197. "ぎゅ": literal('gyu'),
  198. "ぎょ": literal('gyo'),
  199. "じゃ": j('a'),
  200. "じゅ": j('u'),
  201. "じょ": j('o'),
  202. "ぢゃ": literal('dya'),
  203. "ぢゅ": literal('dyu'),
  204. "ぢょ": literal('dyo'),
  205. "びゃ": literal('bya'),
  206. "びゅ": literal('byu'),
  207. "びょ": literal('byo'),
  208. "ぴゃ": literal('pya'),
  209. "ぴゅ": literal('pyu'),
  210. "ぴょ": literal('pyo')
  211. }
  212. export class KanaInputState {
  213. kana: string[];
  214. stateMachines: StateMachine[];
  215. currentIndex: number;
  216. constructor(input: string) {
  217. let kana: string[] = [];
  218. let machines: StateMachine[] = [];
  219. let prevTsu = false;
  220. // we pad the input so checking 2 at a time is simpler
  221. let remaining = input.toLowerCase() + ' ';
  222. while (remaining.length > 1) {
  223. let nextOne = remaining.substring(0, 1);
  224. if (/\s/.test(nextOne)) {
  225. remaining = remaining.substring(1);
  226. continue;
  227. }
  228. let nextTwo = remaining.substring(0, 2);
  229. let doubleKana = DOUBLE_KANA_MAPPING[nextTwo];
  230. if (doubleKana != undefined) {
  231. if (prevTsu) {
  232. kana.push('っ' + nextTwo);
  233. machines.push(doubleKana.extend());
  234. prevTsu = false;
  235. } else {
  236. kana.push(nextTwo);
  237. machines.push(doubleKana.clone());
  238. }
  239. remaining = remaining.substring(2);
  240. } else {
  241. if (nextOne === 'っ') {
  242. prevTsu = true;
  243. remaining = remaining.substring(1);
  244. } else {
  245. let singleKana = SINGLE_KANA_MAPPING[nextOne];
  246. if (singleKana != undefined) {
  247. if (prevTsu) {
  248. kana.push('っ' + nextOne);
  249. machines.push(singleKana.extend());
  250. } else {
  251. kana.push(nextOne);
  252. machines.push(singleKana.clone());
  253. }
  254. } else {
  255. kana.push(nextOne);
  256. machines.push(literal(nextOne));
  257. }
  258. prevTsu = false;
  259. remaining = remaining.substring(1);
  260. }
  261. }
  262. }
  263. this.kana = kana;
  264. this.stateMachines = machines;
  265. this.currentIndex = 0;
  266. }
  267. handleInput(input: string): boolean {
  268. if (this.currentIndex >= this.stateMachines.length) return false;
  269. let currentMachine = this.stateMachines[this.currentIndex];
  270. let result = currentMachine.transition(input);
  271. switch (result) {
  272. case TransitionResult.FAILED:
  273. return false;
  274. case TransitionResult.SUCCESS:
  275. return true;
  276. case TransitionResult.FINISHED:
  277. this.currentIndex += 1;
  278. return true;
  279. }
  280. }
  281. getRemainingInput(): string {
  282. let remaining = '';
  283. for (let i = this.currentIndex; i < this.stateMachines.length; ++i) {
  284. remaining += this.stateMachines[i].getDisplay();
  285. }
  286. return remaining;
  287. }
  288. }
  289. }