Ruby  2.0.0p451(2014-02-24revision45167)
encoding.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  encoding.c -
4 
5  $Author: nagachika $
6  created at: Thu May 24 17:23:27 JST 2007
7 
8  Copyright (C) 2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/ruby.h"
13 #include "ruby/encoding.h"
14 #include "internal.h"
15 #include "regenc.h"
16 #include <ctype.h>
17 #ifndef NO_LOCALE_CHARMAP
18 #ifdef __CYGWIN__
19 #include <windows.h>
20 #endif
21 #ifdef HAVE_LANGINFO_H
22 #include <langinfo.h>
23 #endif
24 #endif
25 #include "ruby/util.h"
26 
27 #if defined __GNUC__ && __GNUC__ >= 4
28 #pragma GCC visibility push(default)
29 int rb_enc_register(const char *name, rb_encoding *encoding);
30 void rb_enc_set_base(const char *name, const char *orig);
31 void rb_encdb_declare(const char *name);
32 int rb_encdb_replicate(const char *name, const char *orig);
33 int rb_encdb_dummy(const char *name);
34 int rb_encdb_alias(const char *alias, const char *orig);
35 void rb_encdb_set_unicode(int index);
36 #pragma GCC visibility pop
37 #endif
38 
39 static ID id_encoding;
42 
44  const char *name;
47 };
48 
49 static struct {
51  int count;
52  int size;
54 } enc_table;
55 
56 void rb_enc_init(void);
57 
58 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
59 #define UNSPECIFIED_ENCODING INT_MAX
60 
61 #define ENCODING_NAMELEN_MAX 63
62 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
63 
64 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
65 
66 static int load_encoding(const char *name);
67 
68 static size_t
69 enc_memsize(const void *p)
70 {
71  return 0;
72 }
73 
75  "encoding",
76  {0, 0, enc_memsize,},
77 };
78 
79 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
80 
81 static VALUE
82 enc_new(rb_encoding *encoding)
83 {
84  return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, encoding);
85 }
86 
87 static VALUE
89 {
90  VALUE list, enc;
91 
92  if (!(list = rb_encoding_list)) {
93  rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
94  }
95  enc = rb_ary_entry(list, idx);
96  if (NIL_P(enc)) {
97  rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
98  }
99  return enc;
100 }
101 
102 VALUE
104 {
105  int idx;
106  if (!encoding) return Qnil;
107  idx = ENC_TO_ENCINDEX(encoding);
108  return rb_enc_from_encoding_index(idx);
109 }
110 
111 static int enc_autoload(rb_encoding *);
112 
113 static int
115 {
116  int index = rb_enc_to_index(enc);
117  if (rb_enc_from_index(index) != enc)
118  return -1;
119  if (enc_autoload_p(enc)) {
120  index = enc_autoload(enc);
121  }
122  return index;
123 }
124 
125 static int
127 {
128  if (SPECIAL_CONST_P(obj) || !rb_typeddata_is_kind_of(obj, &encoding_data_type)) {
129  return -1;
130  }
131  return check_encoding(RDATA(obj)->data);
132 }
133 
134 static int
136 {
137  int index = enc_check_encoding(enc);
138  if (index < 0) {
139  rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding)",
140  rb_obj_classname(enc));
141  }
142  return index;
143 }
144 
145 int
147 {
148  int idx;
149 
150  idx = enc_check_encoding(enc);
151  if (idx >= 0) {
152  return idx;
153  }
154  else if (NIL_P(enc = rb_check_string_type(enc))) {
155  return -1;
156  }
157  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
158  return -1;
159  }
160  return rb_enc_find_index(StringValueCStr(enc));
161 }
162 
163 /* Returns encoding index or UNSPECIFIED_ENCODING */
164 static int
166 {
167  int idx;
168 
169  StringValue(enc);
170  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
171  rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
172  }
174  return idx;
175 }
176 
177 static int
179 {
180  int idx = str_find_encindex(enc);
181  if (idx < 0) {
182  rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
183  }
184  return idx;
185 }
186 
187 static rb_encoding *
189 {
190  return rb_enc_from_index(str_to_encindex(enc));
191 }
192 
193 rb_encoding *
195 {
196  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
197  return str_to_encoding(enc);
198 }
199 
200 rb_encoding *
202 {
203  int idx;
204  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
205  idx = str_find_encindex(enc);
206  if (idx < 0) return NULL;
207  return rb_enc_from_index(idx);
208 }
209 
210 void
212 {
213 }
214 
215 static int
216 enc_table_expand(int newsize)
217 {
218  struct rb_encoding_entry *ent;
219  int count = newsize;
220 
221  if (enc_table.size >= newsize) return newsize;
222  newsize = (newsize + 7) / 8 * 8;
223  ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
224  if (!ent) return -1;
225  memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
226  enc_table.list = ent;
227  enc_table.size = newsize;
228  return count;
229 }
230 
231 static int
232 enc_register_at(int index, const char *name, rb_encoding *encoding)
233 {
234  struct rb_encoding_entry *ent = &enc_table.list[index];
235  VALUE list;
236 
237  if (!valid_encoding_name_p(name)) return -1;
238  if (!ent->name) {
239  ent->name = name = strdup(name);
240  }
241  else if (STRCASECMP(name, ent->name)) {
242  return -1;
243  }
244  if (!ent->enc) {
245  ent->enc = xmalloc(sizeof(rb_encoding));
246  }
247  if (encoding) {
248  *ent->enc = *encoding;
249  }
250  else {
251  memset(ent->enc, 0, sizeof(*ent->enc));
252  }
253  encoding = ent->enc;
254  encoding->name = name;
255  encoding->ruby_encoding_index = index;
256  st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
257  list = rb_encoding_list;
258  if (list && NIL_P(rb_ary_entry(list, index))) {
259  /* initialize encoding data */
260  rb_ary_store(list, index, enc_new(encoding));
261  }
262  return index;
263 }
264 
265 static int
266 enc_register(const char *name, rb_encoding *encoding)
267 {
268  int index = enc_table.count;
269 
270  if ((index = enc_table_expand(index + 1)) < 0) return -1;
271  enc_table.count = index;
272  return enc_register_at(index - 1, name, encoding);
273 }
274 
275 static void set_encoding_const(const char *, rb_encoding *);
276 int rb_enc_registered(const char *name);
277 
278 int
279 rb_enc_register(const char *name, rb_encoding *encoding)
280 {
281  int index = rb_enc_registered(name);
282 
283  if (index >= 0) {
284  rb_encoding *oldenc = rb_enc_from_index(index);
285  if (STRCASECMP(name, rb_enc_name(oldenc))) {
286  index = enc_register(name, encoding);
287  }
288  else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
289  enc_register_at(index, name, encoding);
290  }
291  else {
292  rb_raise(rb_eArgError, "encoding %s is already registered", name);
293  }
294  }
295  else {
296  index = enc_register(name, encoding);
298  }
299  return index;
300 }
301 
302 void
303 rb_encdb_declare(const char *name)
304 {
305  int idx = rb_enc_registered(name);
306  if (idx < 0) {
307  idx = enc_register(name, 0);
308  }
310 }
311 
312 static void
314 {
315  if (rb_enc_registered(name) >= 0) {
316  rb_raise(rb_eArgError, "encoding %s is already registered", name);
317  }
318 }
319 
320 static rb_encoding*
322 {
323  rb_encoding *enc = enc_table.list[index].enc;
324 
325  enc_table.list[index].base = base;
326  if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
327  return enc;
328 }
329 
330 /* for encdb.h
331  * Set base encoding for encodings which are not replicas
332  * but not in their own files.
333  */
334 void
335 rb_enc_set_base(const char *name, const char *orig)
336 {
337  int idx = rb_enc_registered(name);
338  int origidx = rb_enc_registered(orig);
339  set_base_encoding(idx, rb_enc_from_index(origidx));
340 }
341 
342 int
343 rb_enc_replicate(const char *name, rb_encoding *encoding)
344 {
345  int idx;
346 
347  enc_check_duplication(name);
348  idx = enc_register(name, encoding);
349  set_base_encoding(idx, encoding);
351  return idx;
352 }
353 
354 /*
355  * call-seq:
356  * enc.replicate(name) -> encoding
357  *
358  * Returns a replicated encoding of _enc_ whose name is _name_.
359  * The new encoding should have the same byte structure of _enc_.
360  * If _name_ is used by another encoding, raise ArgumentError.
361  *
362  */
363 static VALUE
365 {
368  rb_to_encoding(encoding)));
369 }
370 
371 static int
372 enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
373 {
374  if (idx < 0) {
375  idx = enc_register(name, origenc);
376  }
377  else {
378  idx = enc_register_at(idx, name, origenc);
379  }
380  if (idx >= 0) {
381  set_base_encoding(idx, origenc);
383  }
384  return idx;
385 }
386 
387 int
388 rb_encdb_replicate(const char *name, const char *orig)
389 {
390  int origidx = rb_enc_registered(orig);
391  int idx = rb_enc_registered(name);
392 
393  if (origidx < 0) {
394  origidx = enc_register(orig, 0);
395  }
396  return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
397 }
398 
399 int
401 {
402  int index = rb_enc_replicate(name, rb_ascii8bit_encoding());
403  rb_encoding *enc = enc_table.list[index].enc;
404 
405  ENC_SET_DUMMY(enc);
406  return index;
407 }
408 
409 int
410 rb_encdb_dummy(const char *name)
411 {
413  rb_enc_registered(name));
414  rb_encoding *enc = enc_table.list[index].enc;
415 
416  ENC_SET_DUMMY(enc);
417  return index;
418 }
419 
420 /*
421  * call-seq:
422  * enc.dummy? -> true or false
423  *
424  * Returns true for dummy encodings.
425  * A dummy encoding is an encoding for which character handling is not properly
426  * implemented.
427  * It is used for stateful encodings.
428  *
429  * Encoding::ISO_2022_JP.dummy? #=> true
430  * Encoding::UTF_8.dummy? #=> false
431  *
432  */
433 static VALUE
435 {
436  return ENC_DUMMY_P(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
437 }
438 
439 /*
440  * call-seq:
441  * enc.ascii_compatible? -> true or false
442  *
443  * Returns whether ASCII-compatible or not.
444  *
445  * Encoding::UTF_8.ascii_compatible? #=> true
446  * Encoding::UTF_16BE.ascii_compatible? #=> false
447  *
448  */
449 static VALUE
451 {
452  return rb_enc_asciicompat(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
453 }
454 
455 /*
456  * Returns 1 when the encoding is Unicode series other than UTF-7 else 0.
457  */
458 int
460 {
461  return ONIGENC_IS_UNICODE(enc);
462 }
463 
464 static st_data_t
466 {
467  return (st_data_t)strdup((const char *)name);
468 }
469 
470 /*
471  * Returns copied alias name when the key is added for st_table,
472  * else returns NULL.
473  */
474 static int
475 enc_alias_internal(const char *alias, int idx)
476 {
477  return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
478  enc_dup_name);
479 }
480 
481 static int
482 enc_alias(const char *alias, int idx)
483 {
484  if (!valid_encoding_name_p(alias)) return -1;
485  if (!enc_alias_internal(alias, idx))
487  return idx;
488 }
489 
490 int
491 rb_enc_alias(const char *alias, const char *orig)
492 {
493  int idx;
494 
495  enc_check_duplication(alias);
496  if (!enc_table.list) {
497  rb_enc_init();
498  }
499  if ((idx = rb_enc_find_index(orig)) < 0) {
500  return -1;
501  }
502  return enc_alias(alias, idx);
503 }
504 
505 int
506 rb_encdb_alias(const char *alias, const char *orig)
507 {
508  int idx = rb_enc_registered(orig);
509 
510  if (idx < 0) {
511  idx = enc_register(orig, 0);
512  }
513  return enc_alias(alias, idx);
514 }
515 
516 void
518 {
520 }
521 
522 enum {
527 };
528 
531 
532 void
534 {
536  if (!enc_table.names) {
538  }
539 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
542  ENC_REGISTER(US_ASCII);
543 #undef ENC_REGISTER
545 }
546 
547 rb_encoding *
549 {
550  if (!enc_table.list) {
551  rb_enc_init();
552  }
553  if (index < 0 || enc_table.count <= index) {
554  return 0;
555  }
556  return enc_table.list[index].enc;
557 }
558 
559 int
561 {
562  st_data_t idx = 0;
563 
564  if (!name) return -1;
565  if (!enc_table.list) return -1;
566  if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
567  return (int)idx;
568  }
569  return -1;
570 }
571 
572 static VALUE
574 {
575  int safe = rb_safe_level();
576  return rb_require_safe(enclib, safe > 3 ? 3 : safe);
577 }
578 
579 static int
580 load_encoding(const char *name)
581 {
582  VALUE enclib = rb_sprintf("enc/%s.so", name);
583  VALUE verbose = ruby_verbose;
585  VALUE errinfo;
586  VALUE loaded;
587  char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
588  int idx;
589 
590  while (s < e) {
591  if (!ISALNUM(*s)) *s = '_';
592  else if (ISUPPER(*s)) *s = (char)TOLOWER(*s);
593  ++s;
594  }
595  FL_UNSET(enclib, FL_TAINT|FL_UNTRUSTED);
596  OBJ_FREEZE(enclib);
598  ruby_debug = Qfalse;
599  errinfo = rb_errinfo();
600  loaded = rb_protect(require_enc, enclib, 0);
601  ruby_verbose = verbose;
602  ruby_debug = debug;
603  rb_set_errinfo(errinfo);
604  if (NIL_P(loaded)) return -1;
605  if ((idx = rb_enc_registered(name)) < 0) return -1;
606  if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
607  return idx;
608 }
609 
610 static int
612 {
613  int i;
614  rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
615 
616  if (base) {
617  i = 0;
618  do {
619  if (i >= enc_table.count) return -1;
620  } while (enc_table.list[i].enc != base && (++i, 1));
621  if (enc_autoload_p(base)) {
622  if (enc_autoload(base) < 0) return -1;
623  }
624  i = ENC_TO_ENCINDEX(enc);
625  enc_register_at(i, rb_enc_name(enc), base);
626  }
627  else {
628  i = load_encoding(rb_enc_name(enc));
629  }
630  return i;
631 }
632 
633 /* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
634 int
636 {
637  int i = rb_enc_registered(name);
638  rb_encoding *enc;
639 
640  if (i < 0) {
641  i = load_encoding(name);
642  }
643  else if (!(enc = rb_enc_from_index(i))) {
644  if (i != UNSPECIFIED_ENCODING) {
645  rb_raise(rb_eArgError, "encoding %s is not registered", name);
646  }
647  }
648  else if (enc_autoload_p(enc)) {
649  if (enc_autoload(enc) < 0) {
650  rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
651  name);
652  return 0;
653  }
654  }
655  return i;
656 }
657 
658 rb_encoding *
659 rb_enc_find(const char *name)
660 {
661  int idx = rb_enc_find_index(name);
662  if (idx < 0) idx = 0;
663  return rb_enc_from_index(idx);
664 }
665 
666 static inline int
668 {
669  if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
670  switch (BUILTIN_TYPE(obj)) {
671  case T_STRING:
672  case T_REGEXP:
673  case T_FILE:
674  return TRUE;
675  case T_DATA:
676  if (is_data_encoding(obj)) return TRUE;
677  default:
678  return FALSE;
679  }
680 }
681 
682 ID
684 {
685  CONST_ID(id_encoding, "encoding");
686  return id_encoding;
687 }
688 
689 int
691 {
692  int i = -1;
693  VALUE tmp;
694 
695  if (SPECIAL_CONST_P(obj)) {
696  if (!SYMBOL_P(obj)) return -1;
697  obj = rb_id2str(SYM2ID(obj));
698  }
699  switch (BUILTIN_TYPE(obj)) {
700  as_default:
701  default:
702  case T_STRING:
703  case T_REGEXP:
704  i = ENCODING_GET_INLINED(obj);
705  if (i == ENCODING_INLINE_MAX) {
706  VALUE iv;
707 
708  iv = rb_ivar_get(obj, rb_id_encoding());
709  i = NUM2INT(iv);
710  }
711  break;
712  case T_FILE:
713  tmp = rb_funcall(obj, rb_intern("internal_encoding"), 0, 0);
714  if (NIL_P(tmp)) obj = rb_funcall(obj, rb_intern("external_encoding"), 0, 0);
715  else obj = tmp;
716  if (NIL_P(obj)) break;
717  case T_DATA:
718  if (is_data_encoding(obj)) {
719  i = enc_check_encoding(obj);
720  }
721  else {
722  goto as_default;
723  }
724  break;
725  }
726  return i;
727 }
728 
729 static void
730 enc_set_index(VALUE obj, int idx)
731 {
732  if (idx < ENCODING_INLINE_MAX) {
733  ENCODING_SET_INLINED(obj, idx);
734  return;
735  }
737  rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
738 }
739 
740 void
741 rb_enc_set_index(VALUE obj, int idx)
742 {
743  rb_check_frozen(obj);
744  enc_set_index(obj, idx);
745 }
746 
747 VALUE
749 {
750 /* enc_check_capable(obj);*/
751  rb_check_frozen(obj);
752  if (rb_enc_get_index(obj) == idx)
753  return obj;
754  if (SPECIAL_CONST_P(obj)) {
755  rb_raise(rb_eArgError, "cannot set encoding");
756  }
757  if (!ENC_CODERANGE_ASCIIONLY(obj) ||
759  ENC_CODERANGE_CLEAR(obj);
760  }
761  enc_set_index(obj, idx);
762  return obj;
763 }
764 
765 VALUE
767 {
768  return rb_enc_associate_index(obj, rb_enc_to_index(enc));
769 }
770 
773 {
774  return rb_enc_from_index(rb_enc_get_index(obj));
775 }
776 
779 {
780  rb_encoding *enc = rb_enc_compatible(str1, str2);
781  if (!enc)
782  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
783  rb_enc_name(rb_enc_get(str1)),
784  rb_enc_name(rb_enc_get(str2)));
785  return enc;
786 }
787 
790 {
791  int idx1, idx2;
792  rb_encoding *enc1, *enc2;
793  int isstr1, isstr2;
794 
795  idx1 = rb_enc_get_index(str1);
796  idx2 = rb_enc_get_index(str2);
797 
798  if (idx1 < 0 || idx2 < 0)
799  return 0;
800 
801  if (idx1 == idx2) {
802  return rb_enc_from_index(idx1);
803  }
804  enc1 = rb_enc_from_index(idx1);
805  enc2 = rb_enc_from_index(idx2);
806 
807  isstr2 = RB_TYPE_P(str2, T_STRING);
808  if (isstr2 && RSTRING_LEN(str2) == 0)
809  return enc1;
810  isstr1 = RB_TYPE_P(str1, T_STRING);
811  if (isstr1 && RSTRING_LEN(str1) == 0)
812  return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
813  if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
814  return 0;
815  }
816 
817  /* objects whose encoding is the same of contents */
818  if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
819  return enc1;
820  if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
821  return enc2;
822 
823  if (!isstr1) {
824  VALUE tmp = str1;
825  int idx0 = idx1;
826  str1 = str2;
827  str2 = tmp;
828  idx1 = idx2;
829  idx2 = idx0;
830  idx0 = isstr1;
831  isstr1 = isstr2;
832  isstr2 = idx0;
833  }
834  if (isstr1) {
835  int cr1, cr2;
836 
837  cr1 = rb_enc_str_coderange(str1);
838  if (isstr2) {
839  cr2 = rb_enc_str_coderange(str2);
840  if (cr1 != cr2) {
841  /* may need to handle ENC_CODERANGE_BROKEN */
842  if (cr1 == ENC_CODERANGE_7BIT) return enc2;
843  if (cr2 == ENC_CODERANGE_7BIT) return enc1;
844  }
845  if (cr2 == ENC_CODERANGE_7BIT) {
846  return enc1;
847  }
848  }
849  if (cr1 == ENC_CODERANGE_7BIT)
850  return enc2;
851  }
852  return 0;
853 }
854 
855 void
857 {
859 }
860 
861 
862 /*
863  * call-seq:
864  * obj.encoding -> encoding
865  *
866  * Returns the Encoding object that represents the encoding of obj.
867  */
868 
869 VALUE
871 {
872  int idx = rb_enc_get_index(obj);
873  if (idx < 0) {
874  rb_raise(rb_eTypeError, "unknown encoding");
875  }
876  return rb_enc_from_encoding_index(idx);
877 }
878 
879 int
880 rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
881 {
882  return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
883 }
884 
885 int
886 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
887 {
888  int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
889  if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
890  return MBCLEN_CHARFOUND_LEN(n);
891  else {
892  int min = rb_enc_mbminlen(enc);
893  return min <= e-p ? min : (int)(e-p);
894  }
895 }
896 
897 int
898 rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
899 {
900  int n;
901  if (e <= p)
903  n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
904  if (e-p < n)
905  return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
906  return n;
907 }
908 
909 int
910 rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
911 {
912  unsigned int c, l;
913  if (e <= p)
914  return -1;
915  if (rb_enc_asciicompat(enc)) {
916  c = (unsigned char)*p;
917  if (!ISASCII(c))
918  return -1;
919  if (len) *len = 1;
920  return c;
921  }
922  l = rb_enc_precise_mbclen(p, e, enc);
923  if (!MBCLEN_CHARFOUND_P(l))
924  return -1;
925  c = rb_enc_mbc_to_codepoint(p, e, enc);
926  if (!rb_enc_isascii(c, enc))
927  return -1;
928  if (len) *len = l;
929  return c;
930 }
931 
932 unsigned int
933 rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
934 {
935  int r;
936  if (e <= p)
937  rb_raise(rb_eArgError, "empty string");
938  r = rb_enc_precise_mbclen(p, e, enc);
939  if (!MBCLEN_CHARFOUND_P(r)) {
940  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
941  }
942  if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
943  return rb_enc_mbc_to_codepoint(p, e, enc);
944 }
945 
946 #undef rb_enc_codepoint
947 unsigned int
948 rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
949 {
950  return rb_enc_codepoint_len(p, e, 0, enc);
951 }
952 
953 int
955 {
956  int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
957  if (n == 0) {
958  rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
959  }
960  return n;
961 }
962 
963 int
965 {
967 }
968 
969 int
971 {
973 }
974 
975 /*
976  * call-seq:
977  * enc.inspect -> string
978  *
979  * Returns a string which represents the encoding for programmers.
980  *
981  * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
982  * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
983  */
984 static VALUE
986 {
987  VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
989  (enc_dummy_p(self) ? " (dummy)" : ""));
991  return str;
992 }
993 
994 /*
995  * call-seq:
996  * enc.name -> string
997  *
998  * Returns the name of the encoding.
999  *
1000  * Encoding::UTF_8.name #=> "UTF-8"
1001  */
1002 static VALUE
1004 {
1006 }
1007 
1008 static int
1010 {
1011  VALUE *arg = (VALUE *)args;
1012 
1013  if ((int)idx == (int)arg[0]) {
1014  VALUE str = rb_usascii_str_new2((char *)name);
1015  OBJ_FREEZE(str);
1016  rb_ary_push(arg[1], str);
1017  }
1018  return ST_CONTINUE;
1019 }
1020 
1021 /*
1022  * call-seq:
1023  * enc.names -> array
1024  *
1025  * Returns the list of name and aliases of the encoding.
1026  *
1027  * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
1028  */
1029 static VALUE
1031 {
1032  VALUE args[2];
1033 
1034  args[0] = (VALUE)rb_to_encoding_index(self);
1035  args[1] = rb_ary_new2(0);
1036  st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
1037  return args[1];
1038 }
1039 
1040 /*
1041  * call-seq:
1042  * Encoding.list -> [enc1, enc2, ...]
1043  *
1044  * Returns the list of loaded encodings.
1045  *
1046  * Encoding.list
1047  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1048  * #<Encoding:ISO-2022-JP (dummy)>]
1049  *
1050  * Encoding.find("US-ASCII")
1051  * #=> #<Encoding:US-ASCII>
1052  *
1053  * Encoding.list
1054  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1055  * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
1056  *
1057  */
1058 static VALUE
1060 {
1061  VALUE ary = rb_ary_new2(0);
1063  return ary;
1064 }
1065 
1066 /*
1067  * call-seq:
1068  * Encoding.find(string) -> enc
1069  * Encoding.find(symbol) -> enc
1070  *
1071  * Search the encoding with specified <i>name</i>.
1072  * <i>name</i> should be a string or symbol.
1073  *
1074  * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1075  * Encoding.find(:Shift_JIS) #=> #<Encoding:Shift_JIS>
1076  *
1077  * Names which this method accept are encoding names and aliases
1078  * including following special aliases
1079  *
1080  * "external":: default external encoding
1081  * "internal":: default internal encoding
1082  * "locale":: locale encoding
1083  * "filesystem":: filesystem encoding
1084  *
1085  * An ArgumentError is raised when no encoding with <i>name</i>.
1086  * Only <code>Encoding.find("internal")</code> however returns nil
1087  * when no encoding named "internal", in other words, when Ruby has no
1088  * default internal encoding.
1089  */
1090 static VALUE
1092 {
1093  int idx;
1094  if (RB_TYPE_P(enc, T_DATA) && is_data_encoding(enc))
1095  return enc;
1096  idx = str_to_encindex(enc);
1097  if (idx == UNSPECIFIED_ENCODING) return Qnil;
1098  return rb_enc_from_encoding_index(idx);
1099 }
1100 
1101 /*
1102  * call-seq:
1103  * Encoding.compatible?(obj1, obj2) -> enc or nil
1104  *
1105  * Checks the compatibility of two objects.
1106  *
1107  * If the objects are both strings they are compatible when they are
1108  * concatenatable. The encoding of the concatenated string will be returned
1109  * if they are compatible, nil if they are not.
1110  *
1111  * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
1112  * #=> #<Encoding:ISO-8859-1>
1113  *
1114  * Encoding.compatible?(
1115  * "\xa1".force_encoding("iso-8859-1"),
1116  * "\xa1\xa1".force_encoding("euc-jp"))
1117  * #=> nil
1118  *
1119  * If the objects are non-strings their encodings are compatible when they
1120  * have an encoding and:
1121  * * Either encoding is US-ASCII compatible
1122  * * One of the encodings is a 7-bit encoding
1123  *
1124  */
1125 static VALUE
1126 enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
1127 {
1128  rb_encoding *enc;
1129 
1130  if (!enc_capable(str1)) return Qnil;
1131  if (!enc_capable(str2)) return Qnil;
1132  enc = rb_enc_compatible(str1, str2);
1133  if (!enc) return Qnil;
1134  return rb_enc_from_encoding(enc);
1135 }
1136 
1137 /* :nodoc: */
1138 static VALUE
1140 {
1141  rb_scan_args(argc, argv, "01", 0);
1142  return enc_name(self);
1143 }
1144 
1145 /* :nodoc: */
1146 static VALUE
1147 enc_load(VALUE klass, VALUE str)
1148 {
1149  return enc_find(klass, str);
1150 }
1151 
1152 rb_encoding *
1154 {
1155  if (!enc_table.list) {
1156  rb_enc_init();
1157  }
1158  return enc_table.list[ENCINDEX_ASCII].enc;
1159 }
1160 
1161 int
1163 {
1164  return ENCINDEX_ASCII;
1165 }
1166 
1167 rb_encoding *
1169 {
1170  if (!enc_table.list) {
1171  rb_enc_init();
1172  }
1173  return enc_table.list[ENCINDEX_UTF_8].enc;
1174 }
1175 
1176 int
1178 {
1179  return ENCINDEX_UTF_8;
1180 }
1181 
1182 rb_encoding *
1184 {
1185  if (!enc_table.list) {
1186  rb_enc_init();
1187  }
1188  return enc_table.list[ENCINDEX_US_ASCII].enc;
1189 }
1190 
1191 int
1193 {
1194  return ENCINDEX_US_ASCII;
1195 }
1196 
1197 int
1199 {
1201  int idx;
1202 
1203  if (NIL_P(charmap))
1204  idx = rb_usascii_encindex();
1205  else if ((idx = rb_enc_find_index(StringValueCStr(charmap))) < 0)
1206  idx = rb_ascii8bit_encindex();
1207 
1208  if (rb_enc_registered("locale") < 0) enc_alias_internal("locale", idx);
1209 
1210  return idx;
1211 }
1212 
1213 rb_encoding *
1215 {
1217 }
1218 
1219 static int
1221 {
1222  int idx;
1223 #if defined NO_LOCALE_CHARMAP
1225 #elif defined _WIN32 || defined __CYGWIN__
1226  char cp[sizeof(int) * 8 / 3 + 4];
1227  snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
1228  idx = rb_enc_find_index(cp);
1229  if (idx < 0) idx = rb_ascii8bit_encindex();
1230 #else
1232 #endif
1233 
1234  enc_alias_internal("filesystem", idx);
1235  return idx;
1236 }
1237 
1238 int
1240 {
1241  int idx = rb_enc_registered("filesystem");
1242  if (idx < 0)
1243  idx = rb_ascii8bit_encindex();
1244  return idx;
1245 }
1246 
1247 rb_encoding *
1249 {
1251 }
1252 
1254  int index; /* -2 => not yet set, -1 => nil */
1256 };
1257 
1259 
1260 static int
1261 enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
1262 {
1263  int overridden = FALSE;
1264 
1265  if (def->index != -2)
1266  /* Already set */
1267  overridden = TRUE;
1268 
1269  if (NIL_P(encoding)) {
1270  def->index = -1;
1271  def->enc = 0;
1272  st_insert(enc_table.names, (st_data_t)strdup(name),
1274  }
1275  else {
1276  def->index = rb_enc_to_index(rb_to_encoding(encoding));
1277  def->enc = 0;
1278  enc_alias_internal(name, def->index);
1279  }
1280 
1281  if (def == &default_external)
1283 
1284  return overridden;
1285 }
1286 
1287 rb_encoding *
1289 {
1290  if (default_external.enc) return default_external.enc;
1291 
1292  if (default_external.index >= 0) {
1293  default_external.enc = rb_enc_from_index(default_external.index);
1294  return default_external.enc;
1295  }
1296  else {
1297  return rb_locale_encoding();
1298  }
1299 }
1300 
1301 VALUE
1303 {
1305 }
1306 
1307 /*
1308  * call-seq:
1309  * Encoding.default_external -> enc
1310  *
1311  * Returns default external encoding.
1312  *
1313  * The default external encoding is used by default for strings created from
1314  * the following locations:
1315  *
1316  * * CSV
1317  * * File data read from disk
1318  * * SDBM
1319  * * StringIO
1320  * * Zlib::GzipReader
1321  * * Zlib::GzipWriter
1322  * * String#inspect
1323  * * Regexp#inspect
1324  *
1325  * While strings created from these locations will have this encoding, the
1326  * encoding may not be valid. Be sure to check String#valid_encoding?.
1327  *
1328  * File data written to disk will be transcoded to the default external
1329  * encoding when written.
1330  *
1331  * The default external encoding is initialized by the locale or -E option.
1332  */
1333 static VALUE
1335 {
1336  return rb_enc_default_external();
1337 }
1338 
1339 void
1341 {
1342  if (NIL_P(encoding)) {
1343  rb_raise(rb_eArgError, "default external can not be nil");
1344  }
1345  enc_set_default_encoding(&default_external, encoding,
1346  "external");
1347 }
1348 
1349 /*
1350  * call-seq:
1351  * Encoding.default_external = enc
1352  *
1353  * Sets default external encoding. You should not set
1354  * Encoding::default_external in ruby code as strings created before changing
1355  * the value may have a different encoding from strings created after the value
1356  * was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
1357  * the correct default_external.
1358  *
1359  * See Encoding::default_external for information on how the default external
1360  * encoding is used.
1361  */
1362 static VALUE
1364 {
1365  rb_warning("setting Encoding.default_external");
1366  rb_enc_set_default_external(encoding);
1367  return encoding;
1368 }
1369 
1370 static struct default_encoding default_internal = {-2};
1371 
1372 rb_encoding *
1374 {
1375  if (!default_internal.enc && default_internal.index >= 0) {
1376  default_internal.enc = rb_enc_from_index(default_internal.index);
1377  }
1378  return default_internal.enc; /* can be NULL */
1379 }
1380 
1381 VALUE
1383 {
1384  /* Note: These functions cope with default_internal not being set */
1386 }
1387 
1388 /*
1389  * call-seq:
1390  * Encoding.default_internal -> enc
1391  *
1392  * Returns default internal encoding. Strings will be transcoded to the
1393  * default internal encoding in the following places if the default internal
1394  * encoding is not nil:
1395  *
1396  * * CSV
1397  * * Etc.sysconfdir and Etc.systmpdir
1398  * * File data read from disk
1399  * * File names from Dir
1400  * * Integer#chr
1401  * * String#inspect and Regexp#inspect
1402  * * Strings returned from Curses
1403  * * Strings returned from Readline
1404  * * Strings returned from SDBM
1405  * * Time#zone
1406  * * Values from ENV
1407  * * Values in ARGV including $PROGRAM_NAME
1408  * * __FILE__
1409  *
1410  * Additionally String#encode and String#encode! use the default internal
1411  * encoding if no encoding is given.
1412  *
1413  * The locale encoding (__ENCODING__), not default_internal, is used as the
1414  * encoding of created strings.
1415  *
1416  * Encoding::default_internal is initialized by the source file's
1417  * internal_encoding or -E option.
1418  */
1419 static VALUE
1421 {
1422  return rb_enc_default_internal();
1423 }
1424 
1425 void
1427 {
1428  enc_set_default_encoding(&default_internal, encoding,
1429  "internal");
1430 }
1431 
1432 /*
1433  * call-seq:
1434  * Encoding.default_internal = enc or nil
1435  *
1436  * Sets default internal encoding or removes default internal encoding when
1437  * passed nil. You should not set Encoding::default_internal in ruby code as
1438  * strings created before changing the value may have a different encoding
1439  * from strings created after the change. Instead you should use
1440  * <tt>ruby -E</tt> to invoke ruby with the correct default_internal.
1441  *
1442  * See Encoding::default_internal for information on how the default internal
1443  * encoding is used.
1444  */
1445 static VALUE
1447 {
1448  rb_warning("setting Encoding.default_internal");
1449  rb_enc_set_default_internal(encoding);
1450  return encoding;
1451 }
1452 
1453 /*
1454  * call-seq:
1455  * Encoding.locale_charmap -> string
1456  *
1457  * Returns the locale charmap name.
1458  * It returns nil if no appropriate information.
1459  *
1460  * Debian GNU/Linux
1461  * LANG=C
1462  * Encoding.locale_charmap #=> "ANSI_X3.4-1968"
1463  * LANG=ja_JP.EUC-JP
1464  * Encoding.locale_charmap #=> "EUC-JP"
1465  *
1466  * SunOS 5
1467  * LANG=C
1468  * Encoding.locale_charmap #=> "646"
1469  * LANG=ja
1470  * Encoding.locale_charmap #=> "eucJP"
1471  *
1472  * The result is highly platform dependent.
1473  * So Encoding.find(Encoding.locale_charmap) may cause an error.
1474  * If you need some encoding object even for unknown locale,
1475  * Encoding.find("locale") can be used.
1476  *
1477  */
1478 VALUE
1480 {
1481 #if defined NO_LOCALE_CHARMAP
1482  return rb_usascii_str_new2("ASCII-8BIT");
1483 #elif defined _WIN32 || defined __CYGWIN__
1484  const char *codeset = 0;
1485  char cp[sizeof(int) * 3 + 4];
1486 # ifdef __CYGWIN__
1487  const char *nl_langinfo_codeset(void);
1488  codeset = nl_langinfo_codeset();
1489 # endif
1490  if (!codeset) {
1491  UINT codepage = GetConsoleCP();
1492  if (!codepage) codepage = GetACP();
1493  snprintf(cp, sizeof(cp), "CP%d", codepage);
1494  codeset = cp;
1495  }
1496  return rb_usascii_str_new2(codeset);
1497 #elif defined HAVE_LANGINFO_H
1498  char *codeset;
1499  codeset = nl_langinfo(CODESET);
1500  return rb_usascii_str_new2(codeset);
1501 #else
1502  return Qnil;
1503 #endif
1504 }
1505 
1506 static void
1508 {
1509  VALUE encoding = rb_enc_from_encoding(enc);
1510  char *s = (char *)name;
1511  int haslower = 0, hasupper = 0, valid = 0;
1512 
1513  if (ISDIGIT(*s)) return;
1514  if (ISUPPER(*s)) {
1515  hasupper = 1;
1516  while (*++s && (ISALNUM(*s) || *s == '_')) {
1517  if (ISLOWER(*s)) haslower = 1;
1518  }
1519  }
1520  if (!*s) {
1521  if (s - name > ENCODING_NAMELEN_MAX) return;
1522  valid = 1;
1523  rb_define_const(rb_cEncoding, name, encoding);
1524  }
1525  if (!valid || haslower) {
1526  size_t len = s - name;
1527  if (len > ENCODING_NAMELEN_MAX) return;
1528  if (!haslower || !hasupper) {
1529  do {
1530  if (ISLOWER(*s)) haslower = 1;
1531  if (ISUPPER(*s)) hasupper = 1;
1532  } while (*++s && (!haslower || !hasupper));
1533  len = s - name;
1534  }
1535  len += strlen(s);
1536  if (len++ > ENCODING_NAMELEN_MAX) return;
1537  MEMCPY(s = ALLOCA_N(char, len), name, char, len);
1538  name = s;
1539  if (!valid) {
1540  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1541  for (; *s; ++s) {
1542  if (!ISALNUM(*s)) *s = '_';
1543  }
1544  if (hasupper) {
1545  rb_define_const(rb_cEncoding, name, encoding);
1546  }
1547  }
1548  if (haslower) {
1549  for (s = (char *)name; *s; ++s) {
1550  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1551  }
1552  rb_define_const(rb_cEncoding, name, encoding);
1553  }
1554  }
1555 }
1556 
1557 static int
1559 {
1560  VALUE ary = (VALUE)arg;
1561  VALUE str = rb_usascii_str_new2((char *)name);
1562  OBJ_FREEZE(str);
1563  rb_ary_push(ary, str);
1564  return ST_CONTINUE;
1565 }
1566 
1567 /*
1568  * call-seq:
1569  * Encoding.name_list -> ["enc1", "enc2", ...]
1570  *
1571  * Returns the list of available encoding names.
1572  *
1573  * Encoding.name_list
1574  * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
1575  * "ISO-8859-1", "Shift_JIS", "EUC-JP",
1576  * "Windows-31J",
1577  * "BINARY", "CP932", "eucJP"]
1578  *
1579  */
1580 
1581 static VALUE
1583 {
1584  VALUE ary = rb_ary_new2(enc_table.names->num_entries);
1586  return ary;
1587 }
1588 
1589 static int
1591 {
1592  VALUE *p = (VALUE *)arg;
1593  VALUE aliases = p[0], ary = p[1];
1594  int idx = (int)orig;
1595  VALUE key, str = rb_ary_entry(ary, idx);
1596 
1597  if (NIL_P(str)) {
1599 
1600  if (!enc) return ST_CONTINUE;
1601  if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
1602  return ST_CONTINUE;
1603  }
1604  str = rb_usascii_str_new2(rb_enc_name(enc));
1605  OBJ_FREEZE(str);
1606  rb_ary_store(ary, idx, str);
1607  }
1608  key = rb_usascii_str_new2((char *)name);
1609  OBJ_FREEZE(key);
1610  rb_hash_aset(aliases, key, str);
1611  return ST_CONTINUE;
1612 }
1613 
1614 /*
1615  * call-seq:
1616  * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
1617  *
1618  * Returns the hash of available encoding alias and original encoding name.
1619  *
1620  * Encoding.aliases
1621  * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
1622  * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
1623  *
1624  */
1625 
1626 static VALUE
1628 {
1629  VALUE aliases[2];
1630  aliases[0] = rb_hash_new();
1631  aliases[1] = rb_ary_new();
1633  return aliases[0];
1634 }
1635 
1636 /*
1637  * An Encoding instance represents a character encoding usable in Ruby. It is
1638  * defined as a constant under the Encoding namespace. It has a name and
1639  * optionally, aliases:
1640  *
1641  * Encoding::ISO_8859_1.name
1642  * #=> #<Encoding:ISO-8859-1>
1643  *
1644  * Encoding::ISO_8859_1.names
1645  * #=> ["ISO-8859-1", "ISO8859-1"]
1646  *
1647  * Ruby methods dealing with encodings return or accept Encoding instances as
1648  * arguments (when a method accepts an Encoding instance as an argument, it
1649  * can be passed an Encoding name or alias instead).
1650  *
1651  * "some string".encoding
1652  * #=> #<Encoding:UTF-8>
1653  *
1654  * string = "some string".encode(Encoding::ISO_8859_1)
1655  * #=> "some string"
1656  * string.encoding
1657  * #=> #<Encoding:ISO-8859-1>
1658  *
1659  * "some string".encode "ISO-8859-1"
1660  * #=> "some string"
1661  *
1662  * <code>Encoding::ASCII_8BIT</code> is a special encoding that is usually
1663  * used for a byte string, not a character string. But as the name insists,
1664  * its characters in the range of ASCII are considered as ASCII characters.
1665  * This is useful when you use ASCII-8BIT characters with other ASCII
1666  * compatible characters.
1667  *
1668  * == Changing an encoding
1669  *
1670  * The associated Encoding of a String can be changed in two different ways.
1671  *
1672  * First, it is possible to set the Encoding of a string to a new Encoding
1673  * without changing the internal byte representation of the string, with
1674  * String#force_encoding. This is how you can tell Ruby the correct encoding
1675  * of a string.
1676  *
1677  * string
1678  * #=> "R\xC3\xA9sum\xC3\xA9"
1679  * string.encoding
1680  * #=> #<Encoding:ISO-8859-1>
1681  * string.force_encoding(Encoding::UTF-8)
1682  * #=> "R\u00E9sum\u00E9"
1683  *
1684  * Second, it is possible to transcode a string, i.e. translate its internal
1685  * byte representation to another encoding. Its associated encoding is also
1686  * set to the other encoding. See String#encode for the various forms of
1687  * transcoding, and the Encoding::Converter class for additional control over
1688  * the transcoding process.
1689  *
1690  * string
1691  * #=> "R\u00E9sum\u00E9"
1692  * string.encoding
1693  * #=> #<Encoding:UTF-8>
1694  * string = string.encode!(Encoding::ISO_8859_1)
1695  * #=> "R\xE9sum\xE9"
1696  * string.encoding
1697  * #=> #<Encoding::ISO-8859-1>
1698  *
1699  * == Script encoding
1700  *
1701  * All Ruby script code has an associated Encoding which any String literal
1702  * created in the source code will be associated to.
1703  *
1704  * The default script encoding is <code>Encoding::US-ASCII</code>, but it can
1705  * be changed by a magic comment on the first line of the source code file (or
1706  * second line, if there is a shebang line on the first). The comment must
1707  * contain the word <code>coding</code> or <code>encoding</code>, followed
1708  * by a colon, space and the Encoding name or alias:
1709  *
1710  * # encoding: UTF-8
1711  *
1712  * "some string".encoding
1713  * #=> #<Encoding:UTF-8>
1714  *
1715  * The <code>__ENCODING__</code> keyword returns the script encoding of the file
1716  * which the keyword is written:
1717  *
1718  * # encoding: ISO-8859-1
1719  *
1720  * __ENCODING__
1721  * #=> #<Encoding:ISO-8859-1>
1722  *
1723  * <code>ruby -K</code> will change the default locale encoding, but this is
1724  * not recommended. Ruby source files should declare its script encoding by a
1725  * magic comment even when they only depend on US-ASCII strings or regular
1726  * expressions.
1727  *
1728  * == Locale encoding
1729  *
1730  * The default encoding of the environment. Usually derived from locale.
1731  *
1732  * see Encoding.locale_charmap, Encoding.find('locale')
1733  *
1734  * == Filesystem encoding
1735  *
1736  * The default encoding of strings from the filesystem of the environment.
1737  * This is used for strings of file names or paths.
1738  *
1739  * see Encoding.find('filesystem')
1740  *
1741  * == External encoding
1742  *
1743  * Each IO object has an external encoding which indicates the encoding that
1744  * Ruby will use to read its data. By default Ruby sets the external encoding
1745  * of an IO object to the default external encoding. The default external
1746  * encoding is set by locale encoding or the interpreter <code>-E</code> option.
1747  * Encoding.default_external returns the current value of the external
1748  * encoding.
1749  *
1750  * ENV["LANG"]
1751  * #=> "UTF-8"
1752  * Encoding.default_external
1753  * #=> #<Encoding:UTF-8>
1754  *
1755  * $ ruby -E ISO-8859-1 -e "p Encoding.default_external"
1756  * #<Encoding:ISO-8859-1>
1757  *
1758  * $ LANG=C ruby -e 'p Encoding.default_external'
1759  * #<Encoding:US-ASCII>
1760  *
1761  * The default external encoding may also be set through
1762  * Encoding.default_external=, but you should not do this as strings created
1763  * before and after the change will have inconsistent encodings. Instead use
1764  * <code>ruby -E</code> to invoke ruby with the correct external encoding.
1765  *
1766  * When you know that the actual encoding of the data of an IO object is not
1767  * the default external encoding, you can reset its external encoding with
1768  * IO#set_encoding or set it at IO object creation (see IO.new options).
1769  *
1770  * == Internal encoding
1771  *
1772  * To process the data of an IO object which has an encoding different
1773  * from its external encoding, you can set its internal encoding. Ruby will use
1774  * this internal encoding to transcode the data when it is read from the IO
1775  * object.
1776  *
1777  * Conversely, when data is written to the IO object it is transcoded from the
1778  * internal encoding to the external encoding of the IO object.
1779  *
1780  * The internal encoding of an IO object can be set with
1781  * IO#set_encoding or at IO object creation (see IO.new options).
1782  *
1783  * The internal encoding is optional and when not set, the Ruby default
1784  * internal encoding is used. If not explicitly set this default internal
1785  * encoding is +nil+ meaning that by default, no transcoding occurs.
1786  *
1787  * The default internal encoding can be set with the interpreter option
1788  * <code>-E</code>. Encoding.default_internal returns the current internal
1789  * encoding.
1790  *
1791  * $ ruby -e 'p Encoding.default_internal'
1792  * nil
1793  *
1794  * $ ruby -E ISO-8859-1:UTF-8 -e "p [Encoding.default_external, \
1795  * Encoding.default_internal]"
1796  * [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>]
1797  *
1798  * The default internal encoding may also be set through
1799  * Encoding.default_internal=, but you should not do this as strings created
1800  * before and after the change will have inconsistent encodings. Instead use
1801  * <code>ruby -E</code> to invoke ruby with the correct internal encoding.
1802  *
1803  * == IO encoding example
1804  *
1805  * In the following example a UTF-8 encoded string "R\u00E9sum\u00E9" is transcoded for
1806  * output to ISO-8859-1 encoding, then read back in and transcoded to UTF-8:
1807  *
1808  * string = "R\u00E9sum\u00E9"
1809  *
1810  * open("transcoded.txt", "w:ISO-8859-1") do |io|
1811  * io.write(string)
1812  * end
1813  *
1814  * puts "raw text:"
1815  * p File.binread("transcoded.txt")
1816  * puts
1817  *
1818  * open("transcoded.txt", "r:ISO-8859-1:UTF-8") do |io|
1819  * puts "transcoded text:"
1820  * p io.read
1821  * end
1822  *
1823  * While writing the file, the internal encoding is not specified as it is
1824  * only necessary for reading. While reading the file both the internal and
1825  * external encoding must be specified to obtain the correct result.
1826  *
1827  * $ ruby t.rb
1828  * raw text:
1829  * "R\xE9sum\xE9"
1830  *
1831  * transcoded text:
1832  * "R\u00E9sum\u00E9"
1833  *
1834  */
1835 
1836 void
1838 {
1839 #undef rb_intern
1840 #define rb_intern(str) rb_intern_const(str)
1841  VALUE list;
1842  int i;
1843 
1844  rb_cEncoding = rb_define_class("Encoding", rb_cObject);
1847  rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
1848  rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
1849  rb_define_method(rb_cEncoding, "name", enc_name, 0);
1850  rb_define_method(rb_cEncoding, "names", enc_names, 0);
1851  rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
1852  rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0);
1853  rb_define_method(rb_cEncoding, "replicate", enc_replicate, 1);
1859 
1860  rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
1862 
1868 
1869  list = rb_ary_new2(enc_table.count);
1870  RBASIC(list)->klass = 0;
1873 
1874  for (i = 0; i < enc_table.count; ++i) {
1875  rb_ary_push(list, enc_new(enc_table.list[i].enc));
1876  }
1877 }
1878 
1879 /* locale insensitive ctype functions */
1880 
1881 #define ctype_test(c, ctype) \
1882  (rb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), (ctype)))
1883 
1884 int rb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
1885 int rb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
1886 int rb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
1887 int rb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
1888 int rb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
1889 int rb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
1890 int rb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
1891 int rb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
1892 int rb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
1893 int rb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
1894 int rb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
1896 
1897 int
1899 {
1901 }
1902 
1903 int
1905 {
1907 }
1908 
static void enc_set_index(VALUE obj, int idx)
Definition: encoding.c:730
static int rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
Definition: encoding.c:1558
#define RB_TYPE_P(obj, type)
rb_encoding OnigEncodingUS_ASCII
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:954
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:690
#define ONIGENC_CTYPE_GRAPH
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:778
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:1101
void rb_bug(const char *fmt,...)
Definition: error.c:290
VALUE rb_require_safe(VALUE, int)
Definition: load.c:934
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:856
#define FALSE
Definition: nkf.h:174
void rb_enc_set_base(const char *name, const char *orig)
Definition: encoding.c:335
size_t strlen(const char *)
int i
Definition: win32ole.c:784
unsigned long VALUE
Definition: ripper.y:104
const char * rb_obj_classname(VALUE)
Definition: variable.c:396
VALUE rb_id2str(ID id)
Definition: ripper.c:16992
#define RSTRING_END(str)
VALUE rb_cEncoding
Definition: encoding.c:40
static VALUE enc_load(VALUE klass, VALUE str)
Definition: encoding.c:1147
int count
Definition: encoding.c:51
#define ONIGENC_CTYPE_PUNCT
int st_lookup(st_table *, st_data_t, st_data_t *)
int ruby_encoding_index
Definition: ripper.y:178
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1497
static int rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
Definition: encoding.c:1590
static VALUE enc_inspect(VALUE self)
Definition: encoding.c:985
static rb_encoding * set_base_encoding(int index, rb_encoding *base)
Definition: encoding.c:321
#define ENC_SET_DUMMY(enc)
#define ONIGENC_CTYPE_XDIGIT
static VALUE rb_enc_name_list(VALUE klass)
Definition: encoding.c:1582
static VALUE enc_list(VALUE klass)
Definition: encoding.c:1059
static int enc_register_at(int index, const char *name, rb_encoding *encoding)
Definition: encoding.c:232
#define rb_usascii_str_new2
int rb_toupper(int c)
Definition: encoding.c:1904
void Init_Encoding(void)
Definition: encoding.c:1837
#define UChar
#define rb_check_frozen(obj)
static int str_to_encindex(VALUE enc)
Definition: encoding.c:178
#define rb_enc_name(enc)
#define ONIGENC_CTYPE_ALNUM
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:194
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:103
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:970
const char * nl_langinfo_codeset(void)
Definition: langinfo.c:64
VALUE rb_eTypeError
Definition: error.c:511
#define OBJ_FREEZE(x)
st_table * names
Definition: encoding.c:53
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1373
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:822
#define ONIGENC_CTYPE_LOWER
st_table * st_init_strcasetable(void)
Definition: st.c:296
int st_insert2(st_table *, st_data_t, st_data_t, st_data_t(*)(st_data_t))
int rb_usascii_encindex(void)
Definition: encoding.c:1192
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:789
static VALUE enc_names(VALUE self)
Definition: encoding.c:1030
#define RSTRING_PTR(str)
#define CLASS_OF(v)
int rb_isblank(int c)
Definition: encoding.c:1886
static int enc_table_expand(int newsize)
Definition: encoding.c:216
VALUE rb_protect(VALUE(*proc)(VALUE), VALUE data, int *state)
Definition: eval.c:771
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:773
#define Qnil
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:933
static VALUE enc_new(rb_encoding *encoding)
Definition: encoding.c:82
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1780
#define ONIGENC_CTYPE_SPACE
unsigned int flags
Definition: ripper.y:179
void rb_enc_set_default_external(VALUE encoding)
Definition: encoding.c:1340
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:766
int rb_isupper(int c)
Definition: encoding.c:1894
#define T_FILE
static VALUE rb_enc_aliases(VALUE klass)
Definition: encoding.c:1627
static VALUE set_default_external(VALUE klass, VALUE encoding)
Definition: encoding.c:1363
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:886
int rb_isprint(int c)
Definition: encoding.c:1891
VALUE rb_locale_charmap(VALUE klass)
Definition: encoding.c:1479
const char * alias
Definition: nkf.c:1151
#define rb_enc_to_index(enc)
#define ENC_REGISTER(enc)
#define FL_UNTRUSTED
int rb_enc_registered(const char *name)
Definition: encoding.c:560
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e)
ID rb_id_encoding(void)
Definition: encoding.c:683
int rb_isdigit(int c)
Definition: encoding.c:1888
static int enc_alias_internal(const char *alias, int idx)
Definition: encoding.c:475
#define ISDIGIT(c)
#define ONIGENC_IS_ASCII_CODE(code)
int rb_filesystem_encindex(void)
Definition: encoding.c:1239
void rb_enc_init(void)
Definition: encoding.c:533
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1168
#define ONIGENC_CTYPE_CNTRL
#define ruby_debug
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1362
VALUE rb_ivar_get(VALUE, ID)
Definition: variable.c:1116
#define ENC_CODERANGE_ASCIIONLY(obj)
static VALUE enc_dummy_p(VALUE enc)
Definition: encoding.c:434
const char * name
Definition: ripper.y:163
static VALUE rb_enc_from_encoding_index(int idx)
Definition: encoding.c:88
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:964
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c)
Definition: nkf.c:87
int args
Definition: win32ole.c:785
unsigned long st_data_t
Definition: ripper.y:35
#define ctype_test(c, ctype)
Definition: encoding.c:1881
void rb_encdb_set_unicode(int index)
Definition: encoding.c:517
static struct @4 enc_table
static int str_find_encindex(VALUE enc)
Definition: encoding.c:165
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:146
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1288
#define ENCODING_INLINE_MAX
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
int rb_isxdigit(int c)
Definition: encoding.c:1895
#define STRCASECMP(s1, s2)
int rb_ispunct(int c)
Definition: encoding.c:1892
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:880
Definition: encoding.c:43
#define Qtrue
return c
Definition: ripper.y:7591
int rb_isspace(int c)
Definition: encoding.c:1893
int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:473
VALUE rb_ary_replace(VALUE copy, VALUE orig)
Definition: array.c:3168
VALUE rb_ary_new(void)
Definition: array.c:424
#define StringValueCStr(v)
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1162
unsigned long ID
Definition: ripper.y:105
VALUE rb_enc_default_external(void)
Definition: encoding.c:1302
#define ONIGENC_CTYPE_UPPER
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2202
#define ISASCII(c)
Definition: ruby.h:1629
#define ONIGENC_CTYPE_ALPHA
#define ENC_CODERANGE_CLEAR(obj)
static VALUE enc_name(VALUE self)
Definition: encoding.c:1003
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:499
#define RSTRING_LEN(str)
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:741
int rb_enc_replicate(const char *name, rb_encoding *encoding)
Definition: encoding.c:343
#define Qfalse
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:719
#define ENCODING_COUNT
Definition: encoding.c:58
#define ISALNUM(c)
Definition: ruby.h:1635
static void set_encoding_const(const char *, rb_encoding *)
Definition: encoding.c:1507
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: ripper.y:235
#define T_STRING
#define MBCLEN_CHARFOUND_P(ret)
static VALUE enc_dump(int argc, VALUE *argv, VALUE self)
Definition: encoding.c:1139
int rb_encdb_alias(const char *alias, const char *orig)
Definition: encoding.c:506
#define xmalloc
int argc
Definition: ruby.c:130
#define NIL_P(v)
int rb_locale_encindex(void)
Definition: encoding.c:1198
#define realloc
Definition: ripper.c:99
static rb_encoding * str_to_encoding(VALUE enc)
Definition: encoding.c:188
#define TypedData_Wrap_Struct(klass, data_type, sval)
#define ISUPPER(c)
Definition: ruby.h:1633
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:748
VALUE rb_eEncCompatError
Definition: error.c:518
arg
Definition: ripper.y:1316
#define ISLOWER(c)
Definition: ruby.h:1634
#define ALLOCA_N(type, n)
const char * name
Definition: encoding.c:44
#define rb_enc_mbc_to_codepoint(p, e, enc)
#define ONIGENC_CTYPE_BLANK
int rb_isgraph(int c)
Definition: encoding.c:1889
#define ruby_verbose
static int enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
Definition: encoding.c:1261
rb_encoding * rb_find_encoding(VALUE enc)
Definition: encoding.c:201
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:910
VALUE rb_hash_aset(VALUE, VALUE, VALUE)
int st_foreach(st_table *, int(*)(ANYARGS), st_data_t)
Definition: st.c:1000
int rb_encdb_dummy(const char *name)
Definition: encoding.c:410
#define rb_enc_mbminlen(enc)
static int enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:266
#define TRUE
Definition: nkf.h:175
#define ENC_DUMMY_P(enc)
static int enc_check_encoding(VALUE obj)
Definition: encoding.c:126
#define DATA_PTR(dta)
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1270
#define StringValue(v)
#define RDATA(obj)
#define MBCLEN_CHARFOUND_LEN(ret)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:898
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:459
#define rb_isascii(c)
#define T_REGEXP
#define TOLOWER(c)
#define CONST_ID(var, str)
void rb_gc_register_mark_object(VALUE)
Definition: gc.c:2982
#define strdup(s)
Definition: util.h:69
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1570
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1183
int rb_encdb_replicate(const char *name, const char *orig)
Definition: encoding.c:388
#define ENCODING_NAMELEN_MAX
Definition: encoding.c:61
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:400
#define FL_TAINT
static struct default_encoding default_internal
Definition: encoding.c:1370
#define debug(x)
Definition: _sdbm.c:52
static VALUE enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
Definition: encoding.c:1126
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1214
int rb_utf8_encindex(void)
Definition: encoding.c:1177
#define ENCODING_SET_INLINED(obj, i)
#define ONIGENC_IS_UNICODE(enc)
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:492
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:870
static int enc_autoload(rb_encoding *)
Definition: encoding.c:611
int rb_islower(int c)
Definition: encoding.c:1890
static VALUE set_default_internal(VALUE klass, VALUE encoding)
Definition: encoding.c:1446
#define enc_autoload_p(enc)
Definition: encoding.c:64
#define ONIGENC_FLAG_UNICODE
#define MEMCPY(p1, p2, type, n)
static void enc_check_duplication(const char *name)
Definition: encoding.c:313
static size_t enc_memsize(const void *p)
Definition: encoding.c:69
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1128
static ID id_encoding
Definition: encoding.c:39
static int enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
Definition: encoding.c:1009
#define ENC_CODERANGE_7BIT
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:772
static VALUE get_default_external(VALUE klass)
Definition: encoding.c:1334
int size
Definition: encoding.c:52
static struct default_encoding default_external
Definition: encoding.c:1258
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
#define SYMBOL_P(x)
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:340
void rb_set_errinfo(VALUE err)
Definition: eval.c:1436
#define ONIGENC_MBC_ENC_LEN(enc, p, e)
static VALUE enc_replicate(VALUE encoding, VALUE name)
Definition: encoding.c:364
rb_encoding * enc
Definition: encoding.c:1255
static int enc_set_filesystem_encoding(void)
Definition: encoding.c:1220
RUBY_EXTERN VALUE rb_cObject
Definition: ripper.y:1426
void rb_enc_set_default_internal(VALUE encoding)
Definition: encoding.c:1426
static VALUE enc_ascii_compatible_p(VALUE enc)
Definition: encoding.c:450
uint8_t key[16]
Definition: random.c:1370
#define RBASIC(obj)
#define valid_encoding_name_p(name)
Definition: encoding.c:62
#define ONIGENC_CTYPE_DIGIT
int rb_enc_alias(const char *alias, const char *orig)
Definition: encoding.c:491
#define INT2NUM(x)
static VALUE require_enc(VALUE enclib)
Definition: encoding.c:573
#define is_data_encoding(obj)
Definition: encoding.c:79
struct rb_encoding_entry * list
Definition: encoding.c:50
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1248
static int enc_capable(VALUE obj)
Definition: encoding.c:667
static const rb_data_type_t encoding_data_type
Definition: encoding.c:74
int st_insert(st_table *, st_data_t, st_data_t)
static st_data_t enc_dup_name(st_data_t name)
Definition: encoding.c:465
int rb_isalnum(int c)
Definition: encoding.c:1884
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c)
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1382
VALUE rb_ary_new2(long capa)
Definition: array.c:417
static int check_encoding(rb_encoding *enc)
Definition: encoding.c:114
static VALUE get_default_internal(VALUE klass)
Definition: encoding.c:1420
#define rb_safe_level()
Definition: tcltklib.c:94
int rb_tolower(int c)
Definition: encoding.c:1898
const char * name
Definition: nkf.c:208
#define rb_enc_asciicompat(enc)
#define NUM2INT(x)
static int enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
Definition: encoding.c:372
VALUE rb_hash_new(void)
Definition: hash.c:234
#define rb_errinfo()
Definition: tcltklib.c:89
unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:948
#define BUILTIN_TYPE(x)
#define rb_intern(str)
#define rb_enc_isascii(c, enc)
static int enc_alias(const char *alias, int idx)
Definition: encoding.c:482
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1153
void rb_warning(const char *fmt,...)
Definition: error.c:229
int rb_enc_find_index(const char *name)
Definition: encoding.c:635
int rb_iscntrl(int c)
Definition: encoding.c:1887
int rb_enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:279
Definition: nkf.c:108
static VALUE rb_encoding_list
Definition: encoding.c:41
#define snprintf
#define SPECIAL_CONST_P(x)
void rb_encdb_declare(const char *name)
Definition: encoding.c:303
#define ONIGENC_CTYPE_PRINT
void rb_gc_mark_encodings(void)
Definition: encoding.c:211
#define NULL
Definition: _sdbm.c:103
#define T_DATA
#define UNSPECIFIED_ENCODING
Definition: encoding.c:59
VALUE rb_check_string_type(VALUE)
Definition: string.c:1509
rb_encoding OnigEncodingUTF_8
#define ENC_TO_ENCINDEX(enc)
int rb_enc_str_coderange(VALUE)
Definition: string.c:327
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1344
void rb_warn(const char *fmt,...)
Definition: error.c:216
#define SYM2ID(x)
rb_encoding * enc
Definition: encoding.c:45
VALUE rb_eArgError
Definition: error.c:512
static int load_encoding(const char *name)
Definition: encoding.c:580
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:659
char ** argv
Definition: ruby.c:131
#define FL_UNSET(x, f)
static VALUE enc_find(VALUE klass, VALUE enc)
Definition: encoding.c:1091
#define ENCODING_GET_INLINED(obj)
int rb_isalpha(int c)
Definition: encoding.c:1885
static int must_encoding(VALUE enc)
Definition: encoding.c:135
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:548
rb_encoding * base
Definition: encoding.c:46