Ruby  2.0.0p451(2014-02-24revision45167)
string.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  string.c -
4 
5  $Author: nagachika $
6  created at: Mon Aug 9 17:12:58 JST 1993
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9  Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
10  Copyright (C) 2000 Information-technology Promotion Agency, Japan
11 
12 **********************************************************************/
13 
14 #include "ruby/ruby.h"
15 #include "ruby/re.h"
16 #include "ruby/encoding.h"
17 #include "vm_core.h"
18 #include "internal.h"
19 #include "probes.h"
20 #include <assert.h>
21 
22 #define BEG(no) (regs->beg[(no)])
23 #define END(no) (regs->end[(no)])
24 
25 #include <math.h>
26 #include <ctype.h>
27 
28 #ifdef HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 
32 #define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
33 
34 #undef rb_str_new_cstr
35 #undef rb_tainted_str_new_cstr
36 #undef rb_usascii_str_new_cstr
37 #undef rb_external_str_new_cstr
38 #undef rb_locale_str_new_cstr
39 #undef rb_str_new2
40 #undef rb_str_new3
41 #undef rb_str_new4
42 #undef rb_str_new5
43 #undef rb_tainted_str_new2
44 #undef rb_usascii_str_new2
45 #undef rb_str_dup_frozen
46 #undef rb_str_buf_new_cstr
47 #undef rb_str_buf_new2
48 #undef rb_str_buf_cat2
49 #undef rb_str_cat2
50 
51 static VALUE rb_str_clear(VALUE str);
52 
55 
56 #define RUBY_MAX_CHAR_LEN 16
57 #define STR_TMPLOCK FL_USER7
58 #define STR_NOEMBED FL_USER1
59 #define STR_SHARED FL_USER2 /* = ELTS_SHARED */
60 #define STR_ASSOC FL_USER3
61 #define STR_SHARED_P(s) FL_ALL((s), STR_NOEMBED|ELTS_SHARED)
62 #define STR_ASSOC_P(s) FL_ALL((s), STR_NOEMBED|STR_ASSOC)
63 #define STR_NOCAPA (STR_NOEMBED|ELTS_SHARED|STR_ASSOC)
64 #define STR_NOCAPA_P(s) (FL_TEST((s),STR_NOEMBED) && FL_ANY((s),ELTS_SHARED|STR_ASSOC))
65 #define STR_UNSET_NOCAPA(s) do {\
66  if (FL_TEST((s),STR_NOEMBED)) FL_UNSET((s),(ELTS_SHARED|STR_ASSOC));\
67 } while (0)
68 
69 
70 #define STR_SET_NOEMBED(str) do {\
71  FL_SET((str), STR_NOEMBED);\
72  STR_SET_EMBED_LEN((str), 0);\
73 } while (0)
74 #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED)
75 #define STR_EMBED_P(str) (!FL_TEST((str), STR_NOEMBED))
76 #define STR_SET_EMBED_LEN(str, n) do { \
77  long tmp_n = (n);\
78  RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
79  RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
80 } while (0)
81 
82 #define STR_SET_LEN(str, n) do { \
83  if (STR_EMBED_P(str)) {\
84  STR_SET_EMBED_LEN((str), (n));\
85  }\
86  else {\
87  RSTRING(str)->as.heap.len = (n);\
88  }\
89 } while (0)
90 
91 #define STR_DEC_LEN(str) do {\
92  if (STR_EMBED_P(str)) {\
93  long n = RSTRING_LEN(str);\
94  n--;\
95  STR_SET_EMBED_LEN((str), n);\
96  }\
97  else {\
98  RSTRING(str)->as.heap.len--;\
99  }\
100 } while (0)
101 
102 #define RESIZE_CAPA(str,capacity) do {\
103  if (STR_EMBED_P(str)) {\
104  if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
105  char *tmp = ALLOC_N(char, (capacity)+1);\
106  memcpy(tmp, RSTRING_PTR(str), RSTRING_LEN(str));\
107  RSTRING(str)->as.heap.ptr = tmp;\
108  RSTRING(str)->as.heap.len = RSTRING_LEN(str);\
109  STR_SET_NOEMBED(str);\
110  RSTRING(str)->as.heap.aux.capa = (capacity);\
111  }\
112  }\
113  else {\
114  REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+1);\
115  if (!STR_NOCAPA_P(str))\
116  RSTRING(str)->as.heap.aux.capa = (capacity);\
117  }\
118 } while (0)
119 
120 #define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
121 #define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN)
122 
123 #define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
124 
125 static inline int
127 {
128  rb_encoding *enc;
129 
130  /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
131  if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
132  return 1;
133 
134  enc = STR_ENC_GET(str);
135  if (rb_enc_mbmaxlen(enc) == 1)
136  return 1;
137 
138  /* Conservative. Possibly single byte.
139  * "\xa1" in Shift_JIS for example. */
140  return 0;
141 }
142 
144 
145 static inline const char *
146 search_nonascii(const char *p, const char *e)
147 {
148 #if SIZEOF_VALUE == 8
149 # define NONASCII_MASK 0x8080808080808080ULL
150 #elif SIZEOF_VALUE == 4
151 # define NONASCII_MASK 0x80808080UL
152 #endif
153 #ifdef NONASCII_MASK
154  if ((int)sizeof(VALUE) * 2 < e - p) {
155  const VALUE *s, *t;
156  const VALUE lowbits = sizeof(VALUE) - 1;
157  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
158  while (p < (const char *)s) {
159  if (!ISASCII(*p))
160  return p;
161  p++;
162  }
163  t = (const VALUE*)(~lowbits & (VALUE)e);
164  while (s < t) {
165  if (*s & NONASCII_MASK) {
166  t = s;
167  break;
168  }
169  s++;
170  }
171  p = (const char *)t;
172  }
173 #endif
174  while (p < e) {
175  if (!ISASCII(*p))
176  return p;
177  p++;
178  }
179  return NULL;
180 }
181 
182 static int
183 coderange_scan(const char *p, long len, rb_encoding *enc)
184 {
185  const char *e = p + len;
186 
187  if (rb_enc_to_index(enc) == 0) {
188  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
189  p = search_nonascii(p, e);
191  }
192 
193  if (rb_enc_asciicompat(enc)) {
194  p = search_nonascii(p, e);
195  if (!p) {
196  return ENC_CODERANGE_7BIT;
197  }
198  while (p < e) {
199  int ret = rb_enc_precise_mbclen(p, e, enc);
200  if (!MBCLEN_CHARFOUND_P(ret)) {
201  return ENC_CODERANGE_BROKEN;
202  }
203  p += MBCLEN_CHARFOUND_LEN(ret);
204  if (p < e) {
205  p = search_nonascii(p, e);
206  if (!p) {
207  return ENC_CODERANGE_VALID;
208  }
209  }
210  }
211  if (e < p) {
212  return ENC_CODERANGE_BROKEN;
213  }
214  return ENC_CODERANGE_VALID;
215  }
216 
217  while (p < e) {
218  int ret = rb_enc_precise_mbclen(p, e, enc);
219 
220  if (!MBCLEN_CHARFOUND_P(ret)) {
221  return ENC_CODERANGE_BROKEN;
222  }
223  p += MBCLEN_CHARFOUND_LEN(ret);
224  }
225  if (e < p) {
226  return ENC_CODERANGE_BROKEN;
227  }
228  return ENC_CODERANGE_VALID;
229 }
230 
231 long
232 rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
233 {
234  const char *p = s;
235 
236  if (*cr == ENC_CODERANGE_BROKEN)
237  return e - s;
238 
239  if (rb_enc_to_index(enc) == 0) {
240  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
241  p = search_nonascii(p, e);
243  return e - s;
244  }
245  else if (rb_enc_asciicompat(enc)) {
246  p = search_nonascii(p, e);
247  if (!p) {
248  if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
249  return e - s;
250  }
251  while (p < e) {
252  int ret = rb_enc_precise_mbclen(p, e, enc);
253  if (!MBCLEN_CHARFOUND_P(ret)) {
255  return p - s;
256  }
257  p += MBCLEN_CHARFOUND_LEN(ret);
258  if (p < e) {
259  p = search_nonascii(p, e);
260  if (!p) {
261  *cr = ENC_CODERANGE_VALID;
262  return e - s;
263  }
264  }
265  }
267  return p - s;
268  }
269  else {
270  while (p < e) {
271  int ret = rb_enc_precise_mbclen(p, e, enc);
272  if (!MBCLEN_CHARFOUND_P(ret)) {
274  return p - s;
275  }
276  p += MBCLEN_CHARFOUND_LEN(ret);
277  }
279  return p - s;
280  }
281 }
282 
283 static inline void
285 {
286  rb_enc_set_index(str1, ENCODING_GET(str2));
287 }
288 
289 static void
291 {
292  /* this function is designed for copying encoding and coderange
293  * from src to new string "dest" which is made from the part of src.
294  */
295  str_enc_copy(dest, src);
296  if (RSTRING_LEN(dest) == 0) {
297  if (!rb_enc_asciicompat(STR_ENC_GET(src)))
299  else
301  return;
302  }
303  switch (ENC_CODERANGE(src)) {
304  case ENC_CODERANGE_7BIT:
306  break;
307  case ENC_CODERANGE_VALID:
308  if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
311  else
313  break;
314  default:
315  break;
316  }
317 }
318 
319 static void
321 {
322  str_enc_copy(dest, src);
323  ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
324 }
325 
326 int
328 {
329  int cr = ENC_CODERANGE(str);
330 
331  if (cr == ENC_CODERANGE_UNKNOWN) {
332  rb_encoding *enc = STR_ENC_GET(str);
333  cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
334  ENC_CODERANGE_SET(str, cr);
335  }
336  return cr;
337 }
338 
339 int
341 {
342  rb_encoding *enc = STR_ENC_GET(str);
343 
344  if (!rb_enc_asciicompat(enc))
345  return FALSE;
346  else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
347  return TRUE;
348  return FALSE;
349 }
350 
351 static inline void
352 str_mod_check(VALUE s, const char *p, long len)
353 {
354  if (RSTRING_PTR(s) != p || RSTRING_LEN(s) != len){
355  rb_raise(rb_eRuntimeError, "string modified");
356  }
357 }
358 
359 size_t
361 {
362  if (STR_EMBED_P(str)) {
363  return RSTRING_EMBED_LEN_MAX;
364  }
365  else if (STR_NOCAPA_P(str)) {
366  return RSTRING(str)->as.heap.len;
367  }
368  else {
369  return RSTRING(str)->as.heap.aux.capa;
370  }
371 }
372 
373 static inline VALUE
375 {
376  NEWOBJ_OF(str, struct RString, klass, T_STRING);
377 
378  str->as.heap.ptr = 0;
379  str->as.heap.len = 0;
380  str->as.heap.aux.capa = 0;
381 
382  return (VALUE)str;
383 }
384 
385 static inline VALUE
387 {
390  }
391  return str_alloc(klass);
392 }
393 
394 static VALUE
395 str_new(VALUE klass, const char *ptr, long len)
396 {
397  VALUE str;
398 
399  if (len < 0) {
400  rb_raise(rb_eArgError, "negative string size (or size too big)");
401  }
402 
405  }
406 
407  str = str_alloc(klass);
408  if (len > RSTRING_EMBED_LEN_MAX) {
409  RSTRING(str)->as.heap.aux.capa = len;
410  RSTRING(str)->as.heap.ptr = ALLOC_N(char,len+1);
411  STR_SET_NOEMBED(str);
412  }
413  else if (len == 0) {
415  }
416  if (ptr) {
417  memcpy(RSTRING_PTR(str), ptr, len);
418  }
419  STR_SET_LEN(str, len);
420  RSTRING_PTR(str)[len] = '\0';
421  return str;
422 }
423 
424 VALUE
425 rb_str_new(const char *ptr, long len)
426 {
427  return str_new(rb_cString, ptr, len);
428 }
429 
430 VALUE
431 rb_usascii_str_new(const char *ptr, long len)
432 {
433  VALUE str = rb_str_new(ptr, len);
435  return str;
436 }
437 
438 VALUE
439 rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
440 {
441  VALUE str = rb_str_new(ptr, len);
442  rb_enc_associate(str, enc);
443  return str;
444 }
445 
446 VALUE
447 rb_str_new_cstr(const char *ptr)
448 {
449  if (!ptr) {
450  rb_raise(rb_eArgError, "NULL pointer given");
451  }
452  return rb_str_new(ptr, strlen(ptr));
453 }
454 
456 #define rb_str_new2 rb_str_new_cstr
457 
458 VALUE
459 rb_usascii_str_new_cstr(const char *ptr)
460 {
461  VALUE str = rb_str_new2(ptr);
463  return str;
464 }
465 
467 #define rb_usascii_str_new2 rb_usascii_str_new_cstr
468 
469 VALUE
470 rb_tainted_str_new(const char *ptr, long len)
471 {
472  VALUE str = rb_str_new(ptr, len);
473 
474  OBJ_TAINT(str);
475  return str;
476 }
477 
478 VALUE
479 rb_tainted_str_new_cstr(const char *ptr)
480 {
481  VALUE str = rb_str_new2(ptr);
482 
483  OBJ_TAINT(str);
484  return str;
485 }
486 
488 #define rb_tainted_str_new2 rb_tainted_str_new_cstr
489 
490 VALUE
491 rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
492 {
494  rb_econv_t *ec;
495  rb_econv_result_t ret;
496  long len, olen;
497  VALUE econv_wrapper;
498  VALUE newstr;
499  const unsigned char *start, *sp;
500  unsigned char *dest, *dp;
501  size_t converted_output = 0;
502 
503  if (!to) return str;
504  if (!from) from = rb_enc_get(str);
505  if (from == to) return str;
506  if ((rb_enc_asciicompat(to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) ||
507  to == rb_ascii8bit_encoding()) {
508  if (STR_ENC_GET(str) != to) {
509  str = rb_str_dup(str);
510  rb_enc_associate(str, to);
511  }
512  return str;
513  }
514 
515  len = RSTRING_LEN(str);
516  newstr = rb_str_new(0, len);
517  olen = len;
518 
519  econv_wrapper = rb_obj_alloc(rb_cEncodingConverter);
520  RBASIC(econv_wrapper)->klass = 0;
521  ec = rb_econv_open_opts(from->name, to->name, ecflags, ecopts);
522  if (!ec) return str;
523  DATA_PTR(econv_wrapper) = ec;
524 
525  sp = (unsigned char*)RSTRING_PTR(str);
526  start = sp;
527  while ((dest = (unsigned char*)RSTRING_PTR(newstr)),
528  (dp = dest + converted_output),
529  (ret = rb_econv_convert(ec, &sp, start + len, &dp, dest + olen, 0)),
531  /* destination buffer short */
532  size_t converted_input = sp - start;
533  size_t rest = len - converted_input;
534  converted_output = dp - dest;
535  rb_str_set_len(newstr, converted_output);
536  if (converted_input && converted_output &&
537  rest < (LONG_MAX / converted_output)) {
538  rest = (rest * converted_output) / converted_input;
539  }
540  else {
541  rest = olen;
542  }
543  olen += rest < 2 ? 2 : rest;
544  rb_str_resize(newstr, olen);
545  }
546  DATA_PTR(econv_wrapper) = 0;
547  rb_econv_close(ec);
548  rb_gc_force_recycle(econv_wrapper);
549  switch (ret) {
550  case econv_finished:
551  len = dp - (unsigned char*)RSTRING_PTR(newstr);
552  rb_str_set_len(newstr, len);
553  rb_enc_associate(newstr, to);
554  return newstr;
555 
556  default:
557  /* some error, return original */
558  return str;
559  }
560 }
561 
562 VALUE
564 {
565  return rb_str_conv_enc_opts(str, from, to, 0, Qnil);
566 }
567 
568 VALUE
569 rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
570 {
571  VALUE str;
572 
573  str = rb_tainted_str_new(ptr, len);
574  if (eenc == rb_usascii_encoding() &&
577  return str;
578  }
579  rb_enc_associate(str, eenc);
580  return rb_str_conv_enc(str, eenc, rb_default_internal_encoding());
581 }
582 
583 VALUE
584 rb_external_str_new(const char *ptr, long len)
585 {
587 }
588 
589 VALUE
590 rb_external_str_new_cstr(const char *ptr)
591 {
593 }
594 
595 VALUE
596 rb_locale_str_new(const char *ptr, long len)
597 {
599 }
600 
601 VALUE
602 rb_locale_str_new_cstr(const char *ptr)
603 {
605 }
606 
607 VALUE
608 rb_filesystem_str_new(const char *ptr, long len)
609 {
611 }
612 
613 VALUE
615 {
617 }
618 
619 VALUE
621 {
623 }
624 
625 VALUE
627 {
628  return rb_str_conv_enc(str, STR_ENC_GET(str), rb_locale_encoding());
629 }
630 
631 VALUE
633 {
634  return rb_str_conv_enc(str, STR_ENC_GET(str), enc);
635 }
636 
637 static VALUE
639 {
640  if (RSTRING_LEN(str) <= RSTRING_EMBED_LEN_MAX) {
641  STR_SET_EMBED(str2);
642  memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), RSTRING_LEN(str)+1);
643  STR_SET_EMBED_LEN(str2, RSTRING_LEN(str));
644  }
645  else {
646  str = rb_str_new_frozen(str);
647  FL_SET(str2, STR_NOEMBED);
648  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
649  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
650  RSTRING(str2)->as.heap.aux.shared = str;
651  FL_SET(str2, ELTS_SHARED);
652  }
653  return str2;
654 }
655 
656 static VALUE
658 {
660  rb_enc_cr_str_exact_copy(str2, str);
661  return str2;
662 }
663 
664 static VALUE
666 {
667  return str_replace_shared(str_alloc(klass), str);
668 }
669 
670 static VALUE
671 str_new3(VALUE klass, VALUE str)
672 {
673  return str_new_shared(klass, str);
674 }
675 
676 VALUE
678 {
679  VALUE str2 = str_new3(rb_obj_class(str), str);
680 
681  OBJ_INFECT(str2, str);
682  return str2;
683 }
684 
686 #define rb_str_new3 rb_str_new_shared
687 
688 static VALUE
689 str_new4(VALUE klass, VALUE str)
690 {
691  VALUE str2;
692 
693  str2 = str_alloc(klass);
694  STR_SET_NOEMBED(str2);
695  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
696  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
697  if (STR_SHARED_P(str)) {
698  VALUE shared = RSTRING(str)->as.heap.aux.shared;
699  assert(OBJ_FROZEN(shared));
700  FL_SET(str2, ELTS_SHARED);
701  RSTRING(str2)->as.heap.aux.shared = shared;
702  }
703  else {
704  FL_SET(str, ELTS_SHARED);
705  RSTRING(str)->as.heap.aux.shared = str2;
706  }
707  rb_enc_cr_str_exact_copy(str2, str);
708  OBJ_INFECT(str2, str);
709  return str2;
710 }
711 
712 VALUE
714 {
715  VALUE klass, str;
716 
717  if (OBJ_FROZEN(orig)) return orig;
718  klass = rb_obj_class(orig);
719  if (STR_SHARED_P(orig) && (str = RSTRING(orig)->as.heap.aux.shared)) {
720  long ofs;
721  assert(OBJ_FROZEN(str));
722  ofs = RSTRING_LEN(str) - RSTRING_LEN(orig);
723  if ((ofs > 0) || (klass != RBASIC(str)->klass) ||
724  ((RBASIC(str)->flags ^ RBASIC(orig)->flags) & (FL_TAINT|FL_UNTRUSTED)) ||
725  ENCODING_GET(str) != ENCODING_GET(orig)) {
726  str = str_new3(klass, str);
727  RSTRING(str)->as.heap.ptr += ofs;
728  RSTRING(str)->as.heap.len -= ofs;
729  rb_enc_cr_str_exact_copy(str, orig);
730  OBJ_INFECT(str, orig);
731  }
732  }
733  else if (STR_EMBED_P(orig)) {
734  str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
735  rb_enc_cr_str_exact_copy(str, orig);
736  OBJ_INFECT(str, orig);
737  }
738  else if (STR_ASSOC_P(orig)) {
739  VALUE assoc = RSTRING(orig)->as.heap.aux.shared;
740  FL_UNSET(orig, STR_ASSOC);
741  str = str_new4(klass, orig);
742  FL_SET(str, STR_ASSOC);
743  RSTRING(str)->as.heap.aux.shared = assoc;
744  }
745  else {
746  str = str_new4(klass, orig);
747  }
748  OBJ_FREEZE(str);
749  return str;
750 }
751 
753 #define rb_str_new4 rb_str_new_frozen
754 
755 VALUE
756 rb_str_new_with_class(VALUE obj, const char *ptr, long len)
757 {
758  return str_new(rb_obj_class(obj), ptr, len);
759 }
760 
761 RUBY_ALIAS_FUNCTION(rb_str_new5(VALUE obj, const char *ptr, long len),
762  rb_str_new_with_class, (obj, ptr, len))
763 #define rb_str_new5 rb_str_new_with_class
764 
765 static VALUE
766 str_new_empty(VALUE str)
767 {
768  VALUE v = rb_str_new5(str, 0, 0);
769  rb_enc_copy(v, str);
770  OBJ_INFECT(v, str);
771  return v;
772 }
773 
774 #define STR_BUF_MIN_SIZE 128
775 
776 VALUE
777 rb_str_buf_new(long capa)
778 {
779  VALUE str = str_alloc(rb_cString);
780 
781  if (capa < STR_BUF_MIN_SIZE) {
782  capa = STR_BUF_MIN_SIZE;
783  }
784  FL_SET(str, STR_NOEMBED);
785  RSTRING(str)->as.heap.aux.capa = capa;
786  RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1);
787  RSTRING(str)->as.heap.ptr[0] = '\0';
788 
789  return str;
790 }
791 
792 VALUE
793 rb_str_buf_new_cstr(const char *ptr)
794 {
795  VALUE str;
796  long len = strlen(ptr);
797 
798  str = rb_str_buf_new(len);
799  rb_str_buf_cat(str, ptr, len);
800 
801  return str;
802 }
803 
805 #define rb_str_buf_new2 rb_str_buf_new_cstr
806 
807 VALUE
808 rb_str_tmp_new(long len)
809 {
810  return str_new(0, 0, len);
811 }
812 
813 void *
814 rb_alloc_tmp_buffer(volatile VALUE *store, long len)
815 {
816  VALUE s = rb_str_tmp_new(len);
817  *store = s;
818  return RSTRING_PTR(s);
819 }
820 
821 void
822 rb_free_tmp_buffer(volatile VALUE *store)
823 {
824  VALUE s = *store;
825  *store = 0;
826  if (s) rb_str_clear(s);
827 }
828 
829 void
831 {
832  if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
833  xfree(RSTRING(str)->as.heap.ptr);
834  }
835 }
836 
837 RUBY_FUNC_EXPORTED size_t
839 {
840  if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
841  return RSTRING(str)->as.heap.aux.capa;
842  }
843  else {
844  return 0;
845  }
846 }
847 
848 VALUE
850 {
851  return rb_convert_type(str, T_STRING, "String", "to_str");
852 }
853 
854 static inline void str_discard(VALUE str);
855 
856 void
858 {
859  rb_encoding *enc;
860  int cr;
861  if (str == str2) return;
862  enc = STR_ENC_GET(str2);
863  cr = ENC_CODERANGE(str2);
864  str_discard(str);
865  OBJ_INFECT(str, str2);
866  if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) {
867  STR_SET_EMBED(str);
868  memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
869  STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
870  rb_enc_associate(str, enc);
871  ENC_CODERANGE_SET(str, cr);
872  return;
873  }
874  STR_SET_NOEMBED(str);
875  STR_UNSET_NOCAPA(str);
876  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
877  RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
878  if (STR_NOCAPA_P(str2)) {
879  FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
880  RSTRING(str)->as.heap.aux.shared = RSTRING(str2)->as.heap.aux.shared;
881  }
882  else {
883  RSTRING(str)->as.heap.aux.capa = RSTRING(str2)->as.heap.aux.capa;
884  }
885  STR_SET_EMBED(str2); /* abandon str2 */
886  RSTRING_PTR(str2)[0] = 0;
887  STR_SET_EMBED_LEN(str2, 0);
888  rb_enc_associate(str, enc);
889  ENC_CODERANGE_SET(str, cr);
890 }
891 
892 static ID id_to_s;
893 
894 VALUE
896 {
897  VALUE str;
898 
899  if (RB_TYPE_P(obj, T_STRING)) {
900  return obj;
901  }
902  str = rb_funcall(obj, id_to_s, 0);
903  if (!RB_TYPE_P(str, T_STRING))
904  return rb_any_to_s(obj);
905  if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
906  return str;
907 }
908 
909 static VALUE
911 {
912  long len;
913 
914  len = RSTRING_LEN(str2);
915  if (STR_ASSOC_P(str2)) {
916  str2 = rb_str_new4(str2);
917  }
918  if (STR_SHARED_P(str2)) {
919  VALUE shared = RSTRING(str2)->as.heap.aux.shared;
920  assert(OBJ_FROZEN(shared));
921  STR_SET_NOEMBED(str);
922  RSTRING(str)->as.heap.len = len;
923  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
924  FL_SET(str, ELTS_SHARED);
925  FL_UNSET(str, STR_ASSOC);
926  RSTRING(str)->as.heap.aux.shared = shared;
927  }
928  else {
929  str_replace_shared(str, str2);
930  }
931 
932  OBJ_INFECT(str, str2);
933  rb_enc_cr_str_exact_copy(str, str2);
934  return str;
935 }
936 
937 static VALUE
939 {
940  VALUE dup = str_alloc(klass);
941  str_replace(dup, str);
942  return dup;
943 }
944 
945 VALUE
947 {
948  return str_duplicate(rb_obj_class(str), str);
949 }
950 
951 VALUE
953 {
957  }
958  return str_replace(str_alloc(rb_cString), str);
959 }
960 
961 /*
962  * call-seq:
963  * String.new(str="") -> new_str
964  *
965  * Returns a new string object containing a copy of <i>str</i>.
966  */
967 
968 static VALUE
970 {
971  VALUE orig;
972 
973  if (argc > 0 && rb_scan_args(argc, argv, "01", &orig) == 1)
974  rb_str_replace(str, orig);
975  return str;
976 }
977 
978 static inline long
979 enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
980 {
981  long c;
982  const char *q;
983 
984  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
985  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
986  }
987  else if (rb_enc_asciicompat(enc)) {
988  c = 0;
989  if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
990  while (p < e) {
991  if (ISASCII(*p)) {
992  q = search_nonascii(p, e);
993  if (!q)
994  return c + (e - p);
995  c += q - p;
996  p = q;
997  }
998  p += rb_enc_fast_mbclen(p, e, enc);
999  c++;
1000  }
1001  }
1002  else {
1003  while (p < e) {
1004  if (ISASCII(*p)) {
1005  q = search_nonascii(p, e);
1006  if (!q)
1007  return c + (e - p);
1008  c += q - p;
1009  p = q;
1010  }
1011  p += rb_enc_mbclen(p, e, enc);
1012  c++;
1013  }
1014  }
1015  return c;
1016  }
1017 
1018  for (c=0; p<e; c++) {
1019  p += rb_enc_mbclen(p, e, enc);
1020  }
1021  return c;
1022 }
1023 
1024 long
1025 rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
1026 {
1027  return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN);
1028 }
1029 
1030 long
1031 rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
1032 {
1033  long c;
1034  const char *q;
1035  int ret;
1036 
1037  *cr = 0;
1038  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1039  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
1040  }
1041  else if (rb_enc_asciicompat(enc)) {
1042  c = 0;
1043  while (p < e) {
1044  if (ISASCII(*p)) {
1045  q = search_nonascii(p, e);
1046  if (!q) {
1047  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1048  return c + (e - p);
1049  }
1050  c += q - p;
1051  p = q;
1052  }
1053  ret = rb_enc_precise_mbclen(p, e, enc);
1054  if (MBCLEN_CHARFOUND_P(ret)) {
1055  *cr |= ENC_CODERANGE_VALID;
1056  p += MBCLEN_CHARFOUND_LEN(ret);
1057  }
1058  else {
1059  *cr = ENC_CODERANGE_BROKEN;
1060  p++;
1061  }
1062  c++;
1063  }
1064  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1065  return c;
1066  }
1067 
1068  for (c=0; p<e; c++) {
1069  ret = rb_enc_precise_mbclen(p, e, enc);
1070  if (MBCLEN_CHARFOUND_P(ret)) {
1071  *cr |= ENC_CODERANGE_VALID;
1072  p += MBCLEN_CHARFOUND_LEN(ret);
1073  }
1074  else {
1075  *cr = ENC_CODERANGE_BROKEN;
1076  if (p + rb_enc_mbminlen(enc) <= e)
1077  p += rb_enc_mbminlen(enc);
1078  else
1079  p = e;
1080  }
1081  }
1082  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1083  return c;
1084 }
1085 
1086 #ifdef NONASCII_MASK
1087 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1088 
1089 /*
1090  * UTF-8 leading bytes have either 0xxxxxxx or 11xxxxxx
1091  * bit represention. (see http://en.wikipedia.org/wiki/UTF-8)
1092  * Therefore, following pseudo code can detect UTF-8 leading byte.
1093  *
1094  * if (!(byte & 0x80))
1095  * byte |= 0x40; // turn on bit6
1096  * return ((byte>>6) & 1); // bit6 represent it's leading byte or not.
1097  *
1098  * This function calculate every bytes in the argument word `s'
1099  * using the above logic concurrently. and gather every bytes result.
1100  */
1101 static inline VALUE
1102 count_utf8_lead_bytes_with_word(const VALUE *s)
1103 {
1104  VALUE d = *s;
1105 
1106  /* Transform into bit0 represent UTF-8 leading or not. */
1107  d |= ~(d>>1);
1108  d >>= 6;
1109  d &= NONASCII_MASK >> 7;
1110 
1111  /* Gather every bytes. */
1112  d += (d>>8);
1113  d += (d>>16);
1114 #if SIZEOF_VALUE == 8
1115  d += (d>>32);
1116 #endif
1117  return (d&0xF);
1118 }
1119 #endif
1120 
1121 static long
1123 {
1124  const char *p, *e;
1125  long n;
1126  int cr;
1127 
1128  if (single_byte_optimizable(str)) return RSTRING_LEN(str);
1129  if (!enc) enc = STR_ENC_GET(str);
1130  p = RSTRING_PTR(str);
1131  e = RSTRING_END(str);
1132  cr = ENC_CODERANGE(str);
1133 #ifdef NONASCII_MASK
1134  if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1135  enc == rb_utf8_encoding()) {
1136 
1137  VALUE len = 0;
1138  if ((int)sizeof(VALUE) * 2 < e - p) {
1139  const VALUE *s, *t;
1140  const VALUE lowbits = sizeof(VALUE) - 1;
1141  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1142  t = (const VALUE*)(~lowbits & (VALUE)e);
1143  while (p < (const char *)s) {
1144  if (is_utf8_lead_byte(*p)) len++;
1145  p++;
1146  }
1147  while (s < t) {
1148  len += count_utf8_lead_bytes_with_word(s);
1149  s++;
1150  }
1151  p = (const char *)s;
1152  }
1153  while (p < e) {
1154  if (is_utf8_lead_byte(*p)) len++;
1155  p++;
1156  }
1157  return (long)len;
1158  }
1159 #endif
1160  n = rb_enc_strlen_cr(p, e, enc, &cr);
1161  if (cr) {
1162  ENC_CODERANGE_SET(str, cr);
1163  }
1164  return n;
1165 }
1166 
1167 long
1169 {
1170  return str_strlen(str, STR_ENC_GET(str));
1171 }
1172 
1173 /*
1174  * call-seq:
1175  * str.length -> integer
1176  * str.size -> integer
1177  *
1178  * Returns the character length of <i>str</i>.
1179  */
1180 
1181 VALUE
1183 {
1184  long len;
1185 
1186  len = str_strlen(str, STR_ENC_GET(str));
1187  return LONG2NUM(len);
1188 }
1189 
1190 /*
1191  * call-seq:
1192  * str.bytesize -> integer
1193  *
1194  * Returns the length of +str+ in bytes.
1195  *
1196  * "\x80\u3042".bytesize #=> 4
1197  * "hello".bytesize #=> 5
1198  */
1199 
1200 static VALUE
1202 {
1203  return LONG2NUM(RSTRING_LEN(str));
1204 }
1205 
1206 /*
1207  * call-seq:
1208  * str.empty? -> true or false
1209  *
1210  * Returns <code>true</code> if <i>str</i> has a length of zero.
1211  *
1212  * "hello".empty? #=> false
1213  * " ".empty? #=> false
1214  * "".empty? #=> true
1215  */
1216 
1217 static VALUE
1219 {
1220  if (RSTRING_LEN(str) == 0)
1221  return Qtrue;
1222  return Qfalse;
1223 }
1224 
1225 /*
1226  * call-seq:
1227  * str + other_str -> new_str
1228  *
1229  * Concatenation---Returns a new <code>String</code> containing
1230  * <i>other_str</i> concatenated to <i>str</i>.
1231  *
1232  * "Hello from " + self.to_s #=> "Hello from main"
1233  */
1234 
1235 VALUE
1237 {
1238  VALUE str3;
1239  rb_encoding *enc;
1240 
1241  StringValue(str2);
1242  enc = rb_enc_check(str1, str2);
1243  str3 = rb_str_new(0, RSTRING_LEN(str1)+RSTRING_LEN(str2));
1244  memcpy(RSTRING_PTR(str3), RSTRING_PTR(str1), RSTRING_LEN(str1));
1245  memcpy(RSTRING_PTR(str3) + RSTRING_LEN(str1),
1246  RSTRING_PTR(str2), RSTRING_LEN(str2));
1247  RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0';
1248 
1249  if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
1250  OBJ_TAINT(str3);
1253  return str3;
1254 }
1255 
1256 /*
1257  * call-seq:
1258  * str * integer -> new_str
1259  *
1260  * Copy --- Returns a new String containing +integer+ copies of the receiver.
1261  * +integer+ must be greater than or equal to 0.
1262  *
1263  * "Ho! " * 3 #=> "Ho! Ho! Ho! "
1264  * "Ho! " * 0 #=> ""
1265  */
1266 
1267 VALUE
1269 {
1270  VALUE str2;
1271  long n, len;
1272  char *ptr2;
1273 
1274  len = NUM2LONG(times);
1275  if (len < 0) {
1276  rb_raise(rb_eArgError, "negative argument");
1277  }
1278  if (len && LONG_MAX/len < RSTRING_LEN(str)) {
1279  rb_raise(rb_eArgError, "argument too big");
1280  }
1281 
1282  str2 = rb_str_new5(str, 0, len *= RSTRING_LEN(str));
1283  ptr2 = RSTRING_PTR(str2);
1284  if (len) {
1285  n = RSTRING_LEN(str);
1286  memcpy(ptr2, RSTRING_PTR(str), n);
1287  while (n <= len/2) {
1288  memcpy(ptr2 + n, ptr2, n);
1289  n *= 2;
1290  }
1291  memcpy(ptr2 + n, ptr2, len-n);
1292  }
1293  ptr2[RSTRING_LEN(str2)] = '\0';
1294  OBJ_INFECT(str2, str);
1295  rb_enc_cr_str_copy_for_substr(str2, str);
1296 
1297  return str2;
1298 }
1299 
1300 /*
1301  * call-seq:
1302  * str % arg -> new_str
1303  *
1304  * Format---Uses <i>str</i> as a format specification, and returns the result
1305  * of applying it to <i>arg</i>. If the format specification contains more than
1306  * one substitution, then <i>arg</i> must be an <code>Array</code> or <code>Hash</code>
1307  * containing the values to be substituted. See <code>Kernel::sprintf</code> for
1308  * details of the format string.
1309  *
1310  * "%05d" % 123 #=> "00123"
1311  * "%-5s: %08x" % [ "ID", self.object_id ] #=> "ID : 200e14d6"
1312  * "foo = %{foo}" % { :foo => 'bar' } #=> "foo = bar"
1313  */
1314 
1315 static VALUE
1317 {
1318  volatile VALUE tmp = rb_check_array_type(arg);
1319 
1320  if (!NIL_P(tmp)) {
1321  return rb_str_format(RARRAY_LENINT(tmp), RARRAY_PTR(tmp), str);
1322  }
1323  return rb_str_format(1, &arg, str);
1324 }
1325 
1326 static inline void
1328 {
1329  if (FL_TEST(str, STR_TMPLOCK)) {
1330  rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
1331  }
1332  rb_check_frozen(str);
1333  if (!OBJ_UNTRUSTED(str) && rb_safe_level() >= 4)
1334  rb_raise(rb_eSecurityError, "Insecure: can't modify string");
1335 }
1336 
1337 static inline int
1339 {
1340  str_modifiable(str);
1341  if (!STR_SHARED_P(str)) return 1;
1342  if (STR_EMBED_P(str)) return 1;
1343  return 0;
1344 }
1345 
1346 static void
1348 {
1349  char *ptr;
1350  long len = RSTRING_LEN(str);
1351  long capa = len + expand;
1352 
1353  if (len > capa) len = capa;
1354  ptr = ALLOC_N(char, capa + 1);
1355  if (RSTRING_PTR(str)) {
1356  memcpy(ptr, RSTRING_PTR(str), len);
1357  }
1358  STR_SET_NOEMBED(str);
1359  STR_UNSET_NOCAPA(str);
1360  ptr[len] = 0;
1361  RSTRING(str)->as.heap.ptr = ptr;
1362  RSTRING(str)->as.heap.len = len;
1363  RSTRING(str)->as.heap.aux.capa = capa;
1364 }
1365 
1366 #define str_make_independent(str) str_make_independent_expand((str), 0L)
1367 
1368 void
1370 {
1371  if (!str_independent(str))
1372  str_make_independent(str);
1373  ENC_CODERANGE_CLEAR(str);
1374 }
1375 
1376 void
1377 rb_str_modify_expand(VALUE str, long expand)
1378 {
1379  if (expand < 0) {
1380  rb_raise(rb_eArgError, "negative expanding string size");
1381  }
1382  if (!str_independent(str)) {
1383  str_make_independent_expand(str, expand);
1384  }
1385  else if (expand > 0) {
1386  long len = RSTRING_LEN(str);
1387  long capa = len + expand;
1388  if (!STR_EMBED_P(str)) {
1389  REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa+1);
1390  STR_UNSET_NOCAPA(str);
1391  RSTRING(str)->as.heap.aux.capa = capa;
1392  }
1393  else if (capa > RSTRING_EMBED_LEN_MAX) {
1394  str_make_independent_expand(str, expand);
1395  }
1396  }
1397  ENC_CODERANGE_CLEAR(str);
1398 }
1399 
1400 /* As rb_str_modify(), but don't clear coderange */
1401 static void
1403 {
1404  if (!str_independent(str))
1405  str_make_independent(str);
1406  if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN)
1407  /* Force re-scan later */
1408  ENC_CODERANGE_CLEAR(str);
1409 }
1410 
1411 static inline void
1413 {
1414  str_modifiable(str);
1415  if (!STR_SHARED_P(str) && !STR_EMBED_P(str)) {
1416  xfree(RSTRING_PTR(str));
1417  RSTRING(str)->as.heap.ptr = 0;
1418  RSTRING(str)->as.heap.len = 0;
1419  }
1420 }
1421 
1422 void
1424 {
1425  /* sanity check */
1426  rb_check_frozen(str);
1427  if (STR_ASSOC_P(str)) {
1428  /* already associated */
1429  rb_ary_concat(RSTRING(str)->as.heap.aux.shared, add);
1430  }
1431  else {
1432  if (STR_SHARED_P(str)) {
1433  VALUE assoc = RSTRING(str)->as.heap.aux.shared;
1434  str_make_independent(str);
1435  if (STR_ASSOC_P(assoc)) {
1436  assoc = RSTRING(assoc)->as.heap.aux.shared;
1437  rb_ary_concat(assoc, add);
1438  add = assoc;
1439  }
1440  }
1441  else if (STR_EMBED_P(str)) {
1442  str_make_independent(str);
1443  }
1444  else if (RSTRING(str)->as.heap.aux.capa != RSTRING_LEN(str)) {
1445  RESIZE_CAPA(str, RSTRING_LEN(str));
1446  }
1447  FL_SET(str, STR_ASSOC);
1448  RBASIC(add)->klass = 0;
1449  RSTRING(str)->as.heap.aux.shared = add;
1450  }
1451 }
1452 
1453 VALUE
1455 {
1456  if (STR_SHARED_P(str)) str = RSTRING(str)->as.heap.aux.shared;
1457  if (STR_ASSOC_P(str)) {
1458  return RSTRING(str)->as.heap.aux.shared;
1459  }
1460  return Qfalse;
1461 }
1462 
1463 void
1465 {
1466  rb_encoding *enc = rb_enc_get(str);
1467  if (!rb_enc_asciicompat(enc)) {
1468  rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc));
1469  }
1470 }
1471 
1472 VALUE
1473 rb_string_value(volatile VALUE *ptr)
1474 {
1475  VALUE s = *ptr;
1476  if (!RB_TYPE_P(s, T_STRING)) {
1477  s = rb_str_to_str(s);
1478  *ptr = s;
1479  }
1480  return s;
1481 }
1482 
1483 char *
1485 {
1486  VALUE str = rb_string_value(ptr);
1487  return RSTRING_PTR(str);
1488 }
1489 
1490 char *
1492 {
1493  VALUE str = rb_string_value(ptr);
1494  char *s = RSTRING_PTR(str);
1495  long len = RSTRING_LEN(str);
1496 
1497  if (!s || memchr(s, 0, len)) {
1498  rb_raise(rb_eArgError, "string contains null byte");
1499  }
1500  if (s[len]) {
1501  rb_str_modify(str);
1502  s = RSTRING_PTR(str);
1503  s[RSTRING_LEN(str)] = 0;
1504  }
1505  return s;
1506 }
1507 
1508 VALUE
1510 {
1511  str = rb_check_convert_type(str, T_STRING, "String", "to_str");
1512  return str;
1513 }
1514 
1515 /*
1516  * call-seq:
1517  * String.try_convert(obj) -> string or nil
1518  *
1519  * Try to convert <i>obj</i> into a String, using to_str method.
1520  * Returns converted string or nil if <i>obj</i> cannot be converted
1521  * for any reason.
1522  *
1523  * String.try_convert("str") #=> "str"
1524  * String.try_convert(/re/) #=> nil
1525  */
1526 static VALUE
1528 {
1529  return rb_check_string_type(str);
1530 }
1531 
1532 static char*
1533 str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
1534 {
1535  long nth = *nthp;
1536  if (rb_enc_mbmaxlen(enc) == 1) {
1537  p += nth;
1538  }
1539  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1540  p += nth * rb_enc_mbmaxlen(enc);
1541  }
1542  else if (rb_enc_asciicompat(enc)) {
1543  const char *p2, *e2;
1544  int n;
1545 
1546  while (p < e && 0 < nth) {
1547  e2 = p + nth;
1548  if (e < e2) {
1549  *nthp = nth;
1550  return (char *)e;
1551  }
1552  if (ISASCII(*p)) {
1553  p2 = search_nonascii(p, e2);
1554  if (!p2) {
1555  nth -= e2 - p;
1556  *nthp = nth;
1557  return (char *)e2;
1558  }
1559  nth -= p2 - p;
1560  p = p2;
1561  }
1562  n = rb_enc_mbclen(p, e, enc);
1563  p += n;
1564  nth--;
1565  }
1566  *nthp = nth;
1567  if (nth != 0) {
1568  return (char *)e;
1569  }
1570  return (char *)p;
1571  }
1572  else {
1573  while (p < e && nth--) {
1574  p += rb_enc_mbclen(p, e, enc);
1575  }
1576  }
1577  if (p > e) p = e;
1578  *nthp = nth;
1579  return (char*)p;
1580 }
1581 
1582 char*
1583 rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
1584 {
1585  return str_nth_len(p, e, &nth, enc);
1586 }
1587 
1588 static char*
1589 str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1590 {
1591  if (singlebyte)
1592  p += nth;
1593  else {
1594  p = str_nth_len(p, e, &nth, enc);
1595  }
1596  if (!p) return 0;
1597  if (p > e) p = e;
1598  return (char *)p;
1599 }
1600 
1601 /* char offset to byte offset */
1602 static long
1603 str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1604 {
1605  const char *pp = str_nth(p, e, nth, enc, singlebyte);
1606  if (!pp) return e - p;
1607  return pp - p;
1608 }
1609 
1610 long
1611 rb_str_offset(VALUE str, long pos)
1612 {
1613  return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
1615 }
1616 
1617 #ifdef NONASCII_MASK
1618 static char *
1619 str_utf8_nth(const char *p, const char *e, long *nthp)
1620 {
1621  long nth = *nthp;
1622  if ((int)SIZEOF_VALUE * 2 < e - p && (int)SIZEOF_VALUE * 2 < nth) {
1623  const VALUE *s, *t;
1624  const VALUE lowbits = sizeof(VALUE) - 1;
1625  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1626  t = (const VALUE*)(~lowbits & (VALUE)e);
1627  while (p < (const char *)s) {
1628  if (is_utf8_lead_byte(*p)) nth--;
1629  p++;
1630  }
1631  do {
1632  nth -= count_utf8_lead_bytes_with_word(s);
1633  s++;
1634  } while (s < t && (int)sizeof(VALUE) <= nth);
1635  p = (char *)s;
1636  }
1637  while (p < e) {
1638  if (is_utf8_lead_byte(*p)) {
1639  if (nth == 0) break;
1640  nth--;
1641  }
1642  p++;
1643  }
1644  *nthp = nth;
1645  return (char *)p;
1646 }
1647 
1648 static long
1649 str_utf8_offset(const char *p, const char *e, long nth)
1650 {
1651  const char *pp = str_utf8_nth(p, e, &nth);
1652  return pp - p;
1653 }
1654 #endif
1655 
1656 /* byte offset to char offset */
1657 long
1658 rb_str_sublen(VALUE str, long pos)
1659 {
1660  if (single_byte_optimizable(str) || pos < 0)
1661  return pos;
1662  else {
1663  char *p = RSTRING_PTR(str);
1664  return enc_strlen(p, p + pos, STR_ENC_GET(str), ENC_CODERANGE(str));
1665  }
1666 }
1667 
1668 VALUE
1669 rb_str_subseq(VALUE str, long beg, long len)
1670 {
1671  VALUE str2;
1672 
1673  if (RSTRING_LEN(str) == beg + len &&
1674  RSTRING_EMBED_LEN_MAX < len) {
1675  str2 = rb_str_new_shared(rb_str_new_frozen(str));
1676  rb_str_drop_bytes(str2, beg);
1677  }
1678  else {
1679  str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len);
1680  RB_GC_GUARD(str);
1681  }
1682 
1683  rb_enc_cr_str_copy_for_substr(str2, str);
1684  OBJ_INFECT(str2, str);
1685 
1686  return str2;
1687 }
1688 
1689 static char *
1690 rb_str_subpos(VALUE str, long beg, long *lenp)
1691 {
1692  long len = *lenp;
1693  long slen = -1L;
1694  long blen = RSTRING_LEN(str);
1695  rb_encoding *enc = STR_ENC_GET(str);
1696  char *p, *s = RSTRING_PTR(str), *e = s + blen;
1697 
1698  if (len < 0) return 0;
1699  if (!blen) {
1700  len = 0;
1701  }
1702  if (single_byte_optimizable(str)) {
1703  if (beg > blen) return 0;
1704  if (beg < 0) {
1705  beg += blen;
1706  if (beg < 0) return 0;
1707  }
1708  if (beg + len > blen)
1709  len = blen - beg;
1710  if (len < 0) return 0;
1711  p = s + beg;
1712  goto end;
1713  }
1714  if (beg < 0) {
1715  if (len > -beg) len = -beg;
1716  if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
1717  beg = -beg;
1718  while (beg-- > len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
1719  p = e;
1720  if (!p) return 0;
1721  while (len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
1722  if (!p) return 0;
1723  len = e - p;
1724  goto end;
1725  }
1726  else {
1727  slen = str_strlen(str, enc);
1728  beg += slen;
1729  if (beg < 0) return 0;
1730  p = s + beg;
1731  if (len == 0) goto end;
1732  }
1733  }
1734  else if (beg > 0 && beg > RSTRING_LEN(str)) {
1735  return 0;
1736  }
1737  if (len == 0) {
1738  if (beg > str_strlen(str, enc)) return 0;
1739  p = s + beg;
1740  }
1741 #ifdef NONASCII_MASK
1742  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1743  enc == rb_utf8_encoding()) {
1744  p = str_utf8_nth(s, e, &beg);
1745  if (beg > 0) return 0;
1746  len = str_utf8_offset(p, e, len);
1747  }
1748 #endif
1749  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1750  int char_sz = rb_enc_mbmaxlen(enc);
1751 
1752  p = s + beg * char_sz;
1753  if (p > e) {
1754  return 0;
1755  }
1756  else if (len * char_sz > e - p)
1757  len = e - p;
1758  else
1759  len *= char_sz;
1760  }
1761  else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
1762  if (beg > 0) return 0;
1763  len = 0;
1764  }
1765  else {
1766  len = str_offset(p, e, len, enc, 0);
1767  }
1768  end:
1769  *lenp = len;
1770  RB_GC_GUARD(str);
1771  return p;
1772 }
1773 
1774 VALUE
1775 rb_str_substr(VALUE str, long beg, long len)
1776 {
1777  VALUE str2;
1778  char *p = rb_str_subpos(str, beg, &len);
1779 
1780  if (!p) return Qnil;
1781  if (len > RSTRING_EMBED_LEN_MAX && p + len == RSTRING_END(str)) {
1782  str2 = rb_str_new4(str);
1783  str2 = str_new3(rb_obj_class(str2), str2);
1784  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
1785  RSTRING(str2)->as.heap.len = len;
1786  }
1787  else {
1788  str2 = rb_str_new5(str, p, len);
1789  rb_enc_cr_str_copy_for_substr(str2, str);
1790  OBJ_INFECT(str2, str);
1791  RB_GC_GUARD(str);
1792  }
1793 
1794  return str2;
1795 }
1796 
1797 VALUE
1799 {
1800  if (STR_ASSOC_P(str)) {
1801  VALUE ary = RSTRING(str)->as.heap.aux.shared;
1802  OBJ_FREEZE(ary);
1803  }
1804  return rb_obj_freeze(str);
1805 }
1806 
1808 #define rb_str_dup_frozen rb_str_new_frozen
1809 
1810 VALUE
1811 rb_str_locktmp(VALUE str)
1812 {
1813  if (FL_TEST(str, STR_TMPLOCK)) {
1814  rb_raise(rb_eRuntimeError, "temporal locking already locked string");
1815  }
1816  FL_SET(str, STR_TMPLOCK);
1817  return str;
1818 }
1819 
1820 VALUE
1822 {
1823  if (!FL_TEST(str, STR_TMPLOCK)) {
1824  rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
1825  }
1826  FL_UNSET(str, STR_TMPLOCK);
1827  return str;
1828 }
1829 
1830 VALUE
1832 {
1833  rb_str_locktmp(str);
1834  return rb_ensure(func, arg, rb_str_unlocktmp, str);
1835 }
1836 
1837 void
1838 rb_str_set_len(VALUE str, long len)
1839 {
1840  long capa;
1841 
1842  str_modifiable(str);
1843  if (STR_SHARED_P(str)) {
1844  rb_raise(rb_eRuntimeError, "can't set length of shared string");
1845  }
1846  if (len > (capa = (long)rb_str_capacity(str))) {
1847  rb_bug("probable buffer overflow: %ld for %ld", len, capa);
1848  }
1849  STR_SET_LEN(str, len);
1850  RSTRING_PTR(str)[len] = '\0';
1851 }
1852 
1853 VALUE
1854 rb_str_resize(VALUE str, long len)
1855 {
1856  long slen;
1857  int independent;
1858 
1859  if (len < 0) {
1860  rb_raise(rb_eArgError, "negative string size (or size too big)");
1861  }
1862 
1863  independent = str_independent(str);
1864  ENC_CODERANGE_CLEAR(str);
1865  slen = RSTRING_LEN(str);
1866  if (len != slen) {
1867  if (STR_EMBED_P(str)) {
1868  if (len <= RSTRING_EMBED_LEN_MAX) {
1869  STR_SET_EMBED_LEN(str, len);
1870  RSTRING(str)->as.ary[len] = '\0';
1871  return str;
1872  }
1873  str_make_independent_expand(str, len - slen);
1874  STR_SET_NOEMBED(str);
1875  }
1876  else if (len <= RSTRING_EMBED_LEN_MAX) {
1877  char *ptr = RSTRING(str)->as.heap.ptr;
1878  STR_SET_EMBED(str);
1879  if (slen > len) slen = len;
1880  if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, slen);
1881  RSTRING(str)->as.ary[len] = '\0';
1882  STR_SET_EMBED_LEN(str, len);
1883  if (independent) xfree(ptr);
1884  return str;
1885  }
1886  else if (!independent) {
1887  str_make_independent_expand(str, len - slen);
1888  }
1889  else if (slen < len || slen - len > 1024) {
1890  REALLOC_N(RSTRING(str)->as.heap.ptr, char, len+1);
1891  }
1892  if (!STR_NOCAPA_P(str)) {
1893  RSTRING(str)->as.heap.aux.capa = len;
1894  }
1895  RSTRING(str)->as.heap.len = len;
1896  RSTRING(str)->as.heap.ptr[len] = '\0'; /* sentinel */
1897  }
1898  return str;
1899 }
1900 
1901 static VALUE
1902 str_buf_cat(VALUE str, const char *ptr, long len)
1903 {
1904  long capa, total, off = -1;
1905 
1906  if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) {
1907  off = ptr - RSTRING_PTR(str);
1908  }
1909  rb_str_modify(str);
1910  if (len == 0) return 0;
1911  if (STR_ASSOC_P(str)) {
1912  FL_UNSET(str, STR_ASSOC);
1913  capa = RSTRING(str)->as.heap.aux.capa = RSTRING_LEN(str);
1914  }
1915  else if (STR_EMBED_P(str)) {
1916  capa = RSTRING_EMBED_LEN_MAX;
1917  }
1918  else {
1919  capa = RSTRING(str)->as.heap.aux.capa;
1920  }
1921  if (RSTRING_LEN(str) >= LONG_MAX - len) {
1922  rb_raise(rb_eArgError, "string sizes too big");
1923  }
1924  total = RSTRING_LEN(str)+len;
1925  if (capa <= total) {
1926  while (total > capa) {
1927  if (capa + 1 >= LONG_MAX / 2) {
1928  capa = (total + 4095) / 4096;
1929  break;
1930  }
1931  capa = (capa + 1) * 2;
1932  }
1933  RESIZE_CAPA(str, capa);
1934  }
1935  if (off != -1) {
1936  ptr = RSTRING_PTR(str) + off;
1937  }
1938  memcpy(RSTRING_PTR(str) + RSTRING_LEN(str), ptr, len);
1939  STR_SET_LEN(str, total);
1940  RSTRING_PTR(str)[total] = '\0'; /* sentinel */
1941 
1942  return str;
1943 }
1944 
1945 #define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
1946 
1947 VALUE
1948 rb_str_buf_cat(VALUE str, const char *ptr, long len)
1949 {
1950  if (len == 0) return str;
1951  if (len < 0) {
1952  rb_raise(rb_eArgError, "negative string size (or size too big)");
1953  }
1954  return str_buf_cat(str, ptr, len);
1955 }
1956 
1957 VALUE
1958 rb_str_buf_cat2(VALUE str, const char *ptr)
1959 {
1960  return rb_str_buf_cat(str, ptr, strlen(ptr));
1961 }
1962 
1963 VALUE
1964 rb_str_cat(VALUE str, const char *ptr, long len)
1965 {
1966  if (len < 0) {
1967  rb_raise(rb_eArgError, "negative string size (or size too big)");
1968  }
1969  if (STR_ASSOC_P(str)) {
1970  char *p;
1971  rb_str_modify_expand(str, len);
1972  p = RSTRING(str)->as.heap.ptr;
1973  memcpy(p + RSTRING(str)->as.heap.len, ptr, len);
1974  len = RSTRING(str)->as.heap.len += len;
1975  p[len] = '\0'; /* sentinel */
1976  return str;
1977  }
1978 
1979  return rb_str_buf_cat(str, ptr, len);
1980 }
1981 
1982 VALUE
1983 rb_str_cat2(VALUE str, const char *ptr)
1984 {
1985  return rb_str_cat(str, ptr, strlen(ptr));
1986 }
1987 
1988 static VALUE
1989 rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
1990  int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
1991 {
1992  int str_encindex = ENCODING_GET(str);
1993  int res_encindex;
1994  int str_cr, res_cr;
1995 
1996  str_cr = ENC_CODERANGE(str);
1997 
1998  if (str_encindex == ptr_encindex) {
1999  if (str_cr == ENC_CODERANGE_UNKNOWN)
2000  ptr_cr = ENC_CODERANGE_UNKNOWN;
2001  else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2002  ptr_cr = coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
2003  }
2004  }
2005  else {
2006  rb_encoding *str_enc = rb_enc_from_index(str_encindex);
2007  rb_encoding *ptr_enc = rb_enc_from_index(ptr_encindex);
2008  if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
2009  if (len == 0)
2010  return str;
2011  if (RSTRING_LEN(str) == 0) {
2012  rb_str_buf_cat(str, ptr, len);
2013  ENCODING_CODERANGE_SET(str, ptr_encindex, ptr_cr);
2014  return str;
2015  }
2016  goto incompatible;
2017  }
2018  if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2019  ptr_cr = coderange_scan(ptr, len, ptr_enc);
2020  }
2021  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2022  if (ENCODING_IS_ASCII8BIT(str) || ptr_cr != ENC_CODERANGE_7BIT) {
2023  str_cr = rb_enc_str_coderange(str);
2024  }
2025  }
2026  }
2027  if (ptr_cr_ret)
2028  *ptr_cr_ret = ptr_cr;
2029 
2030  if (str_encindex != ptr_encindex &&
2031  str_cr != ENC_CODERANGE_7BIT &&
2032  ptr_cr != ENC_CODERANGE_7BIT) {
2033  incompatible:
2034  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
2035  rb_enc_name(rb_enc_from_index(str_encindex)),
2036  rb_enc_name(rb_enc_from_index(ptr_encindex)));
2037  }
2038 
2039  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2040  res_encindex = str_encindex;
2041  res_cr = ENC_CODERANGE_UNKNOWN;
2042  }
2043  else if (str_cr == ENC_CODERANGE_7BIT) {
2044  if (ptr_cr == ENC_CODERANGE_7BIT) {
2045  res_encindex = str_encindex;
2046  res_cr = ENC_CODERANGE_7BIT;
2047  }
2048  else {
2049  res_encindex = ptr_encindex;
2050  res_cr = ptr_cr;
2051  }
2052  }
2053  else if (str_cr == ENC_CODERANGE_VALID) {
2054  res_encindex = str_encindex;
2055  if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
2056  res_cr = str_cr;
2057  else
2058  res_cr = ptr_cr;
2059  }
2060  else { /* str_cr == ENC_CODERANGE_BROKEN */
2061  res_encindex = str_encindex;
2062  res_cr = str_cr;
2063  if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN;
2064  }
2065 
2066  if (len < 0) {
2067  rb_raise(rb_eArgError, "negative string size (or size too big)");
2068  }
2069  str_buf_cat(str, ptr, len);
2070  ENCODING_CODERANGE_SET(str, res_encindex, res_cr);
2071  return str;
2072 }
2073 
2074 VALUE
2075 rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
2076 {
2077  return rb_enc_cr_str_buf_cat(str, ptr, len,
2079 }
2080 
2081 VALUE
2082 rb_str_buf_cat_ascii(VALUE str, const char *ptr)
2083 {
2084  /* ptr must reference NUL terminated ASCII string. */
2085  int encindex = ENCODING_GET(str);
2086  rb_encoding *enc = rb_enc_from_index(encindex);
2087  if (rb_enc_asciicompat(enc)) {
2088  return rb_enc_cr_str_buf_cat(str, ptr, strlen(ptr),
2089  encindex, ENC_CODERANGE_7BIT, 0);
2090  }
2091  else {
2092  char *buf = ALLOCA_N(char, rb_enc_mbmaxlen(enc));
2093  while (*ptr) {
2094  unsigned int c = (unsigned char)*ptr;
2095  int len = rb_enc_codelen(c, enc);
2096  rb_enc_mbcput(c, buf, enc);
2097  rb_enc_cr_str_buf_cat(str, buf, len,
2098  encindex, ENC_CODERANGE_VALID, 0);
2099  ptr++;
2100  }
2101  return str;
2102  }
2103 }
2104 
2105 VALUE
2107 {
2108  int str2_cr;
2109 
2110  str2_cr = ENC_CODERANGE(str2);
2111 
2112  rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
2113  ENCODING_GET(str2), str2_cr, &str2_cr);
2114 
2115  OBJ_INFECT(str, str2);
2116  ENC_CODERANGE_SET(str2, str2_cr);
2117 
2118  return str;
2119 }
2120 
2121 VALUE
2123 {
2124  rb_encoding *enc;
2125  int cr, cr2;
2126  long len2;
2127 
2128  StringValue(str2);
2129  if ((len2 = RSTRING_LEN(str2)) > 0 && STR_ASSOC_P(str)) {
2130  long len = RSTRING_LEN(str) + len2;
2131  enc = rb_enc_check(str, str2);
2132  cr = ENC_CODERANGE(str);
2133  if ((cr2 = ENC_CODERANGE(str2)) > cr) cr = cr2;
2134  rb_str_modify_expand(str, len2);
2135  memcpy(RSTRING(str)->as.heap.ptr + RSTRING(str)->as.heap.len,
2136  RSTRING_PTR(str2), len2+1);
2137  RSTRING(str)->as.heap.len = len;
2138  rb_enc_associate(str, enc);
2139  ENC_CODERANGE_SET(str, cr);
2140  OBJ_INFECT(str, str2);
2141  return str;
2142  }
2143  return rb_str_buf_append(str, str2);
2144 }
2145 
2146 /*
2147  * call-seq:
2148  * str << integer -> str
2149  * str.concat(integer) -> str
2150  * str << obj -> str
2151  * str.concat(obj) -> str
2152  *
2153  * Append---Concatenates the given object to <i>str</i>. If the object is a
2154  * <code>Integer</code>, it is considered as a codepoint, and is converted
2155  * to a character before concatenation.
2156  *
2157  * a = "hello "
2158  * a << "world" #=> "hello world"
2159  * a.concat(33) #=> "hello world!"
2160  */
2161 
2162 VALUE
2164 {
2165  unsigned int code;
2166  rb_encoding *enc = STR_ENC_GET(str1);
2167 
2168  if (FIXNUM_P(str2) || RB_TYPE_P(str2, T_BIGNUM)) {
2169  if (rb_num_to_uint(str2, &code) == 0) {
2170  }
2171  else if (FIXNUM_P(str2)) {
2172  rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
2173  }
2174  else {
2175  rb_raise(rb_eRangeError, "bignum out of char range");
2176  }
2177  }
2178  else {
2179  return rb_str_append(str1, str2);
2180  }
2181 
2182  if (enc == rb_usascii_encoding()) {
2183  /* US-ASCII automatically extended to ASCII-8BIT */
2184  char buf[1];
2185  buf[0] = (char)code;
2186  if (code > 0xFF) {
2187  rb_raise(rb_eRangeError, "%u out of char range", code);
2188  }
2189  rb_str_cat(str1, buf, 1);
2190  if (code > 127) {
2193  }
2194  }
2195  else {
2196  long pos = RSTRING_LEN(str1);
2197  int cr = ENC_CODERANGE(str1);
2198  int len;
2199  char *buf;
2200 
2201  switch (len = rb_enc_codelen(code, enc)) {
2203  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2204  break;
2206  case 0:
2207  rb_raise(rb_eRangeError, "%u out of char range", code);
2208  break;
2209  }
2210  buf = ALLOCA_N(char, len + 1);
2211  rb_enc_mbcput(code, buf, enc);
2212  if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
2213  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2214  }
2215  rb_str_resize(str1, pos+len);
2216  memcpy(RSTRING_PTR(str1) + pos, buf, len);
2217  if (cr == ENC_CODERANGE_7BIT && code > 127)
2218  cr = ENC_CODERANGE_VALID;
2219  ENC_CODERANGE_SET(str1, cr);
2220  }
2221  return str1;
2222 }
2223 
2224 /*
2225  * call-seq:
2226  * str.prepend(other_str) -> str
2227  *
2228  * Prepend---Prepend the given string to <i>str</i>.
2229  *
2230  * a = "world"
2231  * a.prepend("hello ") #=> "hello world"
2232  * a #=> "hello world"
2233  */
2234 
2235 static VALUE
2237 {
2238  StringValue(str2);
2239  StringValue(str);
2240  rb_str_update(str, 0L, 0L, str2);
2241  return str;
2242 }
2243 
2244 st_index_t
2246 {
2247  int e = ENCODING_GET(str);
2248  if (e && rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
2249  e = 0;
2250  }
2251  return rb_memhash((const void *)RSTRING_PTR(str), RSTRING_LEN(str)) ^ e;
2252 }
2253 
2254 int
2256 {
2257  long len;
2258 
2259  if (!rb_str_comparable(str1, str2)) return 1;
2260  if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) &&
2261  memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) {
2262  return 0;
2263  }
2264  return 1;
2265 }
2266 
2267 /*
2268  * call-seq:
2269  * str.hash -> fixnum
2270  *
2271  * Return a hash based on the string's length and content.
2272  */
2273 
2274 static VALUE
2276 {
2277  st_index_t hval = rb_str_hash(str);
2278  return INT2FIX(hval);
2279 }
2280 
2281 #define lesser(a,b) (((a)>(b))?(b):(a))
2282 
2283 int
2285 {
2286  int idx1, idx2;
2287  int rc1, rc2;
2288 
2289  if (RSTRING_LEN(str1) == 0) return TRUE;
2290  if (RSTRING_LEN(str2) == 0) return TRUE;
2291  idx1 = ENCODING_GET(str1);
2292  idx2 = ENCODING_GET(str2);
2293  if (idx1 == idx2) return TRUE;
2294  rc1 = rb_enc_str_coderange(str1);
2295  rc2 = rb_enc_str_coderange(str2);
2296  if (rc1 == ENC_CODERANGE_7BIT) {
2297  if (rc2 == ENC_CODERANGE_7BIT) return TRUE;
2299  return TRUE;
2300  }
2301  if (rc2 == ENC_CODERANGE_7BIT) {
2303  return TRUE;
2304  }
2305  return FALSE;
2306 }
2307 
2308 int
2310 {
2311  long len1, len2;
2312  const char *ptr1, *ptr2;
2313  int retval;
2314 
2315  if (str1 == str2) return 0;
2316  RSTRING_GETMEM(str1, ptr1, len1);
2317  RSTRING_GETMEM(str2, ptr2, len2);
2318  if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
2319  if (len1 == len2) {
2320  if (!rb_str_comparable(str1, str2)) {
2321  if (ENCODING_GET(str1) > ENCODING_GET(str2))
2322  return 1;
2323  return -1;
2324  }
2325  return 0;
2326  }
2327  if (len1 > len2) return 1;
2328  return -1;
2329  }
2330  if (retval > 0) return 1;
2331  return -1;
2332 }
2333 
2334 /* expect tail call optimization */
2335 static VALUE
2336 str_eql(const VALUE str1, const VALUE str2)
2337 {
2338  const long len = RSTRING_LEN(str1);
2339  const char *ptr1, *ptr2;
2340 
2341  if (len != RSTRING_LEN(str2)) return Qfalse;
2342  if (!rb_str_comparable(str1, str2)) return Qfalse;
2343  if ((ptr1 = RSTRING_PTR(str1)) == (ptr2 = RSTRING_PTR(str2)))
2344  return Qtrue;
2345  if (memcmp(ptr1, ptr2, len) == 0)
2346  return Qtrue;
2347  return Qfalse;
2348 }
2349 
2350 /*
2351  * call-seq:
2352  * str == obj -> true or false
2353  *
2354  * Equality---If <i>obj</i> is not a <code>String</code>, returns
2355  * <code>false</code>. Otherwise, returns <code>true</code> if <i>str</i>
2356  * <code><=></code> <i>obj</i> returns zero.
2357  */
2358 
2359 VALUE
2361 {
2362  if (str1 == str2) return Qtrue;
2363  if (!RB_TYPE_P(str2, T_STRING)) {
2364  if (!rb_respond_to(str2, rb_intern("to_str"))) {
2365  return Qfalse;
2366  }
2367  return rb_equal(str2, str1);
2368  }
2369  return str_eql(str1, str2);
2370 }
2371 
2372 /*
2373  * call-seq:
2374  * str.eql?(other) -> true or false
2375  *
2376  * Two strings are equal if they have the same length and content.
2377  */
2378 
2379 static VALUE
2381 {
2382  if (str1 == str2) return Qtrue;
2383  if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
2384  return str_eql(str1, str2);
2385 }
2386 
2387 /*
2388  * call-seq:
2389  * string <=> other_string -> -1, 0, +1 or nil
2390  *
2391  *
2392  * Comparison---Returns -1, 0, +1 or nil depending on whether +string+ is less
2393  * than, equal to, or greater than +other_string+.
2394  *
2395  * +nil+ is returned if the two values are incomparable.
2396  *
2397  * If the strings are of different lengths, and the strings are equal when
2398  * compared up to the shortest length, then the longer string is considered
2399  * greater than the shorter one.
2400  *
2401  * <code><=></code> is the basis for the methods <code><</code>,
2402  * <code><=</code>, <code>></code>, <code>>=</code>, and
2403  * <code>between?</code>, included from module Comparable. The method
2404  * String#== does not use Comparable#==.
2405  *
2406  * "abcdef" <=> "abcde" #=> 1
2407  * "abcdef" <=> "abcdef" #=> 0
2408  * "abcdef" <=> "abcdefg" #=> -1
2409  * "abcdef" <=> "ABCDEF" #=> 1
2410  */
2411 
2412 static VALUE
2414 {
2415  int result;
2416 
2417  if (!RB_TYPE_P(str2, T_STRING)) {
2418  VALUE tmp = rb_check_funcall(str2, rb_intern("to_str"), 0, 0);
2419  if (RB_TYPE_P(tmp, T_STRING)) {
2420  result = rb_str_cmp(str1, tmp);
2421  }
2422  else {
2423  return rb_invcmp(str1, str2);
2424  }
2425  }
2426  else {
2427  result = rb_str_cmp(str1, str2);
2428  }
2429  return INT2FIX(result);
2430 }
2431 
2432 /*
2433  * call-seq:
2434  * str.casecmp(other_str) -> -1, 0, +1 or nil
2435  *
2436  * Case-insensitive version of <code>String#<=></code>.
2437  *
2438  * "abcdef".casecmp("abcde") #=> 1
2439  * "aBcDeF".casecmp("abcdef") #=> 0
2440  * "abcdef".casecmp("abcdefg") #=> -1
2441  * "abcdef".casecmp("ABCDEF") #=> 0
2442  */
2443 
2444 static VALUE
2446 {
2447  long len;
2448  rb_encoding *enc;
2449  char *p1, *p1end, *p2, *p2end;
2450 
2451  StringValue(str2);
2452  enc = rb_enc_compatible(str1, str2);
2453  if (!enc) {
2454  return Qnil;
2455  }
2456 
2457  p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
2458  p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
2459  if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
2460  while (p1 < p1end && p2 < p2end) {
2461  if (*p1 != *p2) {
2462  unsigned int c1 = TOUPPER(*p1 & 0xff);
2463  unsigned int c2 = TOUPPER(*p2 & 0xff);
2464  if (c1 != c2)
2465  return INT2FIX(c1 < c2 ? -1 : 1);
2466  }
2467  p1++;
2468  p2++;
2469  }
2470  }
2471  else {
2472  while (p1 < p1end && p2 < p2end) {
2473  int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
2474  int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
2475 
2476  if (0 <= c1 && 0 <= c2) {
2477  c1 = TOUPPER(c1);
2478  c2 = TOUPPER(c2);
2479  if (c1 != c2)
2480  return INT2FIX(c1 < c2 ? -1 : 1);
2481  }
2482  else {
2483  int r;
2484  l1 = rb_enc_mbclen(p1, p1end, enc);
2485  l2 = rb_enc_mbclen(p2, p2end, enc);
2486  len = l1 < l2 ? l1 : l2;
2487  r = memcmp(p1, p2, len);
2488  if (r != 0)
2489  return INT2FIX(r < 0 ? -1 : 1);
2490  if (l1 != l2)
2491  return INT2FIX(l1 < l2 ? -1 : 1);
2492  }
2493  p1 += l1;
2494  p2 += l2;
2495  }
2496  }
2497  if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0);
2498  if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1);
2499  return INT2FIX(-1);
2500 }
2501 
2502 static long
2503 rb_str_index(VALUE str, VALUE sub, long offset)
2504 {
2505  long pos;
2506  char *s, *sptr, *e;
2507  long len, slen;
2508  rb_encoding *enc;
2509 
2510  enc = rb_enc_check(str, sub);
2511  if (is_broken_string(sub)) {
2512  return -1;
2513  }
2514  len = str_strlen(str, enc);
2515  slen = str_strlen(sub, enc);
2516  if (offset < 0) {
2517  offset += len;
2518  if (offset < 0) return -1;
2519  }
2520  if (len - offset < slen) return -1;
2521  s = RSTRING_PTR(str);
2522  e = s + RSTRING_LEN(str);
2523  if (offset) {
2524  offset = str_offset(s, RSTRING_END(str), offset, enc, single_byte_optimizable(str));
2525  s += offset;
2526  }
2527  if (slen == 0) return offset;
2528  /* need proceed one character at a time */
2529  sptr = RSTRING_PTR(sub);
2530  slen = RSTRING_LEN(sub);
2531  len = RSTRING_LEN(str) - offset;
2532  for (;;) {
2533  char *t;
2534  pos = rb_memsearch(sptr, slen, s, len, enc);
2535  if (pos < 0) return pos;
2536  t = rb_enc_right_char_head(s, s+pos, e, enc);
2537  if (t == s + pos) break;
2538  if ((len -= t - s) <= 0) return -1;
2539  offset += t - s;
2540  s = t;
2541  }
2542  return pos + offset;
2543 }
2544 
2545 
2546 /*
2547  * call-seq:
2548  * str.index(substring [, offset]) -> fixnum or nil
2549  * str.index(regexp [, offset]) -> fixnum or nil
2550  *
2551  * Returns the index of the first occurrence of the given <i>substring</i> or
2552  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2553  * found. If the second parameter is present, it specifies the position in the
2554  * string to begin the search.
2555  *
2556  * "hello".index('e') #=> 1
2557  * "hello".index('lo') #=> 3
2558  * "hello".index('a') #=> nil
2559  * "hello".index(?e) #=> 1
2560  * "hello".index(/[aeiou]/, -3) #=> 4
2561  */
2562 
2563 static VALUE
2565 {
2566  VALUE sub;
2567  VALUE initpos;
2568  long pos;
2569 
2570  if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
2571  pos = NUM2LONG(initpos);
2572  }
2573  else {
2574  pos = 0;
2575  }
2576  if (pos < 0) {
2577  pos += str_strlen(str, STR_ENC_GET(str));
2578  if (pos < 0) {
2579  if (RB_TYPE_P(sub, T_REGEXP)) {
2581  }
2582  return Qnil;
2583  }
2584  }
2585 
2586  if (SPECIAL_CONST_P(sub)) goto generic;
2587  switch (BUILTIN_TYPE(sub)) {
2588  case T_REGEXP:
2589  if (pos > str_strlen(str, STR_ENC_GET(str)))
2590  return Qnil;
2591  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2592  rb_enc_check(str, sub), single_byte_optimizable(str));
2593 
2594  pos = rb_reg_search(sub, str, pos, 0);
2595  pos = rb_str_sublen(str, pos);
2596  break;
2597 
2598  generic:
2599  default: {
2600  VALUE tmp;
2601 
2602  tmp = rb_check_string_type(sub);
2603  if (NIL_P(tmp)) {
2604  rb_raise(rb_eTypeError, "type mismatch: %s given",
2605  rb_obj_classname(sub));
2606  }
2607  sub = tmp;
2608  }
2609  /* fall through */
2610  case T_STRING:
2611  pos = rb_str_index(str, sub, pos);
2612  pos = rb_str_sublen(str, pos);
2613  break;
2614  }
2615 
2616  if (pos == -1) return Qnil;
2617  return LONG2NUM(pos);
2618 }
2619 
2620 static long
2621 rb_str_rindex(VALUE str, VALUE sub, long pos)
2622 {
2623  long len, slen;
2624  char *s, *sbeg, *e, *t;
2625  rb_encoding *enc;
2626  int singlebyte = single_byte_optimizable(str);
2627 
2628  enc = rb_enc_check(str, sub);
2629  if (is_broken_string(sub)) {
2630  return -1;
2631  }
2632  len = str_strlen(str, enc);
2633  slen = str_strlen(sub, enc);
2634  /* substring longer than string */
2635  if (len < slen) return -1;
2636  if (len - pos < slen) {
2637  pos = len - slen;
2638  }
2639  if (len == 0) {
2640  return pos;
2641  }
2642  sbeg = RSTRING_PTR(str);
2643  e = RSTRING_END(str);
2644  t = RSTRING_PTR(sub);
2645  slen = RSTRING_LEN(sub);
2646  s = str_nth(sbeg, e, pos, enc, singlebyte);
2647  while (s) {
2648  if (memcmp(s, t, slen) == 0) {
2649  return pos;
2650  }
2651  if (pos == 0) break;
2652  pos--;
2653  s = rb_enc_prev_char(sbeg, s, e, enc);
2654  }
2655  return -1;
2656 }
2657 
2658 
2659 /*
2660  * call-seq:
2661  * str.rindex(substring [, fixnum]) -> fixnum or nil
2662  * str.rindex(regexp [, fixnum]) -> fixnum or nil
2663  *
2664  * Returns the index of the last occurrence of the given <i>substring</i> or
2665  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2666  * found. If the second parameter is present, it specifies the position in the
2667  * string to end the search---characters beyond this point will not be
2668  * considered.
2669  *
2670  * "hello".rindex('e') #=> 1
2671  * "hello".rindex('l') #=> 3
2672  * "hello".rindex('a') #=> nil
2673  * "hello".rindex(?e) #=> 1
2674  * "hello".rindex(/[aeiou]/, -2) #=> 1
2675  */
2676 
2677 static VALUE
2679 {
2680  VALUE sub;
2681  VALUE vpos;
2682  rb_encoding *enc = STR_ENC_GET(str);
2683  long pos, len = str_strlen(str, enc);
2684 
2685  if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
2686  pos = NUM2LONG(vpos);
2687  if (pos < 0) {
2688  pos += len;
2689  if (pos < 0) {
2690  if (RB_TYPE_P(sub, T_REGEXP)) {
2692  }
2693  return Qnil;
2694  }
2695  }
2696  if (pos > len) pos = len;
2697  }
2698  else {
2699  pos = len;
2700  }
2701 
2702  if (SPECIAL_CONST_P(sub)) goto generic;
2703  switch (BUILTIN_TYPE(sub)) {
2704  case T_REGEXP:
2705  /* enc = rb_get_check(str, sub); */
2706  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2708 
2709  if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
2710  pos = rb_reg_search(sub, str, pos, 1);
2711  pos = rb_str_sublen(str, pos);
2712  }
2713  if (pos >= 0) return LONG2NUM(pos);
2714  break;
2715 
2716  generic:
2717  default: {
2718  VALUE tmp;
2719 
2720  tmp = rb_check_string_type(sub);
2721  if (NIL_P(tmp)) {
2722  rb_raise(rb_eTypeError, "type mismatch: %s given",
2723  rb_obj_classname(sub));
2724  }
2725  sub = tmp;
2726  }
2727  /* fall through */
2728  case T_STRING:
2729  pos = rb_str_rindex(str, sub, pos);
2730  if (pos >= 0) return LONG2NUM(pos);
2731  break;
2732  }
2733  return Qnil;
2734 }
2735 
2736 /*
2737  * call-seq:
2738  * str =~ obj -> fixnum or nil
2739  *
2740  * Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
2741  * against <i>str</i>,and returns the position the match starts, or
2742  * <code>nil</code> if there is no match. Otherwise, invokes
2743  * <i>obj.=~</i>, passing <i>str</i> as an argument. The default
2744  * <code>=~</code> in <code>Object</code> returns <code>nil</code>.
2745  *
2746  * Note: <code>str =~ regexp</code> is not the same as
2747  * <code>regexp =~ str</code>. Strings captured from named capture groups
2748  * are assigned to local variables only in the second case.
2749  *
2750  * "cat o' 9 tails" =~ /\d/ #=> 7
2751  * "cat o' 9 tails" =~ 9 #=> nil
2752  */
2753 
2754 static VALUE
2756 {
2757  if (SPECIAL_CONST_P(y)) goto generic;
2758  switch (BUILTIN_TYPE(y)) {
2759  case T_STRING:
2760  rb_raise(rb_eTypeError, "type mismatch: String given");
2761 
2762  case T_REGEXP:
2763  return rb_reg_match(y, x);
2764 
2765  generic:
2766  default:
2767  return rb_funcall(y, rb_intern("=~"), 1, x);
2768  }
2769 }
2770 
2771 
2772 static VALUE get_pat(VALUE, int);
2773 
2774 
2775 /*
2776  * call-seq:
2777  * str.match(pattern) -> matchdata or nil
2778  * str.match(pattern, pos) -> matchdata or nil
2779  *
2780  * Converts <i>pattern</i> to a <code>Regexp</code> (if it isn't already one),
2781  * then invokes its <code>match</code> method on <i>str</i>. If the second
2782  * parameter is present, it specifies the position in the string to begin the
2783  * search.
2784  *
2785  * 'hello'.match('(.)\1') #=> #<MatchData "ll" 1:"l">
2786  * 'hello'.match('(.)\1')[0] #=> "ll"
2787  * 'hello'.match(/(.)\1/)[0] #=> "ll"
2788  * 'hello'.match('xx') #=> nil
2789  *
2790  * If a block is given, invoke the block with MatchData if match succeed, so
2791  * that you can write
2792  *
2793  * str.match(pat) {|m| ...}
2794  *
2795  * instead of
2796  *
2797  * if m = str.match(pat)
2798  * ...
2799  * end
2800  *
2801  * The return value is a value from block execution in this case.
2802  */
2803 
2804 static VALUE
2806 {
2807  VALUE re, result;
2808  if (argc < 1)
2809  rb_check_arity(argc, 1, 2);
2810  re = argv[0];
2811  argv[0] = str;
2812  result = rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv);
2813  if (!NIL_P(result) && rb_block_given_p()) {
2814  return rb_yield(result);
2815  }
2816  return result;
2817 }
2818 
2823 };
2824 
2825 static enum neighbor_char
2826 enc_succ_char(char *p, long len, rb_encoding *enc)
2827 {
2828  long i;
2829  int l;
2830  while (1) {
2831  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--)
2832  p[i] = '\0';
2833  if (i < 0)
2834  return NEIGHBOR_WRAPPED;
2835  ++((unsigned char*)p)[i];
2836  l = rb_enc_precise_mbclen(p, p+len, enc);
2837  if (MBCLEN_CHARFOUND_P(l)) {
2838  l = MBCLEN_CHARFOUND_LEN(l);
2839  if (l == len) {
2840  return NEIGHBOR_FOUND;
2841  }
2842  else {
2843  memset(p+l, 0xff, len-l);
2844  }
2845  }
2846  if (MBCLEN_INVALID_P(l) && i < len-1) {
2847  long len2;
2848  int l2;
2849  for (len2 = len-1; 0 < len2; len2--) {
2850  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
2851  if (!MBCLEN_INVALID_P(l2))
2852  break;
2853  }
2854  memset(p+len2+1, 0xff, len-(len2+1));
2855  }
2856  }
2857 }
2858 
2859 static enum neighbor_char
2860 enc_pred_char(char *p, long len, rb_encoding *enc)
2861 {
2862  long i;
2863  int l;
2864  while (1) {
2865  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--)
2866  p[i] = '\xff';
2867  if (i < 0)
2868  return NEIGHBOR_WRAPPED;
2869  --((unsigned char*)p)[i];
2870  l = rb_enc_precise_mbclen(p, p+len, enc);
2871  if (MBCLEN_CHARFOUND_P(l)) {
2872  l = MBCLEN_CHARFOUND_LEN(l);
2873  if (l == len) {
2874  return NEIGHBOR_FOUND;
2875  }
2876  else {
2877  memset(p+l, 0, len-l);
2878  }
2879  }
2880  if (MBCLEN_INVALID_P(l) && i < len-1) {
2881  long len2;
2882  int l2;
2883  for (len2 = len-1; 0 < len2; len2--) {
2884  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
2885  if (!MBCLEN_INVALID_P(l2))
2886  break;
2887  }
2888  memset(p+len2+1, 0, len-(len2+1));
2889  }
2890  }
2891 }
2892 
2893 /*
2894  overwrite +p+ by succeeding letter in +enc+ and returns
2895  NEIGHBOR_FOUND or NEIGHBOR_WRAPPED.
2896  When NEIGHBOR_WRAPPED, carried-out letter is stored into carry.
2897  assuming each ranges are successive, and mbclen
2898  never change in each ranges.
2899  NEIGHBOR_NOT_CHAR is returned if invalid character or the range has only one
2900  character.
2901  */
2902 static enum neighbor_char
2903 enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
2904 {
2905  enum neighbor_char ret;
2906  unsigned int c;
2907  int ctype;
2908  int range;
2909  char save[ONIGENC_CODE_TO_MBC_MAXLEN];
2910 
2911  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2912  if (rb_enc_isctype(c, ONIGENC_CTYPE_DIGIT, enc))
2913  ctype = ONIGENC_CTYPE_DIGIT;
2914  else if (rb_enc_isctype(c, ONIGENC_CTYPE_ALPHA, enc))
2915  ctype = ONIGENC_CTYPE_ALPHA;
2916  else
2917  return NEIGHBOR_NOT_CHAR;
2918 
2919  MEMCPY(save, p, char, len);
2920  ret = enc_succ_char(p, len, enc);
2921  if (ret == NEIGHBOR_FOUND) {
2922  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2923  if (rb_enc_isctype(c, ctype, enc))
2924  return NEIGHBOR_FOUND;
2925  }
2926  MEMCPY(p, save, char, len);
2927  range = 1;
2928  while (1) {
2929  MEMCPY(save, p, char, len);
2930  ret = enc_pred_char(p, len, enc);
2931  if (ret == NEIGHBOR_FOUND) {
2932  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2933  if (!rb_enc_isctype(c, ctype, enc)) {
2934  MEMCPY(p, save, char, len);
2935  break;
2936  }
2937  }
2938  else {
2939  MEMCPY(p, save, char, len);
2940  break;
2941  }
2942  range++;
2943  }
2944  if (range == 1) {
2945  return NEIGHBOR_NOT_CHAR;
2946  }
2947 
2948  if (ctype != ONIGENC_CTYPE_DIGIT) {
2949  MEMCPY(carry, p, char, len);
2950  return NEIGHBOR_WRAPPED;
2951  }
2952 
2953  MEMCPY(carry, p, char, len);
2954  enc_succ_char(carry, len, enc);
2955  return NEIGHBOR_WRAPPED;
2956 }
2957 
2958 
2959 /*
2960  * call-seq:
2961  * str.succ -> new_str
2962  * str.next -> new_str
2963  *
2964  * Returns the successor to <i>str</i>. The successor is calculated by
2965  * incrementing characters starting from the rightmost alphanumeric (or
2966  * the rightmost character if there are no alphanumerics) in the
2967  * string. Incrementing a digit always results in another digit, and
2968  * incrementing a letter results in another letter of the same case.
2969  * Incrementing nonalphanumerics uses the underlying character set's
2970  * collating sequence.
2971  *
2972  * If the increment generates a ``carry,'' the character to the left of
2973  * it is incremented. This process repeats until there is no carry,
2974  * adding an additional character if necessary.
2975  *
2976  * "abcd".succ #=> "abce"
2977  * "THX1138".succ #=> "THX1139"
2978  * "<<koala>>".succ #=> "<<koalb>>"
2979  * "1999zzz".succ #=> "2000aaa"
2980  * "ZZZ9999".succ #=> "AAAA0000"
2981  * "***".succ #=> "**+"
2982  */
2983 
2984 VALUE
2986 {
2987  rb_encoding *enc;
2988  VALUE str;
2989  char *sbeg, *s, *e, *last_alnum = 0;
2990  int c = -1;
2991  long l;
2992  char carry[ONIGENC_CODE_TO_MBC_MAXLEN] = "\1";
2993  long carry_pos = 0, carry_len = 1;
2994  enum neighbor_char neighbor = NEIGHBOR_FOUND;
2995 
2996  str = rb_str_new5(orig, RSTRING_PTR(orig), RSTRING_LEN(orig));
2997  rb_enc_cr_str_copy_for_substr(str, orig);
2998  OBJ_INFECT(str, orig);
2999  if (RSTRING_LEN(str) == 0) return str;
3000 
3001  enc = STR_ENC_GET(orig);
3002  sbeg = RSTRING_PTR(str);
3003  s = e = sbeg + RSTRING_LEN(str);
3004 
3005  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3006  if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
3007  if (ISALPHA(*last_alnum) ? ISDIGIT(*s) :
3008  ISDIGIT(*last_alnum) ? ISALPHA(*s) : 0) {
3009  s = last_alnum;
3010  break;
3011  }
3012  }
3013  if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue;
3014  neighbor = enc_succ_alnum_char(s, l, enc, carry);
3015  switch (neighbor) {
3016  case NEIGHBOR_NOT_CHAR:
3017  continue;
3018  case NEIGHBOR_FOUND:
3019  return str;
3020  case NEIGHBOR_WRAPPED:
3021  last_alnum = s;
3022  break;
3023  }
3024  c = 1;
3025  carry_pos = s - sbeg;
3026  carry_len = l;
3027  }
3028  if (c == -1) { /* str contains no alnum */
3029  s = e;
3030  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3031  enum neighbor_char neighbor;
3032  if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue;
3033  neighbor = enc_succ_char(s, l, enc);
3034  if (neighbor == NEIGHBOR_FOUND)
3035  return str;
3036  if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
3037  /* wrapped to \0...\0. search next valid char. */
3038  enc_succ_char(s, l, enc);
3039  }
3040  if (!rb_enc_asciicompat(enc)) {
3041  MEMCPY(carry, s, char, l);
3042  carry_len = l;
3043  }
3044  carry_pos = s - sbeg;
3045  }
3046  }
3047  RESIZE_CAPA(str, RSTRING_LEN(str) + carry_len);
3048  s = RSTRING_PTR(str) + carry_pos;
3049  memmove(s + carry_len, s, RSTRING_LEN(str) - carry_pos);
3050  memmove(s, carry, carry_len);
3051  STR_SET_LEN(str, RSTRING_LEN(str) + carry_len);
3052  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3053  rb_enc_str_coderange(str);
3054  return str;
3055 }
3056 
3057 
3058 /*
3059  * call-seq:
3060  * str.succ! -> str
3061  * str.next! -> str
3062  *
3063  * Equivalent to <code>String#succ</code>, but modifies the receiver in
3064  * place.
3065  */
3066 
3067 static VALUE
3069 {
3071 
3072  return str;
3073 }
3074 
3075 
3076 /*
3077  * call-seq:
3078  * str.upto(other_str, exclusive=false) {|s| block } -> str
3079  * str.upto(other_str, exclusive=false) -> an_enumerator
3080  *
3081  * Iterates through successive values, starting at <i>str</i> and
3082  * ending at <i>other_str</i> inclusive, passing each value in turn to
3083  * the block. The <code>String#succ</code> method is used to generate
3084  * each value. If optional second argument exclusive is omitted or is false,
3085  * the last value will be included; otherwise it will be excluded.
3086  *
3087  * If no block is given, an enumerator is returned instead.
3088  *
3089  * "a8".upto("b6") {|s| print s, ' ' }
3090  * for s in "a8".."b6"
3091  * print s, ' '
3092  * end
3093  *
3094  * <em>produces:</em>
3095  *
3096  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3097  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3098  *
3099  * If <i>str</i> and <i>other_str</i> contains only ascii numeric characters,
3100  * both are recognized as decimal numbers. In addition, the width of
3101  * string (e.g. leading zeros) is handled appropriately.
3102  *
3103  * "9".upto("11").to_a #=> ["9", "10", "11"]
3104  * "25".upto("5").to_a #=> []
3105  * "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"]
3106  */
3107 
3108 static VALUE
3110 {
3111  VALUE end, exclusive;
3112  VALUE current, after_end;
3113  ID succ;
3114  int n, excl, ascii;
3115  rb_encoding *enc;
3116 
3117  rb_scan_args(argc, argv, "11", &end, &exclusive);
3118  RETURN_ENUMERATOR(beg, argc, argv);
3119  excl = RTEST(exclusive);
3120  CONST_ID(succ, "succ");
3121  StringValue(end);
3122  enc = rb_enc_check(beg, end);
3123  ascii = (is_ascii_string(beg) && is_ascii_string(end));
3124  /* single character */
3125  if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
3126  char c = RSTRING_PTR(beg)[0];
3127  char e = RSTRING_PTR(end)[0];
3128 
3129  if (c > e || (excl && c == e)) return beg;
3130  for (;;) {
3131  rb_yield(rb_enc_str_new(&c, 1, enc));
3132  if (!excl && c == e) break;
3133  c++;
3134  if (excl && c == e) break;
3135  }
3136  return beg;
3137  }
3138  /* both edges are all digits */
3139  if (ascii && ISDIGIT(RSTRING_PTR(beg)[0]) && ISDIGIT(RSTRING_PTR(end)[0])) {
3140  char *s, *send;
3141  VALUE b, e;
3142  int width;
3143 
3144  s = RSTRING_PTR(beg); send = RSTRING_END(beg);
3145  width = rb_long2int(send - s);
3146  while (s < send) {
3147  if (!ISDIGIT(*s)) goto no_digits;
3148  s++;
3149  }
3150  s = RSTRING_PTR(end); send = RSTRING_END(end);
3151  while (s < send) {
3152  if (!ISDIGIT(*s)) goto no_digits;
3153  s++;
3154  }
3155  b = rb_str_to_inum(beg, 10, FALSE);
3156  e = rb_str_to_inum(end, 10, FALSE);
3157  if (FIXNUM_P(b) && FIXNUM_P(e)) {
3158  long bi = FIX2LONG(b);
3159  long ei = FIX2LONG(e);
3160  rb_encoding *usascii = rb_usascii_encoding();
3161 
3162  while (bi <= ei) {
3163  if (excl && bi == ei) break;
3164  rb_yield(rb_enc_sprintf(usascii, "%.*ld", width, bi));
3165  bi++;
3166  }
3167  }
3168  else {
3169  ID op = excl ? '<' : rb_intern("<=");
3170  VALUE args[2], fmt = rb_obj_freeze(rb_usascii_str_new_cstr("%.*d"));
3171 
3172  args[0] = INT2FIX(width);
3173  while (rb_funcall(b, op, 1, e)) {
3174  args[1] = b;
3175  rb_yield(rb_str_format(numberof(args), args, fmt));
3176  b = rb_funcall(b, succ, 0, 0);
3177  }
3178  }
3179  return beg;
3180  }
3181  /* normal case */
3182  no_digits:
3183  n = rb_str_cmp(beg, end);
3184  if (n > 0 || (excl && n == 0)) return beg;
3185 
3186  after_end = rb_funcall(end, succ, 0, 0);
3187  current = rb_str_dup(beg);
3188  while (!rb_str_equal(current, after_end)) {
3189  VALUE next = Qnil;
3190  if (excl || !rb_str_equal(current, end))
3191  next = rb_funcall(current, succ, 0, 0);
3192  rb_yield(current);
3193  if (NIL_P(next)) break;
3194  current = next;
3195  StringValue(current);
3196  if (excl && rb_str_equal(current, end)) break;
3197  if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
3198  break;
3199  }
3200 
3201  return beg;
3202 }
3203 
3204 static VALUE
3205 rb_str_subpat(VALUE str, VALUE re, VALUE backref)
3206 {
3207  if (rb_reg_search(re, str, 0, 0) >= 0) {
3209  int nth = rb_reg_backref_number(match, backref);
3210  return rb_reg_nth_match(nth, match);
3211  }
3212  return Qnil;
3213 }
3214 
3215 static VALUE
3217 {
3218  long idx;
3219 
3220  if (FIXNUM_P(indx)) {
3221  idx = FIX2LONG(indx);
3222 
3223  num_index:
3224  str = rb_str_substr(str, idx, 1);
3225  if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil;
3226  return str;
3227  }
3228 
3229  if (SPECIAL_CONST_P(indx)) goto generic;
3230  switch (BUILTIN_TYPE(indx)) {
3231  case T_REGEXP:
3232  return rb_str_subpat(str, indx, INT2FIX(0));
3233 
3234  case T_STRING:
3235  if (rb_str_index(str, indx, 0) != -1)
3236  return rb_str_dup(indx);
3237  return Qnil;
3238 
3239  generic:
3240  default:
3241  /* check if indx is Range */
3242  {
3243  long beg, len;
3244  VALUE tmp;
3245 
3246  len = str_strlen(str, STR_ENC_GET(str));
3247  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
3248  case Qfalse:
3249  break;
3250  case Qnil:
3251  return Qnil;
3252  default:
3253  tmp = rb_str_substr(str, beg, len);
3254  return tmp;
3255  }
3256  }
3257  idx = NUM2LONG(indx);
3258  goto num_index;
3259  }
3260 
3261  UNREACHABLE;
3262 }
3263 
3264 
3265 /*
3266  * call-seq:
3267  * str[index] -> new_str or nil
3268  * str[start, length] -> new_str or nil
3269  * str[range] -> new_str or nil
3270  * str[regexp] -> new_str or nil
3271  * str[regexp, capture] -> new_str or nil
3272  * str[match_str] -> new_str or nil
3273  * str.slice(index) -> new_str or nil
3274  * str.slice(start, length) -> new_str or nil
3275  * str.slice(range) -> new_str or nil
3276  * str.slice(regexp) -> new_str or nil
3277  * str.slice(regexp, capture) -> new_str or nil
3278  * str.slice(match_str) -> new_str or nil
3279  *
3280  * Element Reference --- If passed a single +index+, returns a substring of
3281  * one character at that index. If passed a +start+ index and a +length+,
3282  * returns a substring containing +length+ characters starting at the
3283  * +index+. If passed a +range+, its beginning and end are interpreted as
3284  * offsets delimiting the substring to be returned.
3285  *
3286  * In these three cases, if an index is negative, it is counted from the end
3287  * of the string. For the +start+ and +range+ cases the starting index
3288  * is just before a character and an index matching the string's size.
3289  * Additionally, an empty string is returned when the starting index for a
3290  * character range is at the end of the string.
3291  *
3292  * Returns +nil+ if the initial index falls outside the string or the length
3293  * is negative.
3294  *
3295  * If a +Regexp+ is supplied, the matching portion of the string is
3296  * returned. If a +capture+ follows the regular expression, which may be a
3297  * capture group index or name, follows the regular expression that component
3298  * of the MatchData is returned instead.
3299  *
3300  * If a +match_str+ is given, that string is returned if it occurs in
3301  * the string.
3302  *
3303  * Returns +nil+ if the regular expression does not match or the match string
3304  * cannot be found.
3305  *
3306  * a = "hello there"
3307  *
3308  * a[1] #=> "e"
3309  * a[2, 3] #=> "llo"
3310  * a[2..3] #=> "ll"
3311  *
3312  * a[-3, 2] #=> "er"
3313  * a[7..-2] #=> "her"
3314  * a[-4..-2] #=> "her"
3315  * a[-2..-4] #=> ""
3316  *
3317  * a[11, 0] #=> ""
3318  * a[11] #=> nil
3319  * a[12, 0] #=> nil
3320  * a[12..-1] #=> nil
3321  *
3322  * a[/[aeiou](.)\1/] #=> "ell"
3323  * a[/[aeiou](.)\1/, 0] #=> "ell"
3324  * a[/[aeiou](.)\1/, 1] #=> "l"
3325  * a[/[aeiou](.)\1/, 2] #=> nil
3326  *
3327  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "non_vowel"] #=> "l"
3328  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "vowel"] #=> "e"
3329  *
3330  * a["lo"] #=> "lo"
3331  * a["bye"] #=> nil
3332  */
3333 
3334 static VALUE
3336 {
3337  if (argc == 2) {
3338  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3339  return rb_str_subpat(str, argv[0], argv[1]);
3340  }
3341  return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
3342  }
3343  rb_check_arity(argc, 1, 2);
3344  return rb_str_aref(str, argv[0]);
3345 }
3346 
3347 VALUE
3348 rb_str_drop_bytes(VALUE str, long len)
3349 {
3350  char *ptr = RSTRING_PTR(str);
3351  long olen = RSTRING_LEN(str), nlen;
3352 
3353  str_modifiable(str);
3354  if (len > olen) len = olen;
3355  nlen = olen - len;
3356  if (nlen <= RSTRING_EMBED_LEN_MAX) {
3357  char *oldptr = ptr;
3358  int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|ELTS_SHARED));
3359  STR_SET_EMBED(str);
3360  STR_SET_EMBED_LEN(str, nlen);
3361  ptr = RSTRING(str)->as.ary;
3362  memmove(ptr, oldptr + len, nlen);
3363  if (fl == STR_NOEMBED) xfree(oldptr);
3364  }
3365  else {
3366  if (!STR_SHARED_P(str)) rb_str_new4(str);
3367  ptr = RSTRING(str)->as.heap.ptr += len;
3368  RSTRING(str)->as.heap.len = nlen;
3369  }
3370  ptr[nlen] = 0;
3371  ENC_CODERANGE_CLEAR(str);
3372  return str;
3373 }
3374 
3375 static void
3376 rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
3377 {
3378  if (beg == 0 && RSTRING_LEN(val) == 0) {
3379  rb_str_drop_bytes(str, len);
3380  OBJ_INFECT(str, val);
3381  return;
3382  }
3383 
3384  rb_str_modify(str);
3385  if (len < RSTRING_LEN(val)) {
3386  /* expand string */
3387  RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + 1);
3388  }
3389 
3390  if (RSTRING_LEN(val) != len) {
3391  memmove(RSTRING_PTR(str) + beg + RSTRING_LEN(val),
3392  RSTRING_PTR(str) + beg + len,
3393  RSTRING_LEN(str) - (beg + len));
3394  }
3395  if (RSTRING_LEN(val) < beg && len < 0) {
3396  MEMZERO(RSTRING_PTR(str) + RSTRING_LEN(str), char, -len);
3397  }
3398  if (RSTRING_LEN(val) > 0) {
3399  memmove(RSTRING_PTR(str)+beg, RSTRING_PTR(val), RSTRING_LEN(val));
3400  }
3401  STR_SET_LEN(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len);
3402  if (RSTRING_PTR(str)) {
3403  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3404  }
3405  OBJ_INFECT(str, val);
3406 }
3407 
3408 static void
3409 rb_str_splice(VALUE str, long beg, long len, VALUE val)
3410 {
3411  long slen;
3412  char *p, *e;
3413  rb_encoding *enc;
3414  int singlebyte = single_byte_optimizable(str);
3415  int cr;
3416 
3417  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
3418 
3419  StringValue(val);
3420  enc = rb_enc_check(str, val);
3421  slen = str_strlen(str, enc);
3422 
3423  if (slen < beg) {
3424  out_of_range:
3425  rb_raise(rb_eIndexError, "index %ld out of string", beg);
3426  }
3427  if (beg < 0) {
3428  if (-beg > slen) {
3429  goto out_of_range;
3430  }
3431  beg += slen;
3432  }
3433  if (slen < len || slen < beg + len) {
3434  len = slen - beg;
3435  }
3436  str_modify_keep_cr(str);
3437  p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
3438  if (!p) p = RSTRING_END(str);
3439  e = str_nth(p, RSTRING_END(str), len, enc, singlebyte);
3440  if (!e) e = RSTRING_END(str);
3441  /* error check */
3442  beg = p - RSTRING_PTR(str); /* physical position */
3443  len = e - p; /* physical length */
3444  rb_str_splice_0(str, beg, len, val);
3445  rb_enc_associate(str, enc);
3447  if (cr != ENC_CODERANGE_BROKEN)
3448  ENC_CODERANGE_SET(str, cr);
3449 }
3450 
3451 void
3452 rb_str_update(VALUE str, long beg, long len, VALUE val)
3453 {
3454  rb_str_splice(str, beg, len, val);
3455 }
3456 
3457 static void
3459 {
3460  int nth;
3461  VALUE match;
3462  long start, end, len;
3463  rb_encoding *enc;
3464  struct re_registers *regs;
3465 
3466  if (rb_reg_search(re, str, 0, 0) < 0) {
3467  rb_raise(rb_eIndexError, "regexp not matched");
3468  }
3469  match = rb_backref_get();
3470  nth = rb_reg_backref_number(match, backref);
3471  regs = RMATCH_REGS(match);
3472  if (nth >= regs->num_regs) {
3473  out_of_range:
3474  rb_raise(rb_eIndexError, "index %d out of regexp", nth);
3475  }
3476  if (nth < 0) {
3477  if (-nth >= regs->num_regs) {
3478  goto out_of_range;
3479  }
3480  nth += regs->num_regs;
3481  }
3482 
3483  start = BEG(nth);
3484  if (start == -1) {
3485  rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
3486  }
3487  end = END(nth);
3488  len = end - start;
3489  StringValue(val);
3490  enc = rb_enc_check(str, val);
3491  rb_str_splice_0(str, start, len, val);
3492  rb_enc_associate(str, enc);
3493 }
3494 
3495 static VALUE
3497 {
3498  long idx, beg;
3499 
3500  if (FIXNUM_P(indx)) {
3501  idx = FIX2LONG(indx);
3502  num_index:
3503  rb_str_splice(str, idx, 1, val);
3504  return val;
3505  }
3506 
3507  if (SPECIAL_CONST_P(indx)) goto generic;
3508  switch (TYPE(indx)) {
3509  case T_REGEXP:
3510  rb_str_subpat_set(str, indx, INT2FIX(0), val);
3511  return val;
3512 
3513  case T_STRING:
3514  beg = rb_str_index(str, indx, 0);
3515  if (beg < 0) {
3516  rb_raise(rb_eIndexError, "string not matched");
3517  }
3518  beg = rb_str_sublen(str, beg);
3519  rb_str_splice(str, beg, str_strlen(indx, 0), val);
3520  return val;
3521 
3522  generic:
3523  default:
3524  /* check if indx is Range */
3525  {
3526  long beg, len;
3527  if (rb_range_beg_len(indx, &beg, &len, str_strlen(str, 0), 2)) {
3528  rb_str_splice(str, beg, len, val);
3529  return val;
3530  }
3531  }
3532  idx = NUM2LONG(indx);
3533  goto num_index;
3534  }
3535 }
3536 
3537 /*
3538  * call-seq:
3539  * str[fixnum] = new_str
3540  * str[fixnum, fixnum] = new_str
3541  * str[range] = aString
3542  * str[regexp] = new_str
3543  * str[regexp, fixnum] = new_str
3544  * str[regexp, name] = new_str
3545  * str[other_str] = new_str
3546  *
3547  * Element Assignment---Replaces some or all of the content of <i>str</i>. The
3548  * portion of the string affected is determined using the same criteria as
3549  * <code>String#[]</code>. If the replacement string is not the same length as
3550  * the text it is replacing, the string will be adjusted accordingly. If the
3551  * regular expression or string is used as the index doesn't match a position
3552  * in the string, <code>IndexError</code> is raised. If the regular expression
3553  * form is used, the optional second <code>Fixnum</code> allows you to specify
3554  * which portion of the match to replace (effectively using the
3555  * <code>MatchData</code> indexing rules. The forms that take a
3556  * <code>Fixnum</code> will raise an <code>IndexError</code> if the value is
3557  * out of range; the <code>Range</code> form will raise a
3558  * <code>RangeError</code>, and the <code>Regexp</code> and <code>String</code>
3559  * will raise an <code>IndexError</code> on negative match.
3560  */
3561 
3562 static VALUE
3564 {
3565  if (argc == 3) {
3566  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3567  rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
3568  }
3569  else {
3570  rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
3571  }
3572  return argv[2];
3573  }
3574  rb_check_arity(argc, 2, 3);
3575  return rb_str_aset(str, argv[0], argv[1]);
3576 }
3577 
3578 /*
3579  * call-seq:
3580  * str.insert(index, other_str) -> str
3581  *
3582  * Inserts <i>other_str</i> before the character at the given
3583  * <i>index</i>, modifying <i>str</i>. Negative indices count from the
3584  * end of the string, and insert <em>after</em> the given character.
3585  * The intent is insert <i>aString</i> so that it starts at the given
3586  * <i>index</i>.
3587  *
3588  * "abcd".insert(0, 'X') #=> "Xabcd"
3589  * "abcd".insert(3, 'X') #=> "abcXd"
3590  * "abcd".insert(4, 'X') #=> "abcdX"
3591  * "abcd".insert(-3, 'X') #=> "abXcd"
3592  * "abcd".insert(-1, 'X') #=> "abcdX"
3593  */
3594 
3595 static VALUE
3597 {
3598  long pos = NUM2LONG(idx);
3599 
3600  if (pos == -1) {
3601  return rb_str_append(str, str2);
3602  }
3603  else if (pos < 0) {
3604  pos++;
3605  }
3606  rb_str_splice(str, pos, 0, str2);
3607  return str;
3608 }
3609 
3610 
3611 /*
3612  * call-seq:
3613  * str.slice!(fixnum) -> fixnum or nil
3614  * str.slice!(fixnum, fixnum) -> new_str or nil
3615  * str.slice!(range) -> new_str or nil
3616  * str.slice!(regexp) -> new_str or nil
3617  * str.slice!(other_str) -> new_str or nil
3618  *
3619  * Deletes the specified portion from <i>str</i>, and returns the portion
3620  * deleted.
3621  *
3622  * string = "this is a string"
3623  * string.slice!(2) #=> "i"
3624  * string.slice!(3..6) #=> " is "
3625  * string.slice!(/s.*t/) #=> "sa st"
3626  * string.slice!("r") #=> "r"
3627  * string #=> "thing"
3628  */
3629 
3630 static VALUE
3632 {
3633  VALUE result;
3634  VALUE buf[3];
3635  int i;
3636 
3637  rb_check_arity(argc, 1, 2);
3638  for (i=0; i<argc; i++) {
3639  buf[i] = argv[i];
3640  }
3641  str_modify_keep_cr(str);
3642  result = rb_str_aref_m(argc, buf, str);
3643  if (!NIL_P(result)) {
3644  buf[i] = rb_str_new(0,0);
3645  rb_str_aset_m(argc+1, buf, str);
3646  }
3647  return result;
3648 }
3649 
3650 static VALUE
3651 get_pat(VALUE pat, int quote)
3652 {
3653  VALUE val;
3654 
3655  switch (TYPE(pat)) {
3656  case T_REGEXP:
3657  return pat;
3658 
3659  case T_STRING:
3660  break;
3661 
3662  default:
3663  val = rb_check_string_type(pat);
3664  if (NIL_P(val)) {
3665  Check_Type(pat, T_REGEXP);
3666  }
3667  pat = val;
3668  }
3669 
3670  if (quote) {
3671  pat = rb_reg_quote(pat);
3672  }
3673 
3674  return rb_reg_regcomp(pat);
3675 }
3676 
3677 
3678 /*
3679  * call-seq:
3680  * str.sub!(pattern, replacement) -> str or nil
3681  * str.sub!(pattern) {|match| block } -> str or nil
3682  *
3683  * Performs the same substitution as String#sub in-place.
3684  *
3685  * Returns +str+ if a substitution was performed or +nil+ if no substitution
3686  * was performed.
3687  */
3688 
3689 static VALUE
3691 {
3692  VALUE pat, repl, hash = Qnil;
3693  int iter = 0;
3694  int tainted = 0;
3695  int untrusted = 0;
3696  long plen;
3697  int min_arity = rb_block_given_p() ? 1 : 2;
3698 
3699  rb_check_arity(argc, min_arity, 2);
3700  if (argc == 1) {
3701  iter = 1;
3702  }
3703  else {
3704  repl = argv[1];
3705  hash = rb_check_hash_type(argv[1]);
3706  if (NIL_P(hash)) {
3707  StringValue(repl);
3708  }
3709  if (OBJ_TAINTED(repl)) tainted = 1;
3710  if (OBJ_UNTRUSTED(repl)) untrusted = 1;
3711  }
3712 
3713  pat = get_pat(argv[0], 1);
3714  str_modifiable(str);
3715  if (rb_reg_search(pat, str, 0, 0) >= 0) {
3716  rb_encoding *enc;
3717  int cr = ENC_CODERANGE(str);
3719  struct re_registers *regs = RMATCH_REGS(match);
3720  long beg0 = BEG(0);
3721  long end0 = END(0);
3722  char *p, *rp;
3723  long len, rlen;
3724 
3725  if (iter || !NIL_P(hash)) {
3726  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
3727 
3728  if (iter) {
3729  repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
3730  }
3731  else {
3732  repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
3733  repl = rb_obj_as_string(repl);
3734  }
3735  str_mod_check(str, p, len);
3736  rb_check_frozen(str);
3737  }
3738  else {
3739  repl = rb_reg_regsub(repl, str, regs, pat);
3740  }
3741  enc = rb_enc_compatible(str, repl);
3742  if (!enc) {
3743  rb_encoding *str_enc = STR_ENC_GET(str);
3744  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
3745  if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
3746  coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
3747  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
3748  rb_enc_name(str_enc),
3749  rb_enc_name(STR_ENC_GET(repl)));
3750  }
3751  enc = STR_ENC_GET(repl);
3752  }
3753  rb_str_modify(str);
3754  rb_enc_associate(str, enc);
3755  if (OBJ_TAINTED(repl)) tainted = 1;
3756  if (OBJ_UNTRUSTED(repl)) untrusted = 1;
3757  if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
3758  int cr2 = ENC_CODERANGE(repl);
3759  if (cr2 == ENC_CODERANGE_BROKEN ||
3760  (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
3761  cr = ENC_CODERANGE_UNKNOWN;
3762  else
3763  cr = cr2;
3764  }
3765  plen = end0 - beg0;
3766  rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl);
3767  len = RSTRING_LEN(str);
3768  if (rlen > plen) {
3769  RESIZE_CAPA(str, len + rlen - plen);
3770  }
3771  p = RSTRING_PTR(str);
3772  if (rlen != plen) {
3773  memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
3774  }
3775  memcpy(p + beg0, rp, rlen);
3776  len += rlen - plen;
3777  STR_SET_LEN(str, len);
3778  RSTRING_PTR(str)[len] = '\0';
3779  ENC_CODERANGE_SET(str, cr);
3780  if (tainted) OBJ_TAINT(str);
3781  if (untrusted) OBJ_UNTRUST(str);
3782 
3783  return str;
3784  }
3785  return Qnil;
3786 }
3787 
3788 
3789 /*
3790  * call-seq:
3791  * str.sub(pattern, replacement) -> new_str
3792  * str.sub(pattern, hash) -> new_str
3793  * str.sub(pattern) {|match| block } -> new_str
3794  *
3795  * Returns a copy of +str+ with the _first_ occurrence of +pattern+
3796  * replaced by the second argument. The +pattern+ is typically a Regexp; if
3797  * given as a String, any regular expression metacharacters it contains will
3798  * be interpreted literally, e.g. <code>'\\\d'</code> will match a backlash
3799  * followed by 'd', instead of a digit.
3800  *
3801  * If +replacement+ is a String it will be substituted for the matched text.
3802  * It may contain back-references to the pattern's capture groups of the form
3803  * <code>"\\d"</code>, where <i>d</i> is a group number, or
3804  * <code>"\\k<n>"</code>, where <i>n</i> is a group name. If it is a
3805  * double-quoted string, both back-references must be preceded by an
3806  * additional backslash. However, within +replacement+ the special match
3807  * variables, such as <code>&$</code>, will not refer to the current match.
3808  *
3809  * If the second argument is a Hash, and the matched text is one of its keys,
3810  * the corresponding value is the replacement string.
3811  *
3812  * In the block form, the current match string is passed in as a parameter,
3813  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
3814  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
3815  * returned by the block will be substituted for the match on each call.
3816  *
3817  * The result inherits any tainting in the original string or any supplied
3818  * replacement string.
3819  *
3820  * "hello".sub(/[aeiou]/, '*') #=> "h*llo"
3821  * "hello".sub(/([aeiou])/, '<\1>') #=> "h<e>llo"
3822  * "hello".sub(/./) {|s| s.ord.to_s + ' ' } #=> "104 ello"
3823  * "hello".sub(/(?<foo>[aeiou])/, '*\k<foo>*') #=> "h*e*llo"
3824  * 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV)
3825  * #=> "Is /bin/bash your preferred shell?"
3826  */
3827 
3828 static VALUE
3830 {
3831  str = rb_str_dup(str);
3832  rb_str_sub_bang(argc, argv, str);
3833  return str;
3834 }
3835 
3836 static VALUE
3837 str_gsub(int argc, VALUE *argv, VALUE str, int bang)
3838 {
3839  VALUE pat, val, repl, match, dest, hash = Qnil;
3840  struct re_registers *regs;
3841  long beg, n;
3842  long beg0, end0;
3843  long offset, blen, slen, len, last;
3844  int iter = 0;
3845  char *sp, *cp;
3846  int tainted = 0;
3847  rb_encoding *str_enc;
3848 
3849  switch (argc) {
3850  case 1:
3851  RETURN_ENUMERATOR(str, argc, argv);
3852  iter = 1;
3853  break;
3854  case 2:
3855  repl = argv[1];
3856  hash = rb_check_hash_type(argv[1]);
3857  if (NIL_P(hash)) {
3858  StringValue(repl);
3859  }
3860  if (OBJ_TAINTED(repl)) tainted = 1;
3861  break;
3862  default:
3863  rb_check_arity(argc, 1, 2);
3864  }
3865 
3866  pat = get_pat(argv[0], 1);
3867  beg = rb_reg_search(pat, str, 0, 0);
3868  if (beg < 0) {
3869  if (bang) return Qnil; /* no match, no substitution */
3870  return rb_str_dup(str);
3871  }
3872 
3873  offset = 0;
3874  n = 0;
3875  blen = RSTRING_LEN(str) + 30; /* len + margin */
3876  dest = rb_str_buf_new(blen);
3877  sp = RSTRING_PTR(str);
3878  slen = RSTRING_LEN(str);
3879  cp = sp;
3880  str_enc = STR_ENC_GET(str);
3881  rb_enc_associate(dest, str_enc);
3883 
3884  do {
3885  n++;
3886  match = rb_backref_get();
3887  regs = RMATCH_REGS(match);
3888  beg0 = BEG(0);
3889  end0 = END(0);
3890  if (iter || !NIL_P(hash)) {
3891  if (iter) {
3892  val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
3893  }
3894  else {
3895  val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
3896  val = rb_obj_as_string(val);
3897  }
3898  str_mod_check(str, sp, slen);
3899  if (val == dest) { /* paranoid check [ruby-dev:24827] */
3900  rb_raise(rb_eRuntimeError, "block should not cheat");
3901  }
3902  }
3903  else {
3904  val = rb_reg_regsub(repl, str, regs, pat);
3905  }
3906 
3907  if (OBJ_TAINTED(val)) tainted = 1;
3908 
3909  len = beg0 - offset; /* copy pre-match substr */
3910  if (len) {
3911  rb_enc_str_buf_cat(dest, cp, len, str_enc);
3912  }
3913 
3914  rb_str_buf_append(dest, val);
3915 
3916  last = offset;
3917  offset = end0;
3918  if (beg0 == end0) {
3919  /*
3920  * Always consume at least one character of the input string
3921  * in order to prevent infinite loops.
3922  */
3923  if (RSTRING_LEN(str) <= end0) break;
3924  len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
3925  rb_enc_str_buf_cat(dest, RSTRING_PTR(str)+end0, len, str_enc);
3926  offset = end0 + len;
3927  }
3928  cp = RSTRING_PTR(str) + offset;
3929  if (offset > RSTRING_LEN(str)) break;
3930  beg = rb_reg_search(pat, str, offset, 0);
3931  } while (beg >= 0);
3932  if (RSTRING_LEN(str) > offset) {
3933  rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
3934  }
3935  rb_reg_search(pat, str, last, 0);
3936  if (bang) {
3937  rb_str_shared_replace(str, dest);
3938  }
3939  else {
3940  RBASIC(dest)->klass = rb_obj_class(str);
3941  OBJ_INFECT(dest, str);
3942  str = dest;
3943  }
3944 
3945  if (tainted) OBJ_TAINT(str);
3946  return str;
3947 }
3948 
3949 
3950 /*
3951  * call-seq:
3952  * str.gsub!(pattern, replacement) -> str or nil
3953  * str.gsub!(pattern) {|match| block } -> str or nil
3954  * str.gsub!(pattern) -> an_enumerator
3955  *
3956  * Performs the substitutions of <code>String#gsub</code> in place, returning
3957  * <i>str</i>, or <code>nil</code> if no substitutions were performed.
3958  * If no block and no <i>replacement</i> is given, an enumerator is returned instead.
3959  */
3960 
3961 static VALUE
3963 {
3964  str_modify_keep_cr(str);
3965  return str_gsub(argc, argv, str, 1);
3966 }
3967 
3968 
3969 /*
3970  * call-seq:
3971  * str.gsub(pattern, replacement) -> new_str
3972  * str.gsub(pattern, hash) -> new_str
3973  * str.gsub(pattern) {|match| block } -> new_str
3974  * str.gsub(pattern) -> enumerator
3975  *
3976  * Returns a copy of <i>str</i> with the <em>all</em> occurrences of
3977  * <i>pattern</i> substituted for the second argument. The <i>pattern</i> is
3978  * typically a <code>Regexp</code>; if given as a <code>String</code>, any
3979  * regular expression metacharacters it contains will be interpreted
3980  * literally, e.g. <code>'\\\d'</code> will match a backlash followed by 'd',
3981  * instead of a digit.
3982  *
3983  * If <i>replacement</i> is a <code>String</code> it will be substituted for
3984  * the matched text. It may contain back-references to the pattern's capture
3985  * groups of the form <code>\\\d</code>, where <i>d</i> is a group number, or
3986  * <code>\\\k<n></code>, where <i>n</i> is a group name. If it is a
3987  * double-quoted string, both back-references must be preceded by an
3988  * additional backslash. However, within <i>replacement</i> the special match
3989  * variables, such as <code>$&</code>, will not refer to the current match.
3990  *
3991  * If the second argument is a <code>Hash</code>, and the matched text is one
3992  * of its keys, the corresponding value is the replacement string.
3993  *
3994  * In the block form, the current match string is passed in as a parameter,
3995  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
3996  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
3997  * returned by the block will be substituted for the match on each call.
3998  *
3999  * The result inherits any tainting in the original string or any supplied
4000  * replacement string.
4001  *
4002  * When neither a block nor a second argument is supplied, an
4003  * <code>Enumerator</code> is returned.
4004  *
4005  * "hello".gsub(/[aeiou]/, '*') #=> "h*ll*"
4006  * "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
4007  * "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 "
4008  * "hello".gsub(/(?<foo>[aeiou])/, '{\k<foo>}') #=> "h{e}ll{o}"
4009  * 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*"
4010  */
4011 
4012 static VALUE
4014 {
4015  return str_gsub(argc, argv, str, 0);
4016 }
4017 
4018 
4019 /*
4020  * call-seq:
4021  * str.replace(other_str) -> str
4022  *
4023  * Replaces the contents and taintedness of <i>str</i> with the corresponding
4024  * values in <i>other_str</i>.
4025  *
4026  * s = "hello" #=> "hello"
4027  * s.replace "world" #=> "world"
4028  */
4029 
4030 VALUE
4032 {
4033  str_modifiable(str);
4034  if (str == str2) return str;
4035 
4036  StringValue(str2);
4037  str_discard(str);
4038  return str_replace(str, str2);
4039 }
4040 
4041 /*
4042  * call-seq:
4043  * string.clear -> string
4044  *
4045  * Makes string empty.
4046  *
4047  * a = "abcde"
4048  * a.clear #=> ""
4049  */
4050 
4051 static VALUE
4053 {
4054  str_discard(str);
4055  STR_SET_EMBED(str);
4056  STR_SET_EMBED_LEN(str, 0);
4057  RSTRING_PTR(str)[0] = 0;
4058  if (rb_enc_asciicompat(STR_ENC_GET(str)))
4060  else
4062  return str;
4063 }
4064 
4065 /*
4066  * call-seq:
4067  * string.chr -> string
4068  *
4069  * Returns a one-character string at the beginning of the string.
4070  *
4071  * a = "abcde"
4072  * a.chr #=> "a"
4073  */
4074 
4075 static VALUE
4077 {
4078  return rb_str_substr(str, 0, 1);
4079 }
4080 
4081 /*
4082  * call-seq:
4083  * str.getbyte(index) -> 0 .. 255
4084  *
4085  * returns the <i>index</i>th byte as an integer.
4086  */
4087 static VALUE
4089 {
4090  long pos = NUM2LONG(index);
4091 
4092  if (pos < 0)
4093  pos += RSTRING_LEN(str);
4094  if (pos < 0 || RSTRING_LEN(str) <= pos)
4095  return Qnil;
4096 
4097  return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
4098 }
4099 
4100 /*
4101  * call-seq:
4102  * str.setbyte(index, integer) -> integer
4103  *
4104  * modifies the <i>index</i>th byte as <i>integer</i>.
4105  */
4106 static VALUE
4107 rb_str_setbyte(VALUE str, VALUE index, VALUE value)
4108 {
4109  long pos = NUM2LONG(index);
4110  int byte = NUM2INT(value);
4111 
4112  rb_str_modify(str);
4113 
4114  if (pos < -RSTRING_LEN(str) || RSTRING_LEN(str) <= pos)
4115  rb_raise(rb_eIndexError, "index %ld out of string", pos);
4116  if (pos < 0)
4117  pos += RSTRING_LEN(str);
4118 
4119  RSTRING_PTR(str)[pos] = byte;
4120 
4121  return value;
4122 }
4123 
4124 static VALUE
4125 str_byte_substr(VALUE str, long beg, long len)
4126 {
4127  char *p, *s = RSTRING_PTR(str);
4128  long n = RSTRING_LEN(str);
4129  VALUE str2;
4130 
4131  if (beg > n || len < 0) return Qnil;
4132  if (beg < 0) {
4133  beg += n;
4134  if (beg < 0) return Qnil;
4135  }
4136  if (beg + len > n)
4137  len = n - beg;
4138  if (len <= 0) {
4139  len = 0;
4140  p = 0;
4141  }
4142  else
4143  p = s + beg;
4144 
4145  if (len > RSTRING_EMBED_LEN_MAX && beg + len == n) {
4146  str2 = rb_str_new4(str);
4147  str2 = str_new3(rb_obj_class(str2), str2);
4148  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
4149  RSTRING(str2)->as.heap.len = len;
4150  }
4151  else {
4152  str2 = rb_str_new5(str, p, len);
4153  }
4154 
4155  str_enc_copy(str2, str);
4156 
4157  if (RSTRING_LEN(str2) == 0) {
4158  if (!rb_enc_asciicompat(STR_ENC_GET(str)))
4160  else
4162  }
4163  else {
4164  switch (ENC_CODERANGE(str)) {
4165  case ENC_CODERANGE_7BIT:
4167  break;
4168  default:
4170  break;
4171  }
4172  }
4173 
4174  OBJ_INFECT(str2, str);
4175 
4176  return str2;
4177 }
4178 
4179 static VALUE
4181 {
4182  long idx;
4183  switch (TYPE(indx)) {
4184  case T_FIXNUM:
4185  idx = FIX2LONG(indx);
4186 
4187  num_index:
4188  str = str_byte_substr(str, idx, 1);
4189  if (NIL_P(str) || RSTRING_LEN(str) == 0) return Qnil;
4190  return str;
4191 
4192  default:
4193  /* check if indx is Range */
4194  {
4195  long beg, len = RSTRING_LEN(str);
4196 
4197  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
4198  case Qfalse:
4199  break;
4200  case Qnil:
4201  return Qnil;
4202  default:
4203  return str_byte_substr(str, beg, len);
4204  }
4205  }
4206  idx = NUM2LONG(indx);
4207  goto num_index;
4208  }
4209 
4210  UNREACHABLE;
4211 }
4212 
4213 /*
4214  * call-seq:
4215  * str.byteslice(fixnum) -> new_str or nil
4216  * str.byteslice(fixnum, fixnum) -> new_str or nil
4217  * str.byteslice(range) -> new_str or nil
4218  *
4219  * Byte Reference---If passed a single <code>Fixnum</code>, returns a
4220  * substring of one byte at that position. If passed two <code>Fixnum</code>
4221  * objects, returns a substring starting at the offset given by the first, and
4222  * a length given by the second. If given a <code>Range</code>, a substring containing
4223  * bytes at offsets given by the range is returned. In all three cases, if
4224  * an offset is negative, it is counted from the end of <i>str</i>. Returns
4225  * <code>nil</code> if the initial offset falls outside the string, the length
4226  * is negative, or the beginning of the range is greater than the end.
4227  * The encoding of the resulted string keeps original encoding.
4228  *
4229  * "hello".byteslice(1) #=> "e"
4230  * "hello".byteslice(-1) #=> "o"
4231  * "hello".byteslice(1, 2) #=> "el"
4232  * "\x80\u3042".byteslice(1, 3) #=> "\u3042"
4233  * "\x03\u3042\xff".byteslice(1..3) #=> "\u3042"
4234  */
4235 
4236 static VALUE
4238 {
4239  if (argc == 2) {
4240  return str_byte_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
4241  }
4242  rb_check_arity(argc, 1, 2);
4243  return str_byte_aref(str, argv[0]);
4244 }
4245 
4246 /*
4247  * call-seq:
4248  * str.reverse -> new_str
4249  *
4250  * Returns a new string with the characters from <i>str</i> in reverse order.
4251  *
4252  * "stressed".reverse #=> "desserts"
4253  */
4254 
4255 static VALUE
4257 {
4258  rb_encoding *enc;
4259  VALUE rev;
4260  char *s, *e, *p;
4261  int single = 1;
4262 
4263  if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
4264  enc = STR_ENC_GET(str);
4265  rev = rb_str_new5(str, 0, RSTRING_LEN(str));
4266  s = RSTRING_PTR(str); e = RSTRING_END(str);
4267  p = RSTRING_END(rev);
4268 
4269  if (RSTRING_LEN(str) > 1) {
4270  if (single_byte_optimizable(str)) {
4271  while (s < e) {
4272  *--p = *s++;
4273  }
4274  }
4275  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID) {
4276  while (s < e) {
4277  int clen = rb_enc_fast_mbclen(s, e, enc);
4278 
4279  if (clen > 1 || (*s & 0x80)) single = 0;
4280  p -= clen;
4281  memcpy(p, s, clen);
4282  s += clen;
4283  }
4284  }
4285  else {
4286  while (s < e) {
4287  int clen = rb_enc_mbclen(s, e, enc);
4288 
4289  if (clen > 1 || (*s & 0x80)) single = 0;
4290  p -= clen;
4291  memcpy(p, s, clen);
4292  s += clen;
4293  }
4294  }
4295  }
4296  STR_SET_LEN(rev, RSTRING_LEN(str));
4297  OBJ_INFECT(rev, str);
4298  if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) {
4299  if (single) {
4301  }
4302  else {
4304  }
4305  }
4307 
4308  return rev;
4309 }
4310 
4311 
4312 /*
4313  * call-seq:
4314  * str.reverse! -> str
4315  *
4316  * Reverses <i>str</i> in place.
4317  */
4318 
4319 static VALUE
4321 {
4322  if (RSTRING_LEN(str) > 1) {
4323  if (single_byte_optimizable(str)) {
4324  char *s, *e, c;
4325 
4326  str_modify_keep_cr(str);
4327  s = RSTRING_PTR(str);
4328  e = RSTRING_END(str) - 1;
4329  while (s < e) {
4330  c = *s;
4331  *s++ = *e;
4332  *e-- = c;
4333  }
4334  }
4335  else {
4337  }
4338  }
4339  else {
4340  str_modify_keep_cr(str);
4341  }
4342  return str;
4343 }
4344 
4345 
4346 /*
4347  * call-seq:
4348  * str.include? other_str -> true or false
4349  *
4350  * Returns <code>true</code> if <i>str</i> contains the given string or
4351  * character.
4352  *
4353  * "hello".include? "lo" #=> true
4354  * "hello".include? "ol" #=> false
4355  * "hello".include? ?h #=> true
4356  */
4357 
4358 static VALUE
4360 {
4361  long i;
4362 
4363  StringValue(arg);
4364  i = rb_str_index(str, arg, 0);
4365 
4366  if (i == -1) return Qfalse;
4367  return Qtrue;
4368 }
4369 
4370 
4371 /*
4372  * call-seq:
4373  * str.to_i(base=10) -> integer
4374  *
4375  * Returns the result of interpreting leading characters in <i>str</i> as an
4376  * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
4377  * end of a valid number are ignored. If there is not a valid number at the
4378  * start of <i>str</i>, <code>0</code> is returned. This method never raises an
4379  * exception when <i>base</i> is valid.
4380  *
4381  * "12345".to_i #=> 12345
4382  * "99 red balloons".to_i #=> 99
4383  * "0a".to_i #=> 0
4384  * "0a".to_i(16) #=> 10
4385  * "hello".to_i #=> 0
4386  * "1100101".to_i(2) #=> 101
4387  * "1100101".to_i(8) #=> 294977
4388  * "1100101".to_i(10) #=> 1100101
4389  * "1100101".to_i(16) #=> 17826049
4390  */
4391 
4392 static VALUE
4394 {
4395  int base;
4396 
4397  if (argc == 0) base = 10;
4398  else {
4399  VALUE b;
4400 
4401  rb_scan_args(argc, argv, "01", &b);
4402  base = NUM2INT(b);
4403  }
4404  if (base < 0) {
4405  rb_raise(rb_eArgError, "invalid radix %d", base);
4406  }
4407  return rb_str_to_inum(str, base, FALSE);
4408 }
4409 
4410 
4411 /*
4412  * call-seq:
4413  * str.to_f -> float
4414  *
4415  * Returns the result of interpreting leading characters in <i>str</i> as a
4416  * floating point number. Extraneous characters past the end of a valid number
4417  * are ignored. If there is not a valid number at the start of <i>str</i>,
4418  * <code>0.0</code> is returned. This method never raises an exception.
4419  *
4420  * "123.45e1".to_f #=> 1234.5
4421  * "45.67 degrees".to_f #=> 45.67
4422  * "thx1138".to_f #=> 0.0
4423  */
4424 
4425 static VALUE
4427 {
4428  return DBL2NUM(rb_str_to_dbl(str, FALSE));
4429 }
4430 
4431 
4432 /*
4433  * call-seq:
4434  * str.to_s -> str
4435  * str.to_str -> str
4436  *
4437  * Returns the receiver.
4438  */
4439 
4440 static VALUE
4442 {
4443  if (rb_obj_class(str) != rb_cString) {
4444  return str_duplicate(rb_cString, str);
4445  }
4446  return str;
4447 }
4448 
4449 #if 0
4450 static void
4451 str_cat_char(VALUE str, unsigned int c, rb_encoding *enc)
4452 {
4453  char s[RUBY_MAX_CHAR_LEN];
4454  int n = rb_enc_codelen(c, enc);
4455 
4456  rb_enc_mbcput(c, s, enc);
4457  rb_enc_str_buf_cat(str, s, n, enc);
4458 }
4459 #endif
4460 
4461 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
4462 
4463 int
4464 rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
4465 {
4466  char buf[CHAR_ESC_LEN + 1];
4467  int l;
4468 
4469 #if SIZEOF_INT > 4
4470  c &= 0xffffffff;
4471 #endif
4472  if (unicode_p) {
4473  if (c < 0x7F && ISPRINT(c)) {
4474  snprintf(buf, CHAR_ESC_LEN, "%c", c);
4475  }
4476  else if (c < 0x10000) {
4477  snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
4478  }
4479  else {
4480  snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
4481  }
4482  }
4483  else {
4484  if (c < 0x100) {
4485  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
4486  }
4487  else {
4488  snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
4489  }
4490  }
4491  l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */
4492  rb_str_buf_cat(result, buf, l);
4493  return l;
4494 }
4495 
4496 /*
4497  * call-seq:
4498  * str.inspect -> string
4499  *
4500  * Returns a printable version of _str_, surrounded by quote marks,
4501  * with special characters escaped.
4502  *
4503  * str = "hello"
4504  * str[3] = "\b"
4505  * str.inspect #=> "\"hel\\bo\""
4506  */
4507 
4508 VALUE
4510 {
4511  rb_encoding *enc = STR_ENC_GET(str);
4512  const char *p, *pend, *prev;
4513  char buf[CHAR_ESC_LEN + 1];
4516  int unicode_p = rb_enc_unicode_p(enc);
4517  int asciicompat = rb_enc_asciicompat(enc);
4518  static rb_encoding *utf16, *utf32;
4519 
4520  if (!utf16) utf16 = rb_enc_find("UTF-16");
4521  if (!utf32) utf32 = rb_enc_find("UTF-32");
4522  if (resenc == NULL) resenc = rb_default_external_encoding();
4523  if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
4524  rb_enc_associate(result, resenc);
4525  str_buf_cat2(result, "\"");
4526 
4527  p = RSTRING_PTR(str); pend = RSTRING_END(str);
4528  prev = p;
4529  if (enc == utf16) {
4530  const unsigned char *q = (const unsigned char *)p;
4531  if (q[0] == 0xFE && q[1] == 0xFF)
4532  enc = rb_enc_find("UTF-16BE");
4533  else if (q[0] == 0xFF && q[1] == 0xFE)
4534  enc = rb_enc_find("UTF-16LE");
4535  else
4536  unicode_p = 0;
4537  }
4538  else if (enc == utf32) {
4539  const unsigned char *q = (const unsigned char *)p;
4540  if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF)
4541  enc = rb_enc_find("UTF-32BE");
4542  else if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF)
4543  enc = rb_enc_find("UTF-32LE");
4544  else
4545  unicode_p = 0;
4546  }
4547  while (p < pend) {
4548  unsigned int c, cc;
4549  int n;
4550 
4551  n = rb_enc_precise_mbclen(p, pend, enc);
4552  if (!MBCLEN_CHARFOUND_P(n)) {
4553  if (p > prev) str_buf_cat(result, prev, p - prev);
4554  n = rb_enc_mbminlen(enc);
4555  if (pend < p + n)
4556  n = (int)(pend - p);
4557  while (n--) {
4558  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
4559  str_buf_cat(result, buf, strlen(buf));
4560  prev = ++p;
4561  }
4562  continue;
4563  }
4564  n = MBCLEN_CHARFOUND_LEN(n);
4565  c = rb_enc_mbc_to_codepoint(p, pend, enc);
4566  p += n;
4567  if ((asciicompat || unicode_p) &&
4568  (c == '"'|| c == '\\' ||
4569  (c == '#' &&
4570  p < pend &&
4572  (cc = rb_enc_codepoint(p,pend,enc),
4573  (cc == '$' || cc == '@' || cc == '{'))))) {
4574  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4575  str_buf_cat2(result, "\\");
4576  if (asciicompat || enc == resenc) {
4577  prev = p - n;
4578  continue;
4579  }
4580  }
4581  switch (c) {
4582  case '\n': cc = 'n'; break;
4583  case '\r': cc = 'r'; break;
4584  case '\t': cc = 't'; break;
4585  case '\f': cc = 'f'; break;
4586  case '\013': cc = 'v'; break;
4587  case '\010': cc = 'b'; break;
4588  case '\007': cc = 'a'; break;
4589  case 033: cc = 'e'; break;
4590  default: cc = 0; break;
4591  }
4592  if (cc) {
4593  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4594  buf[0] = '\\';
4595  buf[1] = (char)cc;
4596  str_buf_cat(result, buf, 2);
4597  prev = p;
4598  continue;
4599  }
4600  if ((enc == resenc && rb_enc_isprint(c, enc)) ||
4601  (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
4602  continue;
4603  }
4604  else {
4605  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4606  rb_str_buf_cat_escaped_char(result, c, unicode_p);
4607  prev = p;
4608  continue;
4609  }
4610  }
4611  if (p > prev) str_buf_cat(result, prev, p - prev);
4612  str_buf_cat2(result, "\"");
4613 
4614  OBJ_INFECT(result, str);
4615  return result;
4616 }
4617 
4618 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
4619 
4620 /*
4621  * call-seq:
4622  * str.dump -> new_str
4623  *
4624  * Produces a version of +str+ with all non-printing characters replaced by
4625  * <code>\nnn</code> notation and all special characters escaped.
4626  *
4627  * "hello \n ''".dump #=> "\"hello \\n ''\"
4628  */
4629 
4630 VALUE
4632 {
4633  rb_encoding *enc = rb_enc_get(str);
4634  long len;
4635  const char *p, *pend;
4636  char *q, *qend;
4637  VALUE result;
4638  int u8 = (enc == rb_utf8_encoding());
4639 
4640  len = 2; /* "" */
4641  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4642  while (p < pend) {
4643  unsigned char c = *p++;
4644  switch (c) {
4645  case '"': case '\\':
4646  case '\n': case '\r':
4647  case '\t': case '\f':
4648  case '\013': case '\010': case '\007': case '\033':
4649  len += 2;
4650  break;
4651 
4652  case '#':
4653  len += IS_EVSTR(p, pend) ? 2 : 1;
4654  break;
4655 
4656  default:
4657  if (ISPRINT(c)) {
4658  len++;
4659  }
4660  else {
4661  if (u8) { /* \u{NN} */
4662  int n = rb_enc_precise_mbclen(p-1, pend, enc);
4663  if (MBCLEN_CHARFOUND_P(n-1)) {
4664  unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
4665  while (cc >>= 4) len++;
4666  len += 5;
4667  p += MBCLEN_CHARFOUND_LEN(n)-1;
4668  break;
4669  }
4670  }
4671  len += 4; /* \xNN */
4672  }
4673  break;
4674  }
4675  }
4676  if (!rb_enc_asciicompat(enc)) {
4677  len += 19; /* ".force_encoding('')" */
4678  len += strlen(enc->name);
4679  }
4680 
4681  result = rb_str_new5(str, 0, len);
4682  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4683  q = RSTRING_PTR(result); qend = q + len + 1;
4684 
4685  *q++ = '"';
4686  while (p < pend) {
4687  unsigned char c = *p++;
4688 
4689  if (c == '"' || c == '\\') {
4690  *q++ = '\\';
4691  *q++ = c;
4692  }
4693  else if (c == '#') {
4694  if (IS_EVSTR(p, pend)) *q++ = '\\';
4695  *q++ = '#';
4696  }
4697  else if (c == '\n') {
4698  *q++ = '\\';
4699  *q++ = 'n';
4700  }
4701  else if (c == '\r') {
4702  *q++ = '\\';
4703  *q++ = 'r';
4704  }
4705  else if (c == '\t') {
4706  *q++ = '\\';
4707  *q++ = 't';
4708  }
4709  else if (c == '\f') {
4710  *q++ = '\\';
4711  *q++ = 'f';
4712  }
4713  else if (c == '\013') {
4714  *q++ = '\\';
4715  *q++ = 'v';
4716  }
4717  else if (c == '\010') {
4718  *q++ = '\\';
4719  *q++ = 'b';
4720  }
4721  else if (c == '\007') {
4722  *q++ = '\\';
4723  *q++ = 'a';
4724  }
4725  else if (c == '\033') {
4726  *q++ = '\\';
4727  *q++ = 'e';
4728  }
4729  else if (ISPRINT(c)) {
4730  *q++ = c;
4731  }
4732  else {
4733  *q++ = '\\';
4734  if (u8) {
4735  int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
4736  if (MBCLEN_CHARFOUND_P(n)) {
4737  int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
4738  p += n;
4739  snprintf(q, qend-q, "u{%x}", cc);
4740  q += strlen(q);
4741  continue;
4742  }
4743  }
4744  snprintf(q, qend-q, "x%02X", c);
4745  q += 3;
4746  }
4747  }
4748  *q++ = '"';
4749  *q = '\0';
4750  if (!rb_enc_asciicompat(enc)) {
4751  snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name);
4752  enc = rb_ascii8bit_encoding();
4753  }
4754  OBJ_INFECT(result, str);
4755  /* result from dump is ASCII */
4756  rb_enc_associate(result, enc);
4758  return result;
4759 }
4760 
4761 
4762 static void
4764 {
4765  if (rb_enc_dummy_p(enc)) {
4766  rb_raise(rb_eEncCompatError, "incompatible encoding with this operation: %s",
4767  rb_enc_name(enc));
4768  }
4769 }
4770 
4771 /*
4772  * call-seq:
4773  * str.upcase! -> str or nil
4774  *
4775  * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
4776  * were made.
4777  * Note: case replacement is effective only in ASCII region.
4778  */
4779 
4780 static VALUE
4782 {
4783  rb_encoding *enc;
4784  char *s, *send;
4785  int modify = 0;
4786  int n;
4787 
4788  str_modify_keep_cr(str);
4789  enc = STR_ENC_GET(str);
4791  s = RSTRING_PTR(str); send = RSTRING_END(str);
4792  if (single_byte_optimizable(str)) {
4793  while (s < send) {
4794  unsigned int c = *(unsigned char*)s;
4795 
4796  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
4797  *s = 'A' + (c - 'a');
4798  modify = 1;
4799  }
4800  s++;
4801  }
4802  }
4803  else {
4804  int ascompat = rb_enc_asciicompat(enc);
4805 
4806  while (s < send) {
4807  unsigned int c;
4808 
4809  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
4810  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
4811  *s = 'A' + (c - 'a');
4812  modify = 1;
4813  }
4814  s++;
4815  }
4816  else {
4817  c = rb_enc_codepoint_len(s, send, &n, enc);
4818  if (rb_enc_islower(c, enc)) {
4819  /* assuming toupper returns codepoint with same size */
4820  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
4821  modify = 1;
4822  }
4823  s += n;
4824  }
4825  }
4826  }
4827 
4828  if (modify) return str;
4829  return Qnil;
4830 }
4831 
4832 
4833 /*
4834  * call-seq:
4835  * str.upcase -> new_str
4836  *
4837  * Returns a copy of <i>str</i> with all lowercase letters replaced with their
4838  * uppercase counterparts. The operation is locale insensitive---only
4839  * characters ``a'' to ``z'' are affected.
4840  * Note: case replacement is effective only in ASCII region.
4841  *
4842  * "hEllO".upcase #=> "HELLO"
4843  */
4844 
4845 static VALUE
4847 {
4848  str = rb_str_dup(str);
4849  rb_str_upcase_bang(str);
4850  return str;
4851 }
4852 
4853 
4854 /*
4855  * call-seq:
4856  * str.downcase! -> str or nil
4857  *
4858  * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
4859  * changes were made.
4860  * Note: case replacement is effective only in ASCII region.
4861  */
4862 
4863 static VALUE
4865 {
4866  rb_encoding *enc;
4867  char *s, *send;
4868  int modify = 0;
4869 
4870  str_modify_keep_cr(str);
4871  enc = STR_ENC_GET(str);
4873  s = RSTRING_PTR(str); send = RSTRING_END(str);
4874  if (single_byte_optimizable(str)) {
4875  while (s < send) {
4876  unsigned int c = *(unsigned char*)s;
4877 
4878  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
4879  *s = 'a' + (c - 'A');
4880  modify = 1;
4881  }
4882  s++;
4883  }
4884  }
4885  else {
4886  int ascompat = rb_enc_asciicompat(enc);
4887 
4888  while (s < send) {
4889  unsigned int c;
4890  int n;
4891 
4892  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
4893  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
4894  *s = 'a' + (c - 'A');
4895  modify = 1;
4896  }
4897  s++;
4898  }
4899  else {
4900  c = rb_enc_codepoint_len(s, send, &n, enc);
4901  if (rb_enc_isupper(c, enc)) {
4902  /* assuming toupper returns codepoint with same size */
4903  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
4904  modify = 1;
4905  }
4906  s += n;
4907  }
4908  }
4909  }
4910 
4911  if (modify) return str;
4912  return Qnil;
4913 }
4914 
4915 
4916 /*
4917  * call-seq:
4918  * str.downcase -> new_str
4919  *
4920  * Returns a copy of <i>str</i> with all uppercase letters replaced with their
4921  * lowercase counterparts. The operation is locale insensitive---only
4922  * characters ``A'' to ``Z'' are affected.
4923  * Note: case replacement is effective only in ASCII region.
4924  *
4925  * "hEllO".downcase #=> "hello"
4926  */
4927 
4928 static VALUE
4930 {
4931  str = rb_str_dup(str);
4932  rb_str_downcase_bang(str);
4933  return str;
4934 }
4935 
4936 
4937 /*
4938  * call-seq:
4939  * str.capitalize! -> str or nil
4940  *
4941  * Modifies <i>str</i> by converting the first character to uppercase and the
4942  * remainder to lowercase. Returns <code>nil</code> if no changes are made.
4943  * Note: case conversion is effective only in ASCII region.
4944  *
4945  * a = "hello"
4946  * a.capitalize! #=> "Hello"
4947  * a #=> "Hello"
4948  * a.capitalize! #=> nil
4949  */
4950 
4951 static VALUE
4953 {
4954  rb_encoding *enc;
4955  char *s, *send;
4956  int modify = 0;
4957  unsigned int c;
4958  int n;
4959 
4960  str_modify_keep_cr(str);
4961  enc = STR_ENC_GET(str);
4963  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
4964  s = RSTRING_PTR(str); send = RSTRING_END(str);
4965 
4966  c = rb_enc_codepoint_len(s, send, &n, enc);
4967  if (rb_enc_islower(c, enc)) {
4968  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
4969  modify = 1;
4970  }
4971  s += n;
4972  while (s < send) {
4973  c = rb_enc_codepoint_len(s, send, &n, enc);
4974  if (rb_enc_isupper(c, enc)) {
4975  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
4976  modify = 1;
4977  }
4978  s += n;
4979  }
4980 
4981  if (modify) return str;
4982  return Qnil;
4983 }
4984 
4985 
4986 /*
4987  * call-seq:
4988  * str.capitalize -> new_str
4989  *
4990  * Returns a copy of <i>str</i> with the first character converted to uppercase
4991  * and the remainder to lowercase.
4992  * Note: case conversion is effective only in ASCII region.
4993  *
4994  * "hello".capitalize #=> "Hello"
4995  * "HELLO".capitalize #=> "Hello"
4996  * "123ABC".capitalize #=> "123abc"
4997  */
4998 
4999 static VALUE
5001 {
5002  str = rb_str_dup(str);
5004  return str;
5005 }
5006 
5007 
5008 /*
5009  * call-seq:
5010  * str.swapcase! -> str or nil
5011  *
5012  * Equivalent to <code>String#swapcase</code>, but modifies the receiver in
5013  * place, returning <i>str</i>, or <code>nil</code> if no changes were made.
5014  * Note: case conversion is effective only in ASCII region.
5015  */
5016 
5017 static VALUE
5019 {
5020  rb_encoding *enc;
5021  char *s, *send;
5022  int modify = 0;
5023  int n;
5024 
5025  str_modify_keep_cr(str);
5026  enc = STR_ENC_GET(str);
5028  s = RSTRING_PTR(str); send = RSTRING_END(str);
5029  while (s < send) {
5030  unsigned int c = rb_enc_codepoint_len(s, send, &n, enc);
5031 
5032  if (rb_enc_isupper(c, enc)) {
5033  /* assuming toupper returns codepoint with same size */
5034  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
5035  modify = 1;
5036  }
5037  else if (rb_enc_islower(c, enc)) {
5038  /* assuming tolower returns codepoint with same size */
5039  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
5040  modify = 1;
5041  }
5042  s += n;
5043  }
5044 
5045  if (modify) return str;
5046  return Qnil;
5047 }
5048 
5049 
5050 /*
5051  * call-seq:
5052  * str.swapcase -> new_str
5053  *
5054  * Returns a copy of <i>str</i> with uppercase alphabetic characters converted
5055  * to lowercase and lowercase characters converted to uppercase.
5056  * Note: case conversion is effective only in ASCII region.
5057  *
5058  * "Hello".swapcase #=> "hELLO"
5059  * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11"
5060  */
5061 
5062 static VALUE
5064 {
5065  str = rb_str_dup(str);
5066  rb_str_swapcase_bang(str);
5067  return str;
5068 }
5069 
5070 typedef unsigned char *USTR;
5071 
5072 struct tr {
5073  int gen;
5074  unsigned int now, max;
5075  char *p, *pend;
5076 };
5077 
5078 static unsigned int
5079 trnext(struct tr *t, rb_encoding *enc)
5080 {
5081  int n;
5082 
5083  for (;;) {
5084  if (!t->gen) {
5085 nextpart:
5086  if (t->p == t->pend) return -1;
5087  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '\\' && t->p + n < t->pend) {
5088  t->p += n;
5089  }
5090  t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5091  t->p += n;
5092  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '-' && t->p + n < t->pend) {
5093  t->p += n;
5094  if (t->p < t->pend) {
5095  unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5096  t->p += n;
5097  if (t->now > c) {
5098  if (t->now < 0x80 && c < 0x80) {
5100  "invalid range \"%c-%c\" in string transliteration",
5101  t->now, c);
5102  }
5103  else {
5104  rb_raise(rb_eArgError, "invalid range in string transliteration");
5105  }
5106  continue; /* not reached */
5107  }
5108  t->gen = 1;
5109  t->max = c;
5110  }
5111  }
5112  return t->now;
5113  }
5114  else {
5115  while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
5116  if (t->now == t->max) {
5117  t->gen = 0;
5118  goto nextpart;
5119  }
5120  }
5121  if (t->now < t->max) {
5122  return t->now;
5123  }
5124  else {
5125  t->gen = 0;
5126  return t->max;
5127  }
5128  }
5129  }
5130 }
5131 
5132 static VALUE rb_str_delete_bang(int,VALUE*,VALUE);
5133 
5134 static VALUE
5135 tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
5136 {
5137  const unsigned int errc = -1;
5138  unsigned int trans[256];
5139  rb_encoding *enc, *e1, *e2;
5140  struct tr trsrc, trrepl;
5141  int cflag = 0;
5142  unsigned int c, c0, last = 0;
5143  int modify = 0, i, l;
5144  char *s, *send;
5145  VALUE hash = 0;
5146  int singlebyte = single_byte_optimizable(str);
5147  int cr;
5148 
5149 #define CHECK_IF_ASCII(c) \
5150  (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
5151  (cr = ENC_CODERANGE_VALID) : 0)
5152 
5153  StringValue(src);
5154  StringValue(repl);
5155  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5156  if (RSTRING_LEN(repl) == 0) {
5157  return rb_str_delete_bang(1, &src, str);
5158  }
5159 
5160  cr = ENC_CODERANGE(str);
5161  e1 = rb_enc_check(str, src);
5162  e2 = rb_enc_check(str, repl);
5163  if (e1 == e2) {
5164  enc = e1;
5165  }
5166  else {
5167  enc = rb_enc_check(src, repl);
5168  }
5169  trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
5170  if (RSTRING_LEN(src) > 1 &&
5171  rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) == '^' &&
5172  trsrc.p + l < trsrc.pend) {
5173  cflag = 1;
5174  trsrc.p += l;
5175  }
5176  trrepl.p = RSTRING_PTR(repl);
5177  trrepl.pend = trrepl.p + RSTRING_LEN(repl);
5178  trsrc.gen = trrepl.gen = 0;
5179  trsrc.now = trrepl.now = 0;
5180  trsrc.max = trrepl.max = 0;
5181 
5182  if (cflag) {
5183  for (i=0; i<256; i++) {
5184  trans[i] = 1;
5185  }
5186  while ((c = trnext(&trsrc, enc)) != errc) {
5187  if (c < 256) {
5188  trans[c] = errc;
5189  }
5190  else {
5191  if (!hash) hash = rb_hash_new();
5192  rb_hash_aset(hash, UINT2NUM(c), Qtrue);
5193  }
5194  }
5195  while ((c = trnext(&trrepl, enc)) != errc)
5196  /* retrieve last replacer */;
5197  last = trrepl.now;
5198  for (i=0; i<256; i++) {
5199  if (trans[i] != errc) {
5200  trans[i] = last;
5201  }
5202  }
5203  }
5204  else {
5205  unsigned int r;
5206 
5207  for (i=0; i<256; i++) {
5208  trans[i] = errc;
5209  }
5210  while ((c = trnext(&trsrc, enc)) != errc) {
5211  r = trnext(&trrepl, enc);
5212  if (r == errc) r = trrepl.now;
5213  if (c < 256) {
5214  trans[c] = r;
5215  if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
5216  }
5217  else {
5218  if (!hash) hash = rb_hash_new();
5219  rb_hash_aset(hash, UINT2NUM(c), UINT2NUM(r));
5220  }
5221  }
5222  }
5223 
5224  if (cr == ENC_CODERANGE_VALID)
5225  cr = ENC_CODERANGE_7BIT;
5226  str_modify_keep_cr(str);
5227  s = RSTRING_PTR(str); send = RSTRING_END(str);
5228  if (sflag) {
5229  int clen, tlen;
5230  long offset, max = RSTRING_LEN(str);
5231  unsigned int save = -1;
5232  char *buf = ALLOC_N(char, max), *t = buf;
5233 
5234  while (s < send) {
5235  int may_modify = 0;
5236 
5237  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5238  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5239 
5240  s += clen;
5241  if (c < 256) {
5242  c = trans[c];
5243  }
5244  else if (hash) {
5245  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5246  if (NIL_P(tmp)) {
5247  if (cflag) c = last;
5248  else c = errc;
5249  }
5250  else if (cflag) c = errc;
5251  else c = NUM2INT(tmp);
5252  }
5253  else {
5254  c = errc;
5255  }
5256  if (c != (unsigned int)-1) {
5257  if (save == c) {
5258  CHECK_IF_ASCII(c);
5259  continue;
5260  }
5261  save = c;
5262  tlen = rb_enc_codelen(c, enc);
5263  modify = 1;
5264  }
5265  else {
5266  save = -1;
5267  c = c0;
5268  if (enc != e1) may_modify = 1;
5269  }
5270  while (t - buf + tlen >= max) {
5271  offset = t - buf;
5272  max *= 2;
5273  REALLOC_N(buf, char, max);
5274  t = buf + offset;
5275  }
5276  rb_enc_mbcput(c, t, enc);
5277  if (may_modify && memcmp(s, t, tlen) != 0) {
5278  modify = 1;
5279  }
5280  CHECK_IF_ASCII(c);
5281  t += tlen;
5282  }
5283  if (!STR_EMBED_P(str)) {
5284  xfree(RSTRING(str)->as.heap.ptr);
5285  }
5286  *t = '\0';
5287  RSTRING(str)->as.heap.ptr = buf;
5288  RSTRING(str)->as.heap.len = t - buf;
5289  STR_SET_NOEMBED(str);
5290  RSTRING(str)->as.heap.aux.capa = max;
5291  }
5292  else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) {
5293  while (s < send) {
5294  c = (unsigned char)*s;
5295  if (trans[c] != errc) {
5296  if (!cflag) {
5297  c = trans[c];
5298  *s = c;
5299  modify = 1;
5300  }
5301  else {
5302  *s = last;
5303  modify = 1;
5304  }
5305  }
5306  CHECK_IF_ASCII(c);
5307  s++;
5308  }
5309  }
5310  else {
5311  int clen, tlen, max = (int)(RSTRING_LEN(str) * 1.2);
5312  long offset;
5313  char *buf = ALLOC_N(char, max), *t = buf;
5314 
5315  while (s < send) {
5316  int may_modify = 0;
5317  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5318  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5319 
5320  if (c < 256) {
5321  c = trans[c];
5322  }
5323  else if (hash) {
5324  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5325  if (NIL_P(tmp)) {
5326  if (cflag) c = last;
5327  else c = errc;
5328  }
5329  else if (cflag) c = errc;
5330  else c = NUM2INT(tmp);
5331  }
5332  else {
5333  c = cflag ? last : errc;
5334  }
5335  if (c != errc) {
5336  tlen = rb_enc_codelen(c, enc);
5337  modify = 1;
5338  }
5339  else {
5340  c = c0;
5341  if (enc != e1) may_modify = 1;
5342  }
5343  while (t - buf + tlen >= max) {
5344  offset = t - buf;
5345  max *= 2;
5346  REALLOC_N(buf, char, max);
5347  t = buf + offset;
5348  }
5349  if (s != t) {
5350  rb_enc_mbcput(c, t, enc);
5351  if (may_modify && memcmp(s, t, tlen) != 0) {
5352  modify = 1;
5353  }
5354  }
5355  CHECK_IF_ASCII(c);
5356  s += clen;
5357  t += tlen;
5358  }
5359  if (!STR_EMBED_P(str)) {
5360  xfree(RSTRING(str)->as.heap.ptr);
5361  }
5362  *t = '\0';
5363  RSTRING(str)->as.heap.ptr = buf;
5364  RSTRING(str)->as.heap.len = t - buf;
5365  STR_SET_NOEMBED(str);
5366  RSTRING(str)->as.heap.aux.capa = max;
5367  }
5368 
5369  if (modify) {
5370  if (cr != ENC_CODERANGE_BROKEN)
5371  ENC_CODERANGE_SET(str, cr);
5372  rb_enc_associate(str, enc);
5373  return str;
5374  }
5375  return Qnil;
5376 }
5377 
5378 
5379 /*
5380  * call-seq:
5381  * str.tr!(from_str, to_str) -> str or nil
5382  *
5383  * Translates <i>str</i> in place, using the same rules as
5384  * <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no
5385  * changes were made.
5386  */
5387 
5388 static VALUE
5390 {
5391  return tr_trans(str, src, repl, 0);
5392 }
5393 
5394 
5395 /*
5396  * call-seq:
5397  * str.tr(from_str, to_str) => new_str
5398  *
5399  * Returns a copy of +str+ with the characters in +from_str+ replaced by the
5400  * corresponding characters in +to_str+. If +to_str+ is shorter than
5401  * +from_str+, it is padded with its last character in order to maintain the
5402  * correspondence.
5403  *
5404  * "hello".tr('el', 'ip') #=> "hippo"
5405  * "hello".tr('aeiou', '*') #=> "h*ll*"
5406  * "hello".tr('aeiou', 'AA*') #=> "hAll*"
5407  *
5408  * Both strings may use the <code>c1-c2</code> notation to denote ranges of
5409  * characters, and +from_str+ may start with a <code>^</code>, which denotes
5410  * all characters except those listed.
5411  *
5412  * "hello".tr('a-y', 'b-z') #=> "ifmmp"
5413  * "hello".tr('^aeiou', '*') #=> "*e**o"
5414  *
5415  * The backslash character <code></code> can be used to escape
5416  * <code>^</code> or <code>-</code> and is otherwise ignored unless it
5417  * appears at the end of a range or the end of the +from_str+ or +to_str+:
5418  *
5419  * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
5420  * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld"
5421  *
5422  * "hello\r\nworld".tr("\r", "") #=> "hello\nworld"
5423  * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold"
5424  * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"
5425  *
5426  * "X['\\b']".tr("X\\", "") #=> "['b']"
5427  * "X['\\b']".tr("X-\\]", "") #=> "'b'"
5428  */
5429 
5430 static VALUE
5431 rb_str_tr(VALUE str, VALUE src, VALUE repl)
5432 {
5433  str = rb_str_dup(str);
5434  tr_trans(str, src, repl, 0);
5435  return str;
5436 }
5437 
5438 #define TR_TABLE_SIZE 257
5439 static void
5440 tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first,
5441  VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
5442 {
5443  const unsigned int errc = -1;
5444  char buf[256];
5445  struct tr tr;
5446  unsigned int c;
5447  VALUE table = 0, ptable = 0;
5448  int i, l, cflag = 0;
5449 
5450  tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
5451  tr.gen = tr.now = tr.max = 0;
5452 
5453  if (RSTRING_LEN(str) > 1 && rb_enc_ascget(tr.p, tr.pend, &l, enc) == '^') {
5454  cflag = 1;
5455  tr.p += l;
5456  }
5457  if (first) {
5458  for (i=0; i<256; i++) {
5459  stable[i] = 1;
5460  }
5461  stable[256] = cflag;
5462  }
5463  else if (stable[256] && !cflag) {
5464  stable[256] = 0;
5465  }
5466  for (i=0; i<256; i++) {
5467  buf[i] = cflag;
5468  }
5469 
5470  while ((c = trnext(&tr, enc)) != errc) {
5471  if (c < 256) {
5472  buf[c & 0xff] = !cflag;
5473  }
5474  else {
5475  VALUE key = UINT2NUM(c);
5476 
5477  if (!table && (first || *tablep || stable[256])) {
5478  if (cflag) {
5479  ptable = *ctablep;
5480  table = ptable ? ptable : rb_hash_new();
5481  *ctablep = table;
5482  }
5483  else {
5484  table = rb_hash_new();
5485  ptable = *tablep;
5486  *tablep = table;
5487  }
5488  }
5489  if (table && (!ptable || (cflag ^ !NIL_P(rb_hash_aref(ptable, key))))) {
5490  rb_hash_aset(table, key, Qtrue);
5491  }
5492  }
5493  }
5494  for (i=0; i<256; i++) {
5495  stable[i] = stable[i] && buf[i];
5496  }
5497  if (!table && !cflag) {
5498  *tablep = 0;
5499  }
5500 }
5501 
5502 
5503 static int
5504 tr_find(unsigned int c, char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
5505 {
5506  if (c < 256) {
5507  return table[c] != 0;
5508  }
5509  else {
5510  VALUE v = UINT2NUM(c);
5511 
5512  if (del) {
5513  if (!NIL_P(rb_hash_lookup(del, v)) &&
5514  (!nodel || NIL_P(rb_hash_lookup(nodel, v)))) {
5515  return TRUE;
5516  }
5517  }
5518  else if (nodel && !NIL_P(rb_hash_lookup(nodel, v))) {
5519  return FALSE;
5520  }
5521  return table[256] ? TRUE : FALSE;
5522  }
5523 }
5524 
5525 /*
5526  * call-seq:
5527  * str.delete!([other_str]+) -> str or nil
5528  *
5529  * Performs a <code>delete</code> operation in place, returning <i>str</i>, or
5530  * <code>nil</code> if <i>str</i> was not modified.
5531  */
5532 
5533 static VALUE
5535 {
5536  char squeez[TR_TABLE_SIZE];
5537  rb_encoding *enc = 0;
5538  char *s, *send, *t;
5539  VALUE del = 0, nodel = 0;
5540  int modify = 0;
5541  int i, ascompat, cr;
5542 
5543  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5545  for (i=0; i<argc; i++) {
5546  VALUE s = argv[i];
5547 
5548  StringValue(s);
5549  enc = rb_enc_check(str, s);
5550  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5551  }
5552 
5553  str_modify_keep_cr(str);
5554  ascompat = rb_enc_asciicompat(enc);
5555  s = t = RSTRING_PTR(str);
5556  send = RSTRING_END(str);
5557  cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
5558  while (s < send) {
5559  unsigned int c;
5560  int clen;
5561 
5562  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5563  if (squeez[c]) {
5564  modify = 1;
5565  }
5566  else {
5567  if (t != s) *t = c;
5568  t++;
5569  }
5570  s++;
5571  }
5572  else {
5573  c = rb_enc_codepoint_len(s, send, &clen, enc);
5574 
5575  if (tr_find(c, squeez, del, nodel)) {
5576  modify = 1;
5577  }
5578  else {
5579  if (t != s) rb_enc_mbcput(c, t, enc);
5580  t += clen;
5581  if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
5582  }
5583  s += clen;
5584  }
5585  }
5586  *t = '\0';
5587  STR_SET_LEN(str, t - RSTRING_PTR(str));
5588  ENC_CODERANGE_SET(str, cr);
5589 
5590  if (modify) return str;
5591  return Qnil;
5592 }
5593 
5594 
5595 /*
5596  * call-seq:
5597  * str.delete([other_str]+) -> new_str
5598  *
5599  * Returns a copy of <i>str</i> with all characters in the intersection of its
5600  * arguments deleted. Uses the same rules for building the set of characters as
5601  * <code>String#count</code>.
5602  *
5603  * "hello".delete "l","lo" #=> "heo"
5604  * "hello".delete "lo" #=> "he"
5605  * "hello".delete "aeiou", "^e" #=> "hell"
5606  * "hello".delete "ej-m" #=> "ho"
5607  */
5608 
5609 static VALUE
5611 {
5612  str = rb_str_dup(str);
5613  rb_str_delete_bang(argc, argv, str);
5614  return str;
5615 }
5616 
5617 
5618 /*
5619  * call-seq:
5620  * str.squeeze!([other_str]*) -> str or nil
5621  *
5622  * Squeezes <i>str</i> in place, returning either <i>str</i>, or
5623  * <code>nil</code> if no changes were made.
5624  */
5625 
5626 static VALUE
5628 {
5629  char squeez[TR_TABLE_SIZE];
5630  rb_encoding *enc = 0;
5631  VALUE del = 0, nodel = 0;
5632  char *s, *send, *t;
5633  int i, modify = 0;
5634  int ascompat, singlebyte = single_byte_optimizable(str);
5635  unsigned int save;
5636 
5637  if (argc == 0) {
5638  enc = STR_ENC_GET(str);
5639  }
5640  else {
5641  for (i=0; i<argc; i++) {
5642  VALUE s = argv[i];
5643 
5644  StringValue(s);
5645  enc = rb_enc_check(str, s);
5646  if (singlebyte && !single_byte_optimizable(s))
5647  singlebyte = 0;
5648  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5649  }
5650  }
5651 
5652  str_modify_keep_cr(str);
5653  s = t = RSTRING_PTR(str);
5654  if (!s || RSTRING_LEN(str) == 0) return Qnil;
5655  send = RSTRING_END(str);
5656  save = -1;
5657  ascompat = rb_enc_asciicompat(enc);
5658 
5659  if (singlebyte) {
5660  while (s < send) {
5661  unsigned int c = *(unsigned char*)s++;
5662  if (c != save || (argc > 0 && !squeez[c])) {
5663  *t++ = save = c;
5664  }
5665  }
5666  } else {
5667  while (s < send) {
5668  unsigned int c;
5669  int clen;
5670 
5671  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5672  if (c != save || (argc > 0 && !squeez[c])) {
5673  *t++ = save = c;
5674  }
5675  s++;
5676  }
5677  else {
5678  c = rb_enc_codepoint_len(s, send, &clen, enc);
5679 
5680  if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
5681  if (t != s) rb_enc_mbcput(c, t, enc);
5682  save = c;
5683  t += clen;
5684  }
5685  s += clen;
5686  }
5687  }
5688  }
5689 
5690  *t = '\0';
5691  if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
5692  STR_SET_LEN(str, t - RSTRING_PTR(str));
5693  modify = 1;
5694  }
5695 
5696  if (modify) return str;
5697  return Qnil;
5698 }
5699 
5700 
5701 /*
5702  * call-seq:
5703  * str.squeeze([other_str]*) -> new_str
5704  *
5705  * Builds a set of characters from the <i>other_str</i> parameter(s) using the
5706  * procedure described for <code>String#count</code>. Returns a new string
5707  * where runs of the same character that occur in this set are replaced by a
5708  * single character. If no arguments are given, all runs of identical
5709  * characters are replaced by a single character.
5710  *
5711  * "yellow moon".squeeze #=> "yelow mon"
5712  * " now is the".squeeze(" ") #=> " now is the"
5713  * "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
5714  */
5715 
5716 static VALUE
5718 {
5719  str = rb_str_dup(str);
5720  rb_str_squeeze_bang(argc, argv, str);
5721  return str;
5722 }
5723 
5724 
5725 /*
5726  * call-seq:
5727  * str.tr_s!(from_str, to_str) -> str or nil
5728  *
5729  * Performs <code>String#tr_s</code> processing on <i>str</i> in place,
5730  * returning <i>str</i>, or <code>nil</code> if no changes were made.
5731  */
5732 
5733 static VALUE
5735 {
5736  return tr_trans(str, src, repl, 1);
5737 }
5738 
5739 
5740 /*
5741  * call-seq:
5742  * str.tr_s(from_str, to_str) -> new_str
5743  *
5744  * Processes a copy of <i>str</i> as described under <code>String#tr</code>,
5745  * then removes duplicate characters in regions that were affected by the
5746  * translation.
5747  *
5748  * "hello".tr_s('l', 'r') #=> "hero"
5749  * "hello".tr_s('el', '*') #=> "h*o"
5750  * "hello".tr_s('el', 'hx') #=> "hhxo"
5751  */
5752 
5753 static VALUE
5754 rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
5755 {
5756  str = rb_str_dup(str);
5757  tr_trans(str, src, repl, 1);
5758  return str;
5759 }
5760 
5761 
5762 /*
5763  * call-seq:
5764  * str.count([other_str]+) -> fixnum
5765  *
5766  * Each +other_str+ parameter defines a set of characters to count. The
5767  * intersection of these sets defines the characters to count in +str+. Any
5768  * +other_str+ that starts with a caret <code>^</code> is negated. The
5769  * sequence <code>c1-c2</code> means all characters between c1 and c2. The
5770  * backslash character <code></code> can be used to escape <code>^</code> or
5771  * <code>-</code> and is otherwise ignored unless it appears at the end of a
5772  * sequence or the end of a +other_str+.
5773  *
5774  * a = "hello world"
5775  * a.count "lo" #=> 5
5776  * a.count "lo", "o" #=> 2
5777  * a.count "hello", "^l" #=> 4
5778  * a.count "ej-m" #=> 4
5779  *
5780  * "hello^world".count "\\^aeiou" #=> 4
5781  * "hello-world".count "a\\-eo" #=> 4
5782  *
5783  * c = "hello world\\r\\n"
5784  * c.count "\\" #=> 2
5785  * c.count "\\A" #=> 0
5786  * c.count "X-\\w" #=> 3
5787  */
5788 
5789 static VALUE
5791 {
5792  char table[TR_TABLE_SIZE];
5793  rb_encoding *enc = 0;
5794  VALUE del = 0, nodel = 0;
5795  char *s, *send;
5796  int i;
5797  int ascompat;
5798 
5800  for (i=0; i<argc; i++) {
5801  VALUE tstr = argv[i];
5802  unsigned char c;
5803 
5804  StringValue(tstr);
5805  enc = rb_enc_check(str, tstr);
5806  if (argc == 1 && RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
5807  (c = RSTRING_PTR(tstr)[0]) < 0x80 && !is_broken_string(str)) {
5808  int n = 0;
5809 
5810  s = RSTRING_PTR(str);
5811  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
5812  send = RSTRING_END(str);
5813  while (s < send) {
5814  if (*(unsigned char*)s++ == c) n++;
5815  }
5816  return INT2NUM(n);
5817  }
5818  tr_setup_table(tstr, table, i==0, &del, &nodel, enc);
5819  }
5820 
5821  s = RSTRING_PTR(str);
5822  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
5823  send = RSTRING_END(str);
5824  ascompat = rb_enc_asciicompat(enc);
5825  i = 0;
5826  while (s < send) {
5827  unsigned int c;
5828 
5829  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5830  if (table[c]) {
5831  i++;
5832  }
5833  s++;
5834  }
5835  else {
5836  int clen;
5837  c = rb_enc_codepoint_len(s, send, &clen, enc);
5838  if (tr_find(c, table, del, nodel)) {
5839  i++;
5840  }
5841  s += clen;
5842  }
5843  }
5844 
5845  return INT2NUM(i);
5846 }
5847 
5848 static const char isspacetable[256] = {
5849  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
5850  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5851  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5852  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5853  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5854  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5855  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5856  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5857  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5858  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5859  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5860  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5861  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5862  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5863  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5864  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5865 };
5866 
5867 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
5868 
5869 /*
5870  * call-seq:
5871  * str.split(pattern=$;, [limit]) -> anArray
5872  *
5873  * Divides <i>str</i> into substrings based on a delimiter, returning an array
5874  * of these substrings.
5875  *
5876  * If <i>pattern</i> is a <code>String</code>, then its contents are used as
5877  * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
5878  * space, <i>str</i> is split on whitespace, with leading whitespace and runs
5879  * of contiguous whitespace characters ignored.
5880  *
5881  * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
5882  * pattern matches. Whenever the pattern matches a zero-length string,
5883  * <i>str</i> is split into individual characters. If <i>pattern</i> contains
5884  * groups, the respective matches will be returned in the array as well.
5885  *
5886  * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
5887  * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
5888  * split on whitespace as if ` ' were specified.
5889  *
5890  * If the <i>limit</i> parameter is omitted, trailing null fields are
5891  * suppressed. If <i>limit</i> is a positive number, at most that number of
5892  * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
5893  * string is returned as the only entry in an array). If negative, there is no
5894  * limit to the number of fields returned, and trailing null fields are not
5895  * suppressed.
5896  *
5897  * When the input +str+ is empty an empty Array is returned as the string is
5898  * considered to have no fields to split.
5899  *
5900  * " now's the time".split #=> ["now's", "the", "time"]
5901  * " now's the time".split(' ') #=> ["now's", "the", "time"]
5902  * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
5903  * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
5904  * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
5905  * "hello".split(//, 3) #=> ["h", "e", "llo"]
5906  * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
5907  *
5908  * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
5909  * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
5910  * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
5911  * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
5912  *
5913  * "".split(',', -1) #=> []
5914  */
5915 
5916 static VALUE
5918 {
5919  rb_encoding *enc;
5920  VALUE spat;
5921  VALUE limit;
5922  enum {awk, string, regexp} split_type;
5923  long beg, end, i = 0;
5924  int lim = 0;
5925  VALUE result, tmp;
5926 
5927  if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
5928  lim = NUM2INT(limit);
5929  if (lim <= 0) limit = Qnil;
5930  else if (lim == 1) {
5931  if (RSTRING_LEN(str) == 0)
5932  return rb_ary_new2(0);
5933  return rb_ary_new3(1, str);
5934  }
5935  i = 1;
5936  }
5937 
5938  enc = STR_ENC_GET(str);
5939  if (NIL_P(spat)) {
5940  if (!NIL_P(rb_fs)) {
5941  spat = rb_fs;
5942  goto fs_set;
5943  }
5944  split_type = awk;
5945  }
5946  else {
5947  fs_set:
5948  if (RB_TYPE_P(spat, T_STRING)) {
5949  rb_encoding *enc2 = STR_ENC_GET(spat);
5950 
5951  split_type = string;
5952  if (RSTRING_LEN(spat) == 0) {
5953  /* Special case - split into chars */
5954  spat = rb_reg_regcomp(spat);
5955  split_type = regexp;
5956  }
5957  else if (rb_enc_asciicompat(enc2) == 1) {
5958  if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
5959  split_type = awk;
5960  }
5961  }
5962  else {
5963  int l;
5964  if (rb_enc_ascget(RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' &&
5965  RSTRING_LEN(spat) == l) {
5966  split_type = awk;
5967  }
5968  }
5969  }
5970  else {
5971  spat = get_pat(spat, 1);
5972  split_type = regexp;
5973  }
5974  }
5975 
5976  result = rb_ary_new();
5977  beg = 0;
5978  if (split_type == awk) {
5979  char *ptr = RSTRING_PTR(str);
5980  char *eptr = RSTRING_END(str);
5981  char *bptr = ptr;
5982  int skip = 1;
5983  unsigned int c;
5984 
5985  end = beg;
5986  if (is_ascii_string(str)) {
5987  while (ptr < eptr) {
5988  c = (unsigned char)*ptr++;
5989  if (skip) {
5990  if (ascii_isspace(c)) {
5991  beg = ptr - bptr;
5992  }
5993  else {
5994  end = ptr - bptr;
5995  skip = 0;
5996  if (!NIL_P(limit) && lim <= i) break;
5997  }
5998  }
5999  else if (ascii_isspace(c)) {
6000  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6001  skip = 1;
6002  beg = ptr - bptr;
6003  if (!NIL_P(limit)) ++i;
6004  }
6005  else {
6006  end = ptr - bptr;
6007  }
6008  }
6009  }
6010  else {
6011  while (ptr < eptr) {
6012  int n;
6013 
6014  c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
6015  ptr += n;
6016  if (skip) {
6017  if (rb_isspace(c)) {
6018  beg = ptr - bptr;
6019  }
6020  else {
6021  end = ptr - bptr;
6022  skip = 0;
6023  if (!NIL_P(limit) && lim <= i) break;
6024  }
6025  }
6026  else if (rb_isspace(c)) {
6027  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6028  skip = 1;
6029  beg = ptr - bptr;
6030  if (!NIL_P(limit)) ++i;
6031  }
6032  else {
6033  end = ptr - bptr;
6034  }
6035  }
6036  }
6037  }
6038  else if (split_type == string) {
6039  char *ptr = RSTRING_PTR(str);
6040  char *temp = ptr;
6041  char *eptr = RSTRING_END(str);
6042  char *sptr = RSTRING_PTR(spat);
6043  long slen = RSTRING_LEN(spat);
6044 
6045  if (is_broken_string(str)) {
6046  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
6047  }
6048  if (is_broken_string(spat)) {
6049  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(spat)));
6050  }
6051  enc = rb_enc_check(str, spat);
6052  while (ptr < eptr &&
6053  (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
6054  /* Check we are at the start of a char */
6055  char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
6056  if (t != ptr + end) {
6057  ptr = t;
6058  continue;
6059  }
6060  rb_ary_push(result, rb_str_subseq(str, ptr - temp, end));
6061  ptr += end + slen;
6062  if (!NIL_P(limit) && lim <= ++i) break;
6063  }
6064  beg = ptr - temp;
6065  }
6066  else {
6067  char *ptr = RSTRING_PTR(str);
6068  long len = RSTRING_LEN(str);
6069  long start = beg;
6070  long idx;
6071  int last_null = 0;
6072  struct re_registers *regs;
6073 
6074  while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
6075  regs = RMATCH_REGS(rb_backref_get());
6076  if (start == end && BEG(0) == END(0)) {
6077  if (!ptr) {
6078  rb_ary_push(result, str_new_empty(str));
6079  break;
6080  }
6081  else if (last_null == 1) {
6082  rb_ary_push(result, rb_str_subseq(str, beg,
6083  rb_enc_fast_mbclen(ptr+beg,
6084  ptr+len,
6085  enc)));
6086  beg = start;
6087  }
6088  else {
6089  if (ptr+start == ptr+len)
6090  start++;
6091  else
6092  start += rb_enc_fast_mbclen(ptr+start,ptr+len,enc);
6093  last_null = 1;
6094  continue;
6095  }
6096  }
6097  else {
6098  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6099  beg = start = END(0);
6100  }
6101  last_null = 0;
6102 
6103  for (idx=1; idx < regs->num_regs; idx++) {
6104  if (BEG(idx) == -1) continue;
6105  if (BEG(idx) == END(idx))
6106  tmp = str_new_empty(str);
6107  else
6108  tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx));
6109  rb_ary_push(result, tmp);
6110  }
6111  if (!NIL_P(limit) && lim <= ++i) break;
6112  }
6113  }
6114  if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
6115  if (RSTRING_LEN(str) == beg)
6116  tmp = str_new_empty(str);
6117  else
6118  tmp = rb_str_subseq(str, beg, RSTRING_LEN(str)-beg);
6119  rb_ary_push(result, tmp);
6120  }
6121  if (NIL_P(limit) && lim == 0) {
6122  long len;
6123  while ((len = RARRAY_LEN(result)) > 0 &&
6124  (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
6125  rb_ary_pop(result);
6126  }
6127 
6128  return result;
6129 }
6130 
6131 VALUE
6132 rb_str_split(VALUE str, const char *sep0)
6133 {
6134  VALUE sep;
6135 
6136  StringValue(str);
6137  sep = rb_str_new2(sep0);
6138  return rb_str_split_m(1, &sep, str);
6139 }
6140 
6141 
6142 static VALUE
6143 rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
6144 {
6145  rb_encoding *enc;
6146  VALUE rs;
6147  unsigned int newline;
6148  const char *p, *pend, *s, *ptr;
6149  long len, rslen;
6150  VALUE line;
6151  int n;
6152  VALUE orig = str;
6153  VALUE UNINITIALIZED_VAR(ary);
6154 
6155  if (argc == 0) {
6156  rs = rb_rs;
6157  }
6158  else {
6159  rb_scan_args(argc, argv, "01", &rs);
6160  }
6161 
6162  if (rb_block_given_p()) {
6163  if (wantarray) {
6164 #if 0 /* next major */
6165  rb_warn("given block not used");
6166  ary = rb_ary_new();
6167 #else
6168  rb_warning("passing a block to String#lines is deprecated");
6169  wantarray = 0;
6170 #endif
6171  }
6172  }
6173  else {
6174  if (wantarray)
6175  ary = rb_ary_new();
6176  else
6177  RETURN_ENUMERATOR(str, argc, argv);
6178  }
6179 
6180  if (NIL_P(rs)) {
6181  if (wantarray) {
6182  rb_ary_push(ary, str);
6183  return ary;
6184  }
6185  else {
6186  rb_yield(str);
6187  return orig;
6188  }
6189  }
6190  str = rb_str_new4(str);
6191  ptr = p = s = RSTRING_PTR(str);
6192  pend = p + RSTRING_LEN(str);
6193  len = RSTRING_LEN(str);
6194  StringValue(rs);
6195  if (rs == rb_default_rs) {
6196  enc = rb_enc_get(str);
6197  while (p < pend) {
6198  char *p0;
6199 
6200  p = memchr(p, '\n', pend - p);
6201  if (!p) break;
6202  p0 = rb_enc_left_char_head(s, p, pend, enc);
6203  if (!rb_enc_is_newline(p0, pend, enc)) {
6204  p++;
6205  continue;
6206  }
6207  p = p0 + rb_enc_mbclen(p0, pend, enc);
6208  line = rb_str_subseq(str, s - ptr, p - s);
6209  if (wantarray)
6210  rb_ary_push(ary, line);
6211  else
6212  rb_yield(line);
6213  str_mod_check(str, ptr, len);
6214  s = p;
6215  }
6216  goto finish;
6217  }
6218 
6219  enc = rb_enc_check(str, rs);
6220  rslen = RSTRING_LEN(rs);
6221  if (rslen == 0) {
6222  newline = '\n';
6223  }
6224  else {
6225  newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
6226  }
6227 
6228  while (p < pend) {
6229  unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
6230 
6231  again:
6232  if (rslen == 0 && c == newline) {
6233  p += n;
6234  if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
6235  goto again;
6236  }
6237  while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
6238  p += n;
6239  }
6240  p -= n;
6241  }
6242  if (c == newline &&
6243  (rslen <= 1 ||
6244  (pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) {
6245  const char *pp = p + (rslen ? rslen : n);
6246  line = rb_str_subseq(str, s - ptr, pp - s);
6247  if (wantarray)
6248  rb_ary_push(ary, line);
6249  else
6250  rb_yield(line);
6251  str_mod_check(str, ptr, len);
6252  s = pp;
6253  }
6254  p += n;
6255  }
6256 
6257  finish:
6258  if (s != pend) {
6259  line = rb_str_subseq(str, s - ptr, pend - s);
6260  if (wantarray)
6261  rb_ary_push(ary, line);
6262  else
6263  rb_yield(line);
6264  RB_GC_GUARD(str);
6265  }
6266 
6267  if (wantarray)
6268  return ary;
6269  else
6270  return orig;
6271 }
6272 
6273 /*
6274  * call-seq:
6275  * str.each_line(separator=$/) {|substr| block } -> str
6276  * str.each_line(separator=$/) -> an_enumerator
6277  *
6278  * Splits <i>str</i> using the supplied parameter as the record
6279  * separator (<code>$/</code> by default), passing each substring in
6280  * turn to the supplied block. If a zero-length record separator is
6281  * supplied, the string is split into paragraphs delimited by
6282  * multiple successive newlines.
6283  *
6284  * If no block is given, an enumerator is returned instead.
6285  *
6286  * print "Example one\n"
6287  * "hello\nworld".each_line {|s| p s}
6288  * print "Example two\n"
6289  * "hello\nworld".each_line('l') {|s| p s}
6290  * print "Example three\n"
6291  * "hello\n\n\nworld".each_line('') {|s| p s}
6292  *
6293  * <em>produces:</em>
6294  *
6295  * Example one
6296  * "hello\n"
6297  * "world"
6298  * Example two
6299  * "hel"
6300  * "l"
6301  * "o\nworl"
6302  * "d"
6303  * Example three
6304  * "hello\n\n\n"
6305  * "world"
6306  */
6307 
6308 static VALUE
6310 {
6311  return rb_str_enumerate_lines(argc, argv, str, 0);
6312 }
6313 
6314 /*
6315  * call-seq:
6316  * str.lines(separator=$/) -> an_array
6317  *
6318  * Returns an array of lines in <i>str</i> split using the supplied
6319  * record separator (<code>$/</code> by default). This is a
6320  * shorthand for <code>str.each_line(separator).to_a</code>.
6321  *
6322  * If a block is given, which is a deprecated form, works the same as
6323  * <code>each_line</code>.
6324  */
6325 
6326 static VALUE
6328 {
6329  return rb_str_enumerate_lines(argc, argv, str, 1);
6330 }
6331 
6332 static VALUE
6334 {
6335  return LONG2FIX(RSTRING_LEN(str));
6336 }
6337 
6338 static VALUE
6339 rb_str_enumerate_bytes(VALUE str, int wantarray)
6340 {
6341  long i;
6342  VALUE UNINITIALIZED_VAR(ary);
6343 
6344  if (rb_block_given_p()) {
6345  if (wantarray) {
6346 #if 0 /* next major */
6347  rb_warn("given block not used");
6348  ary = rb_ary_new();
6349 #else
6350  rb_warning("passing a block to String#bytes is deprecated");
6351  wantarray = 0;
6352 #endif
6353  }
6354  }
6355  else {
6356  if (wantarray)
6357  ary = rb_ary_new2(RSTRING_LEN(str));
6358  else
6360  }
6361 
6362  for (i=0; i<RSTRING_LEN(str); i++) {
6363  if (wantarray)
6364  rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6365  else
6366  rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6367  }
6368  if (wantarray)
6369  return ary;
6370  else
6371  return str;
6372 }
6373 
6374 /*
6375  * call-seq:
6376  * str.each_byte {|fixnum| block } -> str
6377  * str.each_byte -> an_enumerator
6378  *
6379  * Passes each byte in <i>str</i> to the given block, or returns an
6380  * enumerator if no block is given.
6381  *
6382  * "hello".each_byte {|c| print c, ' ' }
6383  *
6384  * <em>produces:</em>
6385  *
6386  * 104 101 108 108 111
6387  */
6388 
6389 static VALUE
6391 {
6392  return rb_str_enumerate_bytes(str, 0);
6393 }
6394 
6395 /*
6396  * call-seq:
6397  * str.bytes -> an_array
6398  *
6399  * Returns an array of bytes in <i>str</i>. This is a shorthand for
6400  * <code>str.each_byte.to_a</code>.
6401  *
6402  * If a block is given, which is a deprecated form, works the same as
6403  * <code>each_byte</code>.
6404  */
6405 
6406 static VALUE
6408 {
6409  return rb_str_enumerate_bytes(str, 1);
6410 }
6411 
6412 static VALUE
6414 {
6415  long len = RSTRING_LEN(str);
6416  if (!single_byte_optimizable(str)) {
6417  const char *ptr = RSTRING_PTR(str);
6418  rb_encoding *enc = rb_enc_get(str);
6419  const char *end_ptr = ptr + len;
6420  for (len = 0; ptr < end_ptr; ++len) {
6421  ptr += rb_enc_mbclen(ptr, end_ptr, enc);
6422  }
6423  }
6424  return LONG2FIX(len);
6425 }
6426 
6427 static VALUE
6428 rb_str_enumerate_chars(VALUE str, int wantarray)
6429 {
6430  VALUE orig = str;
6431  VALUE substr;
6432  long i, len, n;
6433  const char *ptr;
6434  rb_encoding *enc;
6435  VALUE UNINITIALIZED_VAR(ary);
6436 
6437  if (rb_block_given_p()) {
6438  if (wantarray) {
6439 #if 0 /* next major */
6440  rb_warn("given block not used");
6441  ary = rb_ary_new();
6442 #else
6443  rb_warning("passing a block to String#chars is deprecated");
6444  wantarray = 0;
6445 #endif
6446  }
6447  }
6448  else {
6449  if (wantarray)
6450  ary = rb_ary_new();
6451  else
6453  }
6454 
6455  str = rb_str_new4(str);
6456  ptr = RSTRING_PTR(str);
6457  len = RSTRING_LEN(str);
6458  enc = rb_enc_get(str);
6459  switch (ENC_CODERANGE(str)) {
6460  case ENC_CODERANGE_VALID:
6461  case ENC_CODERANGE_7BIT:
6462  for (i = 0; i < len; i += n) {
6463  n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
6464  substr = rb_str_subseq(str, i, n);
6465  if (wantarray)
6466  rb_ary_push(ary, substr);
6467  else
6468  rb_yield(substr);
6469  }
6470  break;
6471  default:
6472  for (i = 0; i < len; i += n) {
6473  n = rb_enc_mbclen(ptr + i, ptr + len, enc);
6474  substr = rb_str_subseq(str, i, n);
6475  if (wantarray)
6476  rb_ary_push(ary, substr);
6477  else
6478  rb_yield(substr);
6479  }
6480  }
6481  RB_GC_GUARD(str);
6482  if (wantarray)
6483  return ary;
6484  else
6485  return orig;
6486 }
6487 
6488 /*
6489  * call-seq:
6490  * str.each_char {|cstr| block } -> str
6491  * str.each_char -> an_enumerator
6492  *
6493  * Passes each character in <i>str</i> to the given block, or returns
6494  * an enumerator if no block is given.
6495  *
6496  * "hello".each_char {|c| print c, ' ' }
6497  *
6498  * <em>produces:</em>
6499  *
6500  * h e l l o
6501  */
6502 
6503 static VALUE
6505 {
6506  return rb_str_enumerate_chars(str, 0);
6507 }
6508 
6509 /*
6510  * call-seq:
6511  * str.chars -> an_array
6512  *
6513  * Returns an array of characters in <i>str</i>. This is a shorthand
6514  * for <code>str.each_char.to_a</code>.
6515  *
6516  * If a block is given, which is a deprecated form, works the same as
6517  * <code>each_char</code>.
6518  */
6519 
6520 static VALUE
6522 {
6523  return rb_str_enumerate_chars(str, 1);
6524 }
6525 
6526 
6527 static VALUE
6529 {
6530  VALUE orig = str;
6531  int n;
6532  unsigned int c;
6533  const char *ptr, *end;
6534  rb_encoding *enc;
6535  VALUE UNINITIALIZED_VAR(ary);
6536 
6537  if (single_byte_optimizable(str))
6538  return rb_str_enumerate_bytes(str, wantarray);
6539 
6540  if (rb_block_given_p()) {
6541  if (wantarray) {
6542 #if 0 /* next major */
6543  rb_warn("given block not used");
6544  ary = rb_ary_new();
6545 #else
6546  rb_warning("passing a block to String#codepoints is deprecated");
6547  wantarray = 0;
6548 #endif
6549  }
6550  }
6551  else {
6552  if (wantarray)
6553  ary = rb_ary_new();
6554  else
6556  }
6557 
6558  str = rb_str_new4(str);
6559  ptr = RSTRING_PTR(str);
6560  end = RSTRING_END(str);
6561  enc = STR_ENC_GET(str);
6562  while (ptr < end) {
6563  c = rb_enc_codepoint_len(ptr, end, &n, enc);
6564  if (wantarray)
6565  rb_ary_push(ary, UINT2NUM(c));
6566  else
6567  rb_yield(UINT2NUM(c));
6568  ptr += n;
6569  }
6570  RB_GC_GUARD(str);
6571  if (wantarray)
6572  return ary;
6573  else
6574  return orig;
6575 }
6576 
6577 /*
6578  * call-seq:
6579  * str.each_codepoint {|integer| block } -> str
6580  * str.each_codepoint -> an_enumerator
6581  *
6582  * Passes the <code>Integer</code> ordinal of each character in <i>str</i>,
6583  * also known as a <i>codepoint</i> when applied to Unicode strings to the
6584  * given block.
6585  *
6586  * If no block is given, an enumerator is returned instead.
6587  *
6588  * "hello\u0639".each_codepoint {|c| print c, ' ' }
6589  *
6590  * <em>produces:</em>
6591  *
6592  * 104 101 108 108 111 1593
6593  */
6594 
6595 static VALUE
6597 {
6598  return rb_str_enumerate_codepoints(str, 0);
6599 }
6600 
6601 /*
6602  * call-seq:
6603  * str.codepoints -> an_array
6604  *
6605  * Returns an array of the <code>Integer</code> ordinals of the
6606  * characters in <i>str</i>. This is a shorthand for
6607  * <code>str.each_codepoint.to_a</code>.
6608  *
6609  * If a block is given, which is a deprecated form, works the same as
6610  * <code>each_codepoint</code>.
6611  */
6612 
6613 static VALUE
6615 {
6616  return rb_str_enumerate_codepoints(str, 1);
6617 }
6618 
6619 
6620 static long
6622 {
6623  rb_encoding *enc = STR_ENC_GET(str);
6624  const char *p, *p2, *beg, *end;
6625 
6626  beg = RSTRING_PTR(str);
6627  end = beg + RSTRING_LEN(str);
6628  if (beg > end) return 0;
6629  p = rb_enc_prev_char(beg, end, end, enc);
6630  if (!p) return 0;
6631  if (p > beg && rb_enc_ascget(p, end, 0, enc) == '\n') {
6632  p2 = rb_enc_prev_char(beg, p, end, enc);
6633  if (p2 && rb_enc_ascget(p2, end, 0, enc) == '\r') p = p2;
6634  }
6635  return p - beg;
6636 }
6637 
6638 /*
6639  * call-seq:
6640  * str.chop! -> str or nil
6641  *
6642  * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
6643  * or <code>nil</code> if <i>str</i> is the empty string. See also
6644  * <code>String#chomp!</code>.
6645  */
6646 
6647 static VALUE
6649 {
6650  str_modify_keep_cr(str);
6651  if (RSTRING_LEN(str) > 0) {
6652  long len;
6653  len = chopped_length(str);
6654  STR_SET_LEN(str, len);
6655  RSTRING_PTR(str)[len] = '\0';
6656  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
6657  ENC_CODERANGE_CLEAR(str);
6658  }
6659  return str;
6660  }
6661  return Qnil;
6662 }
6663 
6664 
6665 /*
6666  * call-seq:
6667  * str.chop -> new_str
6668  *
6669  * Returns a new <code>String</code> with the last character removed. If the
6670  * string ends with <code>\r\n</code>, both characters are removed. Applying
6671  * <code>chop</code> to an empty string returns an empty
6672  * string. <code>String#chomp</code> is often a safer alternative, as it leaves
6673  * the string unchanged if it doesn't end in a record separator.
6674  *
6675  * "string\r\n".chop #=> "string"
6676  * "string\n\r".chop #=> "string\n"
6677  * "string\n".chop #=> "string"
6678  * "string".chop #=> "strin"
6679  * "x".chop.chop #=> ""
6680  */
6681 
6682 static VALUE
6684 {
6685  return rb_str_subseq(str, 0, chopped_length(str));
6686 }
6687 
6688 
6689 /*
6690  * call-seq:
6691  * str.chomp!(separator=$/) -> str or nil
6692  *
6693  * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
6694  * returning <i>str</i>, or <code>nil</code> if no modifications were made.
6695  */
6696 
6697 static VALUE
6699 {
6700  rb_encoding *enc;
6701  VALUE rs;
6702  int newline;
6703  char *p, *pp, *e;
6704  long len, rslen;
6705 
6706  str_modify_keep_cr(str);
6707  len = RSTRING_LEN(str);
6708  if (len == 0) return Qnil;
6709  p = RSTRING_PTR(str);
6710  e = p + len;
6711  if (argc == 0) {
6712  rs = rb_rs;
6713  if (rs == rb_default_rs) {
6714  smart_chomp:
6715  enc = rb_enc_get(str);
6716  if (rb_enc_mbminlen(enc) > 1) {
6717  pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
6718  if (rb_enc_is_newline(pp, e, enc)) {
6719  e = pp;
6720  }
6721  pp = e - rb_enc_mbminlen(enc);
6722  if (pp >= p) {
6723  pp = rb_enc_left_char_head(p, pp, e, enc);
6724  if (rb_enc_ascget(pp, e, 0, enc) == '\r') {
6725  e = pp;
6726  }
6727  }
6728  if (e == RSTRING_END(str)) {
6729  return Qnil;
6730  }
6731  len = e - RSTRING_PTR(str);
6732  STR_SET_LEN(str, len);
6733  }
6734  else {
6735  if (RSTRING_PTR(str)[len-1] == '\n') {
6736  STR_DEC_LEN(str);
6737  if (RSTRING_LEN(str) > 0 &&
6738  RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
6739  STR_DEC_LEN(str);
6740  }
6741  }
6742  else if (RSTRING_PTR(str)[len-1] == '\r') {
6743  STR_DEC_LEN(str);
6744  }
6745  else {
6746  return Qnil;
6747  }
6748  }
6749  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6750  return str;
6751  }
6752  }
6753  else {
6754  rb_scan_args(argc, argv, "01", &rs);
6755  }
6756  if (NIL_P(rs)) return Qnil;
6757  StringValue(rs);
6758  rslen = RSTRING_LEN(rs);
6759  if (rslen == 0) {
6760  while (len>0 && p[len-1] == '\n') {
6761  len--;
6762  if (len>0 && p[len-1] == '\r')
6763  len--;
6764  }
6765  if (len < RSTRING_LEN(str)) {
6766  STR_SET_LEN(str, len);
6767  RSTRING_PTR(str)[len] = '\0';
6768  return str;
6769  }
6770  return Qnil;
6771  }
6772  if (rslen > len) return Qnil;
6773  newline = RSTRING_PTR(rs)[rslen-1];
6774  if (rslen == 1 && newline == '\n')
6775  goto smart_chomp;
6776 
6777  enc = rb_enc_check(str, rs);
6778  if (is_broken_string(rs)) {
6779  return Qnil;
6780  }
6781  pp = e - rslen;
6782  if (p[len-1] == newline &&
6783  (rslen <= 1 ||
6784  memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
6785  if (rb_enc_left_char_head(p, pp, e, enc) != pp)
6786  return Qnil;
6787  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
6788  ENC_CODERANGE_CLEAR(str);
6789  }
6790  STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
6791  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6792  return str;
6793  }
6794  return Qnil;
6795 }
6796 
6797 
6798 /*
6799  * call-seq:
6800  * str.chomp(separator=$/) -> new_str
6801  *
6802  * Returns a new <code>String</code> with the given record separator removed
6803  * from the end of <i>str</i> (if present). If <code>$/</code> has not been
6804  * changed from the default Ruby record separator, then <code>chomp</code> also
6805  * removes carriage return characters (that is it will remove <code>\n</code>,
6806  * <code>\r</code>, and <code>\r\n</code>).
6807  *
6808  * "hello".chomp #=> "hello"
6809  * "hello\n".chomp #=> "hello"
6810  * "hello\r\n".chomp #=> "hello"
6811  * "hello\n\r".chomp #=> "hello\n"
6812  * "hello\r".chomp #=> "hello"
6813  * "hello \n there".chomp #=> "hello \n there"
6814  * "hello".chomp("llo") #=> "he"
6815  */
6816 
6817 static VALUE
6819 {
6820  str = rb_str_dup(str);
6821  rb_str_chomp_bang(argc, argv, str);
6822  return str;
6823 }
6824 
6825 /*
6826  * call-seq:
6827  * str.lstrip! -> self or nil
6828  *
6829  * Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no
6830  * change was made. See also <code>String#rstrip!</code> and
6831  * <code>String#strip!</code>.
6832  *
6833  * " hello ".lstrip #=> "hello "
6834  * "hello".lstrip! #=> nil
6835  */
6836 
6837 static VALUE
6839 {
6840  rb_encoding *enc;
6841  char *s, *t, *e;
6842 
6843  str_modify_keep_cr(str);
6844  enc = STR_ENC_GET(str);
6845  s = RSTRING_PTR(str);
6846  if (!s || RSTRING_LEN(str) == 0) return Qnil;
6847  e = t = RSTRING_END(str);
6848  /* remove spaces at head */
6849  while (s < e) {
6850  int n;
6851  unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
6852 
6853  if (!rb_isspace(cc)) break;
6854  s += n;
6855  }
6856 
6857  if (s > RSTRING_PTR(str)) {
6858  STR_SET_LEN(str, t-s);
6859  memmove(RSTRING_PTR(str), s, RSTRING_LEN(str));
6860  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6861  return str;
6862  }
6863  return Qnil;
6864 }
6865 
6866 
6867 /*
6868  * call-seq:
6869  * str.lstrip -> new_str
6870  *
6871  * Returns a copy of <i>str</i> with leading whitespace removed. See also
6872  * <code>String#rstrip</code> and <code>String#strip</code>.
6873  *
6874  * " hello ".lstrip #=> "hello "
6875  * "hello".lstrip #=> "hello"
6876  */
6877 
6878 static VALUE
6880 {
6881  str = rb_str_dup(str);
6882  rb_str_lstrip_bang(str);
6883  return str;
6884 }
6885 
6886 
6887 /*
6888  * call-seq:
6889  * str.rstrip! -> self or nil
6890  *
6891  * Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if
6892  * no change was made. See also <code>String#lstrip!</code> and
6893  * <code>String#strip!</code>.
6894  *
6895  * " hello ".rstrip #=> " hello"
6896  * "hello".rstrip! #=> nil
6897  */
6898 
6899 static VALUE
6901 {
6902  rb_encoding *enc;
6903  char *s, *t, *e;
6904 
6905  str_modify_keep_cr(str);
6906  enc = STR_ENC_GET(str);
6908  s = RSTRING_PTR(str);
6909  if (!s || RSTRING_LEN(str) == 0) return Qnil;
6910  t = e = RSTRING_END(str);
6911 
6912  /* remove trailing spaces or '\0's */
6913  if (single_byte_optimizable(str)) {
6914  unsigned char c;
6915  while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--;
6916  }
6917  else {
6918  char *tp;
6919 
6920  while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
6921  unsigned int c = rb_enc_codepoint(tp, e, enc);
6922  if (c && !rb_isspace(c)) break;
6923  t = tp;
6924  }
6925  }
6926  if (t < e) {
6927  long len = t-RSTRING_PTR(str);
6928 
6929  STR_SET_LEN(str, len);
6930  RSTRING_PTR(str)[len] = '\0';
6931  return str;
6932  }
6933  return Qnil;
6934 }
6935 
6936 
6937 /*
6938  * call-seq:
6939  * str.rstrip -> new_str
6940  *
6941  * Returns a copy of <i>str</i> with trailing whitespace removed. See also
6942  * <code>String#lstrip</code> and <code>String#strip</code>.
6943  *
6944  * " hello ".rstrip #=> " hello"
6945  * "hello".rstrip #=> "hello"
6946  */
6947 
6948 static VALUE
6950 {
6951  str = rb_str_dup(str);
6952  rb_str_rstrip_bang(str);
6953  return str;
6954 }
6955 
6956 
6957 /*
6958  * call-seq:
6959  * str.strip! -> str or nil
6960  *
6961  * Removes leading and trailing whitespace from <i>str</i>. Returns
6962  * <code>nil</code> if <i>str</i> was not altered.
6963  */
6964 
6965 static VALUE
6967 {
6968  VALUE l = rb_str_lstrip_bang(str);
6969  VALUE r = rb_str_rstrip_bang(str);
6970 
6971  if (NIL_P(l) && NIL_P(r)) return Qnil;
6972  return str;
6973 }
6974 
6975 
6976 /*
6977  * call-seq:
6978  * str.strip -> new_str
6979  *
6980  * Returns a copy of <i>str</i> with leading and trailing whitespace removed.
6981  *
6982  * " hello ".strip #=> "hello"
6983  * "\tgoodbye\r\n".strip #=> "goodbye"
6984  */
6985 
6986 static VALUE
6988 {
6989  str = rb_str_dup(str);
6990  rb_str_strip_bang(str);
6991  return str;
6992 }
6993 
6994 static VALUE
6995 scan_once(VALUE str, VALUE pat, long *start)
6996 {
6997  VALUE result, match;
6998  struct re_registers *regs;
6999  int i;
7000 
7001  if (rb_reg_search(pat, str, *start, 0) >= 0) {
7002  match = rb_backref_get();
7003  regs = RMATCH_REGS(match);
7004  if (BEG(0) == END(0)) {
7005  rb_encoding *enc = STR_ENC_GET(str);
7006  /*
7007  * Always consume at least one character of the input string
7008  */
7009  if (RSTRING_LEN(str) > END(0))
7010  *start = END(0)+rb_enc_fast_mbclen(RSTRING_PTR(str)+END(0),
7011  RSTRING_END(str), enc);
7012  else
7013  *start = END(0)+1;
7014  }
7015  else {
7016  *start = END(0);
7017  }
7018  if (regs->num_regs == 1) {
7019  return rb_reg_nth_match(0, match);
7020  }
7021  result = rb_ary_new2(regs->num_regs);
7022  for (i=1; i < regs->num_regs; i++) {
7023  rb_ary_push(result, rb_reg_nth_match(i, match));
7024  }
7025 
7026  return result;
7027  }
7028  return Qnil;
7029 }
7030 
7031 
7032 /*
7033  * call-seq:
7034  * str.scan(pattern) -> array
7035  * str.scan(pattern) {|match, ...| block } -> str
7036  *
7037  * Both forms iterate through <i>str</i>, matching the pattern (which may be a
7038  * <code>Regexp</code> or a <code>String</code>). For each match, a result is
7039  * generated and either added to the result array or passed to the block. If
7040  * the pattern contains no groups, each individual result consists of the
7041  * matched string, <code>$&</code>. If the pattern contains groups, each
7042  * individual result is itself an array containing one entry per group.
7043  *
7044  * a = "cruel world"
7045  * a.scan(/\w+/) #=> ["cruel", "world"]
7046  * a.scan(/.../) #=> ["cru", "el ", "wor"]
7047  * a.scan(/(...)/) #=> [["cru"], ["el "], ["wor"]]
7048  * a.scan(/(..)(..)/) #=> [["cr", "ue"], ["l ", "wo"]]
7049  *
7050  * And the block form:
7051  *
7052  * a.scan(/\w+/) {|w| print "<<#{w}>> " }
7053  * print "\n"
7054  * a.scan(/(.)(.)/) {|x,y| print y, x }
7055  * print "\n"
7056  *
7057  * <em>produces:</em>
7058  *
7059  * <<cruel>> <<world>>
7060  * rceu lowlr
7061  */
7062 
7063 static VALUE
7065 {
7066  VALUE result;
7067  long start = 0;
7068  long last = -1, prev = 0;
7069  char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
7070 
7071  pat = get_pat(pat, 1);
7072  if (!rb_block_given_p()) {
7073  VALUE ary = rb_ary_new();
7074 
7075  while (!NIL_P(result = scan_once(str, pat, &start))) {
7076  last = prev;
7077  prev = start;
7078  rb_ary_push(ary, result);
7079  }
7080  if (last >= 0) rb_reg_search(pat, str, last, 0);
7081  return ary;
7082  }
7083 
7084  while (!NIL_P(result = scan_once(str, pat, &start))) {
7085  last = prev;
7086  prev = start;
7087  rb_yield(result);
7088  str_mod_check(str, p, len);
7089  }
7090  if (last >= 0) rb_reg_search(pat, str, last, 0);
7091  return str;
7092 }
7093 
7094 
7095 /*
7096  * call-seq:
7097  * str.hex -> integer
7098  *
7099  * Treats leading characters from <i>str</i> as a string of hexadecimal digits
7100  * (with an optional sign and an optional <code>0x</code>) and returns the
7101  * corresponding number. Zero is returned on error.
7102  *
7103  * "0x0a".hex #=> 10
7104  * "-1234".hex #=> -4660
7105  * "0".hex #=> 0
7106  * "wombat".hex #=> 0
7107  */
7108 
7109 static VALUE
7111 {
7112  return rb_str_to_inum(str, 16, FALSE);
7113 }
7114 
7115 
7116 /*
7117  * call-seq:
7118  * str.oct -> integer
7119  *
7120  * Treats leading characters of <i>str</i> as a string of octal digits (with an
7121  * optional sign) and returns the corresponding number. Returns 0 if the
7122  * conversion fails.
7123  *
7124  * "123".oct #=> 83
7125  * "-377".oct #=> -255
7126  * "bad".oct #=> 0
7127  * "0377bad".oct #=> 255
7128  */
7129 
7130 static VALUE
7132 {
7133  return rb_str_to_inum(str, -8, FALSE);
7134 }
7135 
7136 
7137 /*
7138  * call-seq:
7139  * str.crypt(salt_str) -> new_str
7140  *
7141  * Applies a one-way cryptographic hash to <i>str</i> by invoking the
7142  * standard library function <code>crypt(3)</code> with the given
7143  * salt string. While the format and the result are system and
7144  * implementation dependent, using a salt matching the regular
7145  * expression <code>\A[a-zA-Z0-9./]{2}</code> should be valid and
7146  * safe on any platform, in which only the first two characters are
7147  * significant.
7148  *
7149  * This method is for use in system specific scripts, so if you want
7150  * a cross-platform hash function consider using Digest or OpenSSL
7151  * instead.
7152  */
7153 
7154 static VALUE
7156 {
7157  extern char *crypt(const char *, const char *);
7158  VALUE result;
7159  const char *s, *saltp;
7160  char *res;
7161 #ifdef BROKEN_CRYPT
7162  char salt_8bit_clean[3];
7163 #endif
7164 
7165  StringValue(salt);
7166  if (RSTRING_LEN(salt) < 2)
7167  rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
7168 
7169  s = RSTRING_PTR(str);
7170  if (!s) s = "";
7171  saltp = RSTRING_PTR(salt);
7172 #ifdef BROKEN_CRYPT
7173  if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
7174  salt_8bit_clean[0] = saltp[0] & 0x7f;
7175  salt_8bit_clean[1] = saltp[1] & 0x7f;
7176  salt_8bit_clean[2] = '\0';
7177  saltp = salt_8bit_clean;
7178  }
7179 #endif
7180  res = crypt(s, saltp);
7181  if (!res) {
7182  rb_sys_fail("crypt");
7183  }
7184  result = rb_str_new2(res);
7185  OBJ_INFECT(result, str);
7186  OBJ_INFECT(result, salt);
7187  return result;
7188 }
7189 
7190 
7191 /*
7192  * call-seq:
7193  * str.intern -> symbol
7194  * str.to_sym -> symbol
7195  *
7196  * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
7197  * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
7198  *
7199  * "Koala".intern #=> :Koala
7200  * s = 'cat'.to_sym #=> :cat
7201  * s == :cat #=> true
7202  * s = '@cat'.to_sym #=> :@cat
7203  * s == :@cat #=> true
7204  *
7205  * This can also be used to create symbols that cannot be represented using the
7206  * <code>:xxx</code> notation.
7207  *
7208  * 'cat and dog'.to_sym #=> :"cat and dog"
7209  */
7210 
7211 VALUE
7213 {
7214  VALUE str = RB_GC_GUARD(s);
7215  ID id;
7216 
7217  id = rb_intern_str(str);
7218  return ID2SYM(id);
7219 }
7220 
7221 
7222 /*
7223  * call-seq:
7224  * str.ord -> integer
7225  *
7226  * Return the <code>Integer</code> ordinal of a one-character string.
7227  *
7228  * "a".ord #=> 97
7229  */
7230 
7231 VALUE
7233 {
7234  unsigned int c;
7235 
7237  return UINT2NUM(c);
7238 }
7239 /*
7240  * call-seq:
7241  * str.sum(n=16) -> integer
7242  *
7243  * Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>,
7244  * where <em>n</em> is the optional <code>Fixnum</code> parameter, defaulting
7245  * to 16. The result is simply the sum of the binary value of each character in
7246  * <i>str</i> modulo <code>2**n - 1</code>. This is not a particularly good
7247  * checksum.
7248  */
7249 
7250 static VALUE
7252 {
7253  VALUE vbits;
7254  int bits;
7255  char *ptr, *p, *pend;
7256  long len;
7257  VALUE sum = INT2FIX(0);
7258  unsigned long sum0 = 0;
7259 
7260  if (argc == 0) {
7261  bits = 16;
7262  }
7263  else {
7264  rb_scan_args(argc, argv, "01", &vbits);
7265  bits = NUM2INT(vbits);
7266  }
7267  ptr = p = RSTRING_PTR(str);
7268  len = RSTRING_LEN(str);
7269  pend = p + len;
7270 
7271  while (p < pend) {
7272  if (FIXNUM_MAX - UCHAR_MAX < sum0) {
7273  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7274  str_mod_check(str, ptr, len);
7275  sum0 = 0;
7276  }
7277  sum0 += (unsigned char)*p;
7278  p++;
7279  }
7280 
7281  if (bits == 0) {
7282  if (sum0) {
7283  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7284  }
7285  }
7286  else {
7287  if (sum == INT2FIX(0)) {
7288  if (bits < (int)sizeof(long)*CHAR_BIT) {
7289  sum0 &= (((unsigned long)1)<<bits)-1;
7290  }
7291  sum = LONG2FIX(sum0);
7292  }
7293  else {
7294  VALUE mod;
7295 
7296  if (sum0) {
7297  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7298  }
7299 
7300  mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
7301  mod = rb_funcall(mod, '-', 1, INT2FIX(1));
7302  sum = rb_funcall(sum, '&', 1, mod);
7303  }
7304  }
7305  return sum;
7306 }
7307 
7308 static VALUE
7309 rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
7310 {
7311  rb_encoding *enc;
7312  VALUE w;
7313  long width, len, flen = 1, fclen = 1;
7314  VALUE res;
7315  char *p;
7316  const char *f = " ";
7317  long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
7318  volatile VALUE pad;
7319  int singlebyte = 1, cr;
7320 
7321  rb_scan_args(argc, argv, "11", &w, &pad);
7322  enc = STR_ENC_GET(str);
7323  width = NUM2LONG(w);
7324  if (argc == 2) {
7325  StringValue(pad);
7326  enc = rb_enc_check(str, pad);
7327  f = RSTRING_PTR(pad);
7328  flen = RSTRING_LEN(pad);
7329  fclen = str_strlen(pad, enc);
7330  singlebyte = single_byte_optimizable(pad);
7331  if (flen == 0 || fclen == 0) {
7332  rb_raise(rb_eArgError, "zero width padding");
7333  }
7334  }
7335  len = str_strlen(str, enc);
7336  if (width < 0 || len >= width) return rb_str_dup(str);
7337  n = width - len;
7338  llen = (jflag == 'l') ? 0 : ((jflag == 'r') ? n : n/2);
7339  rlen = n - llen;
7340  cr = ENC_CODERANGE(str);
7341  if (flen > 1) {
7342  llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
7343  rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
7344  }
7345  size = RSTRING_LEN(str);
7346  if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
7347  (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
7348  (len += llen2 + rlen2) >= LONG_MAX - size) {
7349  rb_raise(rb_eArgError, "argument too big");
7350  }
7351  len += size;
7352  res = rb_str_new5(str, 0, len);
7353  p = RSTRING_PTR(res);
7354  if (flen <= 1) {
7355  memset(p, *f, llen);
7356  p += llen;
7357  }
7358  else {
7359  while (llen >= fclen) {
7360  memcpy(p,f,flen);
7361  p += flen;
7362  llen -= fclen;
7363  }
7364  if (llen > 0) {
7365  memcpy(p, f, llen2);
7366  p += llen2;
7367  }
7368  }
7369  memcpy(p, RSTRING_PTR(str), size);
7370  p += size;
7371  if (flen <= 1) {
7372  memset(p, *f, rlen);
7373  p += rlen;
7374  }
7375  else {
7376  while (rlen >= fclen) {
7377  memcpy(p,f,flen);
7378  p += flen;
7379  rlen -= fclen;
7380  }
7381  if (rlen > 0) {
7382  memcpy(p, f, rlen2);
7383  p += rlen2;
7384  }
7385  }
7386  *p = '\0';
7387  STR_SET_LEN(res, p-RSTRING_PTR(res));
7388  OBJ_INFECT(res, str);
7389  if (!NIL_P(pad)) OBJ_INFECT(res, pad);
7390  rb_enc_associate(res, enc);
7391  if (argc == 2)
7392  cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad));
7393  if (cr != ENC_CODERANGE_BROKEN)
7394  ENC_CODERANGE_SET(res, cr);
7395  return res;
7396 }
7397 
7398 
7399 /*
7400  * call-seq:
7401  * str.ljust(integer, padstr=' ') -> new_str
7402  *
7403  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7404  * <code>String</code> of length <i>integer</i> with <i>str</i> left justified
7405  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7406  *
7407  * "hello".ljust(4) #=> "hello"
7408  * "hello".ljust(20) #=> "hello "
7409  * "hello".ljust(20, '1234') #=> "hello123412341234123"
7410  */
7411 
7412 static VALUE
7414 {
7415  return rb_str_justify(argc, argv, str, 'l');
7416 }
7417 
7418 
7419 /*
7420  * call-seq:
7421  * str.rjust(integer, padstr=' ') -> new_str
7422  *
7423  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7424  * <code>String</code> of length <i>integer</i> with <i>str</i> right justified
7425  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7426  *
7427  * "hello".rjust(4) #=> "hello"
7428  * "hello".rjust(20) #=> " hello"
7429  * "hello".rjust(20, '1234') #=> "123412341234123hello"
7430  */
7431 
7432 static VALUE
7434 {
7435  return rb_str_justify(argc, argv, str, 'r');
7436 }
7437 
7438 
7439 /*
7440  * call-seq:
7441  * str.center(width, padstr=' ') -> new_str
7442  *
7443  * Centers +str+ in +width+. If +width+ is greater than the length of +str+,
7444  * returns a new String of length +width+ with +str+ centered and padded with
7445  * +padstr+; otherwise, returns +str+.
7446  *
7447  * "hello".center(4) #=> "hello"
7448  * "hello".center(20) #=> " hello "
7449  * "hello".center(20, '123') #=> "1231231hello12312312"
7450  */
7451 
7452 static VALUE
7454 {
7455  return rb_str_justify(argc, argv, str, 'c');
7456 }
7457 
7458 /*
7459  * call-seq:
7460  * str.partition(sep) -> [head, sep, tail]
7461  * str.partition(regexp) -> [head, match, tail]
7462  *
7463  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string
7464  * and returns the part before it, the match, and the part
7465  * after it.
7466  * If it is not found, returns two empty strings and <i>str</i>.
7467  *
7468  * "hello".partition("l") #=> ["he", "l", "lo"]
7469  * "hello".partition("x") #=> ["hello", "", ""]
7470  * "hello".partition(/.l/) #=> ["h", "el", "lo"]
7471  */
7472 
7473 static VALUE
7475 {
7476  long pos;
7477  int regex = FALSE;
7478 
7479  if (RB_TYPE_P(sep, T_REGEXP)) {
7480  pos = rb_reg_search(sep, str, 0, 0);
7481  regex = TRUE;
7482  }
7483  else {
7484  VALUE tmp;
7485 
7486  tmp = rb_check_string_type(sep);
7487  if (NIL_P(tmp)) {
7488  rb_raise(rb_eTypeError, "type mismatch: %s given",
7489  rb_obj_classname(sep));
7490  }
7491  sep = tmp;
7492  pos = rb_str_index(str, sep, 0);
7493  }
7494  if (pos < 0) {
7495  failed:
7496  return rb_ary_new3(3, str, str_new_empty(str), str_new_empty(str));
7497  }
7498  if (regex) {
7499  sep = rb_str_subpat(str, sep, INT2FIX(0));
7500  if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
7501  }
7502  return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
7503  sep,
7504  rb_str_subseq(str, pos+RSTRING_LEN(sep),
7505  RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
7506 }
7507 
7508 /*
7509  * call-seq:
7510  * str.rpartition(sep) -> [head, sep, tail]
7511  * str.rpartition(regexp) -> [head, match, tail]
7512  *
7513  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string from the end
7514  * of the string, and returns the part before it, the match, and the part
7515  * after it.
7516  * If it is not found, returns two empty strings and <i>str</i>.
7517  *
7518  * "hello".rpartition("l") #=> ["hel", "l", "o"]
7519  * "hello".rpartition("x") #=> ["", "", "hello"]
7520  * "hello".rpartition(/.l/) #=> ["he", "ll", "o"]
7521  */
7522 
7523 static VALUE
7525 {
7526  long pos = RSTRING_LEN(str);
7527  int regex = FALSE;
7528 
7529  if (RB_TYPE_P(sep, T_REGEXP)) {
7530  pos = rb_reg_search(sep, str, pos, 1);
7531  regex = TRUE;
7532  }
7533  else {
7534  VALUE tmp;
7535 
7536  tmp = rb_check_string_type(sep);
7537  if (NIL_P(tmp)) {
7538  rb_raise(rb_eTypeError, "type mismatch: %s given",
7539  rb_obj_classname(sep));
7540  }
7541  sep = tmp;
7542  pos = rb_str_sublen(str, pos);
7543  pos = rb_str_rindex(str, sep, pos);
7544  }
7545  if (pos < 0) {
7546  return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), str);
7547  }
7548  if (regex) {
7549  sep = rb_reg_nth_match(0, rb_backref_get());
7550  }
7551  return rb_ary_new3(3, rb_str_substr(str, 0, pos),
7552  sep,
7553  rb_str_substr(str,pos+str_strlen(sep,STR_ENC_GET(sep)),RSTRING_LEN(str)));
7554 }
7555 
7556 /*
7557  * call-seq:
7558  * str.start_with?([prefixes]+) -> true or false
7559  *
7560  * Returns true if +str+ starts with one of the +prefixes+ given.
7561  *
7562  * "hello".start_with?("hell") #=> true
7563  *
7564  * # returns true if one of the prefixes matches.
7565  * "hello".start_with?("heaven", "hell") #=> true
7566  * "hello".start_with?("heaven", "paradise") #=> false
7567  */
7568 
7569 static VALUE
7571 {
7572  int i;
7573 
7574  for (i=0; i<argc; i++) {
7575  VALUE tmp = argv[i];
7576  StringValue(tmp);
7577  rb_enc_check(str, tmp);
7578  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7579  if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7580  return Qtrue;
7581  }
7582  return Qfalse;
7583 }
7584 
7585 /*
7586  * call-seq:
7587  * str.end_with?([suffixes]+) -> true or false
7588  *
7589  * Returns true if +str+ ends with one of the +suffixes+ given.
7590  */
7591 
7592 static VALUE
7594 {
7595  int i;
7596  char *p, *s, *e;
7597  rb_encoding *enc;
7598 
7599  for (i=0; i<argc; i++) {
7600  VALUE tmp = argv[i];
7601  StringValue(tmp);
7602  enc = rb_enc_check(str, tmp);
7603  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7604  p = RSTRING_PTR(str);
7605  e = p + RSTRING_LEN(str);
7606  s = e - RSTRING_LEN(tmp);
7607  if (rb_enc_left_char_head(p, s, e, enc) != s)
7608  continue;
7609  if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7610  return Qtrue;
7611  }
7612  return Qfalse;
7613 }
7614 
7615 void
7617 {
7618  if (!NIL_P(val) && !RB_TYPE_P(val, T_STRING)) {
7619  rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
7620  }
7621  *var = val;
7622 }
7623 
7624 
7625 /*
7626  * call-seq:
7627  * str.force_encoding(encoding) -> str
7628  *
7629  * Changes the encoding to +encoding+ and returns self.
7630  */
7631 
7632 static VALUE
7634 {
7635  str_modifiable(str);
7636  rb_enc_associate(str, rb_to_encoding(enc));
7637  ENC_CODERANGE_CLEAR(str);
7638  return str;
7639 }
7640 
7641 /*
7642  * call-seq:
7643  * str.b -> str
7644  *
7645  * Returns a copied string whose encoding is ASCII-8BIT.
7646  */
7647 
7648 static VALUE
7650 {
7651  VALUE str2 = str_alloc(rb_cString);
7652  str_replace_shared_without_enc(str2, str);
7653  OBJ_INFECT(str2, str);
7655  return str2;
7656 }
7657 
7658 /*
7659  * call-seq:
7660  * str.valid_encoding? -> true or false
7661  *
7662  * Returns true for a string which encoded correctly.
7663  *
7664  * "\xc2\xa1".force_encoding("UTF-8").valid_encoding? #=> true
7665  * "\xc2".force_encoding("UTF-8").valid_encoding? #=> false
7666  * "\x80".force_encoding("UTF-8").valid_encoding? #=> false
7667  */
7668 
7669 static VALUE
7671 {
7672  int cr = rb_enc_str_coderange(str);
7673 
7674  return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;
7675 }
7676 
7677 /*
7678  * call-seq:
7679  * str.ascii_only? -> true or false
7680  *
7681  * Returns true for a string which has only ASCII characters.
7682  *
7683  * "abc".force_encoding("UTF-8").ascii_only? #=> true
7684  * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false
7685  */
7686 
7687 static VALUE
7689 {
7690  int cr = rb_enc_str_coderange(str);
7691 
7692  return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse;
7693 }
7694 
7709 VALUE
7710 rb_str_ellipsize(VALUE str, long len)
7711 {
7712  static const char ellipsis[] = "...";
7713  const long ellipsislen = sizeof(ellipsis) - 1;
7714  rb_encoding *const enc = rb_enc_get(str);
7715  const long blen = RSTRING_LEN(str);
7716  const char *const p = RSTRING_PTR(str), *e = p + blen;
7717  VALUE estr, ret = 0;
7718 
7719  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
7720  if (len * rb_enc_mbminlen(enc) >= blen ||
7721  (e = rb_enc_nth(p, e, len, enc)) - p == blen) {
7722  ret = str;
7723  }
7724  else if (len <= ellipsislen ||
7725  !(e = rb_enc_step_back(p, e, e, len = ellipsislen, enc))) {
7726  if (rb_enc_asciicompat(enc)) {
7727  ret = rb_str_new_with_class(str, ellipsis, len);
7728  rb_enc_associate(ret, enc);
7729  }
7730  else {
7731  estr = rb_usascii_str_new(ellipsis, len);
7732  ret = rb_str_encode(estr, rb_enc_from_encoding(enc), 0, Qnil);
7733  }
7734  }
7735  else if (ret = rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
7736  rb_str_cat(ret, ellipsis, ellipsislen);
7737  }
7738  else {
7739  estr = rb_str_encode(rb_usascii_str_new(ellipsis, ellipsislen),
7740  rb_enc_from_encoding(enc), 0, Qnil);
7741  rb_str_append(ret, estr);
7742  }
7743  return ret;
7744 }
7745 
7746 /**********************************************************************
7747  * Document-class: Symbol
7748  *
7749  * <code>Symbol</code> objects represent names and some strings
7750  * inside the Ruby
7751  * interpreter. They are generated using the <code>:name</code> and
7752  * <code>:"string"</code> literals
7753  * syntax, and by the various <code>to_sym</code> methods. The same
7754  * <code>Symbol</code> object will be created for a given name or string
7755  * for the duration of a program's execution, regardless of the context
7756  * or meaning of that name. Thus if <code>Fred</code> is a constant in
7757  * one context, a method in another, and a class in a third, the
7758  * <code>Symbol</code> <code>:Fred</code> will be the same object in
7759  * all three contexts.
7760  *
7761  * module One
7762  * class Fred
7763  * end
7764  * $f1 = :Fred
7765  * end
7766  * module Two
7767  * Fred = 1
7768  * $f2 = :Fred
7769  * end
7770  * def Fred()
7771  * end
7772  * $f3 = :Fred
7773  * $f1.object_id #=> 2514190
7774  * $f2.object_id #=> 2514190
7775  * $f3.object_id #=> 2514190
7776  *
7777  */
7778 
7779 
7780 /*
7781  * call-seq:
7782  * sym == obj -> true or false
7783  *
7784  * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
7785  * symbol, returns <code>true</code>.
7786  */
7787 
7788 static VALUE
7789 sym_equal(VALUE sym1, VALUE sym2)
7790 {
7791  if (sym1 == sym2) return Qtrue;
7792  return Qfalse;
7793 }
7794 
7795 
7796 static int
7797 sym_printable(const char *s, const char *send, rb_encoding *enc)
7798 {
7799  while (s < send) {
7800  int n;
7801  int c = rb_enc_codepoint_len(s, send, &n, enc);
7802 
7803  if (!rb_enc_isprint(c, enc)) return FALSE;
7804  s += n;
7805  }
7806  return TRUE;
7807 }
7808 
7809 int
7811 {
7812  rb_encoding *enc;
7813  const char *ptr;
7814  long len;
7816 
7817  if (resenc == NULL) resenc = rb_default_external_encoding();
7818  enc = STR_ENC_GET(sym);
7819  ptr = RSTRING_PTR(sym);
7820  len = RSTRING_LEN(sym);
7821  if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (long)strlen(ptr) ||
7822  !rb_enc_symname_p(ptr, enc) || !sym_printable(ptr, ptr + len, enc)) {
7823  return FALSE;
7824  }
7825  return TRUE;
7826 }
7827 
7828 VALUE
7830 {
7831  rb_encoding *enc;
7832  const char *ptr;
7833  long len;
7834  rb_encoding *resenc;
7835 
7836  Check_Type(str, T_STRING);
7837  resenc = rb_default_internal_encoding();
7838  if (resenc == NULL) resenc = rb_default_external_encoding();
7839  enc = STR_ENC_GET(str);
7840  ptr = RSTRING_PTR(str);
7841  len = RSTRING_LEN(str);
7842  if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
7843  !sym_printable(ptr, ptr + len, enc)) {
7844  return rb_str_inspect(str);
7845  }
7846  return str;
7847 }
7848 
7849 VALUE
7851 {
7852  return rb_str_quote_unprintable(rb_id2str(id));
7853 }
7854 
7855 /*
7856  * call-seq:
7857  * sym.inspect -> string
7858  *
7859  * Returns the representation of <i>sym</i> as a symbol literal.
7860  *
7861  * :fred.inspect #=> ":fred"
7862  */
7863 
7864 static VALUE
7866 {
7867  VALUE str;
7868  const char *ptr;
7869  long len;
7870  ID id = SYM2ID(sym);
7871  char *dest;
7872 
7873  sym = rb_id2str(id);
7874  if (!rb_str_symname_p(sym)) {
7875  str = rb_str_inspect(sym);
7876  len = RSTRING_LEN(str);
7877  rb_str_resize(str, len + 1);
7878  dest = RSTRING_PTR(str);
7879  memmove(dest + 1, dest, len);
7880  dest[0] = ':';
7881  }
7882  else {
7883  rb_encoding *enc = STR_ENC_GET(sym);
7884  ptr = RSTRING_PTR(sym);
7885  len = RSTRING_LEN(sym);
7886  str = rb_enc_str_new(0, len + 1, enc);
7887  dest = RSTRING_PTR(str);
7888  dest[0] = ':';
7889  memcpy(dest + 1, ptr, len);
7890  }
7891  return str;
7892 }
7893 
7894 
7895 /*
7896  * call-seq:
7897  * sym.id2name -> string
7898  * sym.to_s -> string
7899  *
7900  * Returns the name or string corresponding to <i>sym</i>.
7901  *
7902  * :fred.id2name #=> "fred"
7903  */
7904 
7905 
7906 VALUE
7908 {
7909  ID id = SYM2ID(sym);
7910 
7911  return str_new3(rb_cString, rb_id2str(id));
7912 }
7913 
7914 
7915 /*
7916  * call-seq:
7917  * sym.to_sym -> sym
7918  * sym.intern -> sym
7919  *
7920  * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
7921  * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
7922  * in this case.
7923  */
7924 
7925 static VALUE
7927 {
7928  return sym;
7929 }
7930 
7931 static VALUE
7932 sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
7933 {
7934  VALUE obj;
7935 
7936  if (argc < 1) {
7937  rb_raise(rb_eArgError, "no receiver given");
7938  }
7939  obj = argv[0];
7940  return rb_funcall_with_block(obj, (ID)sym, argc - 1, argv + 1, passed_proc);
7941 }
7942 
7943 /*
7944  * call-seq:
7945  * sym.to_proc
7946  *
7947  * Returns a _Proc_ object which respond to the given method by _sym_.
7948  *
7949  * (1..3).collect(&:to_s) #=> ["1", "2", "3"]
7950  */
7951 
7952 static VALUE
7954 {
7955  static VALUE sym_proc_cache = Qfalse;
7956  enum {SYM_PROC_CACHE_SIZE = 67};
7957  VALUE proc;
7958  long id, index;
7959  VALUE *aryp;
7960 
7961  if (!sym_proc_cache) {
7962  sym_proc_cache = rb_ary_tmp_new(SYM_PROC_CACHE_SIZE * 2);
7963  rb_gc_register_mark_object(sym_proc_cache);
7964  rb_ary_store(sym_proc_cache, SYM_PROC_CACHE_SIZE*2 - 1, Qnil);
7965  }
7966 
7967  id = SYM2ID(sym);
7968  index = (id % SYM_PROC_CACHE_SIZE) << 1;
7969 
7970  aryp = RARRAY_PTR(sym_proc_cache);
7971  if (aryp[index] == sym) {
7972  return aryp[index + 1];
7973  }
7974  else {
7975  proc = rb_proc_new(sym_call, (VALUE)id);
7976  aryp[index] = sym;
7977  aryp[index + 1] = proc;
7978  return proc;
7979  }
7980 }
7981 
7982 /*
7983  * call-seq:
7984  *
7985  * sym.succ
7986  *
7987  * Same as <code>sym.to_s.succ.intern</code>.
7988  */
7989 
7990 static VALUE
7992 {
7993  return rb_str_intern(rb_str_succ(rb_sym_to_s(sym)));
7994 }
7995 
7996 /*
7997  * call-seq:
7998  *
7999  * symbol <=> other_symbol -> -1, 0, +1 or nil
8000  *
8001  * Compares +symbol+ with +other_symbol+ after calling #to_s on each of the
8002  * symbols. Returns -1, 0, +1 or nil depending on whether +symbol+ is less
8003  * than, equal to, or greater than +other_symbol+.
8004  *
8005  * +nil+ is returned if the two values are incomparable.
8006  *
8007  * See String#<=> for more information.
8008  */
8009 
8010 static VALUE
8012 {
8013  if (!SYMBOL_P(other)) {
8014  return Qnil;
8015  }
8016  return rb_str_cmp_m(rb_sym_to_s(sym), rb_sym_to_s(other));
8017 }
8018 
8019 /*
8020  * call-seq:
8021  *
8022  * sym.casecmp(other) -> -1, 0, +1 or nil
8023  *
8024  * Case-insensitive version of <code>Symbol#<=></code>.
8025  */
8026 
8027 static VALUE
8029 {
8030  if (!SYMBOL_P(other)) {
8031  return Qnil;
8032  }
8033  return rb_str_casecmp(rb_sym_to_s(sym), rb_sym_to_s(other));
8034 }
8035 
8036 /*
8037  * call-seq:
8038  * sym =~ obj -> fixnum or nil
8039  *
8040  * Returns <code>sym.to_s =~ obj</code>.
8041  */
8042 
8043 static VALUE
8045 {
8046  return rb_str_match(rb_sym_to_s(sym), other);
8047 }
8048 
8049 /*
8050  * call-seq:
8051  * sym[idx] -> char
8052  * sym[b, n] -> char
8053  *
8054  * Returns <code>sym.to_s[]</code>.
8055  */
8056 
8057 static VALUE
8059 {
8060  return rb_str_aref_m(argc, argv, rb_sym_to_s(sym));
8061 }
8062 
8063 /*
8064  * call-seq:
8065  * sym.length -> integer
8066  *
8067  * Same as <code>sym.to_s.length</code>.
8068  */
8069 
8070 static VALUE
8072 {
8073  return rb_str_length(rb_id2str(SYM2ID(sym)));
8074 }
8075 
8076 /*
8077  * call-seq:
8078  * sym.empty? -> true or false
8079  *
8080  * Returns that _sym_ is :"" or not.
8081  */
8082 
8083 static VALUE
8085 {
8086  return rb_str_empty(rb_id2str(SYM2ID(sym)));
8087 }
8088 
8089 /*
8090  * call-seq:
8091  * sym.upcase -> symbol
8092  *
8093  * Same as <code>sym.to_s.upcase.intern</code>.
8094  */
8095 
8096 static VALUE
8098 {
8100 }
8101 
8102 /*
8103  * call-seq:
8104  * sym.downcase -> symbol
8105  *
8106  * Same as <code>sym.to_s.downcase.intern</code>.
8107  */
8108 
8109 static VALUE
8111 {
8113 }
8114 
8115 /*
8116  * call-seq:
8117  * sym.capitalize -> symbol
8118  *
8119  * Same as <code>sym.to_s.capitalize.intern</code>.
8120  */
8121 
8122 static VALUE
8124 {
8126 }
8127 
8128 /*
8129  * call-seq:
8130  * sym.swapcase -> symbol
8131  *
8132  * Same as <code>sym.to_s.swapcase.intern</code>.
8133  */
8134 
8135 static VALUE
8137 {
8139 }
8140 
8141 /*
8142  * call-seq:
8143  * sym.encoding -> encoding
8144  *
8145  * Returns the Encoding object that represents the encoding of _sym_.
8146  */
8147 
8148 static VALUE
8150 {
8151  return rb_obj_encoding(rb_id2str(SYM2ID(sym)));
8152 }
8153 
8154 ID
8156 {
8157  VALUE tmp;
8158 
8159  switch (TYPE(name)) {
8160  default:
8161  tmp = rb_check_string_type(name);
8162  if (NIL_P(tmp)) {
8163  tmp = rb_inspect(name);
8164  rb_raise(rb_eTypeError, "%s is not a symbol",
8165  RSTRING_PTR(tmp));
8166  }
8167  name = tmp;
8168  /* fall through */
8169  case T_STRING:
8170  name = rb_str_intern(name);
8171  /* fall through */
8172  case T_SYMBOL:
8173  return SYM2ID(name);
8174  }
8175 
8176  UNREACHABLE;
8177 }
8178 
8179 /*
8180  * A <code>String</code> object holds and manipulates an arbitrary sequence of
8181  * bytes, typically representing characters. String objects may be created
8182  * using <code>String::new</code> or as literals.
8183  *
8184  * Because of aliasing issues, users of strings should be aware of the methods
8185  * that modify the contents of a <code>String</code> object. Typically,
8186  * methods with names ending in ``!'' modify their receiver, while those
8187  * without a ``!'' return a new <code>String</code>. However, there are
8188  * exceptions, such as <code>String#[]=</code>.
8189  *
8190  */
8191 
8192 void
8194 {
8195 #undef rb_intern
8196 #define rb_intern(str) rb_intern_const(str)
8197 
8198  rb_cString = rb_define_class("String", rb_cObject);
8202  rb_define_method(rb_cString, "initialize", rb_str_init, -1);
8203  rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);
8207  rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
8209  rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
8215  rb_define_method(rb_cString, "insert", rb_str_insert, 2);
8216  rb_define_method(rb_cString, "length", rb_str_length, 0);
8218  rb_define_method(rb_cString, "bytesize", rb_str_bytesize, 0);
8219  rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
8226  rb_define_method(rb_cString, "upto", rb_str_upto, -1);
8229  rb_define_method(rb_cString, "replace", rb_str_replace, 1);
8232  rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1);
8233  rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
8234  rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
8235 
8236  rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
8239  rb_define_method(rb_cString, "to_str", rb_str_to_s, 0);
8240  rb_define_method(rb_cString, "inspect", rb_str_inspect, 0);
8242 
8243  rb_define_method(rb_cString, "upcase", rb_str_upcase, 0);
8244  rb_define_method(rb_cString, "downcase", rb_str_downcase, 0);
8245  rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
8246  rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
8247 
8252 
8256  rb_define_method(rb_cString, "lines", rb_str_lines, -1);
8259  rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0);
8260  rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
8262  rb_define_method(rb_cString, "concat", rb_str_concat, 1);
8264  rb_define_method(rb_cString, "prepend", rb_str_prepend, 1);
8266  rb_define_method(rb_cString, "intern", rb_str_intern, 0);
8267  rb_define_method(rb_cString, "to_sym", rb_str_intern, 0);
8269 
8270  rb_define_method(rb_cString, "include?", rb_str_include, 1);
8271  rb_define_method(rb_cString, "start_with?", rb_str_start_with, -1);
8272  rb_define_method(rb_cString, "end_with?", rb_str_end_with, -1);
8273 
8275 
8276  rb_define_method(rb_cString, "ljust", rb_str_ljust, -1);
8277  rb_define_method(rb_cString, "rjust", rb_str_rjust, -1);
8278  rb_define_method(rb_cString, "center", rb_str_center, -1);
8279 
8280  rb_define_method(rb_cString, "sub", rb_str_sub, -1);
8281  rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
8283  rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
8285  rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
8286  rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
8287 
8295 
8298  rb_define_method(rb_cString, "delete", rb_str_delete, -1);
8299  rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
8300  rb_define_method(rb_cString, "count", rb_str_count, -1);
8301 
8306 
8307  rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
8308  rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
8309  rb_define_method(rb_cString, "each_char", rb_str_each_char, 0);
8310  rb_define_method(rb_cString, "each_codepoint", rb_str_each_codepoint, 0);
8311 
8312  rb_define_method(rb_cString, "sum", rb_str_sum, -1);
8313 
8314  rb_define_method(rb_cString, "slice", rb_str_aref_m, -1);
8316 
8317  rb_define_method(rb_cString, "partition", rb_str_partition, 1);
8318  rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
8319 
8320  rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
8321  rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
8323  rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
8325 
8326  id_to_s = rb_intern("to_s");
8327 
8328  rb_fs = Qnil;
8329  rb_define_variable("$;", &rb_fs);
8330  rb_define_variable("$-F", &rb_fs);
8331 
8332  rb_cSymbol = rb_define_class("Symbol", rb_cObject);
8336  rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
8337 
8340  rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
8342  rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0);
8343  rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
8344  rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
8345  rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
8346  rb_define_method(rb_cSymbol, "succ", sym_succ, 0);
8347  rb_define_method(rb_cSymbol, "next", sym_succ, 0);
8348 
8349  rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1);
8350  rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1);
8352 
8353  rb_define_method(rb_cSymbol, "[]", sym_aref, -1);
8354  rb_define_method(rb_cSymbol, "slice", sym_aref, -1);
8355  rb_define_method(rb_cSymbol, "length", sym_length, 0);
8356  rb_define_method(rb_cSymbol, "size", sym_length, 0);
8357  rb_define_method(rb_cSymbol, "empty?", sym_empty, 0);
8358  rb_define_method(rb_cSymbol, "match", sym_match, 1);
8359 
8360  rb_define_method(rb_cSymbol, "upcase", sym_upcase, 0);
8361  rb_define_method(rb_cSymbol, "downcase", sym_downcase, 0);
8362  rb_define_method(rb_cSymbol, "capitalize", sym_capitalize, 0);
8363  rb_define_method(rb_cSymbol, "swapcase", sym_swapcase, 0);
8364 
8365  rb_define_method(rb_cSymbol, "encoding", sym_encoding, 0);
8366 }
static int str_independent(VALUE str)
Definition: string.c:1338
#define rb_enc_islower(c, enc)
#define FIXNUM_MAX
#define RB_TYPE_P(obj, type)
#define rb_usascii_str_new2
int rb_enc_str_asciionly_p(VALUE str)
Definition: string.c:340
static VALUE sym_upcase(VALUE sym)
Definition: string.c:8097
static long chopped_length(VALUE str)
Definition: string.c:6621
VALUE rb_str_resize(VALUE str, long len)
Definition: string.c:1854
static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str)
Definition: string.c:638
Definition: string.c:5072
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:954
static VALUE rb_str_bytesize(VALUE str)
Definition: string.c:1201
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
#define rb_tainted_str_new2
static VALUE str_buf_cat(VALUE str, const char *ptr, long len)
Definition: string.c:1902
#define RSTRING(obj)
static long rb_str_rindex(VALUE str, VALUE sub, long pos)
Definition: string.c:2621
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:778
VALUE rb_ary_pop(VALUE ary)
Definition: array.c:879
#define RESIZE_CAPA(str, capacity)
Definition: string.c:102
VALUE rb_any_to_s(VALUE)
Definition: object.c:384
void rb_bug(const char *fmt,...)
Definition: error.c:290
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:856
#define FALSE
Definition: nkf.h:174
#define rb_hash_lookup
Definition: tcltklib.c:268
#define rb_intern(str)
size_t strlen(const char *)
#define OBJ_INFECT(x, s)
int i
Definition: win32ole.c:784
#define CHECK_IF_ASCII(c)
#define TOUPPER(c)
unsigned long VALUE
Definition: ripper.y:104
const char * rb_obj_classname(VALUE)
Definition: variable.c:396
VALUE rb_id2str(ID id)
Definition: ripper.c:16992
#define RSTRING_END(str)
static int sym_printable(const char *s, const char *send, rb_encoding *enc)
Definition: string.c:7797
VALUE rb_str_equal(VALUE str1, VALUE str2)
Definition: string.c:2360
#define UNLIMITED_ARGUMENTS
#define FL_TEST(x, f)
static int max(int a, int b)
Definition: strftime.c:141
VALUE rb_locale_str_new_cstr(const char *ptr)
Definition: string.c:602
VALUE rb_sym_to_s(VALUE sym)
Definition: string.c:7907
#define ascii_isspace(c)
Definition: string.c:5867
static int coderange_scan(const char *p, long len, rb_encoding *enc)
Definition: string.c:183
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
Definition: string.c:569
VALUE rb_str_tmp_new(long)
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1497
static VALUE rb_str_to_f(VALUE str)
Definition: string.c:4426
VALUE rb_str_new_frozen(VALUE orig)
Definition: string.c:713
static VALUE rb_str_oct(VALUE str)
Definition: string.c:7131
#define FL_SET(x, f)
st_index_t rb_str_hash(VALUE str)
Definition: string.c:2245
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
Definition: string.c:2075
#define STR_NOCAPA
Definition: string.c:63
static VALUE rb_str_scan(VALUE str, VALUE pat)
Definition: string.c:7064
VALUE rb_locale_str_new(const char *ptr, long len)
Definition: string.c:596
static VALUE rb_str_gsub(int argc, VALUE *argv, VALUE str)
Definition: string.c:4013
static VALUE rb_str_match(VALUE x, VALUE y)
Definition: string.c:2755
#define rb_enc_codepoint(p, e, enc)
RUBY_ALIAS_FUNCTION(rb_str_new2(const char *ptr), rb_str_new_cstr,(ptr))
Definition: string.c:455
void rb_str_set_len(VALUE str, long len)
Definition: string.c:1838
static void rb_enc_cr_str_copy_for_substr(VALUE dest, VALUE src)
Definition: string.c:290
void rb_gc_force_recycle(VALUE)
Definition: gc.c:2963
unsigned char * USTR
Definition: string.c:5070
static unsigned int trnext(struct tr *t, rb_encoding *enc)
Definition: string.c:5079
VALUE rb_str_locktmp(VALUE)
#define rb_check_frozen(obj)
#define is_broken_string(str)
Definition: string.c:121
RUBY_EXTERN void * memmove(void *, const void *, size_t)
Definition: memmove.c:7
#define rb_enc_right_char_head(s, p, e, enc)
static VALUE sym_swapcase(VALUE sym)
Definition: string.c:8136
#define rb_enc_name(enc)
static VALUE rb_str_b(VALUE str)
Definition: string.c:7649
char * pend
Definition: string.c:5075
const int id
Definition: nkf.c:209
void Init_String(void)
Definition: string.c:8193
static VALUE rb_str_clear(VALUE str)
Definition: string.c:4052
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:194
#define STR_UNSET_NOCAPA(s)
Definition: string.c:65
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:103
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:970
VALUE rb_obj_freeze(VALUE)
Definition: object.c:989
VALUE rb_eTypeError
Definition: error.c:511
int rb_num_to_uint(VALUE val, unsigned int *ret)
Definition: numeric.c:122
#define OBJ_FREEZE(x)
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
#define OBJ_TAINTED(x)
static VALUE str_gsub(int argc, VALUE *argv, VALUE str, int bang)
Definition: string.c:3837
#define UNREACHABLE
Definition: ruby.h:40
static VALUE rb_str_succ_bang(VALUE str)
Definition: string.c:3068
static VALUE rb_str_enumerate_bytes(VALUE str, int wantarray)
Definition: string.c:6339
static VALUE rb_str_each_line(int argc, VALUE *argv, VALUE str)
Definition: string.c:6309
#define rb_enc_prev_char(s, p, e, enc)
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1373
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:822
static VALUE str_new3(VALUE klass, VALUE str)
Definition: string.c:671
VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE)
Definition: re.c:3286
int rb_usascii_encindex(void)
Definition: encoding.c:1192
#define TYPE(x)
VALUE rb_str_split(VALUE str, const char *sep0)
Definition: string.c:6132
static VALUE rb_str_prepend(VALUE str, VALUE str2)
Definition: string.c:2236
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:789
static VALUE rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3962
VALUE rb_ary_tmp_new(long capa)
Definition: array.c:465
VALUE rb_str_export_to_enc(VALUE str, rb_encoding *enc)
Definition: string.c:632
#define RSTRING_PTR(str)
#define CLASS_OF(v)
static VALUE rb_str_codepoints(VALUE str)
Definition: string.c:6614
#define str_buf_cat2(str, ptr)
Definition: string.c:1945
static VALUE rb_str_swapcase_bang(VALUE str)
Definition: string.c:5018
static VALUE rb_str_rstrip(VALUE str)
Definition: string.c:6949
VALUE rb_filesystem_str_new(const char *ptr, long len)
Definition: string.c:608
VALUE rb_str_export(VALUE str)
Definition: string.c:620
static VALUE rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
Definition: string.c:7309
static VALUE rb_str_include(VALUE str, VALUE arg)
Definition: string.c:4359
static void rb_str_check_dummy_enc(rb_encoding *enc)
Definition: string.c:4763
#define xfree
#define str_make_independent(str)
Definition: string.c:1366
VALUE rb_str_freeze(VALUE str)
Definition: string.c:1798
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:773
#define Qnil
long rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
Definition: string.c:1025
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:933
long rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
Definition: string.c:1031
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1780
VALUE rb_funcall_with_block(VALUE, ID, int, const VALUE *, VALUE)
Definition: vm_eval.c:833
char * p
Definition: string.c:5075
static VALUE sym_downcase(VALUE sym)
Definition: string.c:8110
VALUE rb_proc_new(VALUE(*)(ANYARGS), VALUE)
Definition: proc.c:2022
static VALUE str_replace(VALUE str, VALUE str2)
Definition: string.c:910
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:766
VALUE rb_obj_class(VALUE)
Definition: object.c:194
#define RETURN_ENUMERATOR(obj, argc, argv)
static VALUE rb_str_to_i(int argc, VALUE *argv, VALUE str)
Definition: string.c:4393
char * rb_string_value_ptr(volatile VALUE *ptr)
Definition: string.c:1484
static VALUE rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3631
#define rb_enc_left_char_head(s, p, e, enc)
VALUE rb_str_intern(VALUE s)
Definition: string.c:7212
#define STR_NOEMBED
Definition: string.c:58
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:886
VALUE rb_ary_new3(long n,...)
Definition: array.c:432
static VALUE rb_str_empty(VALUE str)
Definition: string.c:1218
static VALUE rb_str_chars(VALUE str)
Definition: string.c:6521
VALUE rb_eSecurityError
Definition: error.c:520
static VALUE rb_str_reverse_bang(VALUE str)
Definition: string.c:4320
void rb_include_module(VALUE klass, VALUE module)
Definition: class.c:695
#define rb_enc_to_index(enc)
static VALUE rb_str_center(int argc, VALUE *argv, VALUE str)
Definition: string.c:7453
static VALUE rb_str_each_char_size(VALUE str)
Definition: string.c:6413
#define FL_UNTRUSTED
RUBY_FUNC_EXPORTED size_t rb_str_memsize(VALUE str)
Definition: string.c:838
VALUE rb_reg_nth_match(int, VALUE)
Definition: re.c:1457
static VALUE rb_str_subpat(VALUE str, VALUE re, VALUE backref)
Definition: string.c:3205
VALUE rb_str_new(const char *ptr, long len)
Definition: string.c:425
static VALUE rb_str_aset_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3563
static VALUE rb_str_upcase_bang(VALUE str)
Definition: string.c:4781
#define ISDIGIT(c)
unsigned int last
Definition: nkf.c:4310
static VALUE rb_str_format_m(VALUE str, VALUE arg)
Definition: string.c:1316
#define STR_SET_NOEMBED(str)
Definition: string.c:70
#define ENCODING_IS_ASCII8BIT(obj)
#define STR_DEC_LEN(str)
Definition: string.c:91
#define numberof(array)
Definition: string.c:32
static long str_strlen(VALUE str, rb_encoding *enc)
Definition: string.c:1122
static VALUE rb_str_chomp(int argc, VALUE *argv, VALUE str)
Definition: string.c:6818
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1168
#define ID2SYM(x)
VALUE rb_str_export_locale(VALUE str)
Definition: string.c:626
#define BEG(no)
Definition: string.c:22
static VALUE sym_length(VALUE sym)
Definition: string.c:8071
VALUE rb_str_new_shared(VALUE str)
Definition: string.c:677
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1362
#define CHAR_ESC_LEN
Definition: string.c:4461
#define ENC_CODERANGE_BROKEN
VALUE rb_sym_all_symbols(void)
Definition: ripper.c:17090
static VALUE empty_str_alloc(VALUE klass)
Definition: string.c:386
static VALUE rb_str_upcase(VALUE str)
Definition: string.c:4846
#define LONG2NUM(x)
static VALUE rb_str_hash_m(VALUE str)
Definition: string.c:2275
static int tr_find(unsigned int c, char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
Definition: string.c:5504
VALUE rb_cString
Definition: string.c:53
int rb_reg_backref_number(VALUE match, VALUE backref)
Definition: re.c:1075
#define rb_enc_isctype(c, t, enc)
VALUE rb_equal(VALUE, VALUE)
Definition: object.c:56
static VALUE rb_str_aset(VALUE str, VALUE indx, VALUE val)
Definition: string.c:3496
#define rb_str_new5
VALUE rb_eRangeError
Definition: error.c:515
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1251
#define rb_str_buf_new2
const char * name
Definition: ripper.y:163
#define ENCODING_GET(obj)
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:964
#define sym(x)
Definition: date_core.c:3715
static VALUE rb_str_insert(VALUE str, VALUE idx, VALUE str2)
Definition: string.c:3596
VALUE rb_str_append(VALUE str, VALUE str2)
Definition: string.c:2122
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define ISALPHA(c)
Definition: ruby.h:1636
#define MEMZERO(p, type, n)
static VALUE sym_equal(VALUE sym1, VALUE sym2)
Definition: string.c:7789
static VALUE sym_inspect(VALUE sym)
Definition: string.c:7865
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Definition: string.c:2255
int args
Definition: win32ole.c:785
static VALUE rb_str_partition(VALUE str, VALUE sep)
Definition: string.c:7474
static long str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1603
static VALUE rb_str_ljust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7413
int rb_enc_str_coderange(VALUE str)
Definition: string.c:327
#define STR_SHARED_P(s)
Definition: string.c:61
VALUE rb_str_plus(VALUE str1, VALUE str2)
Definition: string.c:1236
static VALUE rb_str_setbyte(VALUE str, VALUE index, VALUE value)
Definition: string.c:4107
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1288
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
VALUE rb_mComparable
Definition: compar.c:14
neighbor_char
Definition: string.c:2819
static VALUE rb_str_capitalize_bang(VALUE str)
Definition: string.c:4952
static VALUE rb_str_strip(VALUE str)
Definition: string.c:6987
#define FIXNUM_P(f)
#define rb_intern_str(string)
Definition: generator.h:17
unsigned int now
Definition: string.c:5074
int rb_block_given_p(void)
Definition: eval.c:672
#define RARRAY_LEN(a)
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Definition: transcode.c:1446
#define rb_enc_step_back(s, p, e, n, enc)
static VALUE rb_str_split_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:5917
#define val
static int single_byte_optimizable(VALUE str)
Definition: string.c:126
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:880
static void rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3376
VALUE rb_eRuntimeError
Definition: error.c:510
#define Qtrue
static VALUE sym_to_sym(VALUE sym)
Definition: string.c:7926
return c
Definition: ripper.y:7591
void * rb_alloc_tmp_buffer(volatile VALUE *store, long len)
Definition: string.c:814
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Definition: bignum.c:777
static VALUE str_new_shared(VALUE klass, VALUE str)
Definition: string.c:665
VALUE rb_str_length(VALUE str)
Definition: string.c:1182
static VALUE rb_str_rpartition(VALUE str, VALUE sep)
Definition: string.c:7524
int rb_isspace(int c)
Definition: encoding.c:1893
static VALUE rb_str_crypt(VALUE str, VALUE salt)
Definition: string.c:7155
static VALUE rb_str_cmp_m(VALUE str1, VALUE str2)
Definition: string.c:2413
int rb_str_symname_p(VALUE sym)
Definition: string.c:7810
VALUE rb_ary_new(void)
Definition: array.c:424
#define Check_Type(v, t)
VALUE rb_str_new_cstr(const char *ptr)
Definition: string.c:447
static void str_modify_keep_cr(VALUE str)
Definition: string.c:1402
#define dp(v)
Definition: vm_debug.h:23
unsigned long ID
Definition: ripper.y:105
#define STR_BUF_MIN_SIZE
Definition: string.c:774
#define rb_str_new3
#define STR_SET_EMBED(str)
Definition: string.c:74
static VALUE rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5754
#define ISASCII(c)
Definition: ruby.h:1629
#define ONIGENC_CTYPE_ALPHA
#define ENC_CODERANGE_CLEAR(obj)
#define add(x, y)
Definition: date_strftime.c:23
static VALUE rb_str_delete(int argc, VALUE *argv, VALUE str)
Definition: string.c:5610
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:499
#define RSTRING_LEN(str)
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:741
#define INT2FIX(i)
static VALUE rb_str_each_byte_size(VALUE str, VALUE args)
Definition: string.c:6333
#define Qfalse
static VALUE rb_str_enumerate_chars(VALUE str, int wantarray)
Definition: string.c:6428
static VALUE rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
Definition: string.c:6143
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:719
static VALUE rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5734
#define RUBY_DTRACE_STRING_CREATE_ENABLED()
Definition: probes.h:63
VALUE rb_str_concat(VALUE str1, VALUE str2)
Definition: string.c:2163
#define FIX2LONG(x)
void rb_backref_set(VALUE)
Definition: vm.c:768
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: ripper.y:235
#define T_STRING
#define END(no)
Definition: string.c:23
#define MBCLEN_CHARFOUND_P(ret)
#define ENC_CODERANGE_AND(a, b)
#define rb_enc_isprint(c, enc)
#define STR_ENC_GET(str)
Definition: string.c:123
static VALUE rb_str_strip_bang(VALUE str)
Definition: string.c:6966
int argc
Definition: ruby.c:130
#define NIL_P(v)
double rb_str_to_dbl(VALUE, int)
Definition: object.c:2626
VALUE rb_cEncodingConverter
Definition: transcode.c:25
long rb_str_offset(VALUE str, long pos)
Definition: string.c:1611
#define rb_sourcefile()
Definition: tcltklib.c:97
#define STR_SET_EMBED_LEN(str, n)
Definition: string.c:76
#define range(low, item, hi)
Definition: date_strftime.c:21
VALUE rb_check_hash_type(VALUE)
Definition: hash.c:461
#define LONG_MAX
Definition: ruby.h:201
static VALUE rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5389
#define RUBY_FUNC_EXPORTED
Definition: defines.h:184
VALUE rb_eEncCompatError
Definition: error.c:518
arg
Definition: ripper.y:1316
void rb_str_update(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3452
unsigned int max
Definition: string.c:5074
#define DBL2NUM(dbl)
#define ALLOCA_N(type, n)
VALUE rb_check_funcall(VALUE, ID, int, VALUE *)
Definition: vm_eval.c:408
static VALUE sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
Definition: string.c:7932
#define ENC_CODERANGE_UNKNOWN
VALUE rb_eIndexError
Definition: error.c:513
static VALUE rb_str_rjust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7433
#define rb_enc_mbc_to_codepoint(p, e, enc)
#define ENC_CODERANGE_SET(obj, cr)
VALUE rb_reg_match(VALUE, VALUE)
Definition: re.c:2746
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
long rb_str_sublen(VALUE str, long pos)
Definition: string.c:1658
static VALUE sym_capitalize(VALUE sym)
Definition: string.c:8123
VALUE rb_str_times(VALUE str, VALUE times)
Definition: string.c:1268
#define rb_long2int(n)
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:910
static VALUE sym_cmp(VALUE sym, VALUE other)
Definition: string.c:8011
void rb_str_modify_expand(VALUE str, long expand)
Definition: string.c:1377
#define sub(x, y)
Definition: date_strftime.c:24
VALUE rb_hash_aset(VALUE, VALUE, VALUE)
static void rb_str_splice(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3409
VALUE rb_yield(VALUE)
Definition: vm_eval.c:933
VALUE rb_tainted_str_new(const char *, long)
static VALUE str_eql(const VALUE str1, const VALUE str2)
Definition: string.c:2336
#define RTEST(v)
static VALUE sym_encoding(VALUE sym)
Definition: string.c:8149
VALUE rb_str_format(int, const VALUE *, VALUE)
Definition: sprintf.c:439
static VALUE rb_str_swapcase(VALUE str)
Definition: string.c:5063
VALUE rb_obj_as_string(VALUE obj)
Definition: string.c:895
#define rb_enc_mbminlen(enc)
VALUE rb_str_subseq(VALUE str, long beg, long len)
Definition: string.c:1669
char * rb_string_value_cstr(volatile VALUE *ptr)
Definition: string.c:1491
#define RUBY_MAX_CHAR_LEN
Definition: string.c:56
SSL_METHOD *(* func)(void)
Definition: ossl_ssl.c:108
#define TRUE
Definition: nkf.h:175
static VALUE rb_str_byteslice(int argc, VALUE *argv, VALUE str)
Definition: string.c:4237
#define DATA_PTR(dta)
#define StringValue(v)
void rb_econv_close(rb_econv_t *ec)
Definition: transcode.c:1702
#define rb_enc_mbcput(c, buf, enc)
long rb_memsearch(const void *, long, const void *, long, rb_encoding *)
Definition: re.c:227
#define MBCLEN_CHARFOUND_LEN(ret)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:898
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:459
#define STR_TMPLOCK
Definition: string.c:57
#define T_REGEXP
int rb_enc_symname_p(const char *name, rb_encoding *enc)
Definition: ripper.c:16676
static VALUE rb_str_tr(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5431
#define CONST_ID(var, str)
static VALUE rb_str_chop_bang(VALUE str)
Definition: string.c:6648
void rb_gc_register_mark_object(VALUE)
Definition: gc.c:2982
#define STR_ASSOC
Definition: string.c:60
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1570
static VALUE rb_str_enumerate_codepoints(VALUE str, int wantarray)
Definition: string.c:6528
static VALUE rb_str_squeeze(int argc, VALUE *argv, VALUE str)
Definition: string.c:5717
long rb_reg_search(VALUE, VALUE, long, int)
Definition: re.c:1352
static VALUE str_duplicate(VALUE klass, VALUE str)
Definition: string.c:938
int rb_str_cmp(VALUE str1, VALUE str2)
Definition: string.c:2309
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
#define no_digits()
VALUE rb_str_buf_new_cstr(const char *ptr)
Definition: string.c:793
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1183
static VALUE rb_str_aref_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3335
#define RARRAY_PTR(a)
static VALUE sym_to_proc(VALUE sym)
Definition: string.c:7953
#define OBJ_FROZEN(x)
#define rb_str_new2
#define RB_GC_GUARD(v)
static VALUE rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:5627
RUBY_EXTERN char * crypt(const char *, const char *)
Definition: crypt.c:500
#define FL_TAINT
static VALUE get_pat(VALUE, int)
Definition: string.c:3651
VALUE rb_str_buf_cat(VALUE str, const char *ptr, long len)
Definition: string.c:1948
#define T_FIXNUM
static enum neighbor_char enc_pred_char(char *p, long len, rb_encoding *enc)
Definition: string.c:2860
VALUE rb_cSymbol
Definition: string.c:54
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1214
static VALUE result
Definition: nkf.c:40
VALUE rb_str_replace(VALUE str, VALUE str2)
Definition: string.c:4031
static VALUE rb_str_lstrip_bang(VALUE str)
Definition: string.c:6838
static VALUE str_new(VALUE klass, const char *ptr, long len)
Definition: string.c:395
static VALUE str_alloc(VALUE klass)
Definition: string.c:374
#define UNINITIALIZED_VAR(x)
Definition: vm_core.h:121
#define ELTS_SHARED
static VALUE rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3690
VALUE rb_str_buf_cat2(VALUE str, const char *ptr)
Definition: string.c:1958
static VALUE rb_str_is_ascii_only_p(VALUE str)
Definition: string.c:7688
void rb_str_shared_replace(VALUE str, VALUE str2)
Definition: string.c:857
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:492
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:870
#define RUBY_DTRACE_STRING_CREATE(arg0, arg1, arg2)
Definition: probes.h:64
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Definition: transcode.c:2570
static VALUE rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:6698
VALUE rb_ensure(VALUE(*b_proc)(ANYARGS), VALUE data1, VALUE(*e_proc)(ANYARGS), VALUE data2)
Definition: eval.c:804
VALUE rb_str_buf_cat_ascii(VALUE str, const char *ptr)
Definition: string.c:2082
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
VALUE rb_str_quote_unprintable(VALUE str)
Definition: string.c:7829
static VALUE sym_casecmp(VALUE sym, VALUE other)
Definition: string.c:8028
long rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
Definition: string.c:232
static char * str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
Definition: string.c:1533
int rb_sourceline(void)
Definition: vm.c:816
static VALUE rb_str_getbyte(VALUE str, VALUE index)
Definition: string.c:4088
static void rb_enc_cr_str_exact_copy(VALUE dest, VALUE src)
Definition: string.c:320
void rb_sys_fail(const char *mesg)
Definition: error.c:1899
static VALUE rb_str_chr(VALUE str)
Definition: string.c:4076
#define rb_str_new4
static const char * search_nonascii(const char *p, const char *e)
Definition: string.c:146
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Definition: transcode.c:2867
static void str_modifiable(VALUE str)
Definition: string.c:1327
static VALUE rb_str_bytes(VALUE str)
Definition: string.c:6407
static VALUE rb_str_index_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2564
#define CHAR_BIT
Definition: ruby.h:208
VALUE rb_str_to_str(VALUE str)
Definition: string.c:849
static VALUE rb_str_match_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2805
#define RSTRING_EMBED_LEN_MAX
static void str_mod_check(VALUE s, const char *p, long len)
Definition: string.c:352
VALUE rb_string_value(volatile VALUE *ptr)
Definition: string.c:1473
static VALUE rb_str_lines(int argc, VALUE *argv, VALUE str)
Definition: string.c:6327
VALUE rb_tainted_str_new_cstr(const char *ptr)
Definition: string.c:479
static const char isspacetable[256]
Definition: string.c:5848
#define T_BIGNUM
#define MEMCPY(p1, p2, type, n)
static VALUE scan_once(VALUE str, VALUE pat, long *start)
Definition: string.c:6995
static VALUE rb_str_sub(int argc, VALUE *argv, VALUE str)
Definition: string.c:3829
VALUE rb_usascii_str_new(const char *ptr, long len)
Definition: string.c:431
VALUE rb_str_buf_append(VALUE str, VALUE str2)
Definition: string.c:2106
#define ENC_CODERANGE_VALID
static VALUE rb_str_s_try_convert(VALUE dummy, VALUE str)
Definition: string.c:1527
#define RMATCH_REGS(obj)
Definition: re.h:54
static VALUE sym_succ(VALUE sym)
Definition: string.c:7991
void rb_str_free(VALUE str)
Definition: string.c:830
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Definition: string.c:614
static VALUE rb_str_end_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7593
static void str_enc_copy(VALUE str1, VALUE str2)
Definition: string.c:284
#define NEWOBJ_OF(obj, type, klass, flags)
#define T_SYMBOL
static ID id_to_s
Definition: string.c:892
#define ENC_CODERANGE_7BIT
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:772
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
int size
Definition: encoding.c:52
static VALUE rb_str_hex(VALUE str)
Definition: string.c:7110
char * rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
Definition: string.c:1583
static char * str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1589
#define f
#define NUM2LONG(x)
static VALUE rb_str_reverse(VALUE str)
Definition: string.c:4256
static VALUE rb_str_downcase(VALUE str)
Definition: string.c:4929
#define SYMBOL_P(x)
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
RUBY_EXTERN VALUE rb_default_rs
Definition: ripper.y:490
VALUE rb_str_unlocktmp(VALUE str)
Definition: string.c:1821
static VALUE rb_str_valid_encoding_p(VALUE str)
Definition: string.c:7670
static VALUE rb_str_each_byte(VALUE str)
Definition: string.c:6390
static VALUE rb_str_chop(VALUE str)
Definition: string.c:6683
static VALUE rb_str_count(int argc, VALUE *argv, VALUE str)
Definition: string.c:5790
#define STR_SET_LEN(str, n)
Definition: string.c:82
static VALUE rb_str_eql(VALUE str1, VALUE str2)
Definition: string.c:2380
static void rb_str_subpat_set(VALUE str, VALUE re, VALUE backref, VALUE val)
Definition: string.c:3458
static long enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
Definition: string.c:979
static VALUE rb_str_lstrip(VALUE str)
Definition: string.c:6879
#define MBCLEN_INVALID_P(ret)
st_index_t rb_memhash(const void *ptr, long len)
Definition: random.c:1422
int num_regs
Definition: ripper.y:615
#define ENC_CODERANGE(obj)
#define lesser(a, b)
Definition: string.c:2281
VALUE rb_check_array_type(VALUE ary)
Definition: array.c:557
static enum neighbor_char enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
Definition: string.c:2903
static VALUE sym_match(VALUE sym, VALUE other)
Definition: string.c:8044
VALUE rb_reg_quote(VALUE)
Definition: re.c:2965
static long rb_str_index(VALUE str, VALUE sub, long offset)
Definition: string.c:2503
RUBY_EXTERN VALUE rb_cObject
Definition: ripper.y:1426
static VALUE rb_str_upto(int argc, VALUE *argv, VALUE beg)
Definition: string.c:3109
st_data_t st_index_t
Definition: ripper.y:63
static VALUE str_byte_substr(VALUE str, long beg, long len)
Definition: string.c:4125
#define ALLOC_N(type, n)
#define LONG2FIX(i)
uint8_t key[16]
Definition: random.c:1370
VALUE rb_str_cat(VALUE str, const char *ptr, long len)
Definition: string.c:1964
#define RBASIC(obj)
long rb_str_strlen(VALUE str)
Definition: string.c:1168
VALUE rb_str_new_with_class(VALUE, const char *, long)
rb_econv_result_t
Definition: ripper.y:242
#define STR_EMBED_P(str)
Definition: string.c:75
static VALUE tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
Definition: string.c:5135
#define STR_NOCAPA_P(s)
Definition: string.c:64
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
Definition: string.c:4464
#define RARRAY_LENINT(ary)
#define ONIGENC_CTYPE_DIGIT
#define UINT2NUM(x)
#define INT2NUM(x)
static VALUE rb_str_capitalize(VALUE str)
Definition: string.c:5000
VALUE rb_str_drop_bytes(VALUE str, long len)
Definition: string.c:3348
size_t rb_str_capacity(VALUE str)
Definition: string.c:360
void rb_define_variable(const char *, VALUE *)
Definition: variable.c:594
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1248
static VALUE rb_str_init(int argc, VALUE *argv, VALUE str)
Definition: string.c:969
v
Definition: win32ole.c:798
void rb_str_setter(VALUE val, ID id, VALUE *var)
Definition: string.c:7616
int rb_respond_to(VALUE, ID)
Definition: vm_method.c:1583
#define ONIGENC_CODE_TO_MBC_MAXLEN
RUBY_EXTERN VALUE rb_rs
Definition: ripper.y:489
static VALUE rb_str_rstrip_bang(VALUE str)
Definition: string.c:6900
static VALUE rb_str_each_char(VALUE str)
Definition: string.c:6504
VALUE rb_fs
Definition: string.c:143
#define ISPRINT(c)
Definition: ruby.h:1631
static VALUE str_replace_shared(VALUE str2, VALUE str)
Definition: string.c:657
VALUE rb_backref_get(void)
Definition: vm.c:762
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Definition: string.c:563
static void str_make_independent_expand(VALUE str, long expand)
Definition: string.c:1347
VALUE rb_ary_concat(VALUE x, VALUE y)
Definition: array.c:3382
static unsigned int hash(const char *str, unsigned int len)
Definition: lex.c:56
static VALUE rb_str_start_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7570
VALUE rb_ary_new2(long capa)
Definition: array.c:417
#define OBJ_UNTRUST(x)
#define rb_safe_level()
Definition: tcltklib.c:94
VALUE rb_str_substr(VALUE str, long beg, long len)
Definition: string.c:1775
#define rb_enc_is_newline(p, end, enc)
static void str_discard(VALUE str)
Definition: string.c:1412
void rb_must_asciicompat(VALUE str)
Definition: string.c:1464
#define assert(condition)
Definition: ossl.h:45
const char * name
Definition: nkf.c:208
VALUE rb_range_beg_len(VALUE, long *, long *, long, int)
Definition: range.c:990
VALUE rb_str_associated(VALUE str)
Definition: string.c:1454
#define rb_enc_asciicompat(enc)
#define NUM2INT(x)
VALUE rb_hash_new(void)
Definition: hash.c:234
VALUE rb_obj_alloc(VALUE)
Definition: object.c:1721
const char * rb_id2name(ID id)
Definition: ripper.c:17058
int gen
Definition: string.c:5073
static VALUE sym_empty(VALUE sym)
Definition: string.c:8084
static VALUE rb_str_to_s(VALUE str)
Definition: string.c:4441
#define rb_enc_isupper(c, enc)
#define rb_check_arity(argc, min, max)
#define BUILTIN_TYPE(x)
static VALUE str_byte_aref(VALUE str, VALUE indx)
Definition: string.c:4180
#define OBJ_UNTRUSTED(x)
VALUE rb_external_str_new(const char *ptr, long len)
Definition: string.c:584
void rb_str_associate(VALUE str, VALUE add)
Definition: string.c:1423
#define rb_enc_isascii(c, enc)
#define SIZEOF_VALUE
VALUE rb_hash_aref(VALUE, VALUE)
Definition: hash.c:570
VALUE rb_funcall2(VALUE, ID, int, const VALUE *)
Calls a method.
Definition: vm_eval.c:804
VALUE rb_str_succ(VALUE orig)
Definition: string.c:2985
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1153
static VALUE rb_str_downcase_bang(VALUE str)
Definition: string.c:4864
static VALUE rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len, int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
Definition: string.c:1989
void rb_warning(const char *fmt,...)
Definition: error.c:229
#define ONIGERR_INVALID_CODE_POINT_VALUE
#define RREGEXP(obj)
#define RSTRING_GETMEM(str, ptrvar, lenvar)
static VALUE rb_str_sum(int argc, VALUE *argv, VALUE str)
Definition: string.c:7251
VALUE rb_str_inspect(VALUE str)
Definition: string.c:4509
void rb_free_tmp_buffer(volatile VALUE *store)
Definition: string.c:822
static void tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
Definition: string.c:5440
#define is_ascii_string(str)
Definition: string.c:120
VALUE rb_str_buf_new(long capa)
Definition: string.c:777
#define RREGEXP_SRC_LEN(r)
#define snprintf
#define SPECIAL_CONST_P(x)
#define OBJ_TAINT(x)
static VALUE rb_str_casecmp(VALUE str1, VALUE str2)
Definition: string.c:2445
int rb_str_comparable(VALUE str1, VALUE str2)
Definition: string.c:2284
VALUE rb_str_cat2(VALUE str, const char *ptr)
Definition: string.c:1983
#define mod(x, y)
Definition: date_strftime.c:28
static char * rb_str_subpos(VALUE str, long beg, long *lenp)
Definition: string.c:1690
VALUE rb_str_ord(VALUE s)
Definition: string.c:7232
VALUE rb_str_locktmp_ensure(VALUE str, VALUE(*func)(VALUE), VALUE arg)
Definition: string.c:1831
#define rb_str_dup_frozen
#define STR_ASSOC_P(s)
Definition: string.c:62
static VALUE sym_aref(int argc, VALUE *argv, VALUE sym)
Definition: string.c:8058
#define NULL
Definition: _sdbm.c:103
VALUE rb_invcmp(VALUE x, VALUE y)
Definition: compar.c:42
VALUE rb_str_resurrect(VALUE str)
Definition: string.c:952
static VALUE rb_str_aref(VALUE str, VALUE indx)
Definition: string.c:3216
VALUE rb_check_string_type(VALUE str)
Definition: string.c:1509
#define REALLOC_N(var, type, n)
VALUE rb_id_quote_unprintable(ID id)
Definition: string.c:7850
VALUE rb_reg_regcomp(VALUE)
Definition: re.c:2547
static int match(VALUE str, VALUE pat, VALUE hash, int(*cb)(VALUE, VALUE))
Definition: date_parse.c:273
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1344
#define rb_enc_mbmaxlen(enc)
static VALUE rb_str_delete_bang(int, VALUE *, VALUE)
Definition: string.c:5534
void rb_warn(const char *fmt,...)
Definition: error.c:216
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition: string.c:7710
#define SYM2ID(x)
VALUE rb_eArgError
Definition: error.c:512
VALUE rb_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2381
static VALUE rb_str_force_encoding(VALUE str, VALUE enc)
Definition: string.c:7633
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:659
#define IS_EVSTR(p, e)
Definition: string.c:4618
VALUE rb_check_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2394
VALUE rb_str_dump(VALUE str)
Definition: string.c:4631
static VALUE rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2678
VALUE rb_usascii_str_new_cstr(const char *)
#define TR_TABLE_SIZE
Definition: string.c:5438
static VALUE rb_str_each_codepoint(VALUE str)
Definition: string.c:6596
void rb_str_modify(VALUE str)
Definition: string.c:1369
char ** argv
Definition: ruby.c:131
ID rb_to_id(VALUE name)
Definition: string.c:8155
#define FL_UNSET(x, f)
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Definition: string.c:439
static enum neighbor_char enc_succ_char(char *p, long len, rb_encoding *enc)
Definition: string.c:2826
VALUE rb_external_str_new_cstr(const char *ptr)
Definition: string.c:590
VALUE rb_inspect(VALUE)
Definition: object.c:402
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:548
VALUE rb_str_dup(VALUE str)
Definition: string.c:946