14#include "ruby/internal/config.h"
24#include "debug_counter.h"
29#include "internal/array.h"
30#include "internal/compar.h"
31#include "internal/compilers.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/numeric.h"
36#include "internal/object.h"
37#include "internal/proc.h"
38#include "internal/re.h"
39#include "internal/sanitizers.h"
40#include "internal/string.h"
41#include "internal/transcode.h"
46#include "ruby_assert.h"
49#if defined HAVE_CRYPT_R
50# if defined HAVE_CRYPT_H
53#elif !defined HAVE_CRYPT
54# include "missing/crypt.h"
55# define HAVE_CRYPT_R 1
58#define BEG(no) (regs->beg[(no)])
59#define END(no) (regs->end[(no)])
62#undef rb_usascii_str_new
66#undef rb_usascii_str_new_cstr
67#undef rb_utf8_str_new_cstr
68#undef rb_enc_str_new_cstr
69#undef rb_external_str_new_cstr
70#undef rb_locale_str_new_cstr
71#undef rb_str_dup_frozen
72#undef rb_str_buf_new_cstr
103#define RUBY_MAX_CHAR_LEN 16
104#define STR_SHARED_ROOT FL_USER5
105#define STR_BORROWED FL_USER6
106#define STR_TMPLOCK FL_USER7
107#define STR_NOFREE FL_USER18
108#define STR_FAKESTR FL_USER19
110#define STR_SET_NOEMBED(str) do {\
111 FL_SET((str), STR_NOEMBED);\
113 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
116 STR_SET_EMBED_LEN((str), 0);\
119#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
121# define STR_SET_EMBED_LEN(str, n) do { \
122 assert(str_embed_capa(str) > (n));\
123 RSTRING(str)->as.embed.len = (n);\
126# define STR_SET_EMBED_LEN(str, n) do { \
128 RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
129 RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
133#define STR_SET_LEN(str, n) do { \
134 if (STR_EMBED_P(str)) {\
135 STR_SET_EMBED_LEN((str), (n));\
138 RSTRING(str)->as.heap.len = (n);\
142#define STR_DEC_LEN(str) do {\
143 if (STR_EMBED_P(str)) {\
144 long n = RSTRING_LEN(str);\
146 STR_SET_EMBED_LEN((str), n);\
149 RSTRING(str)->as.heap.len--;\
154str_enc_fastpath(
VALUE str)
158 case ENCINDEX_ASCII_8BIT:
160 case ENCINDEX_US_ASCII:
167#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
168#define TERM_FILL(ptr, termlen) do {\
169 char *const term_fill_ptr = (ptr);\
170 const int term_fill_len = (termlen);\
171 *term_fill_ptr = '\0';\
172 if (UNLIKELY(term_fill_len > 1))\
173 memset(term_fill_ptr, 0, term_fill_len);\
176#define RESIZE_CAPA(str,capacity) do {\
177 const int termlen = TERM_LEN(str);\
178 RESIZE_CAPA_TERM(str,capacity,termlen);\
180#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
181 if (STR_EMBED_P(str)) {\
182 if (str_embed_capa(str) < capacity + termlen) {\
183 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
184 const long tlen = RSTRING_LEN(str);\
185 memcpy(tmp, RSTRING_PTR(str), tlen);\
186 RSTRING(str)->as.heap.ptr = tmp;\
187 RSTRING(str)->as.heap.len = tlen;\
188 STR_SET_NOEMBED(str);\
189 RSTRING(str)->as.heap.aux.capa = (capacity);\
193 assert(!FL_TEST((str), STR_SHARED)); \
194 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
195 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
196 RSTRING(str)->as.heap.aux.capa = (capacity);\
200#define STR_SET_SHARED(str, shared_str) do { \
201 if (!FL_TEST(str, STR_FAKESTR)) { \
202 assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
203 assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
204 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
205 FL_SET((str), STR_SHARED); \
206 FL_SET((shared_str), STR_SHARED_ROOT); \
207 if (RBASIC_CLASS((shared_str)) == 0) \
208 FL_SET_RAW((shared_str), STR_BORROWED); \
212#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
213#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
216#define STR_ENC_GET(str) get_encoding(str)
218#if !defined SHARABLE_MIDDLE_SUBSTRING
219# define SHARABLE_MIDDLE_SUBSTRING 0
221#if !SHARABLE_MIDDLE_SUBSTRING
222#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
224#define SHARABLE_SUBSTRING_P(beg, len, end) 1
229str_embed_capa(
VALUE str)
232 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.
ary);
239rb_str_reembeddable_p(
VALUE str)
241 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
245rb_str_embed_size(
long capa)
251rb_str_size_as_embedded(
VALUE str)
255 if (STR_EMBED_P(str)) {
256 real_size = rb_str_embed_size(
RSTRING(str)->as.embed.len) + TERM_LEN(str);
260 else if (rb_str_reembeddable_p(str)) {
261 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
265 real_size =
sizeof(
struct RString);
273STR_EMBEDDABLE_P(
long len,
long termlen)
276 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
284static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
285static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
287static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
288static inline void str_modifiable(
VALUE str);
292str_make_independent(
VALUE str)
295 int termlen = TERM_LEN(str);
296 str_make_independent_expand((str),
len, 0L, termlen);
299static inline int str_dependent_p(
VALUE str);
302rb_str_make_independent(
VALUE str)
304 if (str_dependent_p(str)) {
305 str_make_independent(str);
310rb_str_make_embedded(
VALUE str)
315 char *buf =
RSTRING(str)->as.heap.ptr;
319 STR_SET_EMBED_LEN(str,
len);
326 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
333 if (new_root == old_root) {
339 if (!STR_EMBED_P(new_root)) {
343 size_t offset = (size_t)((uintptr_t)
RSTRING(str)->
as.heap.ptr - (uintptr_t)
RSTRING(old_root)->
as.embed.ary);
346 RSTRING(str)->as.heap.ptr =
RSTRING(new_root)->as.embed.ary + offset;
350rb_debug_rstring_null_ptr(
const char *func)
352 fprintf(stderr,
"%s is returning NULL!! "
353 "SIGSEGV is highly expected to follow immediately.\n"
354 "If you could reproduce, attach your debugger here, "
355 "and look at the passed string.\n",
360static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
363get_encoding(
VALUE str)
369mustnot_broken(
VALUE str)
371 if (is_broken_string(str)) {
377mustnot_wchar(
VALUE str)
387static VALUE register_fstring(
VALUE str,
bool copy);
394#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
402fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
412 if (rb_objspace_garbage_object_p(str)) {
424 rb_enc_copy(new_str, str);
437 if (STR_SHARED_P(str)) {
439 str_make_independent(str);
442 if (!BARE_STRING_P(str)) {
446 RBASIC(str)->flags |= RSTRING_FSTR;
448 *key = *value = arg->fstr = str;
462 if (
FL_TEST(str, RSTRING_FSTR))
465 bare = BARE_STRING_P(str);
467 if (STR_EMBED_P(str)) {
471 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_SHARED_ROOT|STR_SHARED) == (STR_NOEMBED|STR_SHARED_ROOT)) {
480 fstr = register_fstring(str, FALSE);
483 str_replace_shared_without_enc(str, fstr);
491register_fstring(
VALUE str,
bool copy)
498 st_table *frozen_strings = rb_vm_fstring_table();
501 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
502 }
while (UNDEF_P(args.fstr));
514setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
530 return (
VALUE)fake_str;
537rb_setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
rb_encoding *enc)
539 return setup_fake_str(fake_str, name, len, rb_enc_to_index(enc));
547MJIT_FUNC_EXPORTED
VALUE
548rb_fstring_new(
const char *ptr,
long len)
551 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), FALSE);
558 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), FALSE);
562rb_fstring_cstr(
const char *
ptr)
564 return rb_fstring_new(
ptr, strlen(
ptr));
568fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
578 const char *aptr, *bptr;
581 return (alen != blen ||
583 memcmp(aptr, bptr, alen) != 0);
587single_byte_optimizable(
VALUE str)
595 enc = STR_ENC_GET(str);
606static inline const char *
607search_nonascii(
const char *p,
const char *e)
609 const uintptr_t *s, *t;
611#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
612# if SIZEOF_UINTPTR_T == 8
613# define NONASCII_MASK UINT64_C(0x8080808080808080)
614# elif SIZEOF_UINTPTR_T == 4
615# define NONASCII_MASK UINT32_C(0x80808080)
617# error "don't know what to do."
620# if SIZEOF_UINTPTR_T == 8
621# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
622# elif SIZEOF_UINTPTR_T == 4
623# define NONASCII_MASK 0x80808080UL
625# error "don't know what to do."
629 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
630#if !UNALIGNED_WORD_ACCESS
631 if ((uintptr_t)p % SIZEOF_VOIDP) {
632 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
637 case 7:
if (p[-7]&0x80)
return p-7;
638 case 6:
if (p[-6]&0x80)
return p-6;
639 case 5:
if (p[-5]&0x80)
return p-5;
640 case 4:
if (p[-4]&0x80)
return p-4;
642 case 3:
if (p[-3]&0x80)
return p-3;
643 case 2:
if (p[-2]&0x80)
return p-2;
644 case 1:
if (p[-1]&0x80)
return p-1;
649#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
650#define aligned_ptr(value) \
651 __builtin_assume_aligned((value), sizeof(uintptr_t))
653#define aligned_ptr(value) (uintptr_t *)(value)
656 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
659 if (*s & NONASCII_MASK) {
660#ifdef WORDS_BIGENDIAN
661 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
663 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
673 case 7:
if (e[-7]&0x80)
return e-7;
674 case 6:
if (e[-6]&0x80)
return e-6;
675 case 5:
if (e[-5]&0x80)
return e-5;
676 case 4:
if (e[-4]&0x80)
return e-4;
678 case 3:
if (e[-3]&0x80)
return e-3;
679 case 2:
if (e[-2]&0x80)
return e-2;
680 case 1:
if (e[-1]&0x80)
return e-1;
688 const char *e = p +
len;
690 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
692 p = search_nonascii(p, e);
697 p = search_nonascii(p, e);
700 int ret = rb_enc_precise_mbclen(p, e, enc);
704 p = search_nonascii(p, e);
710 int ret = rb_enc_precise_mbclen(p, e, enc);
726 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
729 p = search_nonascii(p, e);
734 p = search_nonascii(p, e);
740 int ret = rb_enc_precise_mbclen(p, e, enc);
747 p = search_nonascii(p, e);
753 int ret = rb_enc_precise_mbclen(p, e, enc);
772rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
777 str_enc_copy(dest, src);
802rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
804 str_enc_copy(dest, src);
817 return enc_coderange_scan(str, enc);
826 cr = enc_coderange_scan(str, get_encoding(str));
839 else if (is_ascii_string(str))
845str_mod_check(
VALUE s,
const char *p,
long len)
853str_capacity(
VALUE str,
const int termlen)
855 if (STR_EMBED_P(str)) {
857 return str_embed_capa(str) - termlen;
862 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
863 return RSTRING(str)->as.heap.len;
866 return RSTRING(str)->as.heap.aux.capa;
873 return str_capacity(str, TERM_LEN(str));
877must_not_null(
const char *
ptr)
887 size_t size = rb_str_embed_size(
capa);
889 assert(rb_gc_size_allocatable_p(size));
891 assert(size <=
sizeof(
struct RString));
894 RVARGC_NEWOBJ_OF(str,
struct RString, klass,
901str_alloc_heap(
VALUE klass)
903 RVARGC_NEWOBJ_OF(str,
struct RString, klass,
910empty_str_alloc(
VALUE klass)
912 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
913 VALUE str = str_alloc_embed(klass, 0);
914 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
919str_new0(
VALUE klass,
const char *
ptr,
long len,
int termlen)
927 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
929 if (STR_EMBEDDABLE_P(
len, termlen)) {
930 str = str_alloc_embed(klass,
len + termlen);
936 str = str_alloc_heap(klass);
942 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
947 STR_SET_LEN(str,
len);
955 return str_new0(klass,
ptr,
len, 1);
976 rb_enc_associate_index(str, rb_utf8_encindex());
988 rb_enc_associate(str, enc);
1000 __msan_unpoison_string(
ptr);
1016 rb_enc_associate_index(str, rb_utf8_encindex());
1031str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1040 rb_encoding *enc = rb_enc_get_from_index(encindex);
1044 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1045 str = str_alloc_heap(klass);
1049 RBASIC(str)->flags |= STR_NOFREE;
1051 rb_enc_associate_index(str, encindex);
1079static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1081 int ecflags,
VALUE ecopts);
1086 int encidx = rb_enc_to_index(enc);
1087 if (rb_enc_get_index(str) == encidx)
1088 return is_ascii_string(str);
1099 if (!to)
return str;
1100 if (!from) from = rb_enc_get(str);
1101 if (from == to)
return str;
1103 rb_is_ascii8bit_enc(to)) {
1104 if (STR_ENC_GET(str) != to) {
1106 rb_enc_associate(str, to);
1113 from, to, ecflags, ecopts);
1114 if (
NIL_P(newstr)) {
1122rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1128 if (ofs < -olen || olen < ofs)
1130 if (ofs < 0) ofs += olen;
1132 STR_SET_LEN(newstr, ofs);
1137 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1145 STR_SET_LEN(str, 0);
1146 rb_enc_associate(str, enc);
1152str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1154 int ecflags,
VALUE ecopts)
1159 VALUE econv_wrapper;
1160 const unsigned char *start, *sp;
1161 unsigned char *dest, *dp;
1162 size_t converted_output = (size_t)ofs;
1167 RBASIC_CLEAR_CLASS(econv_wrapper);
1169 if (!ec)
return Qnil;
1172 sp = (
unsigned char*)
ptr;
1174 while ((dest = (
unsigned char*)
RSTRING_PTR(newstr)),
1175 (dp = dest + converted_output),
1179 size_t converted_input = sp - start;
1180 size_t rest =
len - converted_input;
1181 converted_output = dp - dest;
1183 if (converted_input && converted_output &&
1184 rest < (LONG_MAX / converted_output)) {
1185 rest = (rest * converted_output) / converted_input;
1190 olen += rest < 2 ? 2 : rest;
1199 rb_enc_associate(newstr, to);
1218 const int eidx = rb_enc_to_index(eenc);
1225 if ((eidx == rb_ascii8bit_encindex()) ||
1226 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1230 ienc = rb_default_internal_encoding();
1231 if (!ienc || eenc == ienc) {
1236 if ((eidx == rb_ascii8bit_encindex()) ||
1237 (eidx == rb_usascii_encindex()) ||
1245 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1246 rb_str_initialize(str,
ptr,
len, eenc);
1254 int eidx = rb_enc_to_index(eenc);
1255 if (eidx == rb_usascii_encindex() &&
1256 !is_ascii_string(str)) {
1257 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1260 rb_enc_associate_index(str, eidx);
1319str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1321 const int termlen = TERM_LEN(str);
1326 if (str_embed_capa(str2) >=
len + termlen) {
1327 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1328 STR_SET_EMBED(str2);
1330 STR_SET_EMBED_LEN(str2,
len);
1331 TERM_FILL(ptr2+
len, termlen);
1335 if (STR_SHARED_P(str)) {
1336 root =
RSTRING(str)->as.heap.aux.shared;
1344 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1346 rb_fatal(
"about to free a possible shared root");
1348 char *ptr2 = STR_HEAP_PTR(str2);
1350 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1353 FL_SET(str2, STR_NOEMBED);
1356 STR_SET_SHARED(str2, root);
1364 str_replace_shared_without_enc(str2, str);
1365 rb_enc_cr_str_exact_copy(str2, str);
1372 return str_replace_shared(str_alloc_heap(klass), str);
1389rb_str_new_frozen_String(
VALUE orig)
1396rb_str_tmp_frozen_acquire(
VALUE orig)
1399 return str_new_frozen_buffer(0, orig, FALSE);
1403rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1408 if (STR_EMBED_P(tmp)) {
1421 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1422 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1427 STR_SET_EMBED_LEN(tmp, 0);
1435 return str_new_frozen_buffer(klass, orig, TRUE);
1441 assert(!STR_EMBED_P(orig));
1442 assert(!STR_SHARED_P(orig));
1444 VALUE str = str_alloc_heap(klass);
1447 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1448 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1449 RBASIC(orig)->flags &= ~STR_NOFREE;
1450 STR_SET_SHARED(orig, str);
1457str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1462 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1464 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1466 assert(STR_EMBED_P(str));
1477 assert(!STR_EMBED_P(
shared));
1481 if ((ofs > 0) || (rest > 0) ||
1484 str = str_new_shared(klass,
shared);
1485 assert(!STR_EMBED_P(str));
1486 RSTRING(str)->as.heap.ptr += ofs;
1487 RSTRING(str)->as.heap.len -= ofs + rest;
1495 else if (STR_EMBEDDABLE_P(
RSTRING_LEN(orig), TERM_LEN(orig))) {
1496 str = str_alloc_embed(klass,
RSTRING_LEN(orig) + TERM_LEN(orig));
1503 str = heap_str_make_shared(klass, orig);
1507 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1519str_new_empty_String(
VALUE str)
1522 rb_enc_copy(v, str);
1526#define STR_BUF_MIN_SIZE 63
1534 if (STR_EMBEDDABLE_P(
capa, 1)) {
1541 if (
capa < STR_BUF_MIN_SIZE) {
1542 capa = STR_BUF_MIN_SIZE;
1547 RSTRING(str)->as.heap.ptr[0] =
'\0';
1567 return str_new(0, 0,
len);
1573 if (
FL_TEST(str, RSTRING_FSTR)) {
1574 st_data_t fstr = (st_data_t)str;
1578 st_delete(rb_vm_fstring_table(), &fstr, NULL);
1579 RB_DEBUG_COUNTER_INC(obj_str_fstr);
1584 if (STR_EMBED_P(str)) {
1585 RB_DEBUG_COUNTER_INC(obj_str_embed);
1587 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1588 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1589 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1592 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1593 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1597RUBY_FUNC_EXPORTED
size_t
1598rb_str_memsize(
VALUE str)
1600 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1601 return STR_HEAP_SIZE(str);
1611 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1614static inline void str_discard(
VALUE str);
1615static void str_shared_replace(
VALUE str,
VALUE str2);
1620 if (str != str2) str_shared_replace(str, str2);
1631 enc = STR_ENC_GET(str2);
1636 if (str_embed_capa(str) >=
RSTRING_LEN(str2) + termlen) {
1640 rb_enc_associate(str, enc);
1645 if (STR_EMBED_P(str2)) {
1646 assert(!
FL_TEST(str2, STR_SHARED));
1648 assert(
len + termlen <= str_embed_capa(str2));
1650 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1651 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1652 RSTRING(str2)->as.heap.ptr = new_ptr;
1655 STR_SET_NOEMBED(str2);
1659 STR_SET_NOEMBED(str);
1664 if (
FL_TEST(str2, STR_SHARED)) {
1666 STR_SET_SHARED(str,
shared);
1669 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1673 STR_SET_EMBED(str2);
1675 STR_SET_EMBED_LEN(str2, 0);
1676 rb_enc_associate(str, enc);
1690 return rb_obj_as_string_result(str, obj);
1693MJIT_FUNC_EXPORTED
VALUE
1707 if (STR_SHARED_P(str2)) {
1710 STR_SET_NOEMBED(str);
1713 STR_SET_SHARED(str,
shared);
1714 rb_enc_cr_str_exact_copy(str, str2);
1717 str_replace_shared(str, str2);
1726 size_t size = rb_str_embed_size(
capa);
1728 assert(rb_gc_size_allocatable_p(size));
1730 assert(size <=
sizeof(
struct RString));
1733 RB_RVARGC_EC_NEWOBJ_OF(ec, str,
struct RString, klass,
1742 RB_RVARGC_EC_NEWOBJ_OF(ec, str,
struct RString, klass,
1751 const VALUE flag_mask =
1753 RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK |
1760 if (STR_EMBED_P(str)) {
1763 assert(STR_EMBED_P(dup));
1764 assert(str_embed_capa(dup) >=
len + 1);
1765 STR_SET_EMBED_LEN(dup,
len);
1771 root =
RSTRING(str)->as.heap.aux.shared;
1773 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1774 root = str = str_new_frozen(klass, str);
1777 assert(!STR_SHARED_P(root));
1781 else if (STR_EMBED_P(root)) {
1790 FL_SET(root, STR_SHARED_ROOT);
1792 flags |= RSTRING_NOEMBED | STR_SHARED;
1797 encidx = rb_enc_get_index(str);
1798 flags &= ~ENCODING_MASK;
1801 if (encidx) rb_enc_associate_index(dup, encidx);
1809 if (
FL_TEST(str, STR_NOEMBED)) {
1810 dup = ec_str_alloc_heap(ec, klass);
1816 return str_duplicate_setup(klass, str, dup);
1823 if (
FL_TEST(str, STR_NOEMBED)) {
1824 dup = str_alloc_heap(klass);
1830 return str_duplicate_setup(klass, str, dup);
1842 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1849 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1850 return ec_str_duplicate(ec,
rb_cString, str);
1865 static ID keyword_ids[2];
1866 VALUE orig, opt, venc, vcapa;
1871 if (!keyword_ids[0]) {
1872 keyword_ids[0] = rb_id_encoding();
1873 CONST_ID(keyword_ids[1],
"capacity");
1881 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
1882 enc = rb_to_encoding(venc);
1884 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
1889 if (
capa < STR_BUF_MIN_SIZE) {
1890 capa = STR_BUF_MIN_SIZE;
1898 if (orig == str) n = 0;
1900 str_modifiable(str);
1901 if (STR_EMBED_P(str)) {
1902 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1904 assert(
RSTRING(str)->
as.embed.len + 1 <= str_embed_capa(str));
1909 RSTRING(str)->as.heap.ptr = new_ptr;
1911 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
1912 const size_t size = (size_t)
capa + termlen;
1914 const size_t osize =
RSTRING(str)->as.heap.len + TERM_LEN(str);
1915 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1916 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
1918 RSTRING(str)->as.heap.ptr = new_ptr;
1920 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
1921 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
1922 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
1928 rb_enc_cr_str_exact_copy(str, orig);
1930 FL_SET(str, STR_NOEMBED);
1937 rb_enc_associate(str, enc);
1948#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1963static inline uintptr_t
1964count_utf8_lead_bytes_with_word(
const uintptr_t *s)
1969 d = (d>>6) | (~d>>7);
1970 d &= NONASCII_MASK >> 7;
1973#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
1975 return rb_popcount_intptr(d);
1979# if SIZEOF_VOIDP == 8
1988enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
1994 long diff = (long)(e - p);
2000 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2001 const uintptr_t *s, *t;
2002 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2003 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2004 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2005 while (p < (
const char *)s) {
2006 if (is_utf8_lead_byte(*p))
len++;
2010 len += count_utf8_lead_bytes_with_word(s);
2013 p = (
const char *)s;
2016 if (is_utf8_lead_byte(*p))
len++;
2027 q = search_nonascii(p, e);
2033 p += rb_enc_fast_mbclen(p, e, enc);
2040 q = search_nonascii(p, e);
2046 p += rb_enc_mbclen(p, e, enc);
2053 for (c=0; p<e; c++) {
2054 p += rb_enc_mbclen(p, e, enc);
2069rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2077 long diff = (long)(e - p);
2084 q = search_nonascii(p, e);
2092 ret = rb_enc_precise_mbclen(p, e, enc);
2107 for (c=0; p<e; c++) {
2108 ret = rb_enc_precise_mbclen(p, e, enc);
2132 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2133 if (!enc) enc = STR_ENC_GET(str);
2139 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2144 return enc_strlen(p, e, enc, cr);
2151 return str_strlen(str, NULL);
2165 return LONG2NUM(str_strlen(str, NULL));
2177rb_str_bytesize(
VALUE str)
2195rb_str_empty(
VALUE str)
2215 char *ptr1, *ptr2, *ptr3;
2220 enc = rb_enc_check_str(str1, str2);
2224 if (len1 > LONG_MAX - len2) {
2227 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
2229 memcpy(ptr3, ptr1, len1);
2230 memcpy(ptr3+len1, ptr2, len2);
2231 TERM_FILL(&ptr3[len1+len2], termlen);
2241MJIT_FUNC_EXPORTED
VALUE
2247 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2250 int enc1 = rb_enc_get_index(str1);
2251 int enc2 = rb_enc_get_index(str2);
2256 else if (enc2 < 0) {
2259 else if (enc1 != enc2) {
2262 else if (len1 > LONG_MAX - len2) {
2295 rb_enc_copy(str2, str);
2303 if (STR_EMBEDDABLE_P(
len, 1)) {
2312 STR_SET_LEN(str2,
len);
2313 rb_enc_copy(str2, str);
2321 termlen = TERM_LEN(str);
2327 while (n <=
len/2) {
2328 memcpy(ptr2 + n, ptr2, n);
2331 memcpy(ptr2 + n, ptr2,
len-n);
2333 STR_SET_LEN(str2,
len);
2334 TERM_FILL(&ptr2[
len], termlen);
2335 rb_enc_cr_str_copy_for_substr(str2, str);
2361 VALUE tmp = rb_check_array_type(arg);
2370rb_check_lockedtmp(
VALUE str)
2372 if (
FL_TEST(str, STR_TMPLOCK)) {
2378str_modifiable(
VALUE str)
2380 rb_check_lockedtmp(str);
2385str_dependent_p(
VALUE str)
2387 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2396str_independent(
VALUE str)
2398 str_modifiable(str);
2399 return !str_dependent_p(str);
2403str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2411 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2416 STR_SET_EMBED_LEN(str,
len);
2423 memcpy(
ptr, oldptr,
len);
2425 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2428 STR_SET_NOEMBED(str);
2429 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2430 TERM_FILL(
ptr +
len, termlen);
2439 if (!str_independent(str))
2440 str_make_independent(str);
2447 int termlen = TERM_LEN(str);
2453 if (expand >= LONG_MAX -
len) {
2457 if (!str_independent(str)) {
2458 str_make_independent_expand(str,
len, expand, termlen);
2460 else if (expand > 0) {
2461 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2468str_modify_keep_cr(
VALUE str)
2470 if (!str_independent(str))
2471 str_make_independent(str);
2478str_discard(
VALUE str)
2480 str_modifiable(str);
2481 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2482 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2483 RSTRING(str)->as.heap.ptr = 0;
2484 RSTRING(str)->as.heap.len = 0;
2519zero_filled(
const char *s,
int n)
2521 for (; n > 0; --n) {
2528str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2530 const char *e = s +
len;
2532 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2533 if (zero_filled(s, minlen))
return s;
2539str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2544 if (str_dependent_p(str)) {
2545 if (!zero_filled(s +
len, termlen))
2546 str_make_independent_expand(str,
len, 0L, termlen);
2549 TERM_FILL(s +
len, termlen);
2556rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2558 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2563 rb_check_lockedtmp(str);
2564 str_make_independent_expand(str,
len, 0L, termlen);
2566 else if (str_dependent_p(str)) {
2567 if (termlen > oldtermlen)
2568 str_make_independent_expand(str,
len, 0L, termlen);
2571 if (!STR_EMBED_P(str)) {
2573 assert(!
FL_TEST((str), STR_SHARED));
2576 if (termlen > oldtermlen) {
2585str_null_check(
VALUE str,
int *w)
2594 if (str_null_char(s,
len, minlen, enc)) {
2597 return str_fill_term(str, s,
len, minlen);
2600 if (!s || memchr(s, 0,
len)) {
2604 s = str_fill_term(str, s,
len, minlen);
2610rb_str_to_cstr(
VALUE str)
2613 return str_null_check(str, &w);
2621 char *s = str_null_check(str, &w);
2632rb_str_fill_terminator(
VALUE str,
const int newminlen)
2636 return str_fill_term(str, s,
len, newminlen);
2642 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2666str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2676 const char *p2, *e2;
2679 while (p < e && 0 < nth) {
2686 p2 = search_nonascii(p, e2);
2695 n = rb_enc_mbclen(p, e, enc);
2706 while (p < e && nth--) {
2707 p += rb_enc_mbclen(p, e, enc);
2718 return str_nth_len(p, e, &nth, enc);
2722str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2727 p = str_nth_len(p, e, &nth, enc);
2736str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2738 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2739 if (!pp)
return e - p;
2747 STR_ENC_GET(str), single_byte_optimizable(str));
2752str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2755 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2756 const uintptr_t *s, *t;
2757 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2758 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2759 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2760 while (p < (
const char *)s) {
2761 if (is_utf8_lead_byte(*p)) nth--;
2765 nth -= count_utf8_lead_bytes_with_word(s);
2767 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2771 if (is_utf8_lead_byte(*p)) {
2772 if (nth == 0)
break;
2782str_utf8_offset(
const char *p,
const char *e,
long nth)
2784 const char *pp = str_utf8_nth(p, e, &nth);
2793 if (single_byte_optimizable(str) || pos < 0)
2797 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
2802str_subseq(
VALUE str,
long beg,
long len)
2806 const long rstring_embed_capa_max = ((
sizeof(
struct RString) - offsetof(struct
RString,
as.
embed.
ary)) / sizeof(char)) - 1;
2809 len <= rstring_embed_capa_max) {
2816 RSTRING(str2)->as.heap.ptr += beg;
2828 VALUE str2 = str_subseq(str, beg,
len);
2829 rb_enc_cr_str_copy_for_substr(str2, str);
2842 if (
len < 0)
return 0;
2846 if (single_byte_optimizable(str)) {
2847 if (beg > blen)
return 0;
2850 if (beg < 0)
return 0;
2852 if (
len > blen - beg)
2854 if (
len < 0)
return 0;
2859 if (
len > -beg)
len = -beg;
2871 slen = str_strlen(str, enc);
2873 if (beg < 0)
return 0;
2875 if (
len == 0)
goto end;
2882 if (beg > str_strlen(str, enc))
return 0;
2887 enc == rb_utf8_encoding()) {
2888 p = str_utf8_nth(s, e, &beg);
2889 if (beg > 0)
return 0;
2890 len = str_utf8_offset(p, e,
len);
2896 p = s + beg * char_sz;
2900 else if (
len * char_sz > e - p)
2905 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
2906 if (beg > 0)
return 0;
2910 len = str_offset(p, e,
len, enc, 0);
2918static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
2923 return str_substr(str, beg,
len, TRUE);
2927str_substr(
VALUE str,
long beg,
long len,
int empty)
2931 if (!p)
return Qnil;
2932 if (!
len && !empty)
return Qnil;
2936 VALUE str2 = str_subseq(str, beg,
len);
2937 rb_enc_cr_str_copy_for_substr(str2, str);
2981str_uminus(
VALUE str)
2986 return rb_fstring(str);
2990#define rb_str_dup_frozen rb_str_new_frozen
2995 if (
FL_TEST(str, STR_TMPLOCK)) {
2998 FL_SET(str, STR_TMPLOCK);
3005 if (!
FL_TEST(str, STR_TMPLOCK)) {
3012RUBY_FUNC_EXPORTED
VALUE
3023 const int termlen = TERM_LEN(str);
3025 str_modifiable(str);
3026 if (STR_SHARED_P(str)) {
3029 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3059 STR_SET_LEN(str,
len);
3070 int independent = str_independent(str);
3079 const int termlen = TERM_LEN(str);
3080 if (STR_EMBED_P(str)) {
3081 if (
len == slen)
return str;
3082 if (str_embed_capa(str) >=
len + termlen) {
3083 STR_SET_EMBED_LEN(str,
len);
3087 str_make_independent_expand(str, slen,
len - slen, termlen);
3089 else if (str_embed_capa(str) >=
len + termlen) {
3090 char *
ptr = STR_HEAP_PTR(str);
3092 if (slen >
len) slen =
len;
3095 STR_SET_EMBED_LEN(str,
len);
3096 if (independent) ruby_xfree(
ptr);
3099 else if (!independent) {
3100 if (
len == slen)
return str;
3101 str_make_independent_expand(str, slen,
len - slen, termlen);
3105 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3106 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3109 else if (
len == slen)
return str;
3117str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3120 str_modify_keep_cr(str);
3125 if (
len == 0)
return 0;
3127 long capa, total, olen, off = -1;
3129 const int termlen = TERM_LEN(str);
3135 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3139 if (STR_EMBED_P(str)) {
3140 capa = str_embed_capa(str) - termlen;
3141 sptr =
RSTRING(str)->as.embed.ary;
3146 sptr =
RSTRING(str)->as.heap.ptr;
3147 olen =
RSTRING(str)->as.heap.len;
3149 if (olen > LONG_MAX -
len) {
3154 if (total >= LONG_MAX / 2) {
3157 while (total >
capa) {
3160 RESIZE_CAPA_TERM(str,
capa, termlen);
3166 memcpy(sptr + olen,
ptr,
len);
3167 STR_SET_LEN(str, total);
3168 TERM_FILL(sptr + total, termlen);
3173#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3174#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3179 if (
len == 0)
return str;
3183 return str_buf_cat(str,
ptr,
len);
3198rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3199 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3208 if (str_encindex == ptr_encindex) {
3210 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3214 str_enc = rb_enc_from_index(str_encindex);
3215 ptr_enc = rb_enc_from_index(ptr_encindex);
3228 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3237 *ptr_cr_ret = ptr_cr;
3239 if (str_encindex != ptr_encindex &&
3242 str_enc = rb_enc_from_index(str_encindex);
3243 ptr_enc = rb_enc_from_index(ptr_encindex);
3248 res_encindex = str_encindex;
3253 res_encindex = str_encindex;
3257 res_encindex = ptr_encindex;
3262 res_encindex = str_encindex;
3269 res_encindex = str_encindex;
3277 str_buf_cat(str,
ptr,
len);
3290 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3301 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3307 unsigned int c = (
unsigned char)*
ptr;
3308 int len = rb_enc_codelen(c, enc);
3310 rb_enc_cr_str_buf_cat(str, buf,
len,
3323 if (str_enc_fastpath(str)) {
3359#define MIN_PRE_ALLOC_SIZE 48
3361MJIT_FUNC_EXPORTED
VALUE
3362rb_str_concat_literals(
size_t num,
const VALUE *strary)
3372 if (LIKELY(
len < MIN_PRE_ALLOC_SIZE)) {
3378 rb_enc_copy(str, strary[0]);
3382 for (i = s; i < num; ++i) {
3383 const VALUE v = strary[i];
3387 if (encidx != ENCINDEX_US_ASCII) {
3389 rb_enc_set_index(str, encidx);
3414rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3416 str_modifiable(str);
3421 else if (argc > 1) {
3424 rb_enc_copy(arg_str, str);
3425 for (i = 0; i < argc; i++) {
3460 if (rb_num_to_uint(str2, &code) == 0) {
3473 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3476 buf[0] = (char)code;
3478 if (encidx != rb_enc_to_index(enc)) {
3479 rb_enc_associate_index(str1, encidx);
3489 switch (
len = rb_enc_codelen(code, enc)) {
3490 case ONIGERR_INVALID_CODE_POINT_VALUE:
3493 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3500 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
3517rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
3519 int encidx = rb_enc_to_index(enc);
3521 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3526 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3527 return ENCINDEX_ASCII_8BIT;
3550rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
3552 str_modifiable(str);
3557 else if (argc > 1) {
3560 rb_enc_copy(arg_str, str);
3561 for (i = 0; i < argc; i++) {
3574 if (e && is_ascii_string(str)) {
3584 const char *ptr1, *ptr2;
3587 return (len1 != len2 ||
3589 memcmp(ptr1, ptr2, len1) != 0);
3603rb_str_hash_m(
VALUE str)
3609#define lesser(a,b) (((a)>(b))?(b):(a))
3621 if (idx1 == idx2)
return TRUE;
3640 const char *ptr1, *ptr2;
3643 if (str1 == str2)
return 0;
3646 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
3655 if (len1 > len2)
return 1;
3658 if (retval > 0)
return 1;
3685 if (str1 == str2)
return Qtrue;
3692 return rb_str_eql_internal(str1, str2);
3713MJIT_FUNC_EXPORTED
VALUE
3716 if (str1 == str2)
return Qtrue;
3718 return rb_str_eql_internal(str1, str2);
3749 return rb_invcmp(str1, str2);
3791 return str_casecmp(str1, s);
3799 const char *p1, *p1end, *p2, *p2end;
3801 enc = rb_enc_compatible(str1, str2);
3808 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
3809 while (p1 < p1end && p2 < p2end) {
3811 unsigned int c1 =
TOLOWER(*p1 & 0xff);
3812 unsigned int c2 =
TOLOWER(*p2 & 0xff);
3814 return INT2FIX(c1 < c2 ? -1 : 1);
3821 while (p1 < p1end && p2 < p2end) {
3822 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
3823 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
3825 if (0 <= c1 && 0 <= c2) {
3829 return INT2FIX(c1 < c2 ? -1 : 1);
3833 l1 = rb_enc_mbclen(p1, p1end, enc);
3834 l2 = rb_enc_mbclen(p2, p2end, enc);
3835 len = l1 < l2 ? l1 : l2;
3836 r = memcmp(p1, p2,
len);
3838 return INT2FIX(r < 0 ? -1 : 1);
3840 return INT2FIX(l1 < l2 ? -1 : 1);
3881 return str_casecmp_p(str1, s);
3888 VALUE folded_str1, folded_str2;
3889 VALUE fold_opt = sym_fold;
3891 enc = rb_enc_compatible(str1, str2);
3896 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
3897 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
3899 return rb_str_eql(folded_str1, folded_str2);
3903strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
3904 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
3906 const char *search_start = str_ptr;
3907 long pos, search_len = str_len - offset;
3911 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
3912 if (pos < 0)
return pos;
3914 if (t == search_start + pos)
break;
3915 search_len -= t - search_start;
3916 if (search_len <= 0)
return -1;
3917 offset += t - search_start;
3920 return pos + offset;
3923#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
3926rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
3928 const char *str_ptr, *str_ptr_end, *sub_ptr;
3929 long str_len, sub_len;
3932 enc = rb_enc_check(str, sub);
3933 if (is_broken_string(sub))
return -1;
3941 if (str_len < sub_len)
return -1;
3944 long str_len_char, sub_len_char;
3945 int single_byte = single_byte_optimizable(str);
3946 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
3947 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
3949 offset += str_len_char;
3950 if (offset < 0)
return -1;
3952 if (str_len_char - offset < sub_len_char)
return -1;
3953 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
3956 if (sub_len == 0)
return offset;
3959 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
3973rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
3979 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
3986 pos += str_strlen(str, NULL);
3996 if (pos > str_strlen(str, NULL)) {
4001 rb_enc_check(str, sub), single_byte_optimizable(str));
4015 pos = rb_str_index(str, sub, pos);
4019 if (pos == -1)
return Qnil;
4028str_check_byte_pos(
VALUE str,
long pos)
4032 const char *p = s + pos;
4079rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4085 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4091 if (pos < 0 || pos > slen) {
4102 if (!str_check_byte_pos(str, pos)) {
4104 "offset %ld does not land on character boundary", pos);
4120 pos = rb_strseq_index(str, sub, pos, 1);
4123 if (pos == -1)
return Qnil;
4131 char *hit, *adjusted;
4133 long slen, searchlen;
4138 if (slen == 0)
return s - sbeg;
4142 searchlen = s - sbeg + 1;
4145 hit = memrchr(sbeg, c, searchlen);
4148 if (hit != adjusted) {
4149 searchlen = adjusted - sbeg;
4152 if (memcmp(hit, t, slen) == 0)
4154 searchlen = adjusted - sbeg;
4155 }
while (searchlen > 0);
4172 if (memcmp(s, t, slen) == 0) {
4175 if (s <= sbeg)
break;
4191 enc = rb_enc_check(str, sub);
4192 if (is_broken_string(sub))
return -1;
4193 singlebyte = single_byte_optimizable(str);
4194 len = singlebyte ?
RSTRING_LEN(str) : str_strlen(str, enc);
4195 slen = str_strlen(sub, enc);
4198 if (len < slen)
return -1;
4199 if (len - pos < slen) pos = len - slen;
4200 if (len == 0)
return pos;
4211 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4273rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4278 long pos, len = str_strlen(str, enc);
4280 if (
rb_scan_args(argc, argv,
"11", &sub, &vpos) == 2) {
4291 if (pos > len) pos = len;
4300 enc, single_byte_optimizable(str));
4311 pos = rb_str_rindex(str, sub, pos);
4312 if (pos >= 0)
return LONG2NUM(pos);
4318rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4324 enc = rb_enc_check(str, sub);
4325 if (is_broken_string(sub))
return -1;
4330 if (len < slen)
return -1;
4331 if (len - pos < slen) pos = len - slen;
4332 if (len == 0)
return pos;
4344 return str_rindex(str, sub, s, enc);
4409rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4415 if (
rb_scan_args(argc, argv,
"11", &sub, &vpos) == 2) {
4426 if (pos > len) pos = len;
4432 if (!str_check_byte_pos(str, pos)) {
4434 "offset %ld does not land on character boundary", pos);
4447 pos = rb_str_byterindex(str, sub, pos);
4448 if (pos >= 0)
return LONG2NUM(pos);
4484 switch (OBJ_BUILTIN_TYPE(y)) {
4536rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
4543 result = rb_funcallv(get_pat(re),
rb_intern(
"match"), argc, argv);
4575rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
4579 re = get_pat(argv[0]);
4580 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
4589static enum neighbor_char
4597 int r = rb_enc_precise_mbclen(p, p + len, enc), c;
4599 return NEIGHBOR_NOT_CHAR;
4603 if (!l)
return NEIGHBOR_NOT_CHAR;
4604 if (l != len)
return NEIGHBOR_WRAPPED;
4606 r = rb_enc_precise_mbclen(p, p + len, enc);
4608 return NEIGHBOR_NOT_CHAR;
4610 return NEIGHBOR_FOUND;
4613 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
4616 return NEIGHBOR_WRAPPED;
4617 ++((
unsigned char*)p)[i];
4618 l = rb_enc_precise_mbclen(p, p+len, enc);
4622 return NEIGHBOR_FOUND;
4625 memset(p+l, 0xff, len-l);
4631 for (len2 = len-1; 0 < len2; len2--) {
4632 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4636 memset(p+len2+1, 0xff, len-(len2+1));
4641static enum neighbor_char
4648 int r = rb_enc_precise_mbclen(p, p + len, enc), c;
4650 return NEIGHBOR_NOT_CHAR;
4653 if (!c)
return NEIGHBOR_NOT_CHAR;
4656 if (!l)
return NEIGHBOR_NOT_CHAR;
4657 if (l != len)
return NEIGHBOR_WRAPPED;
4659 r = rb_enc_precise_mbclen(p, p + len, enc);
4661 return NEIGHBOR_NOT_CHAR;
4663 return NEIGHBOR_FOUND;
4666 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
4669 return NEIGHBOR_WRAPPED;
4670 --((
unsigned char*)p)[i];
4671 l = rb_enc_precise_mbclen(p, p+len, enc);
4675 return NEIGHBOR_FOUND;
4678 memset(p+l, 0, len-l);
4684 for (len2 = len-1; 0 < len2; len2--) {
4685 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4689 memset(p+len2+1, 0, len-(len2+1));
4703static enum neighbor_char
4704enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
4706 enum neighbor_char ret;
4710 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
4714 const int max_gaps = 1;
4718 ctype = ONIGENC_CTYPE_DIGIT;
4720 ctype = ONIGENC_CTYPE_ALPHA;
4722 return NEIGHBOR_NOT_CHAR;
4724 MEMCPY(save, p,
char, len);
4725 for (
try = 0;
try <= max_gaps; ++
try) {
4726 ret = enc_succ_char(p, len, enc);
4727 if (ret == NEIGHBOR_FOUND) {
4730 return NEIGHBOR_FOUND;
4733 MEMCPY(p, save,
char, len);
4736 MEMCPY(save, p,
char, len);
4737 ret = enc_pred_char(p, len, enc);
4738 if (ret == NEIGHBOR_FOUND) {
4741 MEMCPY(p, save,
char, len);
4746 MEMCPY(p, save,
char, len);
4752 return NEIGHBOR_NOT_CHAR;
4755 if (ctype != ONIGENC_CTYPE_DIGIT) {
4756 MEMCPY(carry, p,
char, len);
4757 return NEIGHBOR_WRAPPED;
4760 MEMCPY(carry, p,
char, len);
4761 enc_succ_char(carry, len, enc);
4762 return NEIGHBOR_WRAPPED;
4832 rb_enc_cr_str_copy_for_substr(str, orig);
4833 return str_succ(str);
4840 char *sbeg, *s, *e, *last_alnum = 0;
4841 int found_alnum = 0;
4843 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
4844 long carry_pos = 0, carry_len = 1;
4845 enum neighbor_char neighbor = NEIGHBOR_FOUND;
4848 if (slen == 0)
return str;
4850 enc = STR_ENC_GET(str);
4852 s = e = sbeg + slen;
4855 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
4861 l = rb_enc_precise_mbclen(s, e, enc);
4862 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4863 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4864 neighbor = enc_succ_alnum_char(s, l, enc, carry);
4866 case NEIGHBOR_NOT_CHAR:
4868 case NEIGHBOR_FOUND:
4870 case NEIGHBOR_WRAPPED:
4875 carry_pos = s - sbeg;
4881 enum neighbor_char neighbor;
4882 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
4883 l = rb_enc_precise_mbclen(s, e, enc);
4884 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4885 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4887 neighbor = enc_succ_char(tmp, l, enc);
4889 case NEIGHBOR_FOUND:
4893 case NEIGHBOR_WRAPPED:
4896 case NEIGHBOR_NOT_CHAR:
4899 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
4901 enc_succ_char(s, l, enc);
4904 MEMCPY(carry, s,
char, l);
4907 carry_pos = s - sbeg;
4911 RESIZE_CAPA(str, slen + carry_len);
4913 s = sbeg + carry_pos;
4914 memmove(s + carry_len, s, slen - carry_pos);
4915 memmove(s, carry, carry_len);
4917 STR_SET_LEN(str, slen);
4934rb_str_succ_bang(
VALUE str)
4942all_digits_p(
const char *s,
long len)
4996 VALUE end, exclusive;
5000 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5006 VALUE current, after_end;
5013 enc = rb_enc_check(beg, end);
5014 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5020 if (c > e || (excl && c == e))
return beg;
5023 if (!excl && c == e)
break;
5025 if (excl && c == e)
break;
5037 b = rb_str_to_inum(beg, 10, FALSE);
5038 e = rb_str_to_inum(end, 10, FALSE);
5045 if (excl && bi == ei)
break;
5046 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5051 ID op = excl ?
'<' : idLE;
5052 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5057 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5058 b = rb_funcallv(b, succ, 0, 0);
5065 if (n > 0 || (excl && n == 0))
return beg;
5067 after_end = rb_funcallv(end, succ, 0, 0);
5072 next = rb_funcallv(current, succ, 0, 0);
5073 if ((*each)(current, arg))
break;
5074 if (
NIL_P(next))
break;
5095 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5097 b = rb_str_to_inum(beg, 10, FALSE);
5103 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5111 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5112 b = rb_funcallv(b, succ, 0, 0);
5118 VALUE next = rb_funcallv(current, succ, 0, 0);
5119 if ((*each)(current, arg))
break;
5133 if (!
rb_equal(str, *argp))
return 0;
5162 if (b <= v && v < e)
return Qtrue;
5163 return RBOOL(!
RTEST(exclusive) && v == e);
5176 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5178 return RBOOL(
NIL_P(val));
5201 return rb_str_subpat(str, indx,
INT2FIX(0));
5204 if (rb_str_index(str, indx, 0) != -1)
5210 long beg, len = str_strlen(str, NULL);
5222 return str_substr(str, idx, 1, FALSE);
5241rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5245 return rb_str_subpat(str, argv[0], argv[1]);
5254 return rb_str_aref(str, argv[0]);
5263 str_modifiable(str);
5264 if (len > olen) len = olen;
5266 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5268 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5270 STR_SET_EMBED_LEN(str, nlen);
5271 ptr =
RSTRING(str)->as.embed.ary;
5272 memmove(ptr, oldptr + len, nlen);
5273 if (fl == STR_NOEMBED)
xfree(oldptr);
5276 if (!STR_SHARED_P(str)) {
5278 rb_enc_cr_str_exact_copy(shared, str);
5281 ptr =
RSTRING(str)->as.heap.ptr += len;
5282 RSTRING(str)->as.heap.len = nlen;
5290rb_str_splice_0(
VALUE str,
long beg,
long len,
VALUE val)
5296 if (beg == 0 && vlen == 0) {
5301 str_modify_keep_cr(str);
5305 RESIZE_CAPA(str, slen + vlen - len);
5315 memmove(sptr + beg + vlen,
5317 slen - (beg + len));
5319 if (vlen < beg && len < 0) {
5320 MEMZERO(sptr + slen,
char, -len);
5326 STR_SET_LEN(str, slen);
5327 TERM_FILL(&sptr[slen], TERM_LEN(str));
5337 int singlebyte = single_byte_optimizable(str);
5343 enc = rb_enc_check(str, val);
5344 slen = str_strlen(str, enc);
5346 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5353 assert(beg <= slen);
5354 if (len > slen - beg) {
5359 e = str_nth(p,
RSTRING_END(str), len, enc, singlebyte);
5364 rb_str_splice_0(str, beg, len, val);
5365 rb_enc_associate(str, enc);
5371#define rb_str_splice(str, beg, len, val) rb_str_update(str, beg, len, val)
5378 long start, end, len;
5388 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5392 nth += regs->num_regs;
5402 enc = rb_enc_check_str(str, val);
5403 rb_str_splice_0(str, start, len, val);
5404 rb_enc_associate(str, enc);
5412 switch (
TYPE(indx)) {
5414 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5418 beg = rb_str_index(str, indx, 0);
5423 rb_str_splice(str, beg, str_strlen(indx, NULL), val);
5431 rb_str_splice(str, beg, len, val);
5439 rb_str_splice(str, idx, 1, val);
5474rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5478 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5486 return rb_str_aset(str, argv[0], argv[1]);
5518 rb_str_splice(str, pos, 0, str2);
5546rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
5554 str_modify_keep_cr(str);
5562 if ((nth += regs->num_regs) <= 0)
return Qnil;
5564 else if (nth >= regs->num_regs)
return Qnil;
5566 len = END(nth) - beg;
5569 else if (argc == 2) {
5577 if (!len)
return Qnil;
5582 beg = rb_str_index(str, indx, 0);
5583 if (beg == -1)
return Qnil;
5595 if (!len)
return Qnil;
5609 rb_enc_cr_str_copy_for_substr(result, str);
5619 if (beg + len > slen)
5623 slen - (beg + len));
5625 STR_SET_LEN(str, slen);
5626 TERM_FILL(&sptr[slen], TERM_LEN(str));
5637 switch (OBJ_BUILTIN_TYPE(pat)) {
5656get_pat_quoted(
VALUE pat,
int check)
5660 switch (OBJ_BUILTIN_TYPE(pat)) {
5674 if (check && is_broken_string(pat)) {
5681rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
5684 pos = rb_strseq_index(str, pat, pos, 1);
5685 if (set_backref_str) {
5687 str = rb_str_new_frozen_String(str);
5688 rb_backref_set_string(str, pos,
RSTRING_LEN(pat));
5697 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
5717rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
5731 hash = rb_check_hash_type(argv[1]);
5737 pat = get_pat_quoted(argv[0], 1);
5739 str_modifiable(str);
5740 beg = rb_pat_search(pat, str, 0, 1);
5763 if (iter || !
NIL_P(hash)) {
5770 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
5773 str_mod_check(str, p, len);
5780 enc = rb_enc_compatible(str, repl);
5790 enc = STR_ENC_GET(repl);
5793 rb_enc_associate(str, enc);
5806 RESIZE_CAPA(str, len + rlen - plen);
5810 memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
5813 memmove(p + beg0, rp, rlen);
5815 STR_SET_LEN(str, len);
5843 rb_str_sub_bang(argc, argv, str);
5848str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
5852 long beg, beg0, end0;
5853 long offset, blen, slen, len, last;
5854 enum {STR, ITER, MAP} mode = STR;
5856 int need_backref = -1;
5866 hash = rb_check_hash_type(argv[1]);
5875 rb_error_arity(argc, 1, 2);
5878 pat = get_pat_quoted(argv[0], 1);
5879 beg = rb_pat_search(pat, str, 0, need_backref);
5881 if (bang)
return Qnil;
5891 str_enc = STR_ENC_GET(str);
5892 rb_enc_associate(dest, str_enc);
5914 val = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
5917 str_mod_check(str, sp, slen);
5922 else if (need_backref) {
5924 if (need_backref < 0) {
5925 need_backref = val != repl;
5932 len = beg0 - offset;
5949 offset = end0 + len;
5953 beg = rb_pat_search(pat, str, offset, need_backref);
5958 rb_pat_search(pat, str, last, 1);
5960 str_shared_replace(str, dest);
5988rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
5990 str_modify_keep_cr(str);
5991 return str_gsub(argc, argv, str, 1);
6014 return str_gsub(argc, argv, str, 0);
6032 str_modifiable(str);
6033 if (str == str2)
return str;
6037 return str_replace(str, str2);
6052rb_str_clear(
VALUE str)
6056 STR_SET_EMBED_LEN(str, 0);
6077rb_str_chr(
VALUE str)
6125 char *ptr, *head, *left = 0;
6129 if (pos < -len || len <= pos)
6136 char byte = (char)(
NUM2INT(w) & 0xFF);
6138 if (!str_independent(str))
6139 str_make_independent(str);
6140 enc = STR_ENC_GET(str);
6143 if (!STR_EMBED_P(str)) {
6150 nlen = rb_enc_precise_mbclen(left, head+len, enc);
6158 width = rb_enc_precise_mbclen(left, head+len, enc);
6160 nlen = rb_enc_precise_mbclen(left, head+len, enc);
6176str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6180 if (beg > n || len < 0)
return Qnil;
6183 if (beg < 0)
return Qnil;
6188 if (!empty)
return Qnil;
6192 VALUE str2 = str_subseq(str, beg, len);
6194 str_enc_copy(str2, str);
6233 return str_byte_substr(str, beg, len, TRUE);
6238 return str_byte_substr(str, idx, 1, FALSE);
6285rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6290 return str_byte_substr(str, beg, len, TRUE);
6293 return str_byte_aref(str, argv[0]);
6313rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6315 long beg, end, len, slen;
6324 rb_builtin_class_name(argv[0]));
6335 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
6342 assert(beg <= slen);
6343 if (len > slen - beg) {
6347 if (!str_check_byte_pos(str, beg)) {
6349 "offset %ld does not land on character boundary", beg);
6351 if (!str_check_byte_pos(str, end)) {
6353 "offset %ld does not land on character boundary", end);
6356 enc = rb_enc_check(str, val);
6357 str_modify_keep_cr(str);
6358 rb_str_splice_0(str, beg, len, val);
6359 rb_enc_associate(str, enc);
6377rb_str_reverse(
VALUE str)
6385 enc = STR_ENC_GET(str);
6392 if (single_byte_optimizable(str)) {
6399 int clen = rb_enc_fast_mbclen(s, e, enc);
6410 int clen = rb_enc_mbclen(s, e, enc);
6420 str_enc_copy(rev, str);
6440rb_str_reverse_bang(
VALUE str)
6443 if (single_byte_optimizable(str)) {
6446 str_modify_keep_cr(str);
6456 str_shared_replace(str, rb_str_reverse(str));
6460 str_modify_keep_cr(str);
6485 i = rb_str_index(str, arg, 0);
6487 return RBOOL(i != -1);
6531 return rb_str_to_inum(str, base, FALSE);
6555rb_str_to_f(
VALUE str)
6573rb_str_to_s(
VALUE str)
6585 char s[RUBY_MAX_CHAR_LEN];
6586 int n = rb_enc_codelen(c, enc);
6593#define CHAR_ESC_LEN 13
6596rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
6598 char buf[CHAR_ESC_LEN + 1];
6606 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
6608 else if (c < 0x10000) {
6609 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
6612 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
6617 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
6620 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
6623 l = (int)strlen(buf);
6629ruby_escaped_char(
int c)
6632 case '\0':
return "\\0";
6633 case '\n':
return "\\n";
6634 case '\r':
return "\\r";
6635 case '\t':
return "\\t";
6636 case '\f':
return "\\f";
6637 case '\013':
return "\\v";
6638 case '\010':
return "\\b";
6639 case '\007':
return "\\a";
6640 case '\033':
return "\\e";
6641 case '\x7f':
return "\\c?";
6647rb_str_escape(
VALUE str)
6653 const char *prev = p;
6654 char buf[CHAR_ESC_LEN + 1];
6656 int unicode_p = rb_enc_unicode_p(enc);
6662 int n = rb_enc_precise_mbclen(p, pend, enc);
6664 if (p > prev) str_buf_cat(result, prev, p - prev);
6667 n = (int)(pend - p);
6669 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6670 str_buf_cat(result, buf, strlen(buf));
6678 cc = ruby_escaped_char(c);
6680 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6681 str_buf_cat(result, cc, strlen(cc));
6687 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6688 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6692 if (p > prev) str_buf_cat(result, prev, p - prev);
6716 const char *p, *pend, *prev;
6717 char buf[CHAR_ESC_LEN + 1];
6719 rb_encoding *resenc = rb_default_internal_encoding();
6720 int unicode_p = rb_enc_unicode_p(enc);
6723 if (resenc == NULL) resenc = rb_default_external_encoding();
6725 rb_enc_associate(result, resenc);
6726 str_buf_cat2(result,
"\"");
6734 n = rb_enc_precise_mbclen(p, pend, enc);
6736 if (p > prev) str_buf_cat(result, prev, p - prev);
6739 n = (int)(pend - p);
6741 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6742 str_buf_cat(result, buf, strlen(buf));
6750 if ((asciicompat || unicode_p) &&
6751 (c ==
'"'|| c ==
'\\' ||
6756 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
6757 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6758 str_buf_cat2(result,
"\\");
6759 if (asciicompat || enc == resenc) {
6765 case '\n': cc =
'n';
break;
6766 case '\r': cc =
'r';
break;
6767 case '\t': cc =
't';
break;
6768 case '\f': cc =
'f';
break;
6769 case '\013': cc =
'v';
break;
6770 case '\010': cc =
'b';
break;
6771 case '\007': cc =
'a';
break;
6772 case 033: cc =
'e';
break;
6773 default: cc = 0;
break;
6776 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6779 str_buf_cat(result, buf, 2);
6796 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6797 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6802 if (p > prev) str_buf_cat(result, prev, p - prev);
6803 str_buf_cat2(result,
"\"");
6808#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
6828 int encidx = rb_enc_get_index(str);
6831 const char *p, *pend;
6834 int u8 = (encidx == rb_utf8_encindex());
6835 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
6840 len += strlen(enc->name);
6846 unsigned char c = *p++;
6849 case '"':
case '\\':
6850 case '\n':
case '\r':
6851 case '\t':
case '\f':
6852 case '\013':
case '\010':
case '\007':
case '\033':
6857 clen = IS_EVSTR(p, pend) ? 2 : 1;
6865 if (u8 && c > 0x7F) {
6866 int n = rb_enc_precise_mbclen(p-1, pend, enc);
6871 else if (cc <= 0xFFFFF)
6884 if (clen > LONG_MAX - len) {
6896 unsigned char c = *p++;
6898 if (c ==
'"' || c ==
'\\') {
6902 else if (c ==
'#') {
6903 if (IS_EVSTR(p, pend)) *q++ =
'\\';
6906 else if (c ==
'\n') {
6910 else if (c ==
'\r') {
6914 else if (c ==
'\t') {
6918 else if (c ==
'\f') {
6922 else if (c ==
'\013') {
6926 else if (c ==
'\010') {
6930 else if (c ==
'\007') {
6934 else if (c ==
'\033') {
6944 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
6949 snprintf(q, qend-q,
"u%04X", cc);
6951 snprintf(q, qend-q,
"u{%X}", cc);
6956 snprintf(q, qend-q,
"x%02X", c);
6963 snprintf(q, qend-q, nonascii_suffix, enc->name);
6964 encidx = rb_ascii8bit_encindex();
6967 rb_enc_associate_index(result, encidx);
6973unescape_ascii(
unsigned int c)
6997undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
6999 const char *s = *ss;
7003 unsigned char buf[6];
7021 *buf = unescape_ascii(*s);
7033 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7034 if (*penc != enc_utf8) {
7036 rb_enc_associate(undumped, enc_utf8);
7053 if (hexlen == 0 || hexlen > 6) {
7059 if (0xd800 <= c && c <= 0xdfff) {
7072 if (0xd800 <= c && c <= 0xdfff) {
7103static VALUE rb_str_is_ascii_only_p(
VALUE str);
7121str_undump(
VALUE str)
7128 bool binary =
false;
7132 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7135 if (!str_null_check(str, &w)) {
7139 if (*s !=
'"')
goto invalid_format;
7157 static const char force_encoding_suffix[] =
".force_encoding(\"";
7158 static const char dup_suffix[] =
".dup";
7159 const char *encname;
7164 size =
sizeof(dup_suffix) - 1;
7165 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7167 size =
sizeof(force_encoding_suffix) - 1;
7168 if (s_end - s <= size)
goto invalid_format;
7169 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7177 s = memchr(s,
'"', s_end-s);
7179 if (!s)
goto invalid_format;
7180 if (s_end - s != 2)
goto invalid_format;
7181 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7183 encidx = rb_enc_find_index2(encname, (
long)size);
7187 rb_enc_associate_index(undumped, encidx);
7197 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7206 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7212 if (rb_enc_dummy_p(enc)) {
7219str_true_enc(
VALUE str)
7222 rb_str_check_dummy_enc(enc);
7226static OnigCaseFoldType
7227check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7233 if (argv[0]==sym_turkic) {
7234 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7236 if (argv[1]==sym_lithuanian)
7237 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7242 else if (argv[0]==sym_lithuanian) {
7243 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7245 if (argv[1]==sym_turkic)
7246 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7253 else if (argv[0]==sym_ascii)
7254 flags |= ONIGENC_CASE_ASCII_ONLY;
7255 else if (argv[0]==sym_fold) {
7256 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7257 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7269 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7275#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7276#ifndef CASEMAP_DEBUG
7277# define CASEMAP_DEBUG 0
7285 OnigUChar space[FLEX_ARY_LEN];
7289mapping_buffer_free(
void *p)
7293 while (current_buffer) {
7294 previous_buffer = current_buffer;
7295 current_buffer = current_buffer->next;
7296 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7302 {0, mapping_buffer_free,}
7310 const OnigUChar *source_current, *source_end;
7311 int target_length = 0;
7312 VALUE buffer_anchor;
7315 size_t buffer_count = 0;
7316 int buffer_length_or_invalid;
7325 while (source_current < source_end) {
7327 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7328 if (CASEMAP_DEBUG) {
7329 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n", capa);
7332 *pre_buffer = current_buffer;
7333 pre_buffer = ¤t_buffer->next;
7334 current_buffer->next = NULL;
7335 current_buffer->capa = capa;
7336 buffer_length_or_invalid = enc->case_map(flags,
7337 &source_current, source_end,
7338 current_buffer->space,
7339 current_buffer->space+current_buffer->capa,
7341 if (buffer_length_or_invalid < 0) {
7342 current_buffer =
DATA_PTR(buffer_anchor);
7344 mapping_buffer_free(current_buffer);
7347 target_length += current_buffer->used = buffer_length_or_invalid;
7349 if (CASEMAP_DEBUG) {
7350 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7353 if (buffer_count==1) {
7354 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7357 char *target_current;
7361 current_buffer =
DATA_PTR(buffer_anchor);
7362 while (current_buffer) {
7363 memcpy(target_current, current_buffer->space, current_buffer->used);
7364 target_current += current_buffer->used;
7365 current_buffer = current_buffer->next;
7368 current_buffer =
DATA_PTR(buffer_anchor);
7370 mapping_buffer_free(current_buffer);
7375 str_enc_copy(target, source);
7384 const OnigUChar *source_current, *source_end;
7385 OnigUChar *target_current, *target_end;
7387 int length_or_invalid;
7389 if (old_length == 0)
return Qnil;
7393 if (source == target) {
7394 target_current = (OnigUChar*)source_current;
7395 target_end = (OnigUChar*)source_end;
7402 length_or_invalid = onigenc_ascii_only_case_map(flags,
7403 &source_current, source_end,
7404 target_current, target_end, enc);
7405 if (length_or_invalid < 0)
7407 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7408 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7409 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7411 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7414 str_enc_copy(target, source);
7420upcase_single(
VALUE str)
7423 bool modified =
false;
7426 unsigned int c = *(
unsigned char*)s;
7428 if (
'a' <= c && c <=
'z') {
7429 *s =
'A' + (c -
'a');
7457rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7460 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7462 flags = check_case_options(argc, argv, flags);
7463 str_modify_keep_cr(str);
7464 enc = str_true_enc(str);
7465 if (case_option_single_p(flags, enc, str)) {
7466 if (upcase_single(str))
7467 flags |= ONIGENC_CASE_MODIFIED;
7469 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7470 rb_str_ascii_casemap(str, str, &flags, enc);
7472 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7474 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7496rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7499 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7502 flags = check_case_options(argc, argv, flags);
7503 enc = str_true_enc(str);
7504 if (case_option_single_p(flags, enc, str)) {
7506 str_enc_copy(ret, str);
7509 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7511 rb_str_ascii_casemap(str, ret, &flags, enc);
7514 ret = rb_str_casemap(str, &flags, enc);
7521downcase_single(
VALUE str)
7524 bool modified =
false;
7527 unsigned int c = *(
unsigned char*)s;
7529 if (
'A' <= c && c <=
'Z') {
7530 *s =
'a' + (c -
'A');
7559rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
7562 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7564 flags = check_case_options(argc, argv, flags);
7565 str_modify_keep_cr(str);
7566 enc = str_true_enc(str);
7567 if (case_option_single_p(flags, enc, str)) {
7568 if (downcase_single(str))
7569 flags |= ONIGENC_CASE_MODIFIED;
7571 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7572 rb_str_ascii_casemap(str, str, &flags, enc);
7574 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7576 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7598rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
7601 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7604 flags = check_case_options(argc, argv, flags);
7605 enc = str_true_enc(str);
7606 if (case_option_single_p(flags, enc, str)) {
7608 str_enc_copy(ret, str);
7609 downcase_single(ret);
7611 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7613 rb_str_ascii_casemap(str, ret, &flags, enc);
7616 ret = rb_str_casemap(str, &flags, enc);
7644rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
7647 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7649 flags = check_case_options(argc, argv, flags);
7650 str_modify_keep_cr(str);
7651 enc = str_true_enc(str);
7653 if (flags&ONIGENC_CASE_ASCII_ONLY)
7654 rb_str_ascii_casemap(str, str, &flags, enc);
7656 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7658 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7682rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
7685 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7688 flags = check_case_options(argc, argv, flags);
7689 enc = str_true_enc(str);
7691 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7693 rb_str_ascii_casemap(str, ret, &flags, enc);
7696 ret = rb_str_casemap(str, &flags, enc);
7723rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
7726 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7728 flags = check_case_options(argc, argv, flags);
7729 str_modify_keep_cr(str);
7730 enc = str_true_enc(str);
7731 if (flags&ONIGENC_CASE_ASCII_ONLY)
7732 rb_str_ascii_casemap(str, str, &flags, enc);
7734 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7736 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7760rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
7763 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7766 flags = check_case_options(argc, argv, flags);
7767 enc = str_true_enc(str);
7769 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7771 rb_str_ascii_casemap(str, ret, &flags, enc);
7774 ret = rb_str_casemap(str, &flags, enc);
7779typedef unsigned char *USTR;
7783 unsigned int now, max;
7795 if (t->p == t->pend)
return -1;
7796 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
7799 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7801 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
7803 if (t->p < t->pend) {
7804 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7807 if (t->now < 0x80 && c < 0x80) {
7809 "invalid range \"%c-%c\" in string transliteration",
7824 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
7825 if (t->now == t->max) {
7830 if (t->now < t->max) {
7846 const unsigned int errc = -1;
7847 unsigned int trans[256];
7849 struct tr trsrc, trrepl;
7851 unsigned int c, c0, last = 0;
7852 int modify = 0, i, l;
7853 unsigned char *s, *send;
7855 int singlebyte = single_byte_optimizable(str);
7859#define CHECK_IF_ASCII(c) \
7860 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
7861 (cr = ENC_CODERANGE_VALID) : 0)
7867 return rb_str_delete_bang(1, &src, str);
7871 e1 = rb_enc_check(str, src);
7872 e2 = rb_enc_check(str, repl);
7877 enc = rb_enc_check(src, repl);
7881 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
7882 trsrc.p + l < trsrc.pend) {
7888 trsrc.gen = trrepl.gen = 0;
7889 trsrc.now = trrepl.now = 0;
7890 trsrc.max = trrepl.max = 0;
7893 for (i=0; i<256; i++) {
7896 while ((c = trnext(&trsrc, enc)) != errc) {
7901 if (!hash) hash = rb_hash_new();
7905 while ((c = trnext(&trrepl, enc)) != errc)
7908 for (i=0; i<256; i++) {
7909 if (trans[i] != errc) {
7917 for (i=0; i<256; i++) {
7920 while ((c = trnext(&trsrc, enc)) != errc) {
7921 r = trnext(&trrepl, enc);
7922 if (r == errc) r = trrepl.now;
7925 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
7928 if (!hash) hash = rb_hash_new();
7936 str_modify_keep_cr(str);
7942 unsigned int save = -1;
7943 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
7948 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
7949 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
7958 if (cflag) c = last;
7961 else if (cflag) c = errc;
7967 if (c != (
unsigned int)-1) {
7973 tlen = rb_enc_codelen(c, enc);
7979 if (enc != e1) may_modify = 1;
7981 if ((offset = t - buf) + tlen > max) {
7982 size_t MAYBE_UNUSED(old) = max + termlen;
7983 max = offset + tlen + (send - s);
7984 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
7988 if (may_modify && memcmp(s, t, tlen) != 0) {
7994 if (!STR_EMBED_P(str)) {
7995 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
7997 TERM_FILL((
char *)t, termlen);
7998 RSTRING(str)->as.heap.ptr = (
char *)buf;
7999 RSTRING(str)->as.heap.len = t - buf;
8000 STR_SET_NOEMBED(str);
8001 RSTRING(str)->as.heap.aux.capa = max;
8005 c = (
unsigned char)*s;
8006 if (trans[c] != errc) {
8023 long offset, max = (long)((send - s) * 1.2);
8024 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8028 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
8029 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8037 if (cflag) c = last;
8040 else if (cflag) c = errc;
8044 c = cflag ? last : errc;
8047 tlen = rb_enc_codelen(c, enc);
8052 if (enc != e1) may_modify = 1;
8054 if ((offset = t - buf) + tlen > max) {
8055 size_t MAYBE_UNUSED(old) = max + termlen;
8056 max = offset + tlen + (long)((send - s) * 1.2);
8057 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8062 if (may_modify && memcmp(s, t, tlen) != 0) {
8070 if (!STR_EMBED_P(str)) {
8071 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8073 TERM_FILL((
char *)t, termlen);
8074 RSTRING(str)->as.heap.ptr = (
char *)buf;
8075 RSTRING(str)->as.heap.len = t - buf;
8076 STR_SET_NOEMBED(str);
8077 RSTRING(str)->as.heap.aux.capa = max;
8083 rb_enc_associate(str, enc);
8102 return tr_trans(str, src, repl, 0);
8149 tr_trans(str, src, repl, 0);
8153#define TR_TABLE_MAX (UCHAR_MAX+1)
8154#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8156tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8159 const unsigned int errc = -1;
8160 char buf[TR_TABLE_MAX];
8163 VALUE table = 0, ptable = 0;
8164 int i, l, cflag = 0;
8167 tr.gen =
tr.now =
tr.max = 0;
8169 if (
RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8174 for (i=0; i<TR_TABLE_MAX; i++) {
8177 stable[TR_TABLE_MAX] = cflag;
8179 else if (stable[TR_TABLE_MAX] && !cflag) {
8180 stable[TR_TABLE_MAX] = 0;
8182 for (i=0; i<TR_TABLE_MAX; i++) {
8186 while ((c = trnext(&
tr, enc)) != errc) {
8187 if (c < TR_TABLE_MAX) {
8188 buf[(
unsigned char)c] = !cflag;
8193 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8196 table = ptable ? ptable : rb_hash_new();
8200 table = rb_hash_new();
8205 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8206 rb_hash_aset(table, key,
Qtrue);
8210 for (i=0; i<TR_TABLE_MAX; i++) {
8211 stable[i] = stable[i] && buf[i];
8213 if (!table && !cflag) {
8220tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8222 if (c < TR_TABLE_MAX) {
8223 return table[c] != 0;
8229 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8230 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8234 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8237 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8251rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8253 char squeez[TR_TABLE_SIZE];
8256 VALUE del = 0, nodel = 0;
8258 int i, ascompat, cr;
8262 for (i=0; i<argc; i++) {
8266 enc = rb_enc_check(str, s);
8267 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8270 str_modify_keep_cr(str);
8279 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8290 c = rb_enc_codepoint_len(s, send, &clen, enc);
8292 if (tr_find(c, squeez, del, nodel)) {
8303 TERM_FILL(t, TERM_LEN(str));
8307 if (modify)
return str;
8327rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8330 rb_str_delete_bang(argc, argv, str);
8344rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8346 char squeez[TR_TABLE_SIZE];
8348 VALUE del = 0, nodel = 0;
8349 unsigned char *s, *send, *t;
8351 int ascompat, singlebyte = single_byte_optimizable(str);
8355 enc = STR_ENC_GET(str);
8358 for (i=0; i<argc; i++) {
8362 enc = rb_enc_check(str, s);
8363 if (singlebyte && !single_byte_optimizable(s))
8365 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8369 str_modify_keep_cr(str);
8378 unsigned int c = *s++;
8379 if (c != save || (argc > 0 && !squeez[c])) {
8389 if (ascompat && (c = *s) < 0x80) {
8390 if (c != save || (argc > 0 && !squeez[c])) {
8396 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8398 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8408 TERM_FILL((
char *)t, TERM_LEN(str));
8414 if (modify)
return str;
8437rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8440 rb_str_squeeze_bang(argc, argv, str);
8458 return tr_trans(str, src, repl, 1);
8481 tr_trans(str, src, repl, 1);
8510rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8512 char table[TR_TABLE_SIZE];
8514 VALUE del = 0, nodel = 0, tstr;
8524 enc = rb_enc_check(str, tstr);
8529 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8530 !is_broken_string(str)) {
8532 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8538 if (*(
unsigned char*)s++ == c) n++;
8544 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8545 for (i=1; i<argc; i++) {
8548 enc = rb_enc_check(str, tstr);
8549 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8559 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8567 c = rb_enc_codepoint_len(s, send, &clen, enc);
8568 if (tr_find(c, table, del, nodel)) {
8579rb_fs_check(
VALUE val)
8583 if (
NIL_P(val))
return 0;
8588static const char isspacetable[256] = {
8589 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8590 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8591 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8600 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8601 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8602 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
8607#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
8610split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
8612 if (empty_count >= 0 && len == 0) {
8613 return empty_count + 1;
8615 if (empty_count > 0) {
8619 rb_ary_push(result, str_new_empty_String(str));
8620 }
while (--empty_count > 0);
8624 rb_yield(str_new_empty_String(str));
8625 }
while (--empty_count > 0);
8630 rb_ary_push(result, str);
8639 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
8643literal_split_pattern(
VALUE spat, split_type_t default_type)
8651 return SPLIT_TYPE_CHARS;
8654 if (len == 1 && ptr[0] ==
' ') {
8655 return SPLIT_TYPE_AWK;
8660 if (rb_enc_ascget(ptr, ptr + len, &l, enc) ==
' ' && len == l) {
8661 return SPLIT_TYPE_AWK;
8664 return default_type;
8677rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
8682 split_type_t split_type;
8683 long beg, end, i = 0, empty_count = -1;
8688 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
8690 if (lim <= 0) limit =
Qnil;
8691 else if (lim == 1) {
8703 if (
NIL_P(limit) && !lim) empty_count = 0;
8705 enc = STR_ENC_GET(str);
8706 split_type = SPLIT_TYPE_REGEXP;
8708 spat = get_pat_quoted(spat, 0);
8710 else if (
NIL_P(spat = rb_fs)) {
8711 split_type = SPLIT_TYPE_AWK;
8713 else if (!(spat = rb_fs_check(spat))) {
8719 if (split_type != SPLIT_TYPE_AWK) {
8724 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
8725 if (split_type == SPLIT_TYPE_AWK) {
8727 split_type = SPLIT_TYPE_STRING;
8732 mustnot_broken(spat);
8733 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
8741#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
8743 if (result) result = rb_ary_new();
8747 if (split_type == SPLIT_TYPE_AWK) {
8753 if (is_ascii_string(str)) {
8754 while (ptr < eptr) {
8755 c = (
unsigned char)*ptr++;
8757 if (ascii_isspace(c)) {
8763 if (!
NIL_P(limit) && lim <= i)
break;
8766 else if (ascii_isspace(c)) {
8767 SPLIT_STR(beg, end-beg);
8770 if (!
NIL_P(limit)) ++i;
8778 while (ptr < eptr) {
8781 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
8790 if (!
NIL_P(limit) && lim <= i)
break;
8794 SPLIT_STR(beg, end-beg);
8797 if (!
NIL_P(limit)) ++i;
8805 else if (split_type == SPLIT_TYPE_STRING) {
8806 char *str_start = ptr;
8807 char *substr_start = ptr;
8811 mustnot_broken(str);
8812 enc = rb_enc_check(str, spat);
8813 while (ptr < eptr &&
8814 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
8817 if (t != ptr + end) {
8821 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
8824 if (!
NIL_P(limit) && lim <= ++i)
break;
8826 beg = ptr - str_start;
8828 else if (split_type == SPLIT_TYPE_CHARS) {
8829 char *str_start = ptr;
8832 mustnot_broken(str);
8833 enc = rb_enc_get(str);
8834 while (ptr < eptr &&
8835 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
8836 SPLIT_STR(ptr - str_start, n);
8838 if (!
NIL_P(limit) && lim <= ++i)
break;
8840 beg = ptr - str_start;
8851 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
8856 if (start == end && BEG(0) == END(0)) {
8861 else if (last_null == 1) {
8862 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
8869 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
8875 SPLIT_STR(beg, end-beg);
8876 beg = start = END(0);
8880 for (idx=1; idx < regs->num_regs; idx++) {
8881 if (BEG(idx) == -1)
continue;
8882 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
8884 if (!
NIL_P(limit) && lim <= ++i)
break;
8886 if (match) rb_match_unbusy(match);
8892 return result ? result : str;
8902 return rb_str_split_m(1, &sep, str);
8905#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
8911 rb_ary_push(ary, e);
8920#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
8923chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
8929 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
8948#define rb_rs get_rs()
8955 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
8956 long pos, len, rslen;
8962 static ID keywords[1];
8967 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
8971 if (!ENUM_ELEM(ary, str)) {
8988 enc = rb_enc_get(str);
8990 enc = rb_enc_check(str, rs);
8995 const char *eol = NULL;
8997 while (subend < pend) {
8998 long chomp_rslen = 0;
9000 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9002 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9004 if (eol == subend)
break;
9008 chomp_rslen = -rslen;
9012 if (!subptr) subptr = subend;
9016 }
while (subend < pend);
9018 if (rslen == 0) chomp_rslen = 0;
9020 subend - subptr + (chomp ? chomp_rslen : rslen));
9021 if (ENUM_ELEM(ary, line)) {
9022 str_mod_check(str, ptr, len);
9024 subptr = eol = NULL;
9043 while (subptr < pend) {
9044 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9048 if (hit != adjusted) {
9052 subend = hit += rslen;
9055 subend = chomp_newline(subptr, subend, enc);
9062 if (ENUM_ELEM(ary, line)) {
9063 str_mod_check(str, ptr, len);
9068 if (subptr != pend) {
9071 pend = chomp_newline(subptr, pend, enc);
9073 else if (pend - subptr >= rslen &&
9074 memcmp(pend - rslen, rsptr, rslen) == 0) {
9079 ENUM_ELEM(ary, line);
9100rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9103 return rb_str_enumerate_lines(argc, argv, str, 0);
9116rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9118 VALUE ary = WANTARRAY(
"lines", 0);
9119 return rb_str_enumerate_lines(argc, argv, str, ary);
9152rb_str_each_byte(
VALUE str)
9155 return rb_str_enumerate_bytes(str, 0);
9167rb_str_bytes(
VALUE str)
9170 return rb_str_enumerate_bytes(str, ary);
9190 enc = rb_enc_get(str);
9193 for (i = 0; i < len; i += n) {
9194 n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
9199 for (i = 0; i < len; i += n) {
9200 n = rb_enc_mbclen(ptr + i, ptr + len, enc);
9221rb_str_each_char(
VALUE str)
9224 return rb_str_enumerate_chars(str, 0);
9236rb_str_chars(
VALUE str)
9239 return rb_str_enumerate_chars(str, ary);
9243rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9248 const char *ptr, *end;
9251 if (single_byte_optimizable(str))
9252 return rb_str_enumerate_bytes(str, ary);
9257 enc = STR_ENC_GET(str);
9260 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9281rb_str_each_codepoint(
VALUE str)
9284 return rb_str_enumerate_codepoints(str, 0);
9296rb_str_codepoints(
VALUE str)
9299 return rb_str_enumerate_codepoints(str, ary);
9305 int encidx = rb_enc_to_index(enc);
9307 const OnigUChar source_ascii[] =
"\\X";
9308 const OnigUChar *source = source_ascii;
9309 size_t source_len =
sizeof(source_ascii) - 1;
9312#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9313#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9314#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9315#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9316#define CASE_UTF(e) \
9317 case ENCINDEX_UTF_##e: { \
9318 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9319 source = source_UTF_##e; \
9320 source_len = sizeof(source_UTF_##e); \
9323 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9331 regex_t *reg_grapheme_cluster;
9333 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9334 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9336 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9337 onig_error_code_to_str(message, r, &einfo);
9338 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9341 return reg_grapheme_cluster;
9347 int encidx = rb_enc_to_index(enc);
9348 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9350 if (encidx == rb_utf8_encindex()) {
9351 if (!reg_grapheme_cluster_utf8) {
9352 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9355 return reg_grapheme_cluster_utf8;
9364 size_t grapheme_cluster_count = 0;
9366 const char *ptr, *end;
9368 if (!rb_enc_unicode_p(enc)) {
9372 bool cached_reg_grapheme_cluster =
true;
9373 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9374 if (!reg_grapheme_cluster) {
9375 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9376 cached_reg_grapheme_cluster =
false;
9383 OnigPosition len = onig_match(reg_grapheme_cluster,
9384 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9385 (
const OnigUChar *)ptr, NULL, 0);
9386 if (len <= 0)
break;
9387 grapheme_cluster_count++;
9391 if (!cached_reg_grapheme_cluster) {
9392 onig_free(reg_grapheme_cluster);
9395 return SIZET2NUM(grapheme_cluster_count);
9399rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9403 const char *ptr0, *ptr, *end;
9405 if (!rb_enc_unicode_p(enc)) {
9406 return rb_str_enumerate_chars(str, ary);
9411 bool cached_reg_grapheme_cluster =
true;
9412 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9413 if (!reg_grapheme_cluster) {
9414 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9415 cached_reg_grapheme_cluster =
false;
9422 OnigPosition len = onig_match(reg_grapheme_cluster,
9423 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9424 (
const OnigUChar *)ptr, NULL, 0);
9425 if (len <= 0)
break;
9430 if (!cached_reg_grapheme_cluster) {
9431 onig_free(reg_grapheme_cluster);
9451rb_str_each_grapheme_cluster(
VALUE str)
9454 return rb_str_enumerate_grapheme_clusters(str, 0);
9466rb_str_grapheme_clusters(
VALUE str)
9469 return rb_str_enumerate_grapheme_clusters(str, ary);
9473chopped_length(
VALUE str)
9476 const char *p, *p2, *beg, *end;
9480 if (beg >= end)
return 0;
9483 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9485 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9501rb_str_chop_bang(
VALUE str)
9503 str_modify_keep_cr(str);
9506 len = chopped_length(str);
9507 STR_SET_LEN(str, len);
9527rb_str_chop(
VALUE str)
9533smart_chomp(
VALUE str,
const char *e,
const char *p)
9544 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9552 if (--e > p && *(e-1) ==
'\r') {
9569 char *pp, *e, *rsptr;
9574 if (len == 0)
return 0;
9577 return smart_chomp(str, e, p);
9580 enc = rb_enc_get(str);
9591 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9598 while (e > p && *(e-1) ==
'\n') {
9600 if (e > p && *(e-1) ==
'\r')
9606 if (rslen > len)
return len;
9608 enc = rb_enc_get(rs);
9609 newline = rsptr[rslen-1];
9612 if (newline ==
'\n')
9613 return smart_chomp(str, e, p);
9617 return smart_chomp(str, e, p);
9621 enc = rb_enc_check(str, rs);
9622 if (is_broken_string(rs)) {
9626 if (p[len-1] == newline &&
9628 memcmp(rsptr, pp, rslen) == 0)) {
9642chomp_rs(
int argc,
const VALUE *argv)
9659 long len = chompped_length(str, rs);
9660 if (len >= olen)
return Qnil;
9661 str_modify_keep_cr(str);
9662 STR_SET_LEN(str, len);
9680rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
9683 str_modifiable(str);
9685 rs = chomp_rs(argc, argv);
9687 return rb_str_chomp_string(str, rs);
9700rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
9702 VALUE rs = chomp_rs(argc, argv);
9710 const char *
const start = s;
9712 if (!s || s >= e)
return 0;
9715 if (single_byte_optimizable(str)) {
9716 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
9721 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
9741rb_str_lstrip_bang(
VALUE str)
9747 str_modify_keep_cr(str);
9748 enc = STR_ENC_GET(str);
9750 loffset = lstrip_offset(str, start, start+olen, enc);
9752 long len = olen-loffset;
9753 s = start + loffset;
9754 memmove(start, s, len);
9755 STR_SET_LEN(str, len);
9779rb_str_lstrip(
VALUE str)
9784 loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
9785 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
9794 rb_str_check_dummy_enc(enc);
9798 if (!s || s >= e)
return 0;
9802 if (single_byte_optimizable(str)) {
9804 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
9829rb_str_rstrip_bang(
VALUE str)
9835 str_modify_keep_cr(str);
9836 enc = STR_ENC_GET(str);
9838 roffset = rstrip_offset(str, start, start+olen, enc);
9840 long len = olen - roffset;
9842 STR_SET_LEN(str, len);
9866rb_str_rstrip(
VALUE str)
9872 enc = STR_ENC_GET(str);
9874 roffset = rstrip_offset(str, start, start+olen, enc);
9876 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
9892rb_str_strip_bang(
VALUE str)
9895 long olen, loffset, roffset;
9898 str_modify_keep_cr(str);
9899 enc = STR_ENC_GET(str);
9901 loffset = lstrip_offset(str, start, start+olen, enc);
9902 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9904 if (loffset > 0 || roffset > 0) {
9905 long len = olen-roffset;
9908 memmove(start, start + loffset, len);
9910 STR_SET_LEN(str, len);
9934rb_str_strip(
VALUE str)
9937 long olen, loffset, roffset;
9941 loffset = lstrip_offset(str, start, start+olen, enc);
9942 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9944 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
9949scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
9951 VALUE result, match;
9954 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
9972 *start = end + rb_enc_fast_mbclen(
RSTRING_PTR(str) + end,
9980 if (!regs || regs->num_regs == 1) {
9985 for (i=1; i < regs->num_regs; i++) {
9990 rb_ary_push(result, s);
10043 long last = -1, prev = 0;
10046 pat = get_pat_quoted(pat, 1);
10047 mustnot_broken(str);
10049 VALUE ary = rb_ary_new();
10051 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10054 rb_ary_push(ary, result);
10056 if (last >= 0) rb_pat_search(pat, str, last, 1);
10061 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10065 str_mod_check(str, p, len);
10067 if (last >= 0) rb_pat_search(pat, str, last, 1);
10091rb_str_hex(
VALUE str)
10093 return rb_str_to_inum(str, 16, FALSE);
10118rb_str_oct(
VALUE str)
10120 return rb_str_to_inum(str, -8, FALSE);
10123#ifndef HAVE_CRYPT_R
10128 rb_nativethread_lock_t lock;
10129} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10132crypt_mutex_initialize(
void)
10203# define CRYPT_END() ALLOCV_END(databuf)
10205 extern char *crypt(
const char *,
const char *);
10206# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10209 const char *s, *saltp;
10212 char salt_8bit_clean[3];
10216 mustnot_wchar(str);
10217 mustnot_wchar(salt);
10220 if (
RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10225 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10226 salt_8bit_clean[0] = saltp[0] & 0x7f;
10227 salt_8bit_clean[1] = saltp[1] & 0x7f;
10228 salt_8bit_clean[2] =
'\0';
10229 saltp = salt_8bit_clean;
10234# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10235 data->initialized = 0;
10237 res = crypt_r(s, saltp, data);
10239 crypt_mutex_initialize();
10241 res = crypt(s, saltp);
10282 char *ptr, *p, *pend;
10285 unsigned long sum0 = 0;
10297 str_mod_check(str, ptr, len);
10300 sum0 += (
unsigned char)*p;
10311 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10312 sum0 &= (((
unsigned long)1)<<bits)-1;
10332rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10336 long width, len, flen = 1, fclen = 1;
10339 const char *f =
" ";
10340 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10342 int singlebyte = 1, cr;
10346 enc = STR_ENC_GET(str);
10351 enc = rb_enc_check(str, pad);
10354 fclen = str_strlen(pad, enc);
10355 singlebyte = single_byte_optimizable(pad);
10356 if (flen == 0 || fclen == 0) {
10360 len = str_strlen(str, enc);
10361 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10363 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10367 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10368 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10371 if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10372 (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10373 (len += llen2 + rlen2) >= LONG_MAX - size) {
10377 res = str_new0(
rb_cString, 0, len, termlen);
10380 memset(p, *f, llen);
10384 while (llen >= fclen) {
10390 memcpy(p, f, llen2);
10397 memset(p, *f, rlen);
10401 while (rlen >= fclen) {
10407 memcpy(p, f, rlen2);
10411 TERM_FILL(p, termlen);
10413 rb_enc_associate(res, enc);
10435rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10437 return rb_str_justify(argc, argv, str,
'l');
10451rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10453 return rb_str_justify(argc, argv, str,
'r');
10468rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10470 return rb_str_justify(argc, argv, str,
'c');
10486 sep = get_pat_quoted(sep, 0);
10498 pos = rb_str_index(str, sep, 0);
10499 if (pos < 0)
goto failed;
10507 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
10523 sep = get_pat_quoted(sep, 0);
10536 pos = rb_str_rindex(str, sep, pos);
10548 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
10560rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
10564 for (i=0; i<argc; i++) {
10565 VALUE tmp = argv[i];
10567 if (rb_reg_start_with_p(tmp, str))
10572 rb_enc_check(str, tmp);
10590rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
10596 for (i=0; i<argc; i++) {
10597 VALUE tmp = argv[i];
10600 enc = rb_enc_check(str, tmp);
10624deleted_prefix_length(
VALUE str,
VALUE prefix)
10626 char *strptr, *prefixptr;
10627 long olen, prefixlen;
10630 if (is_broken_string(prefix))
return 0;
10631 rb_enc_check(str, prefix);
10635 if (prefixlen <= 0)
return 0;
10637 if (olen < prefixlen)
return 0;
10640 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
10655rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
10658 str_modify_keep_cr(str);
10660 prefixlen = deleted_prefix_length(str, prefix);
10661 if (prefixlen <= 0)
return Qnil;
10675rb_str_delete_prefix(
VALUE str,
VALUE prefix)
10679 prefixlen = deleted_prefix_length(str, prefix);
10680 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
10695deleted_suffix_length(
VALUE str,
VALUE suffix)
10697 char *strptr, *suffixptr, *s;
10698 long olen, suffixlen;
10702 if (is_broken_string(suffix))
return 0;
10703 enc = rb_enc_check(str, suffix);
10707 if (suffixlen <= 0)
return 0;
10709 if (olen < suffixlen)
return 0;
10712 s = strptr + olen - suffixlen;
10713 if (memcmp(s, suffixptr, suffixlen) != 0)
return 0;
10729rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
10731 long olen, suffixlen, len;
10732 str_modifiable(str);
10734 suffixlen = deleted_suffix_length(str, suffix);
10735 if (suffixlen <= 0)
return Qnil;
10738 str_modify_keep_cr(str);
10739 len = olen - suffixlen;
10740 STR_SET_LEN(str, len);
10741 TERM_FILL(&
RSTRING_PTR(str)[len], TERM_LEN(str));
10757rb_str_delete_suffix(
VALUE str,
VALUE suffix)
10761 suffixlen = deleted_suffix_length(str, suffix);
10762 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
10779 val = rb_fs_check(val);
10782 "value of %"PRIsVALUE
" must be String or Regexp",
10786 rb_warn_deprecated(
"`$;'", NULL);
10803 str_modifiable(str);
10804 rb_enc_associate(str, rb_to_encoding(enc));
10821 if (
FL_TEST(str, STR_NOEMBED)) {
10827 str_replace_shared_without_enc(str2, str);
10862rb_str_valid_encoding_p(
VALUE str)
10882rb_str_is_ascii_only_p(
VALUE str)
10892 static const char ellipsis[] =
"...";
10893 const long ellipsislen =
sizeof(ellipsis) - 1;
10896 const char *
const p =
RSTRING_PTR(str), *e = p + blen;
10897 VALUE estr, ret = 0;
10901 (e =
rb_enc_nth(p, e, len, enc)) - p == blen) {
10904 else if (len <= ellipsislen ||
10908 rb_enc_associate(ret, enc);
10920 rb_enc_from_encoding(enc), 0,
Qnil);
10958 if (enc == STR_ENC_GET(str)) {
10963 return enc_str_scrub(enc, str, repl, cr);
10971 const char *rep, *p, *e, *p1, *sp;
10984 if (!
NIL_P(repl)) {
10985 repl = str_compat_and_valid(repl, enc);
10988 if (rb_enc_dummy_p(enc)) {
10991 encidx = rb_enc_to_index(enc);
10993#define DEFAULT_REPLACE_CHAR(str) do { \
10994 static const char replace[sizeof(str)-1] = str; \
10995 rep = replace; replen = (int)sizeof(replace); \
11010 else if (!
NIL_P(repl)) {
11015 else if (encidx == rb_utf8_encindex()) {
11016 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11020 DEFAULT_REPLACE_CHAR(
"?");
11025 p = search_nonascii(p, e);
11030 int ret = rb_enc_precise_mbclen(p, e, enc);
11049 if (e - p < clen) clen = e - p;
11056 for (; clen > 1; clen--) {
11057 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11069 str_mod_check(str, sp, slen);
11070 repl = str_compat_and_valid(repl, enc);
11077 p = search_nonascii(p, e);
11104 str_mod_check(str, sp, slen);
11105 repl = str_compat_and_valid(repl, enc);
11118 else if (!
NIL_P(repl)) {
11122 else if (encidx == ENCINDEX_UTF_16BE) {
11123 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11125 else if (encidx == ENCINDEX_UTF_16LE) {
11126 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11128 else if (encidx == ENCINDEX_UTF_32BE) {
11129 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11131 else if (encidx == ENCINDEX_UTF_32LE) {
11132 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11135 DEFAULT_REPLACE_CHAR(
"?");
11139 int ret = rb_enc_precise_mbclen(p, e, enc);
11152 if (e - p < clen) clen = e - p;
11153 if (clen <= mbminlen * 2) {
11158 for (; clen > mbminlen; clen-=mbminlen) {
11159 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11170 str_mod_check(str, sp, slen);
11171 repl = str_compat_and_valid(repl, enc);
11197 str_mod_check(str, sp, slen);
11198 repl = str_compat_and_valid(repl, enc);
11234str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11242static ID id_normalize;
11243static ID id_normalized_p;
11244static VALUE mUnicodeNormalize;
11247unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11249 static int UnicodeNormalizeRequired = 0;
11252 if (!UnicodeNormalizeRequired) {
11253 rb_require(
"unicode_normalize/normalize.rb");
11254 UnicodeNormalizeRequired = 1;
11258 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11295rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11297 return unicode_normalize_common(argc, argv, str, id_normalize);
11311rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11313 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11340rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11342 return unicode_normalize_common(argc, argv, str, id_normalized_p);
11477#define sym_equal rb_obj_equal
11480sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
11484 int c = rb_enc_precise_mbclen(s, send, enc);
11496rb_str_symname_p(
VALUE sym)
11501 rb_encoding *resenc = rb_default_internal_encoding();
11503 if (resenc == NULL) resenc = rb_default_external_encoding();
11504 enc = STR_ENC_GET(sym);
11507 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (
long)strlen(ptr) ||
11515rb_str_quote_unprintable(
VALUE str)
11523 resenc = rb_default_internal_encoding();
11524 if (resenc == NULL) resenc = rb_default_external_encoding();
11525 enc = STR_ENC_GET(str);
11528 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
11529 !sym_printable(ptr, ptr + len, enc)) {
11530 return rb_str_escape(str);
11535MJIT_FUNC_EXPORTED
VALUE
11536rb_id_quote_unprintable(
ID id)
11538 VALUE str = rb_id2str(
id);
11539 if (!rb_str_symname_p(str)) {
11540 return rb_str_escape(str);
11558sym_inspect(
VALUE sym)
11565 if (!rb_str_symname_p(str)) {
11570 memmove(dest + 1, dest, len);
11577 memcpy(dest + 1, ptr, len);
11602MJIT_FUNC_EXPORTED
VALUE
11603rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
11707 return rb_str_match(
rb_sym2str(sym), other);
11722sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
11724 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
11737sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
11739 return rb_str_match_m_p(argc, argv, sym);
11757 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
11771sym_length(
VALUE sym)
11785sym_empty(
VALUE sym)
11819sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
11835sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
11851sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
11865sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
11867 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
11880sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
11882 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
11894sym_encoding(
VALUE sym)
11900string_for_symbol(
VALUE name)
11919 name = string_for_symbol(name);
11929 name = string_for_symbol(name);
11953 return rb_fstring(str);
11960 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), TRUE);
11972 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
11973 rb_enc_autoload(enc);
11977 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), TRUE);
11990 assert(rb_vm_fstring_table());
11991 st_foreach(rb_vm_fstring_table(), fstring_set_class_i,
rb_cString);
12154 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isascii(), except it additionally takes an encoding.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implenentation detail of RB_OBJ_FROZEN().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define OBJ_FREEZE_RAW
Old name of RB_OBJ_FREEZE_RAW.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports always regardless of runtime -W flag.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
void rb_fatal(const char *fmt,...)
Raises the unsung "fatal" exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eArgError
ArgumentError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_cSymbol
Sumbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static const char * rb_enc_name(rb_encoding *enc)
Queries the (canonical) name of the passed encoding.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the previous (left) character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it additionally takes an encoding.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
#define rb_check_frozen
Just another name of rb_check_frozen.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "defaultexternal" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "defaultexternal" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
ID rb_intern(const char *name)
Finds or creates a symbol of the given name.
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
ID rb_intern_str(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
@ RSTRING_EMBED_LEN_MAX
Max possible number of characters that can be embedded.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static long RSTRING_EMBED_LEN(VALUE str)
Queries the length of the string.
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RString::@49::@51 embed
Embedded contents.
union RString::@49 as
String's specific fields.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
struct RString::@49::@50 heap
Strings that use separated memory region for contents use this pattern.
long len
Length of the string, not including terminating NUL character.
char ary[RSTRING_EMBED_LEN_MAX+1]
When a string is short enough, it uses this area to store the contents themselves.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
union RString::@49::@50::@52 aux
Auxiliary info.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.