jscript: Store concatenated strings as a rope string to avoid useless copying.

oldstable
Jacek Caban 2013-04-23 15:02:37 +02:00 committed by Alexandre Julliard
parent 019a1af76e
commit 8541f94e2c
2 changed files with 295 additions and 26 deletions

View File

@ -16,13 +16,54 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include <assert.h>
#include "jscript.h"
#include "wine/debug.h"
/*
* This is the length of a string that is considered to be long enough to be
* worth the rope to avoid copy.
* This probably could be tuned, but keep it low for a while to better test rope's code.
*/
#define JSSTR_SHORT_STRING_LENGTH 8
/*
* This is the max rope depth. While faster to allocate, ropes may become slow at access.
*/
#define JSSTR_MAX_ROPE_DEPTH 100
const char *debugstr_jsstr(jsstr_t *str)
{
return debugstr_wn(jsstr_as_inline(str)->buf, jsstr_length(str));
return jsstr_is_inline(str) ? debugstr_wn(jsstr_as_inline(str)->buf, jsstr_length(str))
: jsstr_is_heap(str) ? debugstr_wn(jsstr_as_heap(str)->buf, jsstr_length(str))
: wine_dbg_sprintf("%s...", debugstr_jsstr(jsstr_as_rope(str)->left));
}
void jsstr_free(jsstr_t *str)
{
switch(jsstr_tag(str)) {
case JSSTR_HEAP:
heap_free(jsstr_as_heap(str)->buf);
break;
case JSSTR_ROPE: {
jsstr_rope_t *rope = jsstr_as_rope(str);
jsstr_release(rope->left);
jsstr_release(rope->right);
break;
}
case JSSTR_INLINE:
break;
}
heap_free(str);
}
static inline void jsstr_init(jsstr_t *str, unsigned len, jsstr_tag_t tag)
{
str->length_flags = len << JSSTR_LENGTH_SHIFT | tag;
str->ref = 1;
}
WCHAR *jsstr_alloc_buf(unsigned len, jsstr_t **r)
@ -36,8 +77,7 @@ WCHAR *jsstr_alloc_buf(unsigned len, jsstr_t **r)
if(!ret)
return NULL;
ret->str.length_flags = len << JSSTR_LENGTH_SHIFT;
ret->str.ref = 1;
jsstr_init(&ret->str, len, JSSTR_INLINE);
ret->buf[len] = 0;
*r = &ret->str;
return ret->buf;
@ -55,17 +95,116 @@ jsstr_t *jsstr_alloc_len(const WCHAR *buf, unsigned len)
return ret;
}
int jsstr_cmp(jsstr_t *str1, jsstr_t *str2)
static void jsstr_rope_extract(jsstr_rope_t *str, unsigned off, unsigned len, WCHAR *buf)
{
unsigned left_len = jsstr_length(str->left);
if(left_len <= off) {
jsstr_extract(str->right, off-left_len, len, buf);
}else if(left_len >= len+off) {
jsstr_extract(str->left, off, len, buf);
}else {
left_len -= off;
jsstr_extract(str->left, off, left_len, buf);
jsstr_extract(str->right, 0, len-left_len, buf+left_len);
}
}
void jsstr_extract(jsstr_t *str, unsigned off, unsigned len, WCHAR *buf)
{
switch(jsstr_tag(str)) {
case JSSTR_INLINE:
memcpy(buf, jsstr_as_inline(str)->buf+off, len*sizeof(WCHAR));
return;
case JSSTR_HEAP:
memcpy(buf, jsstr_as_heap(str)->buf+off, len*sizeof(WCHAR));
return;
case JSSTR_ROPE:
return jsstr_rope_extract(jsstr_as_rope(str), off, len, buf);
}
}
static int jsstr_cmp_str(jsstr_t *jsstr, const WCHAR *str, unsigned len)
{
int len1 = jsstr_length(str1);
int len2 = jsstr_length(str2);
int ret;
ret = memcmp(jsstr_as_inline(str1)->buf, jsstr_as_inline(str2)->buf, min(len1, len2)*sizeof(WCHAR));
if(!ret)
ret = len1 - len2;
switch(jsstr_tag(jsstr)) {
case JSSTR_INLINE:
ret = memcmp(jsstr_as_inline(jsstr)->buf, str, len*sizeof(WCHAR));
return ret || jsstr_length(jsstr) == len ? ret : 1;
case JSSTR_HEAP:
ret = memcmp(jsstr_as_heap(jsstr)->buf, str, len*sizeof(WCHAR));
return ret || jsstr_length(jsstr) == len ? ret : 1;
case JSSTR_ROPE: {
jsstr_rope_t *rope = jsstr_as_rope(jsstr);
unsigned left_len = jsstr_length(rope->left);
return ret;
ret = jsstr_cmp_str(rope->left, str, min(len, left_len));
if(ret || len <= left_len)
return ret;
return jsstr_cmp_str(rope->right, str+left_len, len-left_len);
}
}
assert(0);
return 0;
}
#define TMP_BUF_SIZE 256
static int ropes_cmp(jsstr_rope_t *left, jsstr_rope_t *right)
{
WCHAR left_buf[TMP_BUF_SIZE], right_buf[TMP_BUF_SIZE];
unsigned left_len = jsstr_length(&left->str);
unsigned right_len = jsstr_length(&right->str);
unsigned cmp_off = 0, cmp_size;
int ret;
/* FIXME: We can avoid temporary buffers here. */
while(cmp_off < min(left_len, right_len)) {
cmp_size = min(left_len, right_len) - cmp_off;
if(cmp_size > TMP_BUF_SIZE)
cmp_size = TMP_BUF_SIZE;
jsstr_rope_extract(left, cmp_off, cmp_size, left_buf);
jsstr_rope_extract(right, cmp_off, cmp_size, right_buf);
ret = memcmp(left_buf, right_buf, cmp_size);
if(ret)
return ret;
cmp_off += cmp_size;
}
return left_len - right_len;
}
static inline const WCHAR *jsstr_try_flat(jsstr_t *str)
{
return jsstr_is_inline(str) ? jsstr_as_inline(str)->buf
: jsstr_is_heap(str) ? jsstr_as_heap(str)->buf
: NULL;
}
int jsstr_cmp(jsstr_t *str1, jsstr_t *str2)
{
unsigned len1 = jsstr_length(str1);
unsigned len2 = jsstr_length(str2);
const WCHAR *str;
int ret;
str = jsstr_try_flat(str2);
if(str) {
ret = jsstr_cmp_str(str1, str, min(len1, len2));
return ret || len1 == len2 ? ret : -1;
}
str = jsstr_try_flat(str1);
if(str) {
ret = jsstr_cmp_str(str2, str, min(len1, len2));
return ret || len1 == len2 ? -ret : 1;
}
return ropes_cmp(jsstr_as_rope(str1), jsstr_as_rope(str2));
}
jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2)
@ -82,6 +221,31 @@ jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2)
if(!len2)
return jsstr_addref(str1);
if(len1 + len2 >= JSSTR_SHORT_STRING_LENGTH) {
unsigned depth, depth2;
jsstr_rope_t *rope;
depth = jsstr_is_rope(str1) ? jsstr_as_rope(str1)->depth : 0;
depth2 = jsstr_is_rope(str2) ? jsstr_as_rope(str2)->depth : 0;
if(depth2 > depth)
depth = depth2;
if(depth++ < JSSTR_MAX_ROPE_DEPTH) {
if(len1+len2 > JSSTR_MAX_LENGTH)
return NULL;
rope = heap_alloc(sizeof(*rope));
if(!rope)
return NULL;
jsstr_init(&rope->str, len1+len2, JSSTR_ROPE);
rope->left = jsstr_addref(str1);
rope->right = jsstr_addref(str2);
rope->depth = depth;
return &rope->str;
}
}
ptr = jsstr_alloc_buf(len1+len2, &ret);
if(!ret)
return NULL;
@ -89,6 +253,28 @@ jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2)
jsstr_flush(str1, ptr);
jsstr_flush(str2, ptr+len1);
return ret;
}
C_ASSERT(sizeof(jsstr_heap_t) <= sizeof(jsstr_rope_t));
const WCHAR *jsstr_rope_flatten(jsstr_rope_t *str)
{
WCHAR *buf;
buf = heap_alloc((jsstr_length(&str->str)+1) * sizeof(WCHAR));
if(!buf)
return NULL;
jsstr_flush(str->left, buf);
jsstr_flush(str->right, buf+jsstr_length(str->left));
buf[jsstr_length(&str->str)] = 0;
/* Trasform to heap string */
jsstr_release(str->left);
jsstr_release(str->right);
str->str.length_flags |= JSSTR_FLAG_FLAT;
return jsstr_as_heap(&str->str)->buf = buf;
}
static jsstr_t *empty_str, *nan_str, *undefined_str;

View File

@ -16,6 +16,23 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
/*
* This is a common header for all string representations. The exact layout of the string
* representation may be:
*
* - inline string - string bytes directly follow string headers.
* - heap string - a structure containing a pointer to buffer on the heap.
* - roper string - a product of concatenation of two strings. Instead of copying whole
* buffers, we may store just references to concatenated strings.
*
* String layout may change over life time of the string. Currently possible transformation
* is when a rope string becomes a heap stream. That happens when we need a real, linear
* zero-terminated buffer (a flat buffer). At this point the type of the string is changed
* and the new buffer is stored in the string, so that subsequent operations requiring
* a flat string won't need to flatten it again.
*
* In the future more layouts and transformations may be added.
*/
struct _jsstr_t {
unsigned length_flags;
unsigned ref;
@ -25,18 +42,60 @@ struct _jsstr_t {
#define JSSTR_MAX_LENGTH (1 << (32-JSSTR_LENGTH_SHIFT))
#define JSSTR_FLAGS_MASK ((1 << JSSTR_LENGTH_SHIFT)-1)
#define JSSTR_FLAG_NULLBSTR 1
#define JSSTR_FLAG_NULLBSTR 4
#define JSSTR_FLAG_LBIT 1
#define JSSTR_FLAG_FLAT 2
#define JSSTR_FLAG_TAG_MASK 3
typedef enum {
JSSTR_INLINE = JSSTR_FLAG_FLAT,
JSSTR_HEAP = JSSTR_FLAG_FLAT|JSSTR_FLAG_LBIT,
JSSTR_ROPE = JSSTR_FLAG_LBIT
} jsstr_tag_t;
static inline unsigned jsstr_length(jsstr_t *str)
{
return str->length_flags >> JSSTR_LENGTH_SHIFT;
}
static inline jsstr_tag_t jsstr_tag(jsstr_t *str)
{
return str->length_flags & JSSTR_FLAG_TAG_MASK;
}
static inline BOOL jsstr_is_inline(jsstr_t *str)
{
return jsstr_tag(str) == JSSTR_INLINE;
}
static inline BOOL jsstr_is_heap(jsstr_t *str)
{
return jsstr_tag(str) == JSSTR_HEAP;
}
static inline BOOL jsstr_is_rope(jsstr_t *str)
{
return jsstr_tag(str) == JSSTR_ROPE;
}
typedef struct {
jsstr_t str;
WCHAR buf[1];
} jsstr_inline_t;
typedef struct {
jsstr_t str;
WCHAR *buf;
} jsstr_heap_t;
typedef struct {
jsstr_t str;
jsstr_t *left;
jsstr_t *right;
unsigned depth;
} jsstr_rope_t;
jsstr_t *jsstr_alloc_len(const WCHAR*,unsigned) DECLSPEC_HIDDEN;
WCHAR *jsstr_alloc_buf(unsigned,jsstr_t**) DECLSPEC_HIDDEN;
@ -45,10 +104,16 @@ static inline jsstr_t *jsstr_alloc(const WCHAR *str)
return jsstr_alloc_len(str, strlenW(str));
}
void jsstr_free(jsstr_t*) DECLSPEC_HIDDEN;
static inline void jsstr_release(jsstr_t *str)
{
if(!--str->ref)
heap_free(str);
if(!--str->ref) {
if(jsstr_is_inline(str))
heap_free(str);
else
jsstr_free(str);
}
}
static inline jsstr_t *jsstr_addref(jsstr_t *str)
@ -62,27 +127,39 @@ static inline jsstr_inline_t *jsstr_as_inline(jsstr_t *str)
return CONTAINING_RECORD(str, jsstr_inline_t, str);
}
/* This will be failable in the future. */
static inline jsstr_heap_t *jsstr_as_heap(jsstr_t *str)
{
return CONTAINING_RECORD(str, jsstr_heap_t, str);
}
static inline jsstr_rope_t *jsstr_as_rope(jsstr_t *str)
{
return CONTAINING_RECORD(str, jsstr_rope_t, str);
}
const WCHAR *jsstr_rope_flatten(jsstr_rope_t*) DECLSPEC_HIDDEN;
static inline const WCHAR *jsstr_flatten(jsstr_t *str)
{
return jsstr_as_inline(str)->buf;
return jsstr_is_inline(str) ? jsstr_as_inline(str)->buf
: jsstr_is_heap(str) ? jsstr_as_heap(str)->buf
: jsstr_rope_flatten(jsstr_as_rope(str));
}
static inline BOOL jsstr_eq(jsstr_t *str1, jsstr_t *str2)
{
unsigned len = jsstr_length(str1);
return len == jsstr_length(str2) && !memcmp(jsstr_as_inline(str1)->buf, jsstr_as_inline(str2)->buf, len*sizeof(WCHAR));
}
static inline void jsstr_extract(jsstr_t *str, unsigned off, unsigned len, WCHAR *buf)
{
memcpy(buf, jsstr_as_inline(str)->buf+off, len*sizeof(WCHAR));
}
void jsstr_extract(jsstr_t*,unsigned,unsigned,WCHAR*) DECLSPEC_HIDDEN;
static inline unsigned jsstr_flush(jsstr_t *str, WCHAR *buf)
{
unsigned len = jsstr_length(str);
memcpy(buf, jsstr_as_inline(str)->buf, len*sizeof(WCHAR));
if(jsstr_is_inline(str)) {
memcpy(buf, jsstr_as_inline(str)->buf, len*sizeof(WCHAR));
}else if(jsstr_is_heap(str)) {
memcpy(buf, jsstr_as_heap(str)->buf, len*sizeof(WCHAR));
}else {
jsstr_rope_t *rope = jsstr_as_rope(str);
jsstr_flush(rope->left, buf);
jsstr_flush(rope->right, buf+jsstr_length(rope->left));
}
return len;
}
@ -98,6 +175,12 @@ static inline jsstr_t *jsstr_substr(jsstr_t *str, unsigned off, unsigned len)
}
int jsstr_cmp(jsstr_t*,jsstr_t*) DECLSPEC_HIDDEN;
static inline BOOL jsstr_eq(jsstr_t *left, jsstr_t *right)
{
return jsstr_length(left) == jsstr_length(right) && !jsstr_cmp(left, right);
}
jsstr_t *jsstr_concat(jsstr_t*,jsstr_t*) DECLSPEC_HIDDEN;
jsstr_t *jsstr_nan(void) DECLSPEC_HIDDEN;