Skip to content

Commit

Permalink
fix(NODE-5363): defer byte slicing to utf8 decoding API in nodejs (#585)
Browse files Browse the repository at this point in the history
Co-authored-by: Durran Jordan <[email protected]>
  • Loading branch information
W-A-James and durran authored Jul 3, 2023
1 parent 2ea58cf commit e087042
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 20 deletions.
4 changes: 0 additions & 4 deletions etc/benchmarks/bson_versions.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
{
"versions": [
"1.1.6",
"4.6",
"5.0",
"5.1",
"5.2",
"5.3"
]
}
3 changes: 2 additions & 1 deletion etc/benchmarks/install_bson_versions.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/bin/bash
versions=$(jq '.versions' < bson_versions.json | sed -E 's/(\[|\]|,|")//g')
# To be run from repo root
versions=$(jq '.versions' < etc/benchmarks/bson_versions.json | sed -E 's/(\[|\]|,|")//g')
installVersions=''
for bson in $versions; do
versionNoDot=$(echo $bson | tr -d '.')
Expand Down
55 changes: 55 additions & 0 deletions etc/benchmarks/main.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,61 @@ await runner({
}
});

await runner({
skip: true,
name: 'deserialize a large batch of documents each with an array of many Int32s',
iterations,
setup(libs) {
const bson = libs[0].lib;
return bson.serialize({
nextBatch: Array.from({ length: 1000 }, () => ({
_id: new bson.ObjectId(),
arrayField: Array.from({ length: 100 }, (_, i) => i)
}))
});
},
async run(i, bson, document) {
await Promise.all(
Array.from(
{ length: 100 },
(_, i) =>
new Promise(resolve => {
setTimeout(() => {
resolve(bson.lib.deserialize(document, { validation: { utf8: false } }));
}, 20);
})
)
);
}
});

await runner({
skip: true,
name: 'deserialize a large batch of documents each with an array of many Int64s',
iterations,
setup(libs) {
const bson = libs[0].lib;
return bson.serialize({
nextBatch: Array.from({ length: 1000 }, () => ({
_id: new bson.ObjectId(),
arrayField: Array.from({ length: 100 }, (_, i) => bson.Long.fromInt(i))
}))
});
},
async run(i, bson, document) {
await Promise.all(
Array.from(
{ length: 100 },
(_, i) =>
new Promise(resolve => {
setTimeout(() => {
resolve(bson.lib.deserialize(document, { validation: { utf8: false } }));
}, 20);
})
)
);
}
});
// End
console.log(
'Total time taken to benchmark:',
Expand Down
5 changes: 3 additions & 2 deletions src/binary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,9 @@ export class Binary extends BSONValue {
toString(encoding?: 'hex' | 'base64' | 'utf8' | 'utf-8'): string {
if (encoding === 'hex') return ByteUtils.toHex(this.buffer);
if (encoding === 'base64') return ByteUtils.toBase64(this.buffer);
if (encoding === 'utf8' || encoding === 'utf-8') return ByteUtils.toUTF8(this.buffer);
return ByteUtils.toUTF8(this.buffer);
if (encoding === 'utf8' || encoding === 'utf-8')
return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength);
return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength);
}

/** @internal */
Expand Down
14 changes: 7 additions & 7 deletions src/parser/deserializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ function deserializeObject(
if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString');

// Represents the key
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer.subarray(index, i));
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i);

// shouldValidateKey is true if the key should be validated, false otherwise
let shouldValidateKey = true;
Expand Down Expand Up @@ -476,7 +476,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const source = ByteUtils.toUTF8(buffer.subarray(index, i));
const source = ByteUtils.toUTF8(buffer, index, i);
// Create the regexp
index = i + 1;

Expand All @@ -489,7 +489,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const regExpOptions = ByteUtils.toUTF8(buffer.subarray(index, i));
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
index = i + 1;

// For each option add the corresponding one for javascript
Expand Down Expand Up @@ -521,7 +521,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const source = ByteUtils.toUTF8(buffer.subarray(index, i));
const source = ByteUtils.toUTF8(buffer, index, i);
index = i + 1;

// Get the start search index
Expand All @@ -533,7 +533,7 @@ function deserializeObject(
// If are at the end of the buffer there is a problem with the document
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
// Return the C string
const regExpOptions = ByteUtils.toUTF8(buffer.subarray(index, i));
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
index = i + 1;

// Set the object
Expand Down Expand Up @@ -678,7 +678,7 @@ function deserializeObject(
throw new BSONError('Invalid UTF-8 string in BSON document');
}
}
const namespace = ByteUtils.toUTF8(buffer.subarray(index, index + stringSize - 1));
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1);
// Update parse index position
index = index + stringSize;

Expand Down Expand Up @@ -735,7 +735,7 @@ function getValidatedString(
end: number,
shouldValidateUtf8: boolean
) {
const value = ByteUtils.toUTF8(buffer.subarray(start, end));
const value = ByteUtils.toUTF8(buffer, start, end);
// if utf8 validation is on, do the check
if (shouldValidateUtf8) {
for (let i = 0; i < value.length; i++) {
Expand Down
2 changes: 1 addition & 1 deletion src/utils/byte_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export type ByteUtils = {
/** Create a Uint8Array containing utf8 code units from a string */
fromUTF8: (text: string) => Uint8Array;
/** Create a string from utf8 code units */
toUTF8: (buffer: Uint8Array) => string;
toUTF8: (buffer: Uint8Array, start: number, end: number) => string;
/** Get the utf8 code unit count from a string if it were to be transformed to utf8 */
utf8ByteLength: (input: string) => number;
/** Encode UTF8 bytes generated from `source` string into `destination` at byteOffset. Returns the number of bytes encoded. */
Expand Down
6 changes: 3 additions & 3 deletions src/utils/node_byte_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ type NodeJsBuffer = ArrayBufferView &
Uint8Array & {
write(string: string, offset: number, length: undefined, encoding: 'utf8'): number;
copy(target: Uint8Array, targetStart: number, sourceStart: number, sourceEnd: number): number;
toString: (this: Uint8Array, encoding: NodeJsEncoding) => string;
toString: (this: Uint8Array, encoding: NodeJsEncoding, start?: number, end?: number) => string;
equals: (this: Uint8Array, other: Uint8Array) => boolean;
};
type NodeJsBufferConstructor = Omit<Uint8ArrayConstructor, 'from'> & {
Expand Down Expand Up @@ -125,8 +125,8 @@ export const nodeJsByteUtils = {
return Buffer.from(text, 'utf8');
},

toUTF8(buffer: Uint8Array): string {
return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8');
toUTF8(buffer: Uint8Array, start: number, end: number): string {
return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
},

utf8ByteLength(input: string): number {
Expand Down
4 changes: 2 additions & 2 deletions src/utils/web_byte_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,8 @@ export const webByteUtils = {
return new TextEncoder().encode(text);
},

toUTF8(uint8array: Uint8Array): string {
return new TextDecoder('utf8', { fatal: false }).decode(uint8array);
toUTF8(uint8array: Uint8Array, start: number, end: number): string {
return new TextDecoder('utf8', { fatal: false }).decode(uint8array.slice(start, end));
},

utf8ByteLength(input: string): number {
Expand Down

0 comments on commit e087042

Please sign in to comment.