Appearance
3.5 BSON 数据格式
概述
BSON(Binary JSON)是MongoDB使用的数据存储和传输格式,它是JSON的二进制表示形式,提供了比JSON更丰富的数据类型和更高的性能。BSON设计用于高效存储、遍历和操作数据,同时保持JSON的可读性和灵活性。本章节将深入探讨BSON数据格式的特点、结构、优势以及在PHP中的应用。
BSON不仅支持JSON的所有数据类型,还增加了日期、二进制数据、ObjectId、正则表达式等特殊类型。这些扩展使得BSON能够更好地满足现代应用程序的数据存储需求,特别是在处理复杂文档结构和高性能数据操作方面。
基本概念
BSON定义
BSON是一种二进制序列化格式,具有以下特点:
- 二进制格式:高效的存储和传输
- 类型丰富:支持JSON之外的数据类型
- 可遍历性:支持高效的遍历操作
- 可扩展性:支持自定义类型
- 跨语言:支持多种编程语言
BSON与JSON对比
| 特性 | JSON | BSON |
|---|---|---|
| 格式 | 文本 | 二进制 |
| 数据类型 | 有限 | 丰富 |
| 解析速度 | 较慢 | 较快 |
| 存储效率 | 较低 | 较高 |
| 可读性 | 高 | 低 |
| 遍历效率 | 较低 | 较高 |
BSON数据类型
BSON支持以下数据类型:
- Double:64位浮点数
- String:UTF-8字符串
- Object:嵌入文档
- Array:数组
- Binary data:二进制数据
- ObjectId:文档唯一标识符
- Boolean:布尔值
- Date:日期时间
- Null:空值
- Regular Expression:正则表达式
- JavaScript:JavaScript代码
- Symbol:符号(已废弃)
- JavaScript (with scope):带作用域的JavaScript
- 32-bit integer:32位整数
- Timestamp:时间戳
- 64-bit integer:64位整数
- Decimal128:128位十进制数
- Min key:最小键
- Max key:最大键
原理深度解析
BSON文档结构
BSON文档采用特定的二进制结构:
php
<?php
// BSON文档结构分析
class BSONDocumentStructure {
private $database;
public function __construct($databaseName) {
$client = new MongoDB\Client("mongodb://localhost:27017");
$this->database = $client->selectDatabase($databaseName);
}
public function analyzeDocumentStructure($collectionName, $documentId) {
$collection = $this->database->selectCollection($collectionName);
$document = $collection->findOne(['_id' => new MongoDB\BSON\ObjectId($documentId)]);
if (!$document) {
return ['error' => 'Document not found'];
}
$bsonData = MongoDB\BSON\fromPHP($document);
$bsonSize = strlen($bsonData);
$fieldCount = 0;
$fieldSizes = [];
foreach ($document as $field => $value) {
$fieldBson = MongoDB\BSON\fromPHP([$field => $value]);
$fieldSize = strlen($fieldBson);
$fieldSizes[$field] = [
'type' => $this->getBSONTypeName($value),
'size_bytes' => $fieldSize,
'percentage' => round(($fieldSize / $bsonSize) * 100, 2)
];
$fieldCount++;
}
return [
'document_id' => $documentId,
'total_size_bytes' => $bsonSize,
'total_size_kb' => round($bsonSize / 1024, 2),
'field_count' => $fieldCount,
'field_sizes' => $fieldSizes,
'average_field_size' => round($bsonSize / $fieldCount, 2)
];
}
public function demonstrateBSONEncoding() {
$examples = [
[
'name' => 'simple_document',
'data' => [
'name' => 'John Doe',
'age' => 30,
'active' => true
]
],
[
'name' => 'nested_document',
'data' => [
'user' => [
'name' => 'Jane Smith',
'contact' => [
'email' => 'jane@example.com',
'phone' => '555-1234'
]
]
]
],
[
'name' => 'array_document',
'data' => [
'tags' => ['mongodb', 'database', 'nosql'],
'scores' => [85, 90, 78, 92]
]
],
[
'name' => 'complex_document',
'data' => [
'_id' => new MongoDB\BSON\ObjectId(),
'name' => 'Complex Document',
'created_at' => new MongoDB\BSON\UTCDateTime(),
'metadata' => [
'version' => 1.0,
'tags' => ['test', 'sample']
],
'items' => [
['id' => 1, 'value' => 'first'],
['id' => 2, 'value' => 'second']
]
]
]
];
$results = [];
foreach ($examples as $example) {
$bsonData = MongoDB\BSON\fromPHP($example['data']);
$decodedData = MongoDB\BSON\toPHP($bsonData);
$results[] = [
'name' => $example['name'],
'original_data' => $example['data'],
'bson_size_bytes' => strlen($bsonData),
'decoded_data' => $decodedData,
'encoding_successful' => $example['data'] == $decodedData
];
}
return $results;
}
public function compareBSONvsJSON() {
$testData = [
'name' => 'Test Document',
'value' => 42,
'active' => true,
'tags' => ['tag1', 'tag2', 'tag3'],
'nested' => [
'field1' => 'value1',
'field2' => 123
],
'created_at' => new MongoDB\BSON\UTCDateTime()
];
$bsonData = MongoDB\BSON\fromPHP($testData);
$jsonData = json_encode($testData);
return [
'test_data' => $testData,
'bson' => [
'size_bytes' => strlen($bsonData),
'size_kb' => round(strlen($bsonData) / 1024, 2),
'hex_preview' => bin2hex(substr($bsonData, 0, 32))
],
'json' => [
'size_bytes' => strlen($jsonData),
'size_kb' => round(strlen($jsonData) / 1024, 2),
'preview' => substr($jsonData, 0, 100)
],
'comparison' => [
'size_difference_bytes' => strlen($bsonData) - strlen($jsonData),
'size_difference_percentage' => round(
((strlen($bsonData) - strlen($jsonData)) / strlen($jsonData)) * 100,
2
),
'bson_larger' => strlen($bsonData) > strlen($jsonData)
]
];
}
private function getBSONTypeName($value) {
if ($value instanceof MongoDB\BSON\ObjectId) {
return 'ObjectId';
} elseif ($value instanceof MongoDB\BSON\UTCDateTime) {
return 'UTCDateTime';
} elseif ($value instanceof MongoDB\BSON\Binary) {
return 'Binary';
} elseif ($value instanceof MongoDB\BSON\Decimal128) {
return 'Decimal128';
} elseif ($value instanceof MongoDB\BSON\Regex) {
return 'Regex';
} elseif (is_array($value)) {
return 'Array';
} elseif (is_object($value)) {
return 'Object';
} elseif (is_string($value)) {
return 'String';
} elseif (is_int($value)) {
return 'Integer';
} elseif (is_float($value)) {
return 'Double';
} elseif (is_bool($value)) {
return 'Boolean';
} elseif (is_null($value)) {
return 'Null';
} else {
return 'Unknown';
}
}
}
// 使用示例
$bsonStructure = new BSONDocumentStructure('testdb');
// 分析文档结构
$structureAnalysis = $bsonStructure->analyzeDocumentStructure('test_collection', '507f1f77bcf86cd799439011');
print_r($structureAnalysis);
// 演示BSON编码
$encodingDemo = $bsonStructure->demonstrateBSONEncoding();
print_r($encodingDemo);
// 比较BSON和JSON
$comparison = $bsonStructure->compareBSONvsJSON();
print_r($comparison);
?>BSON类型编码
BSON使用特定的字节编码来表示不同的数据类型:
php
<?php
// BSON类型编码详解
class BSONTypeEncoding {
private $database;
public function __construct($databaseName) {
$client = new MongoDB\Client("mongodb://localhost:27017");
$this->database = $client->selectDatabase($databaseName);
}
public function demonstrateTypeEncoding() {
$typeExamples = [
[
'type' => 'Double',
'type_code' => 0x01,
'example' => 3.14159,
'size_bytes' => 8,
'description' => '64-bit floating point'
],
[
'type' => 'String',
'type_code' => 0x02,
'example' => 'Hello, World!',
'size_bytes' => 4 + strlen('Hello, World!') + 1,
'description' => 'UTF-8 string'
],
[
'type' => 'Object',
'type_code' => 0x03,
'example' => ['key' => 'value'],
'size_bytes' => 'variable',
'description' => 'Embedded document'
],
[
'type' => 'Array',
'type_code' => 0x04,
'example' => [1, 2, 3],
'size_bytes' => 'variable',
'description' => 'Array'
],
[
'type' => 'Binary',
'type_code' => 0x05,
'example' => new MongoDB\BSON\Binary('data', MongoDB\BSON\Binary::TYPE_GENERIC),
'size_bytes' => 'variable',
'description' => 'Binary data'
],
[
'type' => 'ObjectId',
'type_code' => 0x07,
'example' => new MongoDB\BSON\ObjectId(),
'size_bytes' => 12,
'description' => '12-byte ObjectId'
],
[
'type' => 'Boolean',
'type_code' => 0x08,
'example' => true,
'size_bytes' => 1,
'description' => 'Boolean value'
],
[
'type' => 'UTCDateTime',
'type_code' => 0x09,
'example' => new MongoDB\BSON\UTCDateTime(),
'size_bytes' => 8,
'description' => '64-bit UTC datetime'
],
[
'type' => 'Null',
'type_code' => 0x0A,
'example' => null,
'size_bytes' => 0,
'description' => 'Null value'
],
[
'type' => 'Regex',
'type_code' => 0x0B,
'example' => new MongoDB\BSON\Regex('^pattern$', 'i'),
'size_bytes' => 'variable',
'description' => 'Regular expression'
],
[
'type' => '32-bit Integer',
'type_code' => 0x10,
'example' => 2147483647,
'size_bytes' => 4,
'description' => '32-bit signed integer'
],
[
'type' => 'Timestamp',
'type_code' => 0x11,
'example' => new MongoDB\BSON\Timestamp(1234567890, 1),
'size_bytes' => 8,
'description' => 'Special timestamp type'
],
[
'type' => '64-bit Integer',
'type_code' => 0x12,
'example' => 9223372036854775807,
'size_bytes' => 8,
'description' => '64-bit signed integer'
],
[
'type' => 'Decimal128',
'type_code' => 0x13,
'example' => new MongoDB\BSON\Decimal128('123.456'),
'size_bytes' => 16,
'description' => '128-bit decimal'
]
];
return $typeExamples;
}
public function analyzeTypeEfficiency() {
$collection = $this->database->selectCollection('bson_efficiency');
$testDocuments = [
[
'name' => 'integer_test',
'value' => 42,
'bson_type' => '32-bit Integer'
],
[
'name' => 'double_test',
'value' => 42.0,
'bson_type' => 'Double'
],
[
'name' => 'string_test',
'value' => '42',
'bson_type' => 'String'
],
[
'name' => 'decimal_test',
'value' => new MongoDB\BSON\Decimal128('42'),
'bson_type' => 'Decimal128'
]
];
$efficiencyAnalysis = [];
foreach ($testDocuments as $doc) {
$bsonData = MongoDB\BSON\fromPHP($doc);
$size = strlen($bsonData);
$efficiencyAnalysis[] = [
'name' => $doc['name'],
'value' => $doc['value'],
'bson_type' => $doc['bson_type'],
'size_bytes' => $size,
'efficiency_note' => $this->getEfficiencyNote($doc['bson_type'])
];
}
return $efficiencyAnalysis;
}
public function demonstrateBSONTraversal() {
$complexDocument = [
'user' => [
'id' => new MongoDB\BSON\ObjectId(),
'name' => 'John Doe',
'contacts' => [
['type' => 'email', 'value' => 'john@example.com'],
['type' => 'phone', 'value' => '555-1234']
],
'metadata' => [
'created' => new MongoDB\BSON\UTCDateTime(),
'updated' => new MongoDB\BSON\UTCDateTime(),
'tags' => ['active', 'verified']
]
]
];
$bsonData = MongoDB\BSON\fromPHP($complexDocument);
$traversalResults = [
'original_document' => $complexDocument,
'bson_size_bytes' => strlen($bsonData),
'traversal_performance' => [
'field_access' => 'O(1) for direct field access',
'nested_access' => 'O(n) for nested structures',
'array_access' => 'O(1) for array indexing',
'overall_efficiency' => 'High - optimized for traversal'
],
'memory_efficiency' => [
'contiguous_storage' => 'Yes - fields stored contiguously',
'type_overhead' => '1 byte per field for type identifier',
'string_storage' => 'Length-prefixed for efficient access'
]
];
return $traversalResults;
}
private function getEfficiencyNote($type) {
$notes = [
'32-bit Integer' => 'Most efficient for integer values within range',
'Double' => 'Efficient for floating-point numbers',
'String' => 'Efficient for text data with length prefix',
'Decimal128' => 'High precision but larger size (16 bytes)'
];
return $notes[$type] ?? 'Standard efficiency';
}
}
// 使用示例
$typeEncoding = new BSONTypeEncoding('testdb');
// 演示类型编码
$encodingDemo = $typeEncoding->demonstrateTypeEncoding();
print_r($encodingDemo);
// 分析类型效率
$efficiencyAnalysis = $typeEncoding->analyzeTypeEfficiency();
print_r($efficiencyAnalysis);
// 演示BSON遍历
$traversalDemo = $typeEncoding->demonstrateBSONTraversal();
print_r($traversalDemo);
?>BSON性能优化
BSON格式设计考虑了性能优化:
php
<?php
// BSON性能优化分析
class BSONPerformanceOptimization {
private $database;
public function __construct($databaseName) {
$client = new MongoDB\Client("mongodb://localhost:27017");
$this->database = $client->selectDatabase($databaseName);
}
public function analyzeEncodingPerformance() {
$testData = [
'small_document' => [
'name' => 'Test',
'value' => 42
],
'medium_document' => [
'name' => 'Medium Test Document',
'description' => 'This is a medium-sized document for testing purposes',
'tags' => ['test', 'performance', 'bson'],
'metadata' => [
'created' => new MongoDB\BSON\UTCDateTime(),
'updated' => new MongoDB\BSON\UTCDateTime()
]
],
'large_document' => [
'name' => 'Large Test Document',
'description' => str_repeat('This is a large document for performance testing. ', 100),
'tags' => array_fill(0, 50, 'test'),
'items' => array_fill(0, 100, ['id' => 1, 'value' => 'test']),
'metadata' => [
'created' => new MongoDB\BSON\UTCDateTime(),
'updated' => new MongoDB\BSON\UTCDateTime(),
'version' => 1.0,
'author' => 'Test Author',
'permissions' => ['read', 'write', 'delete']
]
]
];
$performanceResults = [];
foreach ($testData as $name => $data) {
// 测试编码时间
$encodeStart = microtime(true);
$bsonData = MongoDB\BSON\fromPHP($data);
$encodeTime = microtime(true) - $encodeStart;
// 测试解码时间
$decodeStart = microtime(true);
$decodedData = MongoDB\BSON\toPHP($bsonData);
$decodeTime = microtime(true) - $decodeStart;
$performanceResults[] = [
'document_name' => $name,
'bson_size_bytes' => strlen($bsonData),
'encode_time_ms' => round($encodeTime * 1000, 4),
'decode_time_ms' => round($decodeTime * 1000, 4),
'total_time_ms' => round(($encodeTime + $decodeTime) * 1000, 4),
'throughput_mb_per_sec' => round(
(strlen($bsonData) / (1024 * 1024)) / ($encodeTime + $decodeTime),
2
)
];
}
return $performanceResults;
}
public function optimizeDocumentStructure() {
$collection = $this->database->selectCollection('bson_optimization');
$unoptimizedDocument = [
'user_name' => 'John Doe',
'user_email' => 'john@example.com',
'user_age' => 30,
'user_active' => true,
'user_created' => new MongoDB\BSON\UTCDateTime(),
'user_updated' => new MongoDB\BSON\UTCDateTime(),
'user_metadata_version' => 1,
'user_metadata_tags' => ['active', 'verified']
];
$optimizedDocument = [
'user' => [
'name' => 'John Doe',
'email' => 'john@example.com',
'age' => 30,
'active' => true
],
'timestamps' => [
'created' => new MongoDB\BSON\UTCDateTime(),
'updated' => new MongoDB\BSON\UTCDateTime()
],
'metadata' => [
'version' => 1,
'tags' => ['active', 'verified']
]
];
$unoptimizedBson = MongoDB\BSON\fromPHP($unoptimizedDocument);
$optimizedBson = MongoDB\BSON\fromPHP($optimizedDocument);
return [
'unoptimized' => [
'size_bytes' => strlen($unoptimizedBson),
'structure' => 'Flat with long field names'
],
'optimized' => [
'size_bytes' => strlen($optimizedBson),
'structure' => 'Nested with logical grouping'
],
'improvement' => [
'size_reduction_bytes' => strlen($unoptimizedBson) - strlen($optimizedBson),
'size_reduction_percentage' => round(
((strlen($unoptimizedBson) - strlen($optimizedBson)) / strlen($unoptimizedBson)) * 100,
2
),
'recommendation' => 'Use nested structures and shorter field names'
]
];
}
public function demonstrateBSONCaching() {
$collection = $this->database->selectCollection('bson_caching');
$testDocument = [
'name' => 'Cached Document',
'data' => str_repeat('x', 1000),
'timestamp' => new MongoDB\BSON\UTCDateTime()
];
// 编码文档
$bsonData = MongoDB\BSON\fromPHP($testDocument);
// 模拟多次访问
$accessTimes = [];
for ($i = 0; $i < 10; $i++) {
$start = microtime(true);
$decoded = MongoDB\BSON\toPHP($bsonData);
$accessTimes[] = (microtime(true) - $start) * 1000;
}
return [
'document_size_bytes' => strlen($bsonData),
'access_times_ms' => $accessTimes,
'average_access_time_ms' => round(array_sum($accessTimes) / count($accessTimes), 4),
'min_access_time_ms' => round(min($accessTimes), 4),
'max_access_time_ms' => round(max($accessTimes), 4),
'caching_benefit' => 'Subsequent accesses benefit from OS and database caching'
];
}
}
// 使用示例
$performanceOptimization = new BSONPerformanceOptimization('testdb');
// 分析编码性能
$performanceAnalysis = $performanceOptimization->analyzeEncodingPerformance();
print_r($performanceAnalysis);
// 优化文档结构
$optimizationResult = $performanceOptimization->optimizeDocumentStructure();
print_r($optimizationResult);
// 演示BSON缓存
$cachingDemo = $performanceOptimization->demonstrateBSONCaching();
print_r($cachingDemo);
?>常见错误与踩坑点
错误1:BSON大小限制
问题描述:文档大小超过16MB限制导致插入失败。
php
<?php
// 错误示例 - BSON大小限制
try {
$client = new MongoDB\Client("mongodb://localhost:27017");
$database = $client->selectDatabase("testdb");
$collection = $database->selectCollection('bson_size_errors');
// 错误:创建过大的文档
$largeDocument = [
'name' => 'Large Document',
'data' => str_repeat('x', 20 * 1024 * 1024) // 20MB
];
$result = $collection->insertOne($largeDocument);
} catch (Exception $e) {
echo "错误: " . $e->getMessage() . "\n";
}
// 正确示例 - 使用GridFS处理大文件
$client = new MongoDB\Client("mongodb://localhost:27017");
$database = $client->selectDatabase("testdb");
// 使用GridFS存储大文件
$gridFS = new MongoDB\GridFS\Bucket($database);
$largeData = str_repeat('x', 20 * 1024 * 1024); // 20MB
$stream = $gridFS->openUploadStream('large_file.txt');
fwrite($stream, $largeData);
fclose($stream);
echo "Large file stored in GridFS successfully\n";
// 或者分割文档
$collection = $database->selectCollection('split_documents');
$chunkSize = 10 * 1024 * 1024; // 10MB chunks
$chunks = str_split($largeData, $chunkSize);
foreach ($chunks as $index => $chunk) {
$collection->insertOne([
'chunk_index' => $index,
'data' => $chunk,
'total_chunks' => count($chunks)
]);
}
echo "Large document split into " . count($chunks) . " chunks\n";
?>错误2:BSON类型转换错误
问题描述:PHP类型与BSON类型转换不当导致数据丢失。
php
<?php
// 错误示例 - 类型转换错误
try {
$client = new MongoDB\Client("mongodb://localhost:27017");
$database = $client->selectDatabase("testdb");
$collection = $database->selectCollection('bson_type_errors');
// 错误:PHP大整数超出32位范围
$largeInteger = PHP_INT_MAX + 1;
$collection->insertOne([
'value' => $largeInteger,
'type' => gettype($largeInteger)
]);
$result = $collection->findOne(['type' => 'integer']);
echo "Stored value: " . $result['value'] . "\n";
echo "Original value: " . $largeInteger . "\n";
} catch (Exception $e) {
echo "错误: " . $e->getMessage() . "\n";
}
// 正确示例 - 使用适当的BSON类型
$client = new MongoDB\Client("mongodb://localhost:27017");
$database = $client->selectDatabase("testdb");
$collection = $database->selectCollection('bson_type_correct');
// 使用64位整数
$largeInteger = PHP_INT_MAX + 1;
$collection->insertOne([
'value' => $largeInteger,
'type' => '64-bit integer'
]);
// 使用Decimal128处理高精度数值
$decimalValue = new MongoDB\BSON\Decimal128('12345678901234567890.1234567890');
$collection->insertOne([
'value' => $decimalValue,
'type' => 'Decimal128'
]);
// 使用UTCDateTime处理日期
$dateTime = new MongoDB\BSON\UTCDateTime();
$collection->insertOne([
'value' => $dateTime,
'type' => 'UTCDateTime'
]);
echo "Data stored with correct BSON types\n";
?>错误3:BSON字段名限制
问题描述:使用不合规的字段名导致BSON编码错误。
php
<?php
// 错误示例 - 字段名限制
try {
$client = new MongoDB\Client("mongodb://localhost:27017");
$database = $client->selectDatabase("testdb");
$collection = $database->selectCollection('bson_field_errors');
// 错误:使用不合规的字段名
$invalidDocument = [
'field with spaces' => 'value1',
'field.with.dots' => 'value2',
'field$with$dollar' => 'value3',
'' => 'empty field name'
];
$result = $collection->insertOne($invalidDocument);
} catch (Exception $e) {
echo "错误: " . $e->getMessage() . "\n";
}
// 正确示例 - 使用合规的字段名
$client = new MongoDB\Client("mongodb://localhost:27017");
$database = $client->selectDatabase("testdb");
$collection = $database->selectCollection('bson_field_correct');
$validDocument = [
'field_with_underscores' => 'value1',
'fieldWithCamelCase' => 'value2',
'field_with_numbers_123' => 'value3',
'normal_field' => 'value4'
];
$result = $collection->insertOne($validDocument);
echo "Document with valid field names inserted successfully\n";
// 查询文档
$retrievedDoc = $collection->findOne(['normal_field' => 'value4']);
print_r($retrievedDoc);
?>错误4:BSON数组索引问题
问题描述:BSON数组索引从0开始,使用非数字键导致问题。
php
<?php
// 错误示例 - 数组索引问题
try {
$client = new MongoDB\Client("mongodb://localhost:27017");
$database = $client->selectDatabase("testdb");
$collection = $database->selectCollection('bson_array_errors');
// 错误:使用非连续数字键的数组
$sparseArray = [
0 => 'first',
2 => 'third', // 跳过索引1
5 => 'fifth' // 跳过索引2,3,4
];
$collection->insertOne([
'sparse_array' => $sparseArray
]);
$result = $collection->findOne();
echo "Stored array: ";
print_r($result['sparse_array']);
} catch (Exception $e) {
echo "错误: " . $e->getMessage() . "\n";
}
// 正确示例 - 使用连续数组索引
$client = new MongoDB\Client("mongodb://localhost:27017");
$database = $client->selectDatabase("testdb");
$collection = $database->selectCollection('bson_array_correct');
// 使用连续索引的数组
$continuousArray = [
'first',
'second',
'third',
'fourth',
'fifth'
];
$collection->insertOne([
'continuous_array' => $continuousArray
]);
// 或者使用对象存储关联数据
$objectData = [
'first' => 'value1',
'third' => 'value3',
'fifth' => 'value5'
];
$collection->insertOne([
'object_data' => $objectData
]);
echo "Data stored with correct array structure\n";
// 查询数组元素
$result = $collection->findOne(['continuous_array.0' => 'first']);
echo "Found document with first element: " . $result['continuous_array'][0] . "\n";
?>常见应用场景
场景1:高性能数据序列化
使用BSON进行高性能数据序列化和反序列化:
php
<?php
// 高性能BSON序列化系统
class BSONSerializationSystem {
private $database;
public function __construct($databaseName) {
$client = new MongoDB\Client("mongodb://localhost:27017");
$this->database = $client->selectDatabase($databaseName);
}
public function serializeData($data, $compression = false) {
$startTime = microtime(true);
// BSON编码
$bsonData = MongoDB\BSON\fromPHP($data);
// 可选压缩
if ($compression) {
$bsonData = gzcompress($bsonData);
}
$endTime = microtime(true);
$serializationTime = $endTime - $startTime;
return [
'serialized_data' => $bsonData,
'original_size' => strlen(json_encode($data)),
'serialized_size' => strlen($bsonData),
'compression_ratio' => $compression ?
round(strlen($bsonData) / strlen(json_encode($data)), 2) :
round(strlen($bsonData) / strlen(json_encode($data)), 2),
'serialization_time_ms' => round($serializationTime * 1000, 4),
'compression_used' => $compression
];
}
public function deserializeData($bsonData, $compression = false) {
$startTime = microtime(true);
// 可选解压
if ($compression) {
$bsonData = gzuncompress($bsonData);
}
// BSON解码
$data = MongoDB\BSON\toPHP($bsonData);
$endTime = microtime(true);
$deserializationTime = $endTime - $startTime;
return [
'deserialized_data' => $data,
'deserialization_time_ms' => round($deserializationTime * 1000, 4),
'success' => true
];
}
public function benchmarkSerialization($iterations = 1000) {
$testData = [
'user_id' => 'user_001',
'name' => 'John Doe',
'email' => 'john@example.com',
'age' => 30,
'active' => true,
'tags' => ['developer', 'mongodb', 'php'],
'metadata' => [
'created' => new MongoDB\BSON\UTCDateTime(),
'updated' => new MongoDB\BSON\UTCDateTime(),
'version' => 1.0
]
];
// BSON序列化基准测试
$bsonTimes = [];
for ($i = 0; $i < $iterations; $i++) {
$start = microtime(true);
$bsonData = MongoDB\BSON\fromPHP($testData);
$bsonTimes[] = (microtime(true) - $start) * 1000;
}
// JSON序列化基准测试
$jsonTimes = [];
for ($i = 0; $i < $iterations; $i++) {
$start = microtime(true);
$jsonData = json_encode($testData);
$jsonTimes[] = (microtime(true) - $start) * 1000;
}
return [
'iterations' => $iterations,
'bson_serialization' => [
'average_time_ms' => round(array_sum($bsonTimes) / count($bsonTimes), 4),
'min_time_ms' => round(min($bsonTimes), 4),
'max_time_ms' => round(max($bsonTimes), 4),
'total_time_ms' => round(array_sum($bsonTimes), 4)
],
'json_serialization' => [
'average_time_ms' => round(array_sum($jsonTimes) / count($jsonTimes), 4),
'min_time_ms' => round(min($jsonTimes), 4),
'max_time_ms' => round(max($jsonTimes), 4),
'total_time_ms' => round(array_sum($jsonTimes), 4)
],
'comparison' => [
'bson_faster' => array_sum($bsonTimes) < array_sum($jsonTimes),
'speed_improvement' => round(
((array_sum($jsonTimes) - array_sum($bsonTimes)) / array_sum($jsonTimes)) * 100,
2
) . '%'
]
];
}
}
// 使用示例
$serializationSystem = new BSONSerializationSystem('testdb');
// 序列化数据
$testData = [
'name' => 'Test Data',
'value' => 42,
'timestamp' => new MongoDB\BSON\UTCDateTime(),
'tags' => ['test', 'benchmark']
];
$serializationResult = $serializationSystem->serializeData($testData, true);
print_r($serializationResult);
// 反序列化数据
$deserializationResult = $serializationSystem->deserializeData(
$serializationResult['serialized_data'],
true
);
print_r($deserializationResult);
// 基准测试
$benchmark = $serializationSystem->benchmarkSerialization(1000);
print_r($benchmark);
?>场景2:跨语言数据交换
使用BSON实现跨语言数据交换:
php
<?php
// 跨语言BSON数据交换系统
class CrossLanguageDataExchange {
private $database;
public function __construct($databaseName) {
$client = new MongoDB\Client("mongodb://localhost:27017");
$this->database = $client->selectDatabase($databaseName);
}
public function createLanguageNeutralData($data) {
$collection = $this->database->selectCollection('language_neutral');
// 确保数据使用BSON兼容类型
$bsonCompatibleData = $this->convertToBSONCompatible($data);
$document = [
'_id' => new MongoDB\BSON\ObjectId(),
'data' => $bsonCompatibleData,
'created_at' => new MongoDB\BSON\UTCDateTime(),
'source_language' => 'PHP',
'bson_compatible' => true
];
$result = $collection->insertOne($document);
return $result->getInsertedId();
}
public function retrieveLanguageNeutralData($documentId) {
$collection = $this->database->selectCollection('language_neutral');
$document = $collection->findOne(['_id' => new MongoDB\BSON\ObjectId($documentId)]);
if (!$document) {
return ['error' => 'Document not found'];
}
return [
'document_id' => $document['_id'],
'data' => $document['data'],
'created_at' => $document['created_at']->toDateTime()->format('Y-m-d H:i:s'),
'source_language' => $document['source_language'],
'bson_compatible' => $document['bson_compatible']
];
}
public function exportBSONToFile($documentId, $filename) {
$collection = $this->database->selectCollection('language_neutral');
$document = $collection->findOne(['_id' => new MongoDB\BSON\ObjectId($documentId)]);
if (!$document) {
return ['error' => 'Document not found'];
}
$bsonData = MongoDB\BSON\fromPHP($document);
$result = file_put_contents($filename, $bsonData);
return [
'success' => $result !== false,
'filename' => $filename,
'size_bytes' => strlen($bsonData),
'bytes_written' => $result
];
}
public function importBSONFromFile($filename) {
if (!file_exists($filename)) {
return ['error' => 'File not found'];
}
$bsonData = file_get_contents($filename);
try {
$document = MongoDB\BSON\toPHP($bsonData);
$collection = $this->database->selectCollection('language_neutral');
$result = $collection->insertOne($document);
return [
'success' => true,
'document_id' => $result->getInsertedId(),
'size_bytes' => strlen($bsonData)
];
} catch (Exception $e) {
return [
'error' => 'Failed to parse BSON data',
'message' => $e->getMessage()
];
}
}
private function convertToBSONCompatible($data) {
if (is_array($data)) {
$converted = [];
foreach ($data as $key => $value) {
// 确保键是字符串
$stringKey = is_string($key) ? $key : (string)$key;
$converted[$stringKey] = $this->convertToBSONCompatible($value);
}
return $converted;
} elseif (is_object($data)) {
// 转换对象为数组
return $this->convertToBSONCompatible(get_object_vars($data));
} elseif (is_resource($data)) {
// 资源类型转换为字符串
return (string)$data;
} else {
return $data;
}
}
}
// 使用示例
$crossLanguageExchange = new CrossLanguageDataExchange('testdb');
// 创建语言中立数据
$neutralData = [
'user' => [
'id' => 'user_001',
'name' => 'John Doe',
'age' => 30,
'active' => true,
'score' => 95.5,
'created' => new MongoDB\BSON\UTCDateTime()
],
'preferences' => [
'theme' => 'dark',
'language' => 'en',
'notifications' => true
],
'metadata' => [
'version' => 1.0,
'tags' => ['user', 'active', 'premium']
]
];
$documentId = $crossLanguageExchange->createLanguageNeutralData($neutralData);
echo "Created language-neutral document with ID: " . $documentId . "\n";
// 检索数据
$retrievedData = $crossLanguageExchange->retrieveLanguageNeutralData($documentId);
print_r($retrievedData);
// 导出到文件
$exportResult = $crossLanguageExchange->exportBSONToFile($documentId, 'data.bson');
print_r($exportResult);
// 从文件导入
$importResult = $crossLanguageExchange->importBSONFromFile('data.bson');
print_r($importResult);
?>常见问题答疑
问题1:BSON和JSON有什么区别?
回答:BSON和JSON的主要区别:
php
<?php
// BSON vs JSON 对比分析
class BSONvsJSONComparison {
public static function compareFormats() {
$testData = [
'name' => 'John Doe',
'age' => 30,
'active' => true,
'created_at' => new MongoDB\BSON\UTCDateTime(),
'tags' => ['developer', 'mongodb'],
'metadata' => [
'version' => 1.0,
'score' => 95.5
]
];
// BSON编码
$bsonData = MongoDB\BSON\fromPHP($testData);
// JSON编码
$jsonData = json_encode($testData);
return [
'test_data' => $testData,
'bson' => [
'size_bytes' => strlen($bsonData),
'supports_dates' => true,
'supports_binary' => true,
'supports_objectid' => true,
'type_rich' => true
],
'json' => [
'size_bytes' => strlen($jsonData),
'supports_dates' => false,
'supports_binary' => false,
'supports_objectid' => false,
'type_rich' => false
],
'key_differences' => [
'Data Types' => 'BSON supports more data types than JSON',
'Binary Format' => 'BSON is binary, JSON is text',
'Performance' => 'BSON is faster for encoding/decoding',
'Readability' => 'JSON is more human-readable',
'Size' => 'BSON is typically larger than JSON'
]
];
}
}
// 使用示例
$comparison = BSONvsJSONComparison::compareFormats();
print_r($comparison);
?>实战练习
练习1:BSON文档分析工具
创建一个BSON文档分析工具:
php
<?php
// 练习1:BSON文档分析工具
class BSONDocumentAnalyzer {
private $database;
public function __construct($databaseName) {
$client = new MongoDB\Client("mongodb://localhost:27017");
$this->database = $client->selectDatabase($databaseName);
}
public function analyzeDocument($collectionName, $documentId) {
$collection = $this->database->selectCollection($collectionName);
$document = $collection->findOne(['_id' => new MongoDB\BSON\ObjectId($documentId)]);
if (!$document) {
return ['error' => 'Document not found'];
}
$bsonData = MongoDB\BSON\fromPHP($document);
$bsonSize = strlen($bsonData);
$analysis = [
'document_id' => $documentId,
'size_analysis' => [
'total_bytes' => $bsonSize,
'total_kb' => round($bsonSize / 1024, 2),
'total_mb' => round($bsonSize / (1024 * 1024), 4),
'percentage_of_limit' => round(($bsonSize / (16 * 1024 * 1024)) * 100, 2)
],
'field_analysis' => $this->analyzeFields($document),
'type_analysis' => $this->analyzeTypes($document),
'recommendations' => $this->generateRecommendations($document, $bsonSize)
];
return $analysis;
}
private function analyzeFields($document) {
$fieldAnalysis = [];
foreach ($document as $field => $value) {
$fieldBson = MongoDB\BSON\fromPHP([$field => $value]);
$fieldSize = strlen($fieldBson);
$fieldAnalysis[] = [
'field_name' => $field,
'field_size_bytes' => $fieldSize,
'field_type' => $this->getBSONType($value),
'nesting_level' => $this->calculateNestingLevel($value)
];
}
// 按大小排序
usort($fieldAnalysis, function($a, $b) {
return $b['field_size_bytes'] <=> $a['field_size_bytes'];
});
return $fieldAnalysis;
}
private function analyzeTypes($document) {
$typeCounts = [];
foreach ($document as $field => $value) {
$type = $this->getBSONType($value);
if (!isset($typeCounts[$type])) {
$typeCounts[$type] = 0;
}
$typeCounts[$type]++;
}
return $typeCounts;
}
private function generateRecommendations($document, $bsonSize) {
$recommendations = [];
// 检查文档大小
if ($bsonSize > 10 * 1024 * 1024) {
$recommendations[] = [
'type' => 'warning',
'message' => 'Document is approaching 16MB limit',
'action' => 'Consider splitting into multiple documents'
];
}
// 检查字段数量
if (count($document) > 50) {
$recommendations[] = [
'type' => 'info',
'message' => 'Document has many fields',
'action' => 'Consider using nested structures'
];
}
// 检查数组大小
foreach ($document as $field => $value) {
if (is_array($value) && count($value) > 100) {
$recommendations[] = [
'type' => 'info',
'message' => "Field '{$field}' has large array",
'action' => 'Consider using reference pattern'
];
}
}
return $recommendations;
}
private function getBSONType($value) {
if ($value instanceof MongoDB\BSON\ObjectId) {
return 'ObjectId';
} elseif ($value instanceof MongoDB\BSON\UTCDateTime) {
return 'UTCDateTime';
} elseif ($value instanceof MongoDB\BSON\Binary) {
return 'Binary';
} elseif ($value instanceof MongoDB\BSON\Decimal128) {
return 'Decimal128';
} elseif ($value instanceof MongoDB\BSON\Regex) {
return 'Regex';
} elseif (is_array($value)) {
return 'Array';
} elseif (is_object($value)) {
return 'Object';
} elseif (is_string($value)) {
return 'String';
} elseif (is_int($value)) {
return 'Integer';
} elseif (is_float($value)) {
return 'Double';
} elseif (is_bool($value)) {
return 'Boolean';
} elseif (is_null($value)) {
return 'Null';
} else {
return 'Unknown';
}
}
private function calculateNestingLevel($value, $currentLevel = 0) {
if (is_array($value) || is_object($value)) {
$maxLevel = $currentLevel;
foreach ($value as $item) {
$level = $this->calculateNestingLevel($item, $currentLevel + 1);
if ($level > $maxLevel) {
$maxLevel = $level;
}
}
return $maxLevel;
}
return $currentLevel;
}
}
// 使用示例
$analyzer = new BSONDocumentAnalyzer('testdb');
// 创建测试文档
$client = new MongoDB\Client("mongodb://localhost:27017");
$database = $client->selectDatabase("testdb");
$collection = $database->selectCollection('test_documents');
$testDocument = [
'name' => 'Test Document',
'description' => str_repeat('x', 1000),
'tags' => array_fill(0, 50, 'test'),
'metadata' => [
'created' => new MongoDB\BSON\UTCDateTime(),
'updated' => new MongoDB\BSON\UTCDateTime(),
'version' => 1.0,
'author' => 'Test Author'
],
'items' => array_fill(0, 20, ['id' => 1, 'value' => 'test'])
];
$insertResult = $collection->insertOne($testDocument);
$documentId = $insertResult->getInsertedId();
// 分析文档
$analysis = $analyzer->analyzeDocument('test_documents', $documentId);
print_r($analysis);
?>知识点总结
BSON核心概念
- 二进制格式:高效的二进制序列化格式
- 类型丰富:支持JSON之外的多种数据类型
- 高性能:优化的编码和解码性能
- 跨语言:支持多种编程语言
BSON结构特点
- 长度前缀:每个元素都有长度前缀
- 类型标识:每个元素都有类型标识符
- 连续存储:字段连续存储,便于遍历
- 大小限制:单个文档最大16MB
BSON优势
- 性能:比JSON更快的编码和解码
- 类型:支持更丰富的数据类型
- 遍历:优化的遍历性能
- 扩展:支持自定义类型扩展
最佳实践
- 文档大小:控制在1MB以内,避免接近16MB限制
- 字段命名:使用简短、描述性的字段名
- 类型选择:根据需求选择合适的BSON类型
- 结构优化:使用嵌套结构优化文档组织
拓展参考资料
官方文档
- BSON规范:http://bsonspec.org/
- MongoDB BSON类型:https://docs.mongodb.com/manual/reference/bson-types/
- BSON二进制格式:https://docs.mongodb.com/manual/reference/method/BSON/
推荐阅读
- 《BSON格式详解》
- 《MongoDB内部存储机制》
- 《高性能数据序列化技术》
在线资源
- BSON官方网站
- MongoDB University BSON课程
- Stack Overflow BSON相关问题
