解析:使用php mongodb扩展时 需要注意的事项
最近在使用php的mongo 扩展进行数据统计计算,其中有一个时间戳字段,由于精确到了毫秒,长度有13位,但由于开始的时候是以字符串的形式存储:
{ "_id" : ObjectId("504eea97e4b023cf38e34039"), "in_ts" : NumberLong("1347349143699"), "log" : { "guid" : "4D1F3079-7507-F4B0-E7AF-5432D5D8229D", "p" : "View_Prop_YepPage_Zheng", "cid" : "11", "url" : "http://shanghai.haozu.com/rental/broker/n/10481780", "rfpn" : "Listing_V2_IndexPage_All", "site" : "haozu", "agent" : "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)", "stamp" : "1347349162159", "cip" : "116.226.70.44", "referer" : "http://shanghai.haozu.com/shop/1464934/", "cstamp" : "1347349323125", "sessid" : "FA798056-F9E7-F961-41E0-CC95C850FA47", "uguid" : "C00FF55B-3D3D-4B31-4318-12345B0DBE64", "pn" : "View_Prop_YepPage_Zheng", "cstparam" : { "proId" : NumberLong(10481780), "brokerId" : "326792", "tradeType" : "2", "userType" : "0", "channel" : "site", "entry" : "1", "COMMID" : "1666" } }, "out_ts" : NumberLong("1347349466083"), "rule" : 0, "status" : "ok", "txid" : 0 }
后来改成数字格式:
{ "_id" : ObjectId("504eea97e4b023cf38e34039"), "in_ts" : NumberLong("1347349143699"), "log" : { "guid" : "4D1F3079-7507-F4B0-E7AF-5432D5D8229D", "p" : "View_Prop_YepPage_Zheng", "cid" : "11", "url" : "http://shanghai.haozu.com/rental/broker/n/10481780", "rfpn" : "Listing_V2_IndexPage_All", "site" : "haozu", "agent" : "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)", "stamp" : NumberLong("1347349162159"), "cip" : "116.226.70.44", "referer" : "http://shanghai.haozu.com/shop/1464934/", "cstamp" : "1347349323125", "sessid" : "FA798056-F9E7-F961-41E0-CC95C850FA47", "uguid" : "C00FF55B-3D3D-4B31-4318-12345B0DBE64", "pn" : "View_Prop_YepPage_Zheng", "cstparam" : { "proId" : NumberLong(10481780), "brokerId" : "326792", "tradeType" : "2", "userType" : "0", "channel" : "site", "entry" : "1", "COMMID" : "1666" } }, "out_ts" : NumberLong("1347349466083"), "rule" : 0, "status" : "ok", "txid" : 0 }
为字符串时,使用下面的查询是正常的
$query = array ('log.stamp' => array ('$gte' => ‘1347346800000', '$lt' => ‘1347350400000'));
但是改为数字后,使用下面的查询,死活没有结果,但是直接在mongo客户端直接查询是有结果的:
db.haozu_success.find({'log.stamp':{$gte:1347346800000,$lt:1347350400000}})
php手册上也是这么个用法:
$query = array ('log.stamp' => array ('$gte' => 1347346800000, '$lt' => 1347350400000));
花了好大一会找原因,开始时怀疑是php扩展的bug导致,经过一番思考。突然想到可能是类型问题导致,发现手册上有Types 介绍,所以正确的用法如下:
$query = array ('log.stamp' => array ('$gte' => new MongoInt64($time_range['start']), '$lt' => new MongoInt64($time_range['end'])));
另外,在使用mapreduce进行数据统计时,为了防止cursor出现超时异常,还需要设置一下超时时间
$map = new MongoCode ( '
function(){
var prop_id=this.log.cstparam.proId;
var key=this.log.site+prop_id
emit(key,{"channel":this.log.site,"prop_id":prop_id,"count":1});
}
' );
$reduce = new MongoCode ( '
function(key,emits){
var total=0;
for(var i in emits){
total+=emits[i].count;
}
return {"channel":emits[0].channel,"prop_id":eval(emits[0].prop_id),"count":total};
}
' );
$this->mongo_db->command ( array ('mapreduce' => $collection_name, 'map' => $map, 'reduce' => $reduce, 'out' => $tmp_result, 'query' => $query),array('timeout'=>self::MONGO_CURSOR_TIMEOUT) );
复制代码 代码如下:
{ "_id" : ObjectId("504eea97e4b023cf38e34039"), "in_ts" : NumberLong("1347349143699"), "log" : { "guid" : "4D1F3079-7507-F4B0-E7AF-5432D5D8229D", "p" : "View_Prop_YepPage_Zheng", "cid" : "11", "url" : "http://shanghai.haozu.com/rental/broker/n/10481780", "rfpn" : "Listing_V2_IndexPage_All", "site" : "haozu", "agent" : "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)", "stamp" : "1347349162159", "cip" : "116.226.70.44", "referer" : "http://shanghai.haozu.com/shop/1464934/", "cstamp" : "1347349323125", "sessid" : "FA798056-F9E7-F961-41E0-CC95C850FA47", "uguid" : "C00FF55B-3D3D-4B31-4318-12345B0DBE64", "pn" : "View_Prop_YepPage_Zheng", "cstparam" : { "proId" : NumberLong(10481780), "brokerId" : "326792", "tradeType" : "2", "userType" : "0", "channel" : "site", "entry" : "1", "COMMID" : "1666" } }, "out_ts" : NumberLong("1347349466083"), "rule" : 0, "status" : "ok", "txid" : 0 }
后来改成数字格式:
复制代码 代码如下:
{ "_id" : ObjectId("504eea97e4b023cf38e34039"), "in_ts" : NumberLong("1347349143699"), "log" : { "guid" : "4D1F3079-7507-F4B0-E7AF-5432D5D8229D", "p" : "View_Prop_YepPage_Zheng", "cid" : "11", "url" : "http://shanghai.haozu.com/rental/broker/n/10481780", "rfpn" : "Listing_V2_IndexPage_All", "site" : "haozu", "agent" : "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)", "stamp" : NumberLong("1347349162159"), "cip" : "116.226.70.44", "referer" : "http://shanghai.haozu.com/shop/1464934/", "cstamp" : "1347349323125", "sessid" : "FA798056-F9E7-F961-41E0-CC95C850FA47", "uguid" : "C00FF55B-3D3D-4B31-4318-12345B0DBE64", "pn" : "View_Prop_YepPage_Zheng", "cstparam" : { "proId" : NumberLong(10481780), "brokerId" : "326792", "tradeType" : "2", "userType" : "0", "channel" : "site", "entry" : "1", "COMMID" : "1666" } }, "out_ts" : NumberLong("1347349466083"), "rule" : 0, "status" : "ok", "txid" : 0 }
为字符串时,使用下面的查询是正常的
复制代码 代码如下:
$query = array ('log.stamp' => array ('$gte' => ‘1347346800000', '$lt' => ‘1347350400000'));
但是改为数字后,使用下面的查询,死活没有结果,但是直接在mongo客户端直接查询是有结果的:
复制代码 代码如下:
db.haozu_success.find({'log.stamp':{$gte:1347346800000,$lt:1347350400000}})
php手册上也是这么个用法:
复制代码 代码如下:
$query = array ('log.stamp' => array ('$gte' => 1347346800000, '$lt' => 1347350400000));
花了好大一会找原因,开始时怀疑是php扩展的bug导致,经过一番思考。突然想到可能是类型问题导致,发现手册上有Types 介绍,所以正确的用法如下:
复制代码 代码如下:
$query = array ('log.stamp' => array ('$gte' => new MongoInt64($time_range['start']), '$lt' => new MongoInt64($time_range['end'])));
另外,在使用mapreduce进行数据统计时,为了防止cursor出现超时异常,还需要设置一下超时时间
复制代码 代码如下:
$map = new MongoCode ( '
function(){
var prop_id=this.log.cstparam.proId;
var key=this.log.site+prop_id
emit(key,{"channel":this.log.site,"prop_id":prop_id,"count":1});
}
' );
$reduce = new MongoCode ( '
function(key,emits){
var total=0;
for(var i in emits){
total+=emits[i].count;
}
return {"channel":emits[0].channel,"prop_id":eval(emits[0].prop_id),"count":total};
}
' );
$this->mongo_db->command ( array ('mapreduce' => $collection_name, 'map' => $map, 'reduce' => $reduce, 'out' => $tmp_result, 'query' => $query),array('timeout'=>self::MONGO_CURSOR_TIMEOUT) );