淘宝商品采集
该方法是使用页面抓取然后请求的形式获取商品详情。具体用到了curl和phpQuery。采集淘宝商品有被封ip风险,谨慎使用。淘宝页面规则是一直更新的,所以抓取规则也需要对应当时的页面规则。获取淘宝商品
该方法是使用页面抓取然后请求的形式获取商品详情。具体用到了curl和phpQuery。
采集淘宝商品有被封ip风险,谨慎使用。
淘宝页面规则是一直更新的,所以抓取规则也需要对应当时的页面规则。
获取淘宝商品的信息,传入淘宝商品id,中间可根据自己的业务逻辑来判断或处理一下商品。
(建议模仿人人商城的商品采集修改)
public function get_item_taobao($itemid = '', $taobaourl = '', $cates = '', $merchid = 0)
{
global $_W;
error_reporting(0);
// $g = pdo_fetch('select * from ' . tablename('ewei_shop_goods') . ' where uniacid=:uniacid and merchid=:merchid and catch_id=:catch_id and catch_source=\'taobao\' limit 1', array(':uniacid' => $_W['uniacid'], ':merchid' => $merchid, ':catch_id' => $itemid));
// $item = array();
// $item['id'] = $g['id'];
// $item['merchid'] = $merchid;
// if (!empty($merchid)) {
// if (empty($_W['merch_user']['goodschecked'])) {
// $item['checked'] = 1;
// }
// else {
// $item['checked'] = 0;
// }
// }
$url = $this->get_tmall_page_url($itemid);
$this->ihttp_func = new App_Func_Ihttp(1);
$response = $this->ihttp_func->ihttp_get($url);
$length = strval($response['headers']['Content-Length']);
if ($length != NULL) {
return array('result' => '0', 'error' => '未从淘宝获取到商品信息!');
}
// file_put_contents('11.txt', $response);
// var_dump($response);exit;
$content = $response['content'];
// var_dump($content);exit;
if (function_exists('mb_convert_encoding')) {
$content = mb_convert_encoding($content, 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');
}
// var_dump($content);exit;
if ($this->ihttp_func->strexists($response['content'], 'ERRCODE_QUERY_DETAIL_FAIL')) {
return array('result' => '0', 'error' => '宝贝不存在!');
}
include $_SERVER['DOCUMENT_ROOT'].'/public/vendor/phpquery/phpQuery.php';
// include_once('public/vendor/phpquery/phpQuery.php');
$dom = new DOMDocument();
$dom->loadHTML('<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>' . $content);
$xml = simplexml_import_dom($dom);
// var_dump($xml);exit;
preg_match('/var g_config\\s*=(.*);/isU', $content, $match);
$matchOne = str_replace(array(' ', '
', '
', ' '), array(''), $match[1]);
$erdr = substr($matchOne, stripos($matchOne, 'sibUrl'));
$erdr2 = substr($erdr, 0, stripos($erdr, 'descUrl'));
$asd = explode(':', $erdr2);
$two = substr($asd[1], 1);
$threeUrl = substr($two, 0, -2);
// var_dump($threeUrl);exit;
$detailskip = $this->ihttp_func->ihttp_request('https:' . $threeUrl, '', array('referer' => 'https://item.taobao.com?id=' . $itemid, 'accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'accept-encoding' => '', 'accept-language' => 'zh-CN,zh;q=0.9,en;q=0.8', 'user-agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'CURLOPT_USERAGENT' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'));
// var_dump($detailskip);exit;
$detailskip = json_decode($detailskip['content'], true);
$stockArray = array();
if ($detailskip['code']['code'] == 0 && $detailskip['code']['message'] == 'SUCCESS') {
$stockArray = $detailskip['data']['dynStock']['sku'];
}
$specifications = $xml->xpath('//*[@id="J_isku"]/div/dl/dd/ul');
$specificationsArray = array();
$guigeArr = array();
// var_dump($specifications);exit;
foreach ($specifications as $key => $specificationsInfo) {
$sizeArray = (array) $specificationsInfo;
$sizeAttributesArray = explode(':', $sizeArray['@attributes']['data-property']);
$specificationsArray[$key]['title'] = $sizeAttributesArray[0];
$sizeLiArray = $sizeArray['li'];
if (!is_object($sizeLiArray)) {
$specificationsArray[$key]['itemsCount'] = count($sizeLiArray);
foreach ($sizeLiArray as $j => $sizeLiInfo) {
$sizeLiInfoArray = (array) $sizeLiInfo;
$guigeArr[$key][$j][] = ';' . $sizeLiInfoArray['@attributes']['data-value'];
$sizeLiInfoAttributesArray = explode(':', $sizeLiInfoArray['@attributes']['data-value']);
$specificationsArray[$key]['propId'] = $sizeLiInfoAttributesArray[0];
$specificationsArray[$key]['items'][$j]['valueId'] = $sizeLiInfoAttributesArray[1];
$sizeLiInfoA = (array) $sizeLiInfoArray['a'];
$specificationsTitle = (array) $sizeLiInfoA['span'];
$specificationsArray[$key]['items'][$j]['title'] = $specificationsTitle[0];
$guigeArr[$key][$j][] = $specificationsTitle[0];
$sizeLiInfoAttr = $sizeLiInfoA['@attributes'];
if (!empty($sizeLiInfoAttr['style'])) {
$sizeLiInfoAttrStyle = substr($sizeLiInfoAttr['style'], stripos($sizeLiInfoAttr['style'], '//'));
$sizeLiInfoAttrStyleUrl = substr($sizeLiInfoAttrStyle, 0, stripos($sizeLiInfoAttrStyle, ')'));
$thumb = mb_substr($sizeLiInfoAttrStyleUrl, 0, strpos($sizeLiInfoAttrStyleUrl, '_30x30.jpg'));
$specificationsArray[$key]['items'][$j]['thumb'] = 'http:' . $thumb;
}
else {
$specificationsArray[$key]['items'][$j]['thumb'] = '';
}
}
}
else {
$objsctArr = (array) $sizeLiArray;
$specificationsArray[$key]['itemsCount'] = 1;
$objsctArrAttributes = explode(':', $objsctArr['@attributes']['data-value']);
$specificationsArray[$key]['propId'] = $objsctArrAttributes[0];
$specificationsArray[$key]['items'][0]['valueId'] = $objsctArrAttributes[1];
$sizeLiInfoA = (array) $objsctArr['a'];
$specificationsTitle = (array) $sizeLiInfoA['span'];
$specificationsArray[$key]['items'][0]['title'] = $specificationsTitle[0];
$guigeArr[$key][0][] = ';' . $objsctArr['@attributes']['data-value'];
$guigeArr[$key][0][] = $specificationsTitle[0];
$sizeLiInfoAttr = $sizeLiInfoA['@attributes'];
if (!empty($sizeLiInfoAttr['style'])) {
$sizeLiInfoAttrStyle = substr($sizeLiInfoAttr['style'], stripos($sizeLiInfoAttr['style'], '//'));
$sizeLiInfoAttrStyleUrl = substr($sizeLiInfoAttrStyle, 0, stripos($sizeLiInfoAttrStyle, ')'));
$thumb = mb_substr($sizeLiInfoAttrStyleUrl, 0, strpos($sizeLiInfoAttrStyleUrl, '_30x30.jpg'));
$specificationsArray[$key]['items'][0]['thumb'] = 'http:' . $thumb;
}
else {
$specificationsArray[$key]['items'][0]['thumb'] = '';
}
}
}
$item['specs'] = $this->my_sort($specificationsArray, 'itemsCount', SORT_ASC, SORT_STRING);
$count = count($guigeArr);
if ($count == 1) {
$i = 0;
while ($i < count($guigeArr[0])) {
$value = $guigeArr[0][$i][0];
$title = $guigeArr[0][$i][1];
$arr[] = $value . ';|' . $title;
++$i;
}
}
else if ($count == 2) {
$i = 0;
while ($i < count($guigeArr[0])) {
$value = $guigeArr[0][$i][0];
$title = $guigeArr[0][$i][1];
$j = 0;
while ($j < count($guigeArr[1])) {
$valueTwo = $value . $guigeArr[1][$j][0];
$titleTwo = $title . '+' . $guigeArr[1][$j][1];
$arr[] = $valueTwo . ';|' . $titleTwo;
++$j;
}
++$i;
}
}
else if ($count == 3) {
$i = 0;
while ($i < count($guigeArr[0])) {
$value = $guigeArr[0][$i][0];
$title = $guigeArr[0][$i][1];
$j = 0;
while ($j < count($guigeArr[1])) {
$valueTwo = $value . $guigeArr[1][$j][0];
$titleTwo = $title . '+' . $guigeArr[1][$j][1];
$g = 0;
while ($g < count($guigeArr[2])) {
$valueThree = $valueTwo . $guigeArr[2][$g][0];
$titleThree = $titleTwo . '+' . $guigeArr[2][$g][1];
$arr[] = $valueThree . ';|' . $titleThree;
++$g;
}
++$j;
}
++$i;
}
}
else if ($count == 4) {
$i = 0;
while ($i < count($guigeArr[0])) {
$value = $guigeArr[0][$i][0];
$title = $guigeArr[0][$i][1];
$j = 0;
while ($j < count($guigeArr[1])) {
$valueTwo = $value . $guigeArr[1][$j][0];
$titleTwo = $title . '+' . $guigeArr[1][$j][1];
$g = 0;
while ($g < count($guigeArr[2])) {
$valueThree = $valueTwo . $guigeArr[2][$g][0];
$titleThree = $titleTwo . '+' . $guigeArr[2][$g][1];
$r = 0;
while ($r < count($guigeArr[3])) {
$valueFour = $valueThree . $guigeArr[3][$r][0];
$titleFour = $titleThree . '+' . $guigeArr[3][$r][1];
$arr[] = $valueFour . ';|' . $titleFour;
++$r;
}
++$g;
}
++$j;
}
++$i;
}
}
else if ($count == 5) {
$i = 0;
while ($i < count($guigeArr[0])) {
$value = $guigeArr[0][$i][0];
$title = $guigeArr[0][$i][1];
$j = 0;
while ($j < count($guigeArr[1])) {
$valueTwo = $value . $guigeArr[1][$j][0];
$titleTwo = $title . '+' . $guigeArr[1][$j][1];
$g = 0;
while ($g < count($guigeArr[2])) {
$valueThree = $valueTwo . $guigeArr[2][$g][0];
$titleThree = $titleTwo . '+' . $guigeArr[2][$g][1];
$r = 0;
while ($r < count($guigeArr[3])) {
$valueFour = $valueThree . $guigeArr[3][$g][0];
$titleFour = $titleThree . '+' . $guigeArr[3][$g][1];
$t = 0;
while ($t < count($guigeArr[4])) {
$valueFive = $valueFour . $guigeArr[4][$t][0];
$titleFive = $titleFour . '+' . $guigeArr[4][$t][1];
$arr[] = $valueFive . ';|' . $titleFive;
++$t;
}
++$r;
}
++$g;
}
++$j;
}
++$i;
}
}
else {
if ($count == 6) {
$i = 0;
while ($i < count($guigeArr[0])) {
$value = $guigeArr[0][$i][0];
$title = $guigeArr[0][$i][1];
$j = 0;
while ($j < count($guigeArr[1])) {
$valueTwo = $value . $guigeArr[1][$j][0];
$titleTwo = $title . '+' . $guigeArr[1][$j][1];
$g = 0;
while ($g < count($guigeArr[2])) {
$valueThree = $valueTwo . $guigeArr[2][$g][0];
$titleThree = $titleTwo . '+' . $guigeArr[2][$g][1];
$r = 0;
while ($r < count($guigeArr[3])) {
$valueFour = $valueThree . $guigeArr[3][$g][0];
$titleFour = $titleThree . '+' . $guigeArr[3][$g][1];
$t = 0;
while ($t < count($guigeArr[4])) {
$valueFive = $valueFour . $guigeArr[4][$t][0];
$titleFive = $titleFour . '+' . $guigeArr[4][$t][1];
$k = 0;
while ($k < count($guigeArr[5])) {
$valueSix = $valueFive . $guigeArr[5][$k][0];
$titleSix = $titleFive . '+' . $guigeArr[5][$k][1];
$arr[] = $valueSix . ';|' . $titleSix;
++$k;
}
++$t;
}
++$r;
}
++$g;
}
++$j;
}
++$i;
}
}
}
$item['options'] = array();
$item['total'] = 0;
foreach ($arr as $key => $asdInfo) {
$asdInfoArrAs = explode('|', $asdInfo);
$asdInfoArr = explode(';', $asdInfoArrAs[0]);
$asdInfoArr = array_filter($asdInfoArr);
$j = 0;
foreach ($asdInfoArr as $asdInfoArrInfo) {
$asdInfoArrInfoArr = explode(':', $asdInfoArrInfo);
$item['options'][$key]['option_specs'][$j]['propId'] = $asdInfoArrInfoArr[0];
$item['options'][$key]['option_specs'][$j]['valueId'] = $asdInfoArrInfoArr[1];
++$j;
}
if (!empty($stockArray[$asdInfoArrAs[0]])) {
$item['options'][$key]['stock'] = $stockArray[$asdInfoArrAs[0]]['stock'];
$item['total'] = $item['total'] + $stockArray[$asdInfoArrAs[0]]['stock'];
}
else {
$item['options'][$key]['stock'] = 0;
}
$item['options'][$key]['title'] = explode('+', $asdInfoArrAs[1]);
$item['options'][$key]['marketprice'] = $detailskip['data']['price'];
}
$prodectNameContent = $xml->xpath('//*[@id="J_Title"]');
$titleArr = (array) $prodectNameContent[0];
$item['title'] = trim(strval($titleArr['h3']));
$prodectDescContent = $xml->xpath('//div/div/div/div/div/div/div/div/div/div/div[1]');
$item['subTitle'] = trim(strval($prodectDescContent[1]->p));
$prodectPrice = $xml->xpath('//*[@id="J_StrPrice"]');
$prodectPriceArr = (array) $prodectPrice[0];
$taoBaoPrice = trim(strval($prodectPriceArr['em'][1]));
$taoBaoPriceArr = explode('-', $taoBaoPrice);
$item['productPrice'] = $taoBaoPriceArr[0];
$imgs = array();
$i = 1;
while ($i < 6) {
$img = $xml->xpath('//*[@id="J_UlThumb"]/li[' . $i . ']');
if (!empty($img)) {
$img = strval($img[0]->div->a->img['data-src']);
$img = mb_substr($img, 0, strpos($img, '_50x50.jpg'));
$imgArr = explode(':', $img);
if (count($imgArr) == 2) {
$img = 'http:' . $imgArr[1];
}
else {
$img = 'http:' . $imgArr[0];
}
$imgs[] = $img;
}
++$i;
}
$item['pics'] = $imgs;
$paramsContent = $xml->xpath('//*[@id="attributes"]');
$paramsContent = $paramsContent[0]->ul->li;
$paramsContent = (array) $paramsContent;
if (!empty($paramsContent['@attributes'])) {
unset($paramsContent['@attributes']);
}
$params = array();
foreach ($paramsContent as $paramitem) {
$paramitem = strval($paramitem);
if (!empty($paramitem)) {
$paramitem = trim(str_replace(':', ':', $paramitem));
$p1 = mb_strpos($paramitem, ':');
$ptitle = mb_substr($paramitem, 0, $p1);
$pvalue = mb_substr($paramitem, $p1 + 1, mb_strlen($paramitem));
$param = array('title' => $ptitle, 'value' => $pvalue);
$params[] = $param;
}
}
$item['params'] = $params;
$pcates = array();
$ccates = array();
$tcates = array();
$pcateid = 0;
$ccateid = 0;
$tcateid = 0;
// $item['pcate'] = $pcateid;
// $item['ccate'] = $ccateid;
// $item['tcate'] = $tcateid;
//
// if (!empty($cates)) {
// $item['cates'] = implode(',', $cates);
// }
//
// $item['pcates'] = implode(',', $pcates);
// $item['ccates'] = implode(',', $ccates);
// $item['tcates'] = implode(',', $tcates);
$url = $this->get_taobao_detail_url($itemid);
$response = $this->ihttp_func->ihttp_get($url);
$response = $this->contentpasswh($response);
$item['content'] = $response;
if(!empty($item)){
$this->result = array(
'errcode' => 0,
'errmsg' => 'ok',
'data' => $item,
);
}
return $this->result;
// return $this->save_taobao_goods($item, $taobaourl);
}
相关文章
发表评论
评论列表
- 这篇文章还没有收到评论,赶紧来抢沙发吧~