PHP制作百度词典查词采集器
php  /  管理员 发布于 7年前   165
百度dict 采集样本 写的采集百度dict词典翻译后的所有结果数据,当然附带了13.5w单词库和采集简单的案例,这里我把写出的主要类dict.class.php放出来,项目地址http://github.com/widuu/baidu_dict,有需要的直接fork就可以了~么么哒,这东西用的人很少,所以有用的兄弟拿走了哈~ \(?P.*)\<\/strong\>\(?P (?P 以上就是本文的全部内容了,非常实用的功能,希望小伙伴们能够喜欢。 122 在 123 在 原梓番博客 在 博主 在 1111 在 音标 *"pro" => 发音 *"example"=> 例句 *"explain"=> 简明释义 *"synonym"=> 同反义词 *"phrase" => 短语数组 *) * */public function content($word){ $this -> word = $word; $symbol = $this -> Pronounced(); $pro = $this->getSay(); $example = $this -> getExample(); $explain = $this -> getExplain(); $synonym = $this -> getSynonym(); $phrase = $this -> getPhrase(); $result = array("symbol" => $symbol,//音标"pro" => $pro,//发音"example"=> $example,//例句"explain"=> $explain,//简明释义"synonym"=> $synonym,//同反义词"phrase" => $phrase //短语数组);return $result;}/** * 远程获取百度翻译内容 * get function curl * retun string * */private function getContent(){ $useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0"; $ch = curl_init(); $url = "http://dict.baidu.com/s?wd=".$this->word; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_USERAGENT,$useragent);curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_HTTPGET, 1);curl_setopt($ch, CURLOPT_AUTOREFERER,1);curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_TIMEOUT, 30);$result = curl_exec($ch);if (curl_errno($curl)) {echo 'Errno'.curl_error($curl);}curl_close($ch);return $result;}/** * 获取百度翻译发音 * retun array(英,美) * */private function Pronounced(){$data = $this -> getContent();preg_match_all("/\"EN\-US\"\>(.*)\<\/b\>/Ui",$data,$pronounced);return array('en' => $pronounced[1][0],'us' => $pronounced[1][1]);}/** * 获取百度翻译发音 * return array(英,美) * */private function getSay(){$data = $this -> getContent();preg_match_all("/url=\"(.*)\"/Ui",$data,$pronounced);return array('en' => $pronounced[1][0],'us' => $pronounced[1][1]);}/** * 获取百度翻译例句 * return array() 多维数组 例句 * */private function getExample(){$str = "";$data = $this -> getContent();preg_match_all("/var example_data = (.*)\]\;/Us",$data,$example); $data1 = "[[[".ltrim($example[1][0],"["); $data2 = explode("[[[",$data1); $num = count(array_filter($data2));foreach($data2 as $key => $value){ $data3 = explode("[[","[[".$value); foreach ($data3 as $k => $v) { preg_match_all("/\[\"(.*)\",/Us","[".$v, $match); if(!empty($match[1])){ $str .= implode($match[1]," ")."@"; } }}$data4 = trim($str,"@");$data5 = explode("@", $data4);$result = array_chunk($data5, 2);return $result;}/** * 获取简明释义 * return array (x => "词性",b => "附属") * **/private function getExplain(){$data = $this -> getContent();preg_match_all("/id\=\"en\-simple\-means\"\>(.*)\
(?
您可能感兴趣的文章:
Copyright·© 2019 侯体宗版权所有·
粤ICP备20027696号