Wiki CgX

Parce que j'ai un cerveau, mais pas trop.

Outils pour utilisateurs

Outils du site


code:php:classes:cgxcrawler.class.php

CgXCrawler.class.php

Un classe un peu fourre-tout pour utiliser curl plus facilement en PHP.

A documenter un peu quand même

CgXCrawler.class.php
<?php
 
class CgXCrawler
	{
	private $timeout = CRAWLER_TIMEOUT;
	protected $cookiejar=CRAWLER_COOKIEJAR;
 
	public function fetch_data($url, $method = 'GET', $data = false, $headers = false, $returnInfo = false)
		{
		$ch = curl_init();
 
		if($method == 'POST')
			{
			curl_setopt($ch, CURLOPT_URL, $url);
			curl_setopt($ch, CURLOPT_POST, true);
 
            if($data !== false)
				curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
			}
		else
			{
			if ($method=="DELETE")
				curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "DELETE");
			elseif ($method=="PUT")
				curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "PUT");
 
			if($data !== false)
				{
				if(is_array($data))
					{
					$dataTokens = array();
					foreach($data as $key => $value)
						{
						array_push($dataTokens, urlencode($key).'='.urlencode($value));
						}
					$data = implode('&', $dataTokens);
					}
				curl_setopt($ch, CURLOPT_URL, $url.'?'.$data);
				}
			else
				curl_setopt($ch, CURLOPT_URL, $url);
			}
 
		curl_setopt($ch, CURLOPT_HEADER, false);
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
		curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
		curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0');
 
		curl_setopt($ch, CURLOPT_COOKIESESSION, true );
		curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookiejar );
		curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookiejar );
 
		if (substr($url,0,5)=='https')
			{
			curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false );
			curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
			}
 
		if($headers !== false)
				{
				if (is_array($headers))
					curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
				else
					curl_setopt($ch, CURLOPT_HTTPHEADER, array($headers));
				}
 
//        curl_setopt($ch, CURLINFO_HEADER_OUT, true);
//        curl_setopt($ch, CURLOPT_STDERR, fopen('/tmp/curl.txt', 'w+'));
 
        $contents = curl_exec($ch);
 
        $s=curl_getinfo($ch);
      //  print_r($s);
 
 
		if ($contents===false)
			{
			 echo 'Erreur Curl : ' . curl_error($ch);
			}
 
		if($returnInfo)
			$info = curl_getinfo($ch);
 
		curl_close($ch);
 
		if($returnInfo)
			return array('contents' => $contents, 'info' => $info);
		else
			return $contents;
		}
 
	public function cutString($string,$fromString,$toString)
		{
		if (trim($string)=='')
			return "";
 
		$sub1=strpos($string,$fromString);
		$sub2=strpos($string,$toString,$sub1+strlen($fromString));
 
		if ($sub1===false || $sub2===false)
			return false;
		else
			return substr($string,$sub1+strlen($fromString),$sub2-$sub1-strlen($fromString));
		}
	}
 
?>
code/php/classes/cgxcrawler.class.php.txt · Dernière modification : 23 Nov 2020 :: 13:16 de CgX