Using Curl in PHP, is there any way to inspect HTTP response headers before downloading the body?
Let's say I make a GET request to some URI and I want to grab the content only if Content-type
is text/html
. I know I could make a HEAD request first, and then decide whether or not to use GET, but is it possible in just one request?
I'm basically looking for an equivalent of the following C# code:
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://some.uri");
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if(response.ContentType.Equals("text/html")) {
string body = new StreamReader(response.GetResponseStream()).ReadToEnd();
} else {
// do nothing
}
Is it possible?
Yeah i got you, you can use CURLOPT_WRITEFUNCTION
in this case.
To make things pretty faster, I am using my own curl
class in all my projects.
Let me post that also.
See my code below.
<?php
require_once( "curl.class.php" );
/** variable to store response body **/
$response_body = null;
/** total size of header in response **/
$header_size = null;
/** initialize the curl object **/
$curl = new ipCurl( "http://example.com/images/someimage.jpg" );
/** set a read callback **/
$curl->setReadCallback( "my_read_callback" );
/** Include header in response **/
$curl->includeHeader();
/** start curl **/
$curl->createCurl();
if ( $curl->getError() !== 0 ) {
/** something went wrong, print the error message, and error code **/
echo $curl->getError().":".$curl->getErrorMessage();
if ( $curl->getError() === 23 ) {
// its not an image file (Failed writing body....)
}
}
else {
/** Everything fine, lets parse the body from response **/
$response_body = substr( $response_body, $header_size );
file_put_contents( "image.jpg", $response_body );
}
/** The function for our curl write callback **/
function my_read_callback( &$ch, $data ) {
global $response_body, $header_size;
/** Get the content type **/
$content_type = trim( curl_getinfo( $ch, CURLINFO_CONTENT_TYPE ) );
/** Give it time to get the content-type string from header **/
if ( !empty( $content_type ) ) {
/** stop executing curl if its not an image **/
if ( $content_type !== "image/jpeg" ) {
return false;
}
}
/** size of the header **/
$header_size = curl_getinfo( $ch, CURLINFO_HEADER_SIZE );
/** append resposne body to $response_body **/
$response_body .= $data;
/** return current response length **/
return strlen( $data );
}
?>
The curl.class.php
file contents
<?php
class ipCurl {
private $ch = null;
private $url = null;
private $_error = 0;
private $_errmsg = null;
private $_header = null;
private $_webpage = null;
private $_status = 0;
public function __construct( $url = null ) {
$this->url = $url;
if ( !function_exists( "curl_init" ) ) {
throw new Exception( "Fatal Error: Module 'Curl' is not installed properly" );
}
$this->ch = curl_init();
curl_setopt( $this->ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $this->ch, CURLOPT_FRESH_CONNECT, false );
curl_setopt( $this->ch, CURLOPT_FORBID_REUSE, false );
$this->setTimout( 40 );
$this->setConnTimout( 30 );
$this->followLocation();
$this->setMaxRedirects( 4 );
$this->excludeHeader();
$this->includeBody();
$this->verifySSL();
$this->setBinaryTransfer();
$this->setReferer( $_SERVER["SERVER_NAME"].$_SERVER["REQUEST_URI"] );
$this->setUserAgent();
return $this;
}
public function __destruct() {
curl_close( $this->ch );
$this->ch = null;
}
public function setReadCallback( $callback = null ) {
curl_setopt( $this->ch, CURLOPT_WRITEFUNCTION, $callback );
return $this;
}
public function setProgressCallback( $callback = null, $buffer = 128 ) {
curl_setopt( $this->ch, CURLOPT_NOPROGRESS, false );
curl_setopt( $this->ch, CURLOPT_PROGRESSFUNCTION, $callback );
curl_setopt( $this->ch, CURLOPT_BUFFERSIZE, $buffer );
return $this;
}
public function includeHeader() {
curl_setopt( $this->ch, CURLOPT_HEADER, true );
return $this;
}
public function excludeHeader() {
curl_setopt( $this->ch, CURLOPT_HEADER, false );
return $this;
}
public function includeBody() {
curl_setopt( $this->ch, CURLOPT_NOBODY, false );
return $this;
}
public function excludeBody() {
curl_setopt( $this->ch, CURLOPT_NOBODY, true );
return $this;
}
public function setMaxRedirects( $redirects = 4 ) {
if ( $this->is_safe_mode() ) {
return $this;
}
curl_setopt( $this->ch, CURLOPT_MAXREDIRS, $redirects );
return $this;
}
public function followLocation() {
if ( $this->is_safe_mode() ) {
return $this->unfollowLocation();
}
curl_setopt( $this->ch, CURLOPT_FOLLOWLOCATION, true );
return $this;
}
public function unfollowLocation() {
curl_setopt( $this->ch, CURLOPT_FOLLOWLOCATION, false );
return $this;
}
public function setReferer( $referer = null ) {
curl_setopt( $this->ch, CURLOPT_REFERER, $referer );
return $this;
}
public function setBinaryTransfer( $binary = false ) {
curl_setopt( $this->ch, CURLOPT_BINARYTRANSFER, $binary );
return $this;
}
public function setTimout( $timeout ) {
curl_setopt( $this->ch, CURLOPT_TIMEOUT, $timeout );
return $this;
}
public function setConnTimout( $timeout ) {
curl_setopt( $this->ch, CURLOPT_CONNECTTIMEOUT, $timeout );
return $this;
}
public function setUserAgent( $userAgent = null ) {
$userAgent = ( !$userAgent ) ? "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31" : $userAgent;
curl_setopt( $this->ch, CURLOPT_USERAGENT, $userAgent );
return $this;
}
public function setProxy( $url = null, $port = 0, $username = null, $password = null ) {
curl_setopt( $this->ch, CURLOPT_PROXYAUTH, CURLAUTH_BASIC );
curl_setopt( $this->ch, CURLOPT_PROXY, $url.( ( $port ) > 0 ? ":".$port : null ) );
if ( $port > 0 ) {
curl_setopt( $this->ch, CURLOPT_PROXYPORT, $port );
}
if ( $username ) {
curl_setopt( $this->ch, CURLOPT_PROXYUSERPWD, $username.":".$password );
}
return $this;
}
public function setAuth( $username = null, $password = null ) {
curl_setopt( $this->ch, CURLOPT_USERPWD, $username.':'.$password );
return $this;
}
public function setCookiFile( $file = "cookie.txt" ) {
if ( !$file ) {
return $this;
}
curl_setopt( $this->ch, CURLOPT_COOKIEJAR, $file );
curl_setopt( $this->ch, CURLOPT_COOKIEFILE, $file );
return $this;
}
public function verifySSL( $ssl = false ) {
if ( !$ssl ) {
curl_setopt( $this->ch, CURLOPT_SSL_VERIFYPEER, false );
curl_setopt( $this->ch, CURLOPT_SSL_VERIFYHOST, 2 );
}
else {
curl_setopt( $this->ch, CURLOPT_SSL_VERIFYPEER, true );
}
return $this;
}
public function setPost( $postFields = null, $keep_array = false ) {
if ( is_array( $postFields ) && !$keep_array ) {
$postFields = http_build_query( $postFields );
}
curl_setopt( $this->ch, CURLOPT_POST, true );
curl_setopt( $this->ch, CURLOPT_POSTFIELDS, $postFields );
return $this;
}
public function setFile( $file = null ) {
if ( $file !== null ) {
$file = realpath( $file );
}
if ( $file && is_readable( $file ) ) {
$fp = fopen( $file, "w" );
curl_setopt( $this->ch, CURLOPT_FILE, $fp );
}
return $this;
}
public function setHeader( $header = array( "Expect:" ) ) {
curl_setopt( $this->ch, CURLOPT_HTTPHEADER, $header );
return $this;
}
public function createCurl( $url = null ) {
$url = ( $url ) ? trim( $url ) : trim( $this->url );
if ( !$url ) {
throw new Exception( "Fatal Error: you must provide a valid url before calling 'createCurl'" );
}
curl_setopt( $this->ch, CURLOPT_URL, $url );
$this->_webpage = curl_exec( $this->ch );
$this->_status = (int)curl_getinfo( $this->ch, CURLINFO_HTTP_CODE );
$this->_error = (int)curl_errno( $this->ch );
$this->_errmsg = curl_error( $this->ch );
$this->_header = curl_getinfo( $this->ch );
if ( !$this->_errmsg ) {
$this->_errmsg = $this->parse_http_code( $this->_status );
}
return $this;
}
private function parse_http_code( $code = 404 ) {
$code = (int)$code;
if ( !class_exists( "ipStatusCodes" ) ) {
return null;
}
return ipStatusCodes::info( $code );
}
private function is_safe_mode() {
return ( @ini_get( 'open_basedir' ) != '' && @ini_get( 'safe_mode' ) != 'Off' );
}
public function getStatus() {
return $this->_status;
}
public function getResponse() {
return $this->_webpage;
}
public function getHeader() {
return $this->_header;
}
public function getError() {
return $this->_error;
}
public function getErrorMessage() {
return $this->_errmsg;
}
}
?>
I am not forcing you to use my class, you can simply use it like this also...
<?php
curl_setopt( $ch, CURLOPT_WRITEFUNCTION, "my_callback" );
function my_callback() {
// same like the "my_read_callback" function in my above code
}
?>
with curl the headers can be read seperatly after you exec the request. by executing the following code
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
// Then, after your curl_exec call:
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($response, 0, $header_size);
$body = substr($response, $header_size);