PHP/Curl: inspecting response headers before downl

2020-02-12 08:35发布

问题:

Using Curl in PHP, is there any way to inspect HTTP response headers before downloading the body? Let's say I make a GET request to some URI and I want to grab the content only if Content-type is text/html. I know I could make a HEAD request first, and then decide whether or not to use GET, but is it possible in just one request?

I'm basically looking for an equivalent of the following C# code:

HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://some.uri");
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if(response.ContentType.Equals("text/html")) {
    string body = new StreamReader(response.GetResponseStream()).ReadToEnd();
} else {
    // do nothing
}

Is it possible?

回答1:

Yeah i got you, you can use CURLOPT_WRITEFUNCTION in this case.
To make things pretty faster, I am using my own curl class in all my projects.
Let me post that also.
See my code below.

<?php
  require_once( "curl.class.php" );

  /** variable to store response body **/
  $response_body  = null;
  /** total size of header in response **/
  $header_size    = null;

  /** initialize the curl object **/
  $curl = new ipCurl( "http://example.com/images/someimage.jpg" );
  /** set a read callback **/
  $curl->setReadCallback( "my_read_callback" );
  /** Include header in response **/
  $curl->includeHeader();
  /** start curl **/
  $curl->createCurl();

  if ( $curl->getError() !== 0 ) {
    /** something went wrong, print the error message, and error code **/
    echo $curl->getError().":".$curl->getErrorMessage();

    if ( $curl->getError() === 23 ) {
      // its not an image file (Failed writing body....)
    }
  }
  else {
    /** Everything fine, lets parse the body from response **/
    $response_body  = substr( $response_body, $header_size );
    file_put_contents( "image.jpg", $response_body );
  }

  /** The function for our curl write callback **/
  function my_read_callback( &$ch, $data ) {
    global $response_body, $header_size;
    /** Get the content type **/
    $content_type = trim( curl_getinfo( $ch, CURLINFO_CONTENT_TYPE ) );
    /** Give it time to get the content-type string from header **/
    if ( !empty( $content_type ) ) {
      /** stop executing curl if its not an image **/
      if ( $content_type !== "image/jpeg" ) {
        return false;
      }
    }
    /** size of the header **/
    $header_size  = curl_getinfo( $ch, CURLINFO_HEADER_SIZE );
    /** append resposne body to $response_body **/
    $response_body  .=  $data;
    /** return current response length **/
    return strlen( $data );
  }
?>

The curl.class.php file contents

<?php
class ipCurl { 
  private $ch   = null;
  private $url  = null;

  private $_error   = 0;
  private $_errmsg  = null;
  private $_header  = null;
  private $_webpage = null;
  private $_status  = 0;

  public function __construct( $url = null ) {
    $this->url  = $url;

    if ( !function_exists( "curl_init" ) ) {
      throw new Exception( "Fatal Error: Module 'Curl' is not installed properly" );
    }

    $this->ch = curl_init();

        curl_setopt( $this->ch, CURLOPT_RETURNTRANSFER, true );
        curl_setopt( $this->ch, CURLOPT_FRESH_CONNECT, false );
        curl_setopt( $this->ch, CURLOPT_FORBID_REUSE, false );

    $this->setTimout( 40 );
    $this->setConnTimout( 30 );
    $this->followLocation();
    $this->setMaxRedirects( 4 );
    $this->excludeHeader();
    $this->includeBody();
    $this->verifySSL();
    $this->setBinaryTransfer();
    $this->setReferer( $_SERVER["SERVER_NAME"].$_SERVER["REQUEST_URI"] );
    $this->setUserAgent();

    return $this;
  }

  public function __destruct() {
    curl_close( $this->ch );
    $this->ch = null;
    }

  public function setReadCallback( $callback = null ) {
    curl_setopt( $this->ch, CURLOPT_WRITEFUNCTION, $callback );
    return $this;
  }

  public function setProgressCallback( $callback = null, $buffer = 128 ) {
    curl_setopt( $this->ch, CURLOPT_NOPROGRESS, false );
    curl_setopt( $this->ch, CURLOPT_PROGRESSFUNCTION, $callback );
    curl_setopt( $this->ch, CURLOPT_BUFFERSIZE, $buffer );
    return $this;
  }

  public function includeHeader() {
    curl_setopt( $this->ch, CURLOPT_HEADER, true );
    return $this;
  }
  public function excludeHeader() {
    curl_setopt( $this->ch, CURLOPT_HEADER, false );
    return $this;
  }

  public function includeBody() {
    curl_setopt( $this->ch, CURLOPT_NOBODY, false );
    return $this;
  }
  public function excludeBody() {
    curl_setopt( $this->ch, CURLOPT_NOBODY, true );
    return $this;
  }

  public function setMaxRedirects( $redirects = 4 ) {
    if ( $this->is_safe_mode() ) {
      return $this;
    }
    curl_setopt( $this->ch, CURLOPT_MAXREDIRS, $redirects );
    return $this;
  }

  public function followLocation() {
    if ( $this->is_safe_mode() ) {
      return $this->unfollowLocation();
    }
    curl_setopt( $this->ch, CURLOPT_FOLLOWLOCATION, true );
    return $this;
  }
  public function unfollowLocation() {
    curl_setopt( $this->ch, CURLOPT_FOLLOWLOCATION, false );
    return $this;
  }

  public function setReferer( $referer = null ) {
    curl_setopt( $this->ch, CURLOPT_REFERER, $referer );
    return $this;
  }

  public function setBinaryTransfer( $binary = false ) {
    curl_setopt( $this->ch, CURLOPT_BINARYTRANSFER, $binary );
    return $this;
  }

  public function setTimout( $timeout ) {
    curl_setopt( $this->ch, CURLOPT_TIMEOUT, $timeout );
    return $this;
  }

  public function setConnTimout( $timeout ) {
    curl_setopt( $this->ch, CURLOPT_CONNECTTIMEOUT, $timeout );
    return $this;
  }

  public function setUserAgent( $userAgent = null ) {
    $userAgent  = ( !$userAgent ) ? "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31" : $userAgent;
    curl_setopt( $this->ch, CURLOPT_USERAGENT, $userAgent );
    return $this;
  }

  public function setProxy( $url = null, $port = 0, $username = null, $password = null ) {
    curl_setopt( $this->ch, CURLOPT_PROXYAUTH, CURLAUTH_BASIC );
    curl_setopt( $this->ch, CURLOPT_PROXY, $url.( ( $port ) > 0 ? ":".$port : null ) );

    if ( $port > 0 ) {
      curl_setopt( $this->ch, CURLOPT_PROXYPORT, $port );
    }

    if ( $username ) {
      curl_setopt( $this->ch, CURLOPT_PROXYUSERPWD, $username.":".$password );
    }

    return $this;
    }

  public function setAuth( $username = null, $password = null ) {
    curl_setopt( $this->ch, CURLOPT_USERPWD, $username.':'.$password );
    return $this;
    }

  public function setCookiFile( $file = "cookie.txt" ) {
    if ( !$file ) {
      return $this;
    }
    curl_setopt( $this->ch, CURLOPT_COOKIEJAR, $file );
    curl_setopt( $this->ch, CURLOPT_COOKIEFILE, $file );
    return $this;
  }

  public function verifySSL( $ssl = false ) {
    if ( !$ssl ) {
      curl_setopt( $this->ch, CURLOPT_SSL_VERIFYPEER, false );
      curl_setopt( $this->ch, CURLOPT_SSL_VERIFYHOST, 2 );
    }
    else {
      curl_setopt( $this->ch, CURLOPT_SSL_VERIFYPEER, true );
    }
    return $this;
  }

  public function setPost( $postFields = null, $keep_array = false ) {
    if ( is_array( $postFields ) && !$keep_array ) {
      $postFields = http_build_query( $postFields );
    }

    curl_setopt( $this->ch, CURLOPT_POST, true );
    curl_setopt( $this->ch, CURLOPT_POSTFIELDS, $postFields );

    return $this;
  }

  public function setFile( $file = null ) {
    if ( $file !== null ) {
      $file = realpath( $file );
    }
    if ( $file && is_readable( $file ) ) {
      $fp = fopen( $file, "w" );
      curl_setopt( $this->ch, CURLOPT_FILE, $fp );
    }

    return $this;
  }

  public function setHeader( $header = array( "Expect:" ) ) {
    curl_setopt( $this->ch, CURLOPT_HTTPHEADER, $header );
    return $this;
  }

  public function createCurl( $url = null ) {
    $url  = ( $url ) ? trim( $url ) : trim( $this->url );

    if ( !$url ) {
      throw new Exception( "Fatal Error: you must provide a valid url before calling 'createCurl'" );
    }
    curl_setopt( $this->ch, CURLOPT_URL, $url );

    $this->_webpage = curl_exec( $this->ch );
    $this->_status  = (int)curl_getinfo( $this->ch, CURLINFO_HTTP_CODE );
    $this->_error   = (int)curl_errno( $this->ch );
    $this->_errmsg  = curl_error( $this->ch );
    $this->_header  = curl_getinfo( $this->ch );

    if ( !$this->_errmsg ) {
      $this->_errmsg  = $this->parse_http_code( $this->_status );
    }

    return $this;
  }

  private function parse_http_code( $code = 404 ) {
    $code = (int)$code;
    if ( !class_exists( "ipStatusCodes" ) ) {
      return null;
    }
    return ipStatusCodes::info( $code );
  }

  private function is_safe_mode() {
    return ( @ini_get( 'open_basedir' ) != '' && @ini_get( 'safe_mode' ) != 'Off' );
  }

  public function getStatus() {
    return $this->_status;
  }

  public function getResponse() {
    return $this->_webpage;
  }

  public function getHeader() {
    return $this->_header;
  }

  public function getError() {
    return $this->_error;
  }

  public function getErrorMessage() {
    return $this->_errmsg;
  }
}
?>

I am not forcing you to use my class, you can simply use it like this also...

<?php
  curl_setopt( $ch, CURLOPT_WRITEFUNCTION, "my_callback" );
  function my_callback() {
    // same like the "my_read_callback" function in my above code
  }
?>


回答2:

with curl the headers can be read seperatly after you exec the request. by executing the following code

$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);

// Then, after your curl_exec call:
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($response, 0, $header_size);
$body = substr($response, $header_size);


标签: php http curl