Python 'requests' library - define specifi

2020-01-26 07:18发布

In my project I'm handling all HTTP requests with python requests library.

Now, I need to query the http server using specific DNS - there are two environments, each using its own DNS, and changes are made independently.

So, when the code is running, it should use DNS specific to the environment, and not the DNS specified in my internet connection.

Has anyone tried this using python-requests? I've only found solution for urllib2:
https://stackoverflow.com/questions/4623090/python-set-custom-dns-server-for-urllib-requests

5条回答
我只想做你的唯一
2楼-- · 2020-01-26 08:04

requests uses urllib3, which ultimately uses httplib.HTTPConnection as well, so the techniques from https://stackoverflow.com/questions/4623090/python-set-custom-dns-server-for-urllib-requests (now deleted, it merely linked to Tell urllib2 to use custom DNS) still apply, to a certain extent.

The urllib3.connection module subclasses httplib.HTTPConnection under the same name, having replaced the .connect() method with one that calls self._new_conn. In turn, this delegates to urllib3.util.connection.create_connection(). It is perhaps easiest to patch that function:

from urllib3.util import connection


_orig_create_connection = connection.create_connection


def patched_create_connection(address, *args, **kwargs):
    """Wrap urllib3's create_connection to resolve the name elsewhere"""
    # resolve hostname to an ip address; use your own
    # resolver here, as otherwise the system resolver will be used.
    host, port = address
    hostname = your_dns_resolver(host)

    return _orig_create_connection((hostname, port), *args, **kwargs)


connection.create_connection = patched_create_connection

and you'd provide your own code to resolve the host portion of the address into an ip address instead of relying on the connection.create_connection() call (which wraps socket.create_connection()) to resolve the hostname for you.

Like all monkeypatching, be careful that the code hasn't significantly changed in later releases; the patch here was created against urllib3 version 1.21.1. but should work for versions as far back as 1.9.


Note that this answer was re-written to work with newer urllib3 releases, which have added a much more convenient patching location. See the edit history for the old method, applicable to version < 1.9, as a patch to the vendored urllib3 version rather than a stand-alone installation.

查看更多
等我变得足够好
3楼-- · 2020-01-26 08:04

A customized HTTPAdapter will do the trick.

Don't forget to set server_hostname to enable SNI.

import requests


class HostHeaderSSLAdapter(requests.adapters.HTTPAdapter):
    def resolve(self, hostname):
        # a dummy DNS resolver
        import random
        ips = [
            '104.16.89.20',  # CloudFlare
            '151.101.2.109',  # Fastly
        ]
        resolutions = {
            'cdn.jsdelivr.net': random.choice(ips),
        }
        return resolutions.get(hostname)

    def send(self, request, **kwargs):
        from urllib.parse import urlparse

        connection_pool_kwargs = self.poolmanager.connection_pool_kw

        result = urlparse(request.url)
        resolved_ip = self.resolve(result.hostname)

        if result.scheme == 'https' and resolved_ip:
            request.url = request.url.replace(
                'https://' + result.hostname,
                'https://' + resolved_ip,
            )
            connection_pool_kwargs['server_hostname'] = result.hostname  # SNI
            connection_pool_kwargs['assert_hostname'] = result.hostname

            # overwrite the host header
            request.headers['Host'] = result.hostname
        else:
            # theses headers from a previous request may have been left
            connection_pool_kwargs.pop('server_hostname', None)
            connection_pool_kwargs.pop('assert_hostname', None)

        return super(HostHeaderSSLAdapter, self).send(request, **kwargs)


url = 'https://cdn.jsdelivr.net/npm/bootstrap/LICENSE'

session = requests.Session()
session.mount('https://', HostHeaderSSLAdapter())

r = session.get(url)
print(r.headers)

r = session.get(url)
print(r.headers)
查看更多
smile是对你的礼貌
4楼-- · 2020-01-26 08:09

I know this is an old thread but here is my python3 compatible solution using tldextract and dnspython. I've left some commented out code to illustrate how to debug and set additional session parameters.

#!/usr/bin/env python3

import sys

from pprint import pprint as pp

import requests
import dns.resolver # NOTE: dnspython package
import tldextract

class CustomAdapter(requests.adapters.HTTPAdapter):
    def __init__(self, nameservers):
        self.nameservers = nameservers
        super().__init__()

    def resolve(self, host, nameservers, record_type):
        dns_resolver = dns.resolver.Resolver()
        dns_resolver.nameservers = nameservers
        answers = dns_resolver.query(host, record_type)
        for rdata in answers:
            return str(rdata)

    def get_connection(self, url, proxies=None):
        ext = tldextract.extract(url)
        fqdn = ".".join([ ext.subdomain, ext.domain, ext.suffix ])

        print("FQDN: {}".format(fqdn))
        a_record = self.resolve(fqdn, nameservers, 'A')
        print("A record: {}".format(a_record))

        resolved_url = url.replace(fqdn, a_record) # NOTE: Replace first occurrence only
        print("Resolved URL: {}".format(resolved_url))

        return super().get_connection(resolved_url, proxies=proxies)

if __name__ == "__main__":

    if len(sys.argv) != 2:
        print("Usage: {} <url>".format(sys.argv[0]))
        sys.exit(0)

    url = sys.argv[1]

    nameservers = [ 
        '208.67.222.222', # NOTE: OpenDNS
        '8.8.8.8'         # NOTE: Google
    ]

    session = requests.Session()
    session.mount(url, CustomAdapter(nameservers))

    parameters = {
        # "headers": {'Content-Type': 'application/json'},
        # "timeout" : 45,
        # "stream" : True
        # "proxies" : {
        #   "http": "http://your_http_proxy:8080/",
        #   "https": "http://your_https_proxy:8081/"
        # },
        # "auth": (name, password),
        # ...
    }

    response = session.get(url, **parameters)
    pp(response.__dict__)

And here it the console output :

$ ./run.py http://www.test.com
FQDN: www.test.com
A record: 69.172.200.235
Resolved URL: http://69.172.200.235/
{'_content': b'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3'
             b'.org/TR/html4/strict.dtd">\n<html>\n<head>\n<meta http-equiv="C'
             b'ontent-Type" content="text/html; charset=iso-8859-1">\n<meta '
             b'http-equiv="Content-Script-Type" content="text/javascript">\n'
             b'<script type="text/javascript">\nfunction getCookie(c_name) {'
             b' // Local function for getting a cookie value\n    if (docume'
             b'nt.cookie.length > 0) {\n        c_start = document.cookie.in'
             b'dexOf(c_name + "=");\n        if (c_start!=-1) {\n        c_st'
             b'art=c_start + c_name.length + 1;\n        c_end=document.cook'
             b'ie.indexOf(";", c_start);\n\n        if (c_end==-1) \n         '
             b'   c_end = document.cookie.length;\n\n        return unescape('
             b'document.cookie.substring(c_start,c_end));\n        }\n    }\n '
             b'   return "";\n}\nfunction setCookie(c_name, value, expiredays'
             b') { // Local function for setting a value of a cookie\n    va'
             b'r exdate = new Date();\n    exdate.setDate(exdate.getDate()+e'
             b'xpiredays);\n    document.cookie = c_name + "=" + escape(valu'
             b'e) + ((expiredays==null) ? "" : ";expires=" + exdate.toGMTString'
             b'()) + ";path=/";\n}\nfunction getHostUri() {\n    var loc = doc'
             b"ument.location;\n    return loc.toString();\n}\nsetCookie('YPF8"
             b"827340282Jdskjhfiw_928937459182JAX666', '171.68.244.56', 10)"
             b';\ntry {  \n    location.reload(true);  \n} catch (err1) {  \n  '
             b'  try {  \n        location.reload();  \n    } catch (err2) { '
             b' \n    \tlocation.href = getHostUri();  \n    }  \n}\n</scrip'
             b't>\n</head>\n<body>\n<noscript>This site requires JavaScript an'
             b'd Cookies to be enabled. Please change your browser settings or '
             b'upgrade your browser.</noscript>\n</body>\n</html>\n',
 '_content_consumed': True,
 '_next': None,
 'connection': <requests.adapters.HTTPAdapter object at 0x109130e48>,
 'cookies': <RequestsCookieJar[]>,
 'elapsed': datetime.timedelta(microseconds=992676),
 'encoding': 'ISO-8859-1',
 'headers': {'Server': 'nginx/1.14.2', 'Date': 'Wed, 01 May 2019 18:01:58 GMT', 'Content-Type': 'text/html', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Keep-Alive': 'timeout=20', 'X-DIS-Request-ID': '2a5057a7c7b8a93dd700856c48fda74a', 'P3P': 'CP="NON DSP COR ADMa OUR IND UNI COM NAV INT"', 'Cache-Control': 'no-cache', 'Content-Encoding': 'gzip'},
 'history': [<Response [302]>],
 'raw': <urllib3.response.HTTPResponse object at 0x1095b90b8>,
 'reason': 'OK',
 'request': <PreparedRequest [GET]>,
 'status_code': 200,
 'url': 'https://www.test.com/'}

Hope this helps.

查看更多
叛逆
5楼-- · 2020-01-26 08:10

You should look into the TransportAdapters, including the source code. The documentation on them isn't great, but they give low-level access to a lot of the functionality described in RFC 2818 and RFC 6125. In particular, those documents encourage (require?) client-side code to support application-specific DNS for the purpose of checking certificates' CommonName and SubjectAltName. The keyword argument you need in those calls is "assert_hostname". Here's how to set it with the requests library:

from requests import Session, HTTPError
from requests.adapters import HTTPAdapter, DEFAULT_POOLSIZE, DEFAULT_RETRIES, DEFAULT_POOLBLOCK


class DNSResolverHTTPSAdapter(HTTPAdapter):
    def __init__(self, common_name, host, pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE,
        max_retries=DEFAULT_RETRIES, pool_block=DEFAULT_POOLBLOCK):
        self.__common_name = common_name
        self.__host = host
        super(DNSResolverHTTPSAdapter, self).__init__(pool_connections=pool_connections, pool_maxsize=pool_maxsize,
            max_retries=max_retries, pool_block=pool_block)

    def get_connection(self, url, proxies=None):
        redirected_url = url.replace(self.__common_name, self.__host)
        return super(DNSResolverHTTPSAdapter, self).get_connection(redirected_url, proxies=proxies)

    def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs):
        pool_kwargs['assert_hostname'] = self.__common_name
        super(DNSResolverHTTPSAdapter, self).init_poolmanager(connections, maxsize, block=block, **pool_kwargs)

common_name = 'SuperSecretSarahServer'
host = '192.168.33.51'
port = 666
base_url = 'https://{}:{}/api/'.format(common_name, port)
my_session = Session()
my_session.mount(self.base_url.lower(), DNSResolverHTTPSAdapter(common_name, host))
user_name = 'sarah'
url = '{}users/{}'.format(self.base_url, user_name)
default_response_kwargs = {
    'auth': (NAME, PASSWORD),
    'headers': {'Content-Type': 'application/json'},
    'verify': SSL_OPTIONS['ca_certs'],
    'cert': (SSL_OPTIONS['certfile'], SSL_OPTIONS['keyfile'])
}
response = my_session.get(url, **default_response_kwargs)

I use common_name for the name expected to be on the certificate and how your code will reference the desired machine. I use host for a name recognized by the external world - FQDN, IP, DNS entry, ... Of course, the SSL_OPTIONS dictionary (in my example) must list appropriate certificate / key filenames on your machine. (Plus, NAME and PASSWORD should resolve to correct strings.)

查看更多
劫难
6楼-- · 2020-01-26 08:24

Or just use subprocess with curl and add --dns-servers

查看更多
登录 后发表回答