Extract hostname name from string

2018-12-31 08:55发布

I would like to match just the root of a URL and not the whole URL from a text string. Given:

http://www.youtube.com/watch?v=ClkQA2Lb_iE
http://youtu.be/ClkQA2Lb_iE
http://www.example.com/12xy45
http://example.com/random

I want to get the 2 last instances resolving to the www.example.com or example.com domain.

I heard regex is slow and this would be my second regex expression on the page so If there is anyway to do it without regex let me know.

I'm seeking a JS/jQuery version of this solution.

21条回答
琉璃瓶的回忆
2楼-- · 2018-12-31 09:36

There is no need to parse the string, just pass your URL as an argument to URL constructor:

var url = 'http://www.youtube.com/watch?v=ClkQA2Lb_iE';
var hostname = (new URL(url)).hostname;

assert(hostname === 'www.youtube.com');
查看更多
倾城一夜雪
3楼-- · 2018-12-31 09:36

Code:

var regex = /\w+.(com|co\.kr|be)/ig;
var urls = ['http://www.youtube.com/watch?v=ClkQA2Lb_iE',
            'http://youtu.be/ClkQA2Lb_iE',
            'http://www.example.com/12xy45',
            'http://example.com/random'];


$.each(urls, function(index, url) {
    var convertedUrl = url.match(regex);
    console.log(convertedUrl);
});

Result:

youtube.com
youtu.be
example.com
example.com
查看更多
无色无味的生活
4楼-- · 2018-12-31 09:37

oneline with jquery

$('<a>').attr('href', document.location.href).prop('hostname');
查看更多
荒废的爱情
5楼-- · 2018-12-31 09:40

Try:

function extractHostname(url) {
    var hostname;
    //find & remove protocol (http, ftp, etc.) and get hostname

    if (url.indexOf("//") > -1) {
        hostname = url.split('/')[2];
    }
    else {
        hostname = url.split('/')[0];
    }

    //find & remove port number
    hostname = hostname.split(':')[0];
    //find & remove "?"
    hostname = hostname.split('?')[0];

    return hostname;
}

// To address those who want the "root domain," use this function:
function extractRootDomain(url) {
    var domain = extractHostname(url),
        splitArr = domain.split('.'),
        arrLen = splitArr.length;

    //extracting the root domain here
    //if there is a subdomain 
    if (arrLen > 2) {
        domain = splitArr[arrLen - 2] + '.' + splitArr[arrLen - 1];
        //check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk")
        if (splitArr[arrLen - 2].length == 2 && splitArr[arrLen - 1].length == 2) {
            //this is using a ccTLD
            domain = splitArr[arrLen - 3] + '.' + domain;
        }
    }
    return domain;
}

//test the code
console.log("== Testing extractHostname: ==");
console.log(extractHostname("http://www.blog.classroom.me.uk/index.php"));
console.log(extractHostname("http://www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractHostname("https://www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractHostname("www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractHostname("ftps://ftp.websitename.com/dir/file.txt"));
console.log(extractHostname("websitename.com:1234/dir/file.txt"));
console.log(extractHostname("ftps://websitename.com:1234/dir/file.txt"));
console.log(extractHostname("example.com?param=value"));
console.log(extractHostname("https://facebook.github.io/jest/"));
console.log(extractHostname("//youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractHostname("http://localhost:4200/watch?v=ClkQA2Lb_iE"));

//test extractRootDomain
console.log("== Testing extractRootDomain: ==");
console.log(extractRootDomain("http://www.blog.classroom.me.uk/index.php"));
console.log(extractRootDomain("http://www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomain("https://www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomain("www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomain("ftps://ftp.websitename.com/dir/file.txt"));
console.log(extractRootDomain("websitename.com:1234/dir/file.txt"));
console.log(extractRootDomain("ftps://websitename.com:1234/dir/file.txt"));
console.log(extractRootDomain("example.com?param=value"));
console.log(extractRootDomain("https://facebook.github.io/jest/"));
console.log(extractRootDomain("//youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomain("http://localhost:4200/watch?v=ClkQA2Lb_iE"));

*Hit "Run code snippet" to see these methods in action.

Regardless having the protocol or even port number, you can extract the domain. This is a very simplified, non-regex solution, so I think this will do.

*Thank you @Timmerz, @renoirb, @rineez, @BigDong, @ra00l, @ILikeBeansTacos, @CharlesRobertson for your suggestions! @ross-allen, thank you for reporting the bug!

查看更多
泛滥B
6楼-- · 2018-12-31 09:45

Try this:

var matches = url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
var domain = matches && matches[1];  // domain will be null if no match is found

If you want to exclude the port from your result, use this expression instead:

/^https?\:\/\/([^\/:?#]+)(?:[\/:?#]|$)/i

Edit: To prevent specific domains from matching, use a negative lookahead. (?!youtube.com)

/^https?\:\/\/(?!(?:www\.)?(?:youtube\.com|youtu\.be))([^\/:?#]+)(?:[\/:?#]|$)/i
查看更多
春风洒进眼中
7楼-- · 2018-12-31 09:45

in short way you can do like this

var url = "http://www.someurl.com/support/feature"

function getDomain(url){
  domain=url.split("//")[1];
  return domain.split("/")[0];
}
eg:
  getDomain("http://www.example.com/page/1")

  output:
   "www.example.com"

Use above function to get domain name

查看更多
登录 后发表回答