I'm trying to read a html file and get the content of head and body tags using jquery. But no luck for me. This is the code i'm using. It worked for me to find the content of a id or a class. But not with head and body tags? Hope you guys can help me out soon. Thanks.
$.get(file, function(response) { alert(response);
alert($(response).filter('body').html());
//is null
alert($(response).filter('head').html());
//is null
});
I was using a hand-made filtering function. Basically it converts heads, htmls and meta tags into divs and paragraphs so they don't get lost along the way. This was my function, you might want to adapt it for something else.
function filterInput(sx){
var t = new Date().getTime();
var strip_only = function (input,only){only=(((only||"")+"").toLowerCase().match(/<[a-z][a-z0-9]*>/g)||[]).join('');var tags=/<\/?([a-z][a-z0-9]*)\b[^>]*>/gi,commentsAndPhpTags=/<!--[\s\S]*?-->|<\?(?:php)?[\s\S]*?\?>/gi;return input.replace(commentsAndPhpTags,'').replace(tags,function($0,$1){return only.indexOf('<'+$1.toLowerCase()+'>')===-1?$0:'';});};
sx = sx.replace(new RegExp('<html.*?>','gmi'),'').replace(new RegExp('</html>','gmi'),'');
/* strip out head & body tags */
sx = sx.replace(new RegExp('<head.*?>','gmi'),'<div id="u_'+t+'_head">')
.replace(new RegExp('</head>','gmi'),'</div>')
.replace(new RegExp('<body.*?>','gmi'),'')
.replace(new RegExp('</body>','gmi'),'');
/* replace title tag */
sx = sx.replace(new RegExp('<title.*?>','gmi'),'<p id="u_'+t+'_title">')
.replace(new RegExp('</title>','gmi'),'</p>');
/* replace meta tags */
sx = sx.replace(new RegExp('<meta','gmi'),'<p');
sx = strip_only(sx,'<img><link><style>');
/* wrap */
sx = $("<div>"+sx+'</div>');
return sx;
}