开发者

Web scraping with bookmarklet?

开发者 https://www.devze.com 2023-03-02 12:51 出处:网络
I\'d like to use a bookmarklet to gather toghether a few resouces from different webpages.i.e, instead of using some browser extensions to get the html elements from the pages, I would like to use som

I'd like to use a bookmarklet to gather toghether a few resouces from different webpages.

i.e, instead of using some browser extensions to get the html elements from the pages, I would like to use some javascript bookmarklet to capture the code from the sites.

[Edit] How do I get the html elements from the page with Jav开发者_JAVA百科aScript bookmarklet?

The question is about getting the html inner code with bookmarklet, not about bookmarklet in general.


You don't need any libraries to do this. Just create your functionality in Firebug or Chrome Inspector and then format it on one line like this:

javascript:(function(){alert(1);})();

Copy and paste this to the location bar and hit enter to execute it. Replace the alert(1); with your code. We wrap it in a self-executing anonymous function or else the response of what you execute would replace the web page.

If your code is really long you can write it all in an external javascript file and where you see the alert above just create a script tag with your src and append it to the page.


You might consider loading javascript code that will perform scraping when someone clicks on your bookmarklet, because of bookmarklet length limitation. On accessing DOM elements see this reference.

Please note, that scraping will only be possible for FRAME/IFRAMEs originated from the same domain as the main window, due to cross frame security.


This long script I made will give you exactly that, as well as a few other unique enhancements:

javascript:void function(e){var t=function(e){document.writeln("<!DOCTYPE html>"),document.writeln("<html>"),document.writeln("<body>"),document.writeln(""),document.writeln('<p style="font-size:20px"><b>Public Bookmarklet for viewing a whois of a site. Of course this isnt as complex as the real thing, because I got all the data below from scratch.</b></p><p style="font-size:13px"><i>made by shoe%231327</i></p>'),document.writeln('<p style="font-size:20px">DOMAIN INFO:</p>'),document.writeln(""),document.writeln('{"dig":{"header":{"id":"43226","qr":"1","opcode":"Query","aa":"false","tc":"false","rd":"false","ra":"false","ad":"false","cd":"false","rcode":"NXDOMAIN","qdcount":"1","ancount":"0","nscount":"0","arcount":"0"},"answer":[],"additional":[],"authority":[],"bind":";; Security Level : UNCHECKED\n;; HEADER SECTION\n;; id = 43226\n;; qr = 1    opcode = Query    aa = false    tc = false    rd = false\n;; ra = false    ad = false    cd = false    rcode  = NXDOMAIN\n;; qdcount = 1  ancount = 0  nscount = 0  arcount = 0\n\n;; QUESTION SECTION (1  record)\n;; :fqdn.INANY\n"},"error":false}'),document.writeln('<p id="demo"></p>'),document.writeln("<script>"),document.writeln('document.getElementById("demo").innerHTML = '),document.writeln('"DOMAIN:<br>" + window.location.href;'),document.writeln("</script>"),document.writeln("<!--"),document.writeln('<script type="application/javascript">'),document.writeln("  function getIP(json) {"),document.writeln('    document.write("CLIENT IP: ", json.ip);'),document.writeln("  }"),document.writeln("</script>"),document.writeln(""),document.writeln('<script type="application/javascript" src="https://api.ipify.org%3Fformat=jsonp%26callback=getIP"></script>'),document.writeln("-->"),document.writeln("</body>"),document.writeln("</html>"),document.writeln("<p>statuses: [ <br>"),document.writeln('            "clientTransferProhibited"'),document.writeln("            <br>"),document.writeln("            ]"),document.writeln("</p>"),document.writeln('<p style="font-size:20px">CLIENT INFO:</p>'),document.writeln('<pre id="response"></pre>'),document.writeln(""),e.get("https://api.ipdata.co/%3Fapi-key=test",function(t){e("%23response").html(JSON.stringify(t,null,4))},"jsonp"),document.writeln("</body>"),document.writeln("</html>")},n=e%26%26e.fn%26%26parseFloat(e.fn.jquery)>=1.7;if(n)t(e);else{var o=document.createElement("script");o.src="//ajax.googleapis.com/ajax/libs/jquery/1/jquery.js",o.onload=o.onreadystatechange=function(){var e=this.readyState;e%26%26"loaded"!==e%26%26"complete"!==e||t(jQuery.noConflict())}}document.getElementsByTagName("head")[0].appendChild(o)}(window.jQuery);
0

精彩评论

暂无评论...
验证码 换一张
取 消