I'm doing some simple Python + FB Graph training on my own, and I faced a weird problem:
import time
import sys
import urllib2
import urllib
from json import loads
base_url = "https://graph.facebook.com/search?q="
post_id = None
post_type = None
user_id = None
message = None
created_time = None
def doit(hour):
page = 1
search_term = "\"Plastic Planet\""
encoded_search_term = urllib.quote(search_term)
print encoded_search_term
type="&type=post"
url = "%s%s%s" % (base_url,encoded_search_term,type)
print url
while(1):
try:
response = urllib2.urlopen(url)
except urllib2.HTTPError, e:
print e
finally:
pass
content = response.read()
content = loads(content)
print "=================================="
for c in content["data"]:
print c
print "****************************************"
try:
content["paging"]
print "current URL"
print url
print "next page!------------"
url = content["paging"]["next"]
print url
except:
pass
finally:开发者_JAVA技巧
pass
"""
print "new URL is ======================="
print url
print "=================================="
"""
print url
What I'm trying to do here is to automatically page through the search results, but trying for content["paging"]["next"]
But the weird thing is that no data is returned; I received the following:
{"data":[]}
Even in the very first loop.
But when I copied the URL into a browser, a lot of results were returned.
I've also tried a version with my access token and th same thing happens.
+++++++++++++++++++EDITED and SIMPLIFIED++++++++++++++++++
ok thanks to TryPyPy, here's the simplified and edited version of my previous question:
Why is that:
import urllib2
url = "https://graph.facebook.com/searchq=%22Plastic+Planet%22&type=post&limit=25&until=2010-12-29T19%3A54%3A56%2B0000"
response = urllib2.urlopen(url)
print response.read()
result in {"data":[]}
?
But the same url produces a lot of data in a browser?
Trial and error using Chrome (where I got lots of data) and Firefox (where I got the empty response) made me zero on the 'Accept-Language' header. Other modifications are supposedly only cosmetic, but I'm not sure about the CookieJar.
import time
import sys
import urllib2
import urllib
from json import loads
import cookielib
base_url = "https://graph.facebook.com/search?q="
post_id = None
post_type = None
user_id = None
message = None
created_time = None
jar = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
opener.addheaders = [
('Accept-Language', 'en-US,en;q=0.8'),]
def doit(hour):
page = 1
search_term = "\"Plastic Planet\""
encoded_search_term = urllib.quote(search_term)
print encoded_search_term
type="&type=post"
url = "%s%s%s" % (base_url,encoded_search_term,type)
print url
data = True
while data:
response = opener.open(url)
opener.addheaders += [
('Referer', url) ]
content = response.read()
content = loads(content)
print "=================================="
for c in content["data"]:
print c.keys()
print "****************************************"
if "paging" in content:
print "current URL"
print url
print "next page!------------"
url = content["paging"]["next"]
print url
else:
print content
print url
data = False
doit(1)
Here's a cleaned up, minimal working version:
import urllib2
import urllib
from json import loads
import cookielib
def doit(search_term, base_url = "https://graph.facebook.com/search?q="):
opener = urllib2.build_opener()
opener.addheaders = [('Accept-Language', 'en-US,en;q=0.8')]
encoded_search_term = urllib.quote(search_term)
type="&type=post"
url = "%s%s%s" % (base_url,encoded_search_term,type)
print encoded_search_term
print url
data = True
while data:
response = opener.open(url)
content = loads(response.read())
print "=================================="
for c in content["data"]:
print c.keys()
print "****************************************"
if "paging" in content:
url = content["paging"]["next"]
else:
print "Empty response"
print content
data = False
doit('"Plastic Planet"')
精彩评论