In script loop performs files downloading and saving (curl). But loop iterations too quick, so downloading and saving actions have no time to complete it's operations. Thereat result files comes broken
def get_images_thread(table):
class LoopThread ( threading.Thread ):
def run ( self ):
global db
c=db.cursor()
c.execute(""" SELECT * FROM js_stones ORDER BY stone_id LIMIT 1
""")
ec = EasyCurl(table)
while(1):
stone = c.fetchone()
if stone == None:
break
img_fname = stone[2]
print img_fname
url = "http://www.jstone.it/"+img_fname
fname = url.strip("/").split("/")[-1].strip()
ec.perform(url, filename="D:\\Var\\Python\\Jstone\\downloadeble_pictures\\"+fname,开发者_开发技巧
progress=ec.textprogress)
This is an excerpt from the examples for the PycURL library,
# Make a queue with (url, filename) tuples
queue = Queue.Queue()
for url in urls:
url = url.strip()
if not url or url[0] == "#":
continue
filename = "doc_%03d.dat" % (len(queue.queue) + 1)
queue.put((url, filename))
# Check args
assert queue.queue, "no URLs given"
num_urls = len(queue.queue)
num_conn = min(num_conn, num_urls)
assert 1 <= num_conn <= 10000, "invalid number of concurrent connections"
print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM)
print "----- Getting", num_urls, "URLs using", num_conn, "connections -----"
class WorkerThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while 1:
try:
url, filename = self.queue.get_nowait()
except Queue.Empty:
raise SystemExit
fp = open(filename, "wb")
curl = pycurl.Curl()
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 300)
curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.WRITEDATA, fp)
try:
curl.perform()
except:
import traceback
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
curl.close()
fp.close()
sys.stdout.write(".")
sys.stdout.flush()
# Start a bunch of threads
threads = []
for dummy in range(num_conn):
t = WorkerThread(queue)
t.start()
threads.append(t)
# Wait for all threads to finish
for thread in threads:
thread.join()
If you're asking what I think you're asking,
from time import sleep
sleep(1)
should "solve"(It's hacky to the max!) your problem. Docs here. I would check that that really is your problem, though. It seems catastrophically unlikely that pausing for a few seconds would stop files from downloading brokenly. Some more detail would be nice too.
os.waitpid()
might also help.
精彩评论