开发者

Python: multiple files download by turn

开发者 https://www.devze.com 2023-01-09 03:03 出处:网络
In script loop performs files downloading and saving (curl). But loop iterations too quick, so downloading and saving actions have no time to complete it\'s operations. Thereat result files comes brok

In script loop performs files downloading and saving (curl). But loop iterations too quick, so downloading and saving actions have no time to complete it's operations. Thereat result files comes broken

def get_images_thread(table):
    class LoopThread ( threading.Thread ):
        def run ( self ):
            global db
            c=db.cursor()
            c.execute(""" SELECT * FROM js_stones ORDER BY stone_id LIMIT 1
                            """)           
            ec = EasyCurl(table)

            while(1):
                stone = c.fetchone()
                if stone == None:
                    break
                img_fname = stone[2]
                print img_fname
                url = "http://www.jstone.it/"+img_fname
                fname = url.strip("/").split("/")[-1].strip()
                ec.perform(url, filename="D:\\Var\\Python\\Jstone\\downloadeble_pictures\\"+fname,开发者_开发技巧 
                                    progress=ec.textprogress)


This is an excerpt from the examples for the PycURL library,

# Make a queue with (url, filename) tuples
queue = Queue.Queue()
for url in urls:
    url = url.strip()
    if not url or url[0] == "#":
        continue
    filename = "doc_%03d.dat" % (len(queue.queue) + 1)
    queue.put((url, filename))


# Check args
assert queue.queue, "no URLs given"
num_urls = len(queue.queue)
num_conn = min(num_conn, num_urls)
assert 1 <= num_conn <= 10000, "invalid number of concurrent connections"
print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM)
print "----- Getting", num_urls, "URLs using", num_conn, "connections -----"


class WorkerThread(threading.Thread):
    def __init__(self, queue):
        threading.Thread.__init__(self)
        self.queue = queue

    def run(self):
        while 1:
            try:
                url, filename = self.queue.get_nowait()
            except Queue.Empty:
                raise SystemExit
            fp = open(filename, "wb")
            curl = pycurl.Curl()
            curl.setopt(pycurl.URL, url)
            curl.setopt(pycurl.FOLLOWLOCATION, 1)
            curl.setopt(pycurl.MAXREDIRS, 5)
            curl.setopt(pycurl.CONNECTTIMEOUT, 30)
            curl.setopt(pycurl.TIMEOUT, 300)
            curl.setopt(pycurl.NOSIGNAL, 1)
            curl.setopt(pycurl.WRITEDATA, fp)
            try:
                curl.perform()
            except:
                import traceback
                traceback.print_exc(file=sys.stderr)
                sys.stderr.flush()
            curl.close()
            fp.close()
            sys.stdout.write(".")
            sys.stdout.flush()


# Start a bunch of threads
threads = []
for dummy in range(num_conn):
    t = WorkerThread(queue)
    t.start()
    threads.append(t)


# Wait for all threads to finish
for thread in threads:
    thread.join()


If you're asking what I think you're asking,

from time import sleep
sleep(1)

should "solve"(It's hacky to the max!) your problem. Docs here. I would check that that really is your problem, though. It seems catastrophically unlikely that pausing for a few seconds would stop files from downloading brokenly. Some more detail would be nice too.

os.waitpid()

might also help.

0

精彩评论

暂无评论...
验证码 换一张
取 消