python 实现断点续传和多线程下载

114 views
Skip to first unread message

paul

unread,
Mar 10, 2008, 8:52:59 AM3/10/08
to pyth...@googlegroups.com
如题,谁有比较好的参考案例,介绍一下吧,先谢谢了

--
Paul

Zoom.Quiet

unread,
Mar 10, 2008, 8:54:55 AM3/10/08
to pyth...@googlegroups.com
2008/3/10 paul <cays...@gmail.com>:
> 如题,谁有比较好的参考案例,介绍一下吧,先谢谢了
>
http://www.bittorrent.org/

> --
> Paul
> >
>

--
'''Time is unimportant, only life important!
过程改进乃是开始催生可促生靠谱的人的组织!
'''http://zoomquiet.org
博 @ http://blog.zoomquiet.org/pyblosxom/
维 @ http://wiki.woodpecker.org.cn/moin/ZoomQuiet
豆 @ http://www.douban.com/people/zoomq/
看 @ http://zoomq.haokanbu.com/
作 4 http://trac-hacks.org/wiki/TracChineseTranslation
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Pls. usage OOo to replace M$ Office. http://zh.openoffice.org
Pls. usage 7-zip to replace WinRAR/WinZip. http://7-zip.org
You can get the truely Freedom 4 software.

Daniel Lv

unread,
Mar 10, 2008, 9:02:10 AM3/10/08
to pyth...@googlegroups.com
wget -c
是如何实现的?

2008/3/10 Zoom. Quiet <zoom....@gmail.com>:

est

unread,
Mar 10, 2008, 2:24:13 PM3/10/08
to python-cn:CPyUG
加一个HTTP请求头 Content-Range


On Mar 10, 9:02 pm, "Daniel Lv" <lgn2...@gmail.com> wrote:
> wget -c
> 是如何实现的?
>
> 2008/3/10 Zoom. Quiet <zoom.qu...@gmail.com>:
>
>
>
> > 2008/3/10 paul <cayso...@gmail.com>:
> > > 如题,谁有比较好的参考案例,介绍一下吧,先谢谢了
>
> >http://www.bittorrent.org/
>
> > > --
> > > Paul
>
> > --
> > '''Time is unimportant, only life important!
> > 过程改进乃是开始催生可促生靠谱的人的组织!
> > '''http://zoomquiet.org
> > 博 @http://blog.zoomquiet.org/pyblosxom/
> > 维 @http://wiki.woodpecker.org.cn/moin/ZoomQuiet
> > 豆 @http://www.douban.com/people/zoomq/
> > 看 @http://zoomq.haokanbu.com/
> > 作 4http://trac-hacks.org/wiki/TracChineseTranslation
> > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> > Pls. usage OOo to replace M$ Office.http://zh.openoffice.org
> > Pls. usage 7-zip to replace WinRAR/WinZip.http://7-zip.org
> > You can get the truely Freedom 4 software.- Hide quoted text -
>
> - Show quoted text -

jigloo

unread,
Mar 11, 2008, 1:16:50 AM3/11/08
to python-cn:CPyUG
pycurl又简单又强大又快速
头的名字是Range吧.

junyi sun

unread,
Mar 11, 2008, 1:20:16 AM3/11/08
to pyth...@googlegroups.com
我用C写的一个下载器,支持断点续传了多线程以及服务器Redirect,用Python写的话,应该更简洁。
 

/**
** description:qdown is a multithread downloader
** author:Sunjoy
** from:ICT.CAS.
** date:2007-9-10
**
*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#define MAX_THREAD 100

typedef struct URLInfo
{
    char schema[8];
    char host[256];
    char host_name[256];
    unsigned int port;
    char file[256];
}URLInfo;

typedef struct Connection
{
    int sock;
    URLInfo url_info;
    int avaliable;
}Connection;

typedef struct Resource
{
    char file_url[256];
    int file_size;
    char file_name[256];
}Resource;

typedef struct ThreadArg
{
    Resource* res;
    int start_pos;
    int limit;
    int no;
}ThreadArg;

typedef struct BreakPoint
{
    int downloaded;
    int thread_amount;
    int tasks[MAX_THREAD][2];
   
}BreakPoint;

pthread_mutex_t g_mut;
int g_total=0;
int g_downloaded=0;
BreakPoint g_breakpoint;

URLInfo parse_url(const char *url);
Connection open_url(const char * url);
Resource get_resource(const char *url);
void join_url(const char* old_url,const char* redirect,char * new_url);
void download(const char* url,int thread_amount,const char* file_name);
void* download_part(void* args);
void* monitor(void *args);
void store_breakpoint(char * cfgName);

void store_breakpoint(char * cfgName)
{
    int z;
    FILE* f;
    f=fopen(cfgName,"w");
    fprintf(f,"%d\n",g_breakpoint.downloaded);
    fprintf(f,"%d\n",g_breakpoint.thread_amount);
    for(z=0;z<g_breakpoint.thread_amount;z++){
       fprintf(f,"%d-%d\n",g_breakpoint.tasks[z][0],g_breakpoint.tasks[z][1]);
    }
    fclose(f);
}

void join_url(const char* old_url,const char* redirect,char * new_url)
{
    char stack1[256][256]={0},stack2[256][256]={0};
    int i=0,j=0,p1=0,p2=0;
    char seg[256]={0};
    URLInfo temp_urlinfo;
   
    memset(new_url,0,sizeof(new_url));
    if(strstr(redirect,"://")!=NULL){
        strcpy(new_url,redirect);
    }
    else{
        while(1){
            while(redirect[i]!='/' && redirect[i]!=0){
                seg[j++]=redirect[i++];
            }   
            strcpy(stack1[p1++],seg);
            memset(seg,0,sizeof(seg));
            j=0;
            if(redirect[i]==0)
                break;
            i++;
        }
        for(i=0;i<p1;i++){
            if(!strcmp(stack1[i],"..") && p2>-1)
                p2--;
            else if(strcmp(stack1[i],".")){
                strcpy(stack2[p2++],stack1[i]);
            }
        }
        //printf("##%s\n",stack2[0]);
  
        if(!strcmp(stack2[0],"")){
            temp_urlinfo=parse_url(old_url);
            sprintf(new_url,"%s://%s:%d/",temp_urlinfo.schema,temp_urlinfo.host,temp_urlinfo.port);         
        }
        else{
            i=strlen(old_url)-1;
            while(old_url[i]!='/')
                i--;
            //printf("##%c\n",old_url[i]);
            strncpy(new_url,old_url,i+1);
            new_url[i+1]=0;
        }
        //printf("##%s\n",new_url);
        for(j=0;j<p2-1;j++){
            strcat(new_url,stack2[j]);
            strcat(new_url,"/");
        }
        strcat(new_url,stack2[p2-1]);
    }
}

URLInfo parse_url(const char* url){
    int i=0,j=0;
    char schema[8]={0};
    char host[256]={0};
    char port[8]={0};
    char file[256]={0};
    char IP[32]={0};
    URLInfo url_info;
    struct hostent* hptr;
   
    while(url[i]!=':'){
        schema[j++]=url[i++];
    }

    for(i+=3,j=0;url[i]!=':' && url[i]!='/' && url[i]!=0;){
        host[j++]=url[i++];
    }
   
    if(url[i]==':'){
        for(i+=1,j=0;url[i]!='/';){
            port[j++]=url[i++];
        }
        sscanf(port,"%d",&url_info.port);
    }
    else{
        url_info.port=80;
    }
   
    if(url[i]!=0){
        for(j=0;url[i]!=0;){
            file[j++]=url[i++];
        }
    }
    else{
        file[0]='/';
    }
   
    strcpy(url_info.schema,schema);
    strcpy(url_info.file,file);
    strcpy(url_info.host_name,host);
    hptr=gethostbyname(host);
  
    if(hptr!=NULL){
        strcpy(url_info.host,
            inet_ntop(hptr->h_addrtype,*(hptr->h_addr_list),IP,sizeof(IP))
        );
    }
    //printf("%s\n",url_info.host);
    return url_info;
}
Connection open_url(const char* url){
    Connection conn;
    struct sockaddr_in remote_addr,local_addr;

    conn.avaliable=0;
    conn.url_info=parse_url(url);
   
    local_addr.sin_family=AF_INET;
    local_addr.sin_addr.s_addr=htonl(INADDR_ANY);
    local_addr.sin_port=htons(0);
    remote_addr.sin_family=AF_INET;
    remote_addr.sin_addr.s_addr=inet_addr(conn.url_info.host);
    remote_addr.sin_port=htons(conn.url_info.port);
   
    conn.sock=socket(AF_INET,SOCK_STREAM,0);
    if(bind(conn.sock,
        (struct sockaddr*)&local_addr,
        sizeof(local_addr))<0){
            printf("bind error\n");
    }
   
   
   
    if(conn.sock){
        if(
            connect(conn.sock,(struct sockaddr*)&remote_addr,sizeof(remote_addr))!=-1
        ){
            conn.avaliable=1;
        }
    }
   
    return conn;
}

Resource get_resource(const char* url){
    char pack[1024]={0};
    char buf[1024]={0};
    char redirect[256]={0},new_url[256]={0},old_url[256]={0};
    static int redirect_count=0;
    char* i;
    char* j;
    char* z;
    Resource res;
   
    Connection conn=open_url(url);
    if(!conn.avaliable){
        return res;
    }
    sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nPragma: no-cache\nCache-Control: no-cache\nConnection: close\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name);
    send(conn.sock,pack,strlen(pack),0);
    recv(conn.sock,buf,sizeof(buf),0);
    //printf("%s\n",buf);
    if(strstr(buf,"HTTP/1.1 404")!=NULL || strstr(buf,"HTTP/1.0 404")!=NULL){
       return res;
    }
    i=(char *)strstr(buf,"Location:");
    if(i!=NULL && redirect_count<5){
        sscanf(i,"Location: %s",redirect);
        sprintf(old_url,"%s://%s:%d%s",conn.url_info.schema,conn.url_info.host_name,conn.url_info.port,conn.url_info.file);
        join_url(old_url,redirect,new_url);
        //printf("@#%s\n",new_url);
        redirect_count++;
        return get_resource(new_url);
    }
    i=(char *)strstr(buf,"Content-Length:");
    if(i!=NULL){
        sscanf(i,"Content-Length: %d",&res.file_size);
    }
    strcpy(res.file_url,url);
    //printf("#%d\n",res.file_size);
    for(z=(char*)url;(j=strstr(z,"/"))!=NULL;){
        z=j+sizeof(char);
    }
    strcpy(res.file_name,z);
    close(conn.sock);
    return res;
}

void* download_part(void * args)
{
    ThreadArg* targ=(ThreadArg*)args;
    Connection conn;
    FILE* f=NULL;
    char pack[1024]={0};
    char buf[1024]={0};
    int i=0,ct=0;
    char* body=NULL;
    //printf("%s,%d-%d\n",targ->res->file_url, targ->start_pos,targ->limit);
    conn=open_url(targ->res->file_url);
    while(!conn.avaliable){
        sleep(1);
        conn=open_url(targ->res->file_url);
    }
    if(conn.avaliable){

        f=fopen(targ->res->file_name,"rb+");
        fseek(f,targ->start_pos,0);
        sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nRange: bytes=%d-%d\nPragma: no-cache\nCache-Control: no-cache\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name,targ->start_pos,targ->start_pos+targ->limit-1);
        //printf("%s",pack);
begin_down:
        send(conn.sock,pack,strlen(pack),0);
        i=recv(conn.sock,buf,sizeof(buf),0);
       
        if(strstr(buf,"HTTP/1.1 206")==NULL && strstr(buf,"HTTP/1.0 206")==NULL && strstr(buf,"HTTP/1.1 200")==NULL && strstr(buf,"HTTP/1.0 200")==NULL){
            sleep(2);
            memset(buf,0,sizeof(buf));
            conn=open_url(targ->res->file_url);
            goto begin_down;
        }
        //printf("##%s\n",body);
        body=strstr(buf,"\r\n\r\n")+4;
        if(body!=NULL){
            i=i-(body-buf);
            fwrite(body,sizeof(char),i,f);
            //printf("@@@@%x\n",buf);
            fflush(f);
            ct+=i;
            pthread_mutex_lock(&g_mut);
            g_downloaded+=i;
            pthread_mutex_unlock(&g_mut);
           
            while(ct< targ->limit){
                i=recv(conn.sock,buf,sizeof(buf),0);
                if(i==0){
                    fclose(f);
                    conn.avaliable=0;
                    while(!conn.avaliable){
                        sleep(2);
                        //printf("waiting...\n");
                        conn=open_url(targ->res->file_url);
                    }
                    memset(pack,0,sizeof(pack));
                    memset(buf,0,sizeof(buf));
                    sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nRange: bytes=%d-%d\nPragma: no-cache\nCache-Control: no-cache\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name,targ->start_pos+ct,targ->start_pos+targ->limit-1);
                    f=fopen(targ->res->file_name,"rb+");
                    fseek(f,targ->start_pos+ct,0);
                    goto begin_down;
                }
               
                fwrite(buf,sizeof(char),i,f);
                fflush(f);
                ct+=i;
                pthread_mutex_lock(&g_mut);
                g_downloaded+=i;
                g_breakpoint.tasks[targ->no][0]=targ->start_pos+ct;
                g_breakpoint.tasks[targ->no][1]=targ->limit-ct;
                g_breakpoint.downloaded=g_downloaded;
                pthread_mutex_unlock(&g_mut);
            }
            fclose(f);
            g_breakpoint.downloaded=g_downloaded;
            close(conn.sock);
        }
    }
    pthread_exit(NULL);
}
void* monitor(void* args){
    float p;
    int i,j,z,old;
    FILE* f;
    char cfgName[256];
    strcpy(cfgName,(char*)args);
    strcat(cfgName,".cfg");
   
    while(1){
        p=g_downloaded/(g_total+0.0);
        if(g_downloaded>=g_total)
                break;
        i=p*100/10;
        if(old!=g_downloaded){
           

            printf("\r");
            for(j=0;j<i;j++){
                printf("==");
            }
            printf("%2.0f%%",p*100);
            fflush(stdout);
       
            store_breakpoint(cfgName);
            old=g_downloaded;
        }
    }
    printf("\r====================100%%\n");
    remove(cfgName);
    pthread_exit(NULL);
}


void download(const char* url,int thread_amount,const char* file_name)
{
    ThreadArg targs[MAX_THREAD];
    pthread_attr_t * thAttr = NULL;
    pthread_t tids[MAX_THREAD],monitor_id,controler_id;
    Resource res;
    int i,block_size,t_start_pos,t_limit;
    FILE* f;
    char cfgName[256]={0};
   
    if(thread_amount>MAX_THREAD)
        return;
    res=get_resource(url);
   
    if(!strcmp(res.file_url,""))
        return;
   
    if(strcmp(file_name,""))
        strcpy(res.file_name,file_name);
   
    if(!strcmp(res.file_name,""))
        strcpy(res.file_name,"default_down");
   
    if(res.file_size<1000000)
        thread_amount=1;
   
    block_size=res.file_size/thread_amount;
    pthread_mutex_init(&g_mut,NULL);
   
    strcpy(cfgName,res.file_name);
    strcat(cfgName,".cfg");
    printf("downloading %s,%d bytes... \n",res.file_name,res.file_size);
   
    if(fopen(cfgName,"r")==NULL){
new_task:      
        f=fopen(res.file_name,"wb");
        if(f==NULL){
            strcpy(res.file_name,"default_down");
            f=fopen(res.file_name,"wb");
        }
        fclose(f);
        g_total=res.file_size;

        for(i=0;i<thread_amount;i++){
            targs[i].res=&res;
            targs[i].start_pos=block_size*i;
            targs[i].limit=block_size;
            if(i==thread_amount-1)
                targs[i].limit+= (res.file_size%thread_amount);
           
            targs[i].no=i;
            g_breakpoint.tasks[i][0]=targs[i].start_pos;
            g_breakpoint.tasks[i][1]=block_size;
            pthread_create(&tids[i], thAttr, download_part, (void *)&targs[i]);
        }
       
    }
    else{
        f=fopen(cfgName,"r");
        if(fscanf(f,"%d",&g_downloaded)==-1)
            goto new_task;
        //printf("#%d\n",g_downloaded);
        g_total=res.file_size;
        fscanf(f,"%d",&thread_amount);
        for(i=0;i<thread_amount;i++){
            fscanf(f,"%d-%d",&t_start_pos,&t_limit);
            targs[i].res=&res;
            targs[i].start_pos=t_start_pos;
            targs[i].limit=t_limit;
            targs[i].no=i;
            g_breakpoint.tasks[i][0]=targs[i].start_pos;
            g_breakpoint.tasks[i][1]=t_limit;
            pthread_create(&tids[i], thAttr, download_part, (void *)&targs[i]);
        }
        fclose(f);
    }
   
    pthread_create(&monitor_id,NULL,monitor,(void *)res.file_name);
    g_breakpoint.thread_amount=thread_amount;
    g_breakpoint.downloaded=g_downloaded;
    //printf("#%d\n",g_downloaded);
    /*for(i=0;i<thread_amount;i++){
        pthread_join(tids[i],NULL);
    }*/

    pthread_join(monitor_id,NULL);
}

 

int main (int ac, char * av[])
{
  int thread_amount=5;
  char file_name[256]={0};
  if(ac<2){
        printf("usage: qdown URL [thread_amount] [save as]\n");
        printf("example: qdown http://www.baidu.com/img/logo.gif 5 /home/sunjoy/log.gif\n");
  }
  else{
        if(ac>=3)
            sscanf(av[2],"%d",&thread_amount);
        if(ac>=4){
            strcpy(file_name,av[3]);
        }
        download(av[1],thread_amount,file_name);
       
  }
 
  return 0;
}

 

 



 

Mr Shore

unread,
Mar 11, 2008, 3:11:36 AM3/11/08
to pyth...@googlegroups.com
好久不见的c还是那么惊人那

在08-3-11,junyi sun <ccn...@gmail.com> 写道:

paul

unread,
Mar 11, 2008, 5:49:40 AM3/11/08
to pyth...@googlegroups.com
thanks! 强人,用C 的 人不好找了。

在08-3-11,Mr Shore <shore...@gmail.com> 写道:

Zoom.Quiet

unread,
Mar 11, 2008, 6:02:43 AM3/11/08
to pyth...@googlegroups.com, cpug-ea...@googlegroups.com, zp...@googlegroups.com, pyth...@googlegroups.com
2008/3/11 paul <cays...@gmail.com>:

> thanks! 强人,用C 的 人不好找了。
>
收藏!
http://wiki.woodpecker.org.cn/moin/MicroProj/2008-03-11

作 4 http://trac-hacks.org/wiki/TracChineseTranslation
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Pls. usage OOo to replace M$ Office. http://zh.openoffice.org
Pls. usage 7-zip to replace WinRAR/WinZip. http://7-zip.org

大熊

unread,
Mar 11, 2008, 6:23:21 AM3/11/08
to pyth...@googlegroups.com
基于pycurl的multi接口的分段下载,在单独的一个线程中实现的
相当的简陋
①尚未实现断点续传(需要保存当前的下载状态,以便恢复之用)
②错误处理不够完善

权作抛砖引玉 :)

# -*- coding: utf8 -*-

import sys, os
import time, logging
import urllib, urlparse
import codecs, traceback
from tempfile import *
from dict4ini import DictIni
import pycurl

try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO

if os.name == 'posix':
    # 使用pycurl.NOSIGNAL选项时忽略信号SIGPIPE
    import signal
    signal.signal(signal.SIGPIPE, signal.SIG_IGN)
    del signal

# 支持的协议
VALIDPROTOCOL = ('http', 'ftp')
# HTTP状态码
STATUS_OK = (200, 203, 206)
STATUS_ERROR = range(400, 600)
# 最小数据片大小(128kb)
MINPIECESIZE = 131072
# 最大连接数
MAXCONCOUNT = 10
# 最大重试数
MAXRETRYCOUNT = 5
# 日志级别
LOGLEVEL = logging.DEBUG
# 清屏命令
CLS = 'cls' if os.name == 'nt' else 'clear'

# 下载日志文件
DLOG = 'download.log'

def Traceback():
    s = StringIO()
    traceback.print_exc(file=s)
    return s.getvalue()

class Connection:
    def __init__(self, url):
        self.curl = pycurl.Curl()
        self.curl.setopt(pycurl.FOLLOWLOCATION, 1)
        self.curl.setopt(pycurl.MAXREDIRS, 5)
        self.curl.setopt(pycurl.CONNECTTIMEOUT, 30)
        self.curl.setopt(pycurl.TIMEOUT, 300)
        self.curl.setopt(pycurl.NOSIGNAL, 1)
        self.curl.setopt(pycurl.WRITEFUNCTION, self.write_cb)
        self.curl.setopt(pycurl.URL, url)
        self.curl.connection = self

        # 合计下载字节数
        self.total_downloaded = 0

    def start(self, result, piece):
        if isinstance(piece, list):
            self.id = piece[0]
            self.name = 'Piece%02d' % piece[0]
            self.curl.setopt(pycurl.RANGE, '%d-%d' % (piece[1], piece[2]))
            self.piece_size = piece[2] - piece[1] + 1
            self.piece = piece
        else:
            self.id = 0
            self.name = 'TASK'
            self.piece_size = piece
            self.piece = None

        # 一次连接的已下载字节数
        self.link_downloaded = 0
        # 一个片断的已下载字节数
        self.piece_downloaded = 0
        # 连接重试数
        self.retried = 0
        # 下载中止标志
        self.is_stop = False
        # 结果输出文件对象
        self.result = result
        self.piece = piece

    def retry(self):
        self.curl.setopt(pycurl.RANGE, '%d-%d' % (self.piece[1] + self.piece_downloaded, self.piece[2]))
        if self.link_downloaded: # 上次连接中有数据返回?
            self.link_downloaded = 0
        else:
            self.retried += 1

    def close(self):
        self.curl.close()

    def write_cb(self, data):
        if self.piece:
            self.result.seek(self.piece[1] + self.piece_downloaded, 0)
        self.result.write(data)
        self.result.flush()
        size = len(data)
        self.link_downloaded += size
        self.piece_downloaded += size
        self.total_downloaded += size
        if self.is_stop: return -1

class FastDownload:
    def __init__(self):
        file(DLOG, 'w')
        logging.basicConfig(level=LOGLEVEL,
            format='[%(asctime)s][%(levelname)s] %(message)s',
            filename='download.log',
            filenmode='w')

        self.mcurl = pycurl.CurlMulti()

    def execute(self, url):
        '''
        下载接口
        '''
        self.url_info = self.url_check(url)
        if self.url_info:
            print 'Download %s, Size %d' % (self.url_info['file'], self.url_info['size'])
            self.pieces = self.make_pieces()
            self.allocate_space()
            self.download()

# ***************************************************************

    def url_check(self, url):
        '''
        下载地址检查
        '''
        url_info = {}
        proto = urlparse.urlparse(url)[0]
        if proto not in VALIDPROTOCOL:
            print 'Valid protocol should be http or ftp, but %s found<%s>!' % (proto, url)
        else:
            ss = StringIO()
            curl = pycurl.Curl()
            curl.setopt(pycurl.FOLLOWLOCATION, 1)
            curl.setopt(pycurl.MAXREDIRS, 5)
            curl.setopt(pycurl.CONNECTTIMEOUT, 30)
            curl.setopt(pycurl.TIMEOUT, 300)
            curl.setopt(pycurl.NOSIGNAL, 1)
            curl.setopt(pycurl.NOPROGRESS, 1)
            curl.setopt(pycurl.NOBODY, 1)
            curl.setopt(pycurl.HEADERFUNCTION, ss.write)
            curl.setopt(pycurl.URL, url)

            try:
                curl.perform()
            except:
                pass

            if curl.errstr() == '' and curl.getinfo(pycurl.RESPONSE_CODE) in STATUS_OK:
                url_info['url'] = curl.getinfo(pycurl.EFFECTIVE_URL)
                url_info['file'] = os.path.split(url_info['url'])[1]
                url_info['size'] = int(curl.getinfo(pycurl.CONTENT_LENGTH_DOWNLOAD))
                url_info['partible'] = (ss.getvalue().find('Accept-Ranges') != -1)

        return url_info

    def make_pieces(self):
        '''
        分段信息生成
        '''
        if self.url_info['partible']:
            file_size = self.url_info['size']
            num = MAXCONCOUNT
            while num * MINPIECESIZE > file_size and num > 1:
                num -= 1
            piece_size = int(round(self.url_info['size'] * 1.0 / num + 0.5))
            pieces = [[i, i * piece_size, (i + 1) * piece_size - 1] for i in range(num)]
            pieces[-1][2] = self.url_info['size'] - 1
        else:
            pieces = [self.url_info['size']]
        return pieces

    def allocate_space(self):
        '''
        预分配文件空间(通用?)
        '''
        afile = file(self.url_info['file'], 'wb')
        afile.truncate(self.url_info['size'])
        afile.close()

# ***************************************************************

    def show_progress(self, downloaded, elapsed):
        '''
        显示下载进度
        '''
        percent = min(100, downloaded * 100.0 / self.url_info['size'])
        if elapsed == 0:
            rate = 0
        else:
            rate = downloaded * 1.0 / 1024.0 / elapsed
        info = ' D/L: %d/%d (%6.2f%%) - Avg: %4.1fkB/s' % (downloaded, self.url_info['size'], percent, rate)
        space = ' ' * (60 - len(info))

        prog_len = int(percent * 20 / 100)
        prog = '|' + 'o' * prog_len + '.' * (20 - prog_len) + '|'

        sys.stdout.write(info + space + prog)
        sys.stdout.flush()
        sys.stdout.write('\b' * 82)

    def close_connection(self, c):
        '''
        关闭连接
        '''
        self.connections.remove(c)
        c.close()

    def process_curl(self, curl):
        '''
        下载结果处理
        '''
        self.mcurl.remove_handle(curl)
        c = curl.connection
        c.errno = curl.errno
        c.errmsg= curl.errmsg
        self.working_connections.remove(c)
        if c.errno == pycurl.E_OK:
            c.code = curl.getinfo(pycurl.RESPONSE_CODE)
            d = self.process_ok(c)
        else:
            d = self.process_error(c)
        return d

    def process_ok(self, c):
        '''
        下载成功处理
        '''
        if c.code in STATUS_OK:
            assert c.piece_downloaded == c.piece_size
            msg = '%s: Download successed' % c.name
            logging.info(msg)
            msg = '%s: Download %s out of %d' % (c.name, c.piece_downloaded, c.piece_size)
            logging.debug(msg)
            self.free_connections.append(c)
        elif c.code in STATUS_ERROR:
            msg = '%s: Error<%d>! Connection will be closed' % (c.name, c.code)
            logging.warning(msg)
            self.close_connection(c)
            self.pieces.append(c.piece)
        else:
            raise Exception('%s: Unhandled http status code %d' % (c.name, c.code))

    def process_error(self, c):
        '''
        下载失败处理
        '''
        msg = '%s: Download failed<%s>' % (c.name, c.errmsg)
        logging.error(msg)
        if self.url_info['partible'] and c.retried < MAXRETRYCOUNT:
            c.retry()
            self.working_connections.append(c)
            self.mcurl.add_handle(c.curl)
            msg = '%s: Try again' % c.name
            logging.warning(msg)
        else:
            raise Exception('Download abort~~')

    def download(self):
        '''
        下载主过程
        '''
        self.result = file(self.url_info['file'], 'r+b')
        self.connections = []
        for i in range(len(self.pieces)):
            c = Connection(self.url_info['url'])
            self.connections.append(c)
        self.free_connections = self.connections[:]
        self.working_connections = []

        ok = True
        start_time = time.time()
        try:
            while 1:
                while self.pieces and self.free_connections:
                    p = self.pieces.pop(0)
                    c = self.free_connections.pop(0)
                    c.start(self.result, p)
                    self.working_connections.append(c)
                    self.mcurl.add_handle(c.curl)
                    msg = '%s: Start downloading' % c.name
                    logging.debug(msg)

                while 1:
                    ret, handles_num = self.mcurl.perform()
                    if ret != pycurl.E_CALL_MULTI_PERFORM: break

                while 1:
                    queue_num, ok_list, err_list = self.mcurl.info_read()
                    for curl in ok_list:
                        curl.errno = pycurl.E_OK
                        curl.errmsg = ''
                        self.process_curl(curl)
                    for curl, errno, errmsg in err_list:
                        curl.errno = errno
                        curl.errmsg = errmsg
                        self.process_curl(curl)
                    if queue_num == 0: break

                elapsed = time.time() - start_time
                downloaded = sum([c.total_downloaded for c in self.connections])
                self.show_progress(downloaded, elapsed)

                if not self.working_connections: break

                self.mcurl.select(1.0)
        except:
            logging.error('Error:' + Traceback())
            ok = False
        finally:
          for c in self.connections:
              c.close()
          self.mcurl.close()

        if ok:
            msg = 'Download Successed! Total Elapsed %ds' % elapsed
        else:
            msg = 'Download Failed!'
        print '\n', msg
        logging.info(msg)

if __name__ == '__main__':
    os.system(CLS)

    if len(sys.argv) > 1:
        url = sys.argv[1]
    else:
        url = 'http://www.python.org/ftp/python/2.5.2/python-2.5.2.msi'
    fd = FastDownload()
    fd.execute(url)


--
茫茫人海,你是我的最爱

Mr Shore

unread,
Mar 11, 2008, 7:48:43 AM3/11/08
to pyth...@googlegroups.com

不过好慢 用时360s

在08-3-11,大熊 <bears...@gmail.com> 写道:

大熊

unread,
Mar 11, 2008, 9:04:57 AM3/11/08
to pyth...@googlegroups.com
我这里是2M的adsl,下载http://www.python.org/ftp/python/2.5.2/python-2.5.2.msi
开10个链接,206s;开20个链接120s

这段代码实现的比较直白,没有太多的考虑,毕竟对网络编程方面经验为零,呵呵

在08-3-11,Mr Shore <shore...@gmail.com> 写道:



--
茫茫人海,你是我的最爱

Mr Shore

unread,
Mar 11, 2008, 4:11:20 PM3/11/08
to pyth...@googlegroups.com
0经验就能写这个可谓奇才了!

在08-3-11,大熊 <bears...@gmail.com> 写道:

大熊

unread,
Mar 11, 2008, 9:08:37 PM3/11/08
to pyth...@googlegroups.com
希望不是臭我 :(  确实没写过什么像样的,主要靠google

不过说起断点乡续传的程序,可供参考的似乎不多,经典的wget是单线程的,axel、prozilla、multiget之类的都是从socket底层开始折腾的。
libcurl算是较好的高级库,支持N多的协议

在08-3-12,Mr Shore <shore...@gmail.com> 写道:
0经验就能写这个可谓奇才了!



--
茫茫人海,你是我的最爱

haur

unread,
Mar 11, 2008, 11:32:13 PM3/11/08
to pyth...@googlegroups.com
运行,大熊的程序CPU使用50% 哦,很猛,啥原因哦 ?

paul

unread,
Mar 11, 2008, 11:33:34 PM3/11/08
to pyth...@googlegroups.com
so,对于多线程上传有没有更好的思路呢。


在08-3-12,haur <hek...@gmail.com> 写道:
Reply all
Reply to author
Forward
0 new messages