首页手记扒一扒Nodejs formidable的onPart

扒一扒Nodejs formidable的onPart

标签：

Node.js Html5 JavaScript

话说使用Nodejs实现一个文件上传，还是蛮简单的，基于Express4.x一般也就formidable用的多些吧；基本的不多说了，github一下都会的；接着《[也说文件上传之兼容IE789的进度条---丢掉flash][1]》，新版的大文件上传，最后就差断点续传了，业余跟进中...；对于IE789，在文件上传这块，算是与HTML5无缘了，当然我也选择丢掉了flash，就用最原始的input[type="file"]+hideIframe+轮询；OK，IE789可以凉快去了，BSIE！

那么，现代浏览器上就不一样了；大家都知道用HTML5上传大文件必然会选择分段，files API的file.slice(start,end)+formData；简单的将就看吧：

var uploader=function(){

  //....

  function Files(obj){
    this.files=obj.files;
    this.__token__=utils.getRandomStr();
    this.url=obj.url||location.href;
    this.chunkSize=obj.chunkSize||200*1024;
    this.chunks=Math.ceil(this.files.size/this.chunkSize);
    this.index=0;
    this.onprogress=obj.onprogress||function(p){console.log(p);};
  }
  Files.prototype={
    postFiles:function(){
      var $self=this;
      //大于50M 断点续传
      if (this.files.size>50*1024*1024) {
        var fileReader = new FileReader(),spark = new SparkMD5.ArrayBuffer();
        fileReader.onload = function (e) {
              spark.append(e.target.result);   
              $self.hash=spark.end();   
              window.__hash__=$self.hash;       
              var stored=localStorage.getItem('fileUploadInfos');
              //断点信息
              $self.postSlice();
          };
        fileReader.readAsArrayBuffer(this.files.slice(0, 10240));
      }else{
        this.postSlice();
      };
    },
    postSlice:function(){
      var $self=this;
      if (this.index>=this.chunks) {
        return false;
      };
      this.start=this.index*this.chunkSize;
      this.end=Math.min(this.files.size,this.start+this.chunkSize);

      var self=this;
      var fd = new FormData();
      fd.append("sliceData", this.files.slice(this.start,this.end));
      this.url=//url datas
      var xhr = new XMLHttpRequest();
      xhr.upload.addEventListener("progress", function(evt){
        if (evt.lengthComputable) {
          var led=self.index*self.chunkSize*1+evt.loaded*1;
          var p=parseFloat((led)/self.files.size*100).toFixed(2);
          self.onprogress&&self.onprogress(p);
        }else {
          console.log('unable to compute');
        }
      }, false);
      xhr.addEventListener("load", function(){
        self.index++;
        self.postSlice();
        eval(xhr.responseText);
      }, false);
      xhr.open("POST", this.url);
      // xhr.addEventListener("error", uploadFailed, false);
      xhr.addEventListener("abort", function () {
        //记录断点信息
      }, false);
      xhr.send(fd);
    }
  }

  return {
    Files:Files
    //.....
  }
}();

if (this.files) {
  var Files=new uploader.Files({
    files:this.files[0],
    chunkSize:10*1024*1024,
    onprogress:function(p){
      callbk(p);
    }
  });
  Files.postFiles();
}

好吧，其实大家都懂，我就不多BB了；还是说formidable吧，既然用到分段上传，formidable的一般做法肯定是行不通的；不过github上人家也说了，onPart或许可以。。。。。。原谅我英语有点low，一知半解；原文这样的：

You may overwrite this method if you are interested in directly
accessing the multipart stream. Doing so will disable any'field' /
'file' events processing which would occur otherwise, making you fully
responsible for handling the processing.

form.onPart = function(part) {
part.addListener('data', function() {
// ...
});
}
If you want to use formidable to only handle certain parts for you,
you can do so:

form.onPart = function(part) {
if (!part.filename) {
// let formidable handle all non-file parts
form.handlePart(part);
}
}

也就是我们需要使用onPart来分段接收前端发过来的数据，然后合成一个文件，生成到指定目录；

当使用formData上传时，在request headers里我们会看到有项request payload，也就是我们发送过去的数据，这是未解析的原始数据；那么，难道我们还要自己解析吗？不会玩了。。。

扒一扒formidable的源代码，会发现有好几个_parser结尾的js文件；再看incoming_form.js里有这么一段：

IncomingForm.prototype._parseContentType = function() {
  if (this.bytesExpected === 0) {
    this._parser = dummyParser(this);
    return;
  }

  if (!this.headers['content-type']) {
    this._error(new Error('bad content-type header, no content-type'));
    return;
  }

  if (this.headers['content-type'].match(/octet-stream/i)) {
    this._initOctetStream();
    return;
  }

  if (this.headers['content-type'].match(/urlencoded/i)) {
    this._initUrlencoded();
    return;
  }

  if (this.headers['content-type'].match(/multipart/i)) {
    var m = this.headers['content-type'].match(/boundary=(?:"([^"]+)"|([^;]+))/i);
    if (m) {
      this._initMultipart(m[1] || m[2]);
    } else {
      this._error(new Error('bad content-type header, no multipart boundary'));
    }
    return;
  }

  if (this.headers['content-type'].match(/json/i)) {
    this._initJSONencoded();
    return;
  }

  this._error(new Error('bad content-type header, unknown content-type: '+this.headers['content-type']));
};

这几条if很是让人欣喜啊，有木有？特别是看到这句：

this.headers['content-type'].match(/boundary=(?:"([^"]+)"|([^;]+))/i);

这不是在解决咱在request headers里看到的request payload吗？终于在心中大喜，咱不用自己解析那堆数据了；接着往下看：

IncomingForm.prototype.onPart = function(part) {
  // this method can be overwritten by the user
  this.handlePart(part);
};

IncomingForm.prototype.handlePart = function(part) {
  var self = this;

  if (part.filename === undefined) {
    var value = ''
      , decoder = new StringDecoder(this.encoding);

    part.on('data', function(buffer) {
      self._fieldsSize += buffer.length;
      if (self._fieldsSize > self.maxFieldsSize) {
        self._error(new Error('maxFieldsSize exceeded, received '+self._fieldsSize+' bytes of field data'));
        return;
      }
      value += decoder.write(buffer);
    });

    part.on('end', function() {
      self.emit('field', part.name, value);
    });
    return;
  }

  this._flushing++;

  var file = new File({
    path: this._uploadPath(part.filename),
    name: part.filename,
    type: part.mime,
    hash: self.hash
  });

  this.emit('fileBegin', part.name, file);

  file.open();
  this.openedFiles.push(file);

  part.on('data', function(buffer) {
    if (buffer.length == 0) {
      return;
    }
    self.pause();
    file.write(buffer, function() {
      self.resume();
    });
  });

  part.on('end', function() {
    file.end(function() {
      self._flushing--;
      self.emit('file', part.name, file);
      self._maybeEnd();
    });
  });
};

至此，终于明白作者的话了；自己处理上传的数据，是在handlePart中通过part.on('data')和part.on('end')来收集分段数据，然后生成文件的；那么使用分段上传的话，我们就需要在Nodejs里重写form.handlePart了；

form.handlePart=function(part) {
  var dd=[],ll=0;
  part.on('data', function(data) {
    if (data.length == 0) {
      return;
    }
    dd.push(data);
    ll+=data.length;
  });

  part.on('end', function() {  
      var p='./public/imgs/'+uploadToken+'_'+req.query.name;
      fs.open(p, 'a', function (err, fd) {
        if (err) {
          throw err;
        }
        fs.write(fd, Buffer.concat(dd,ll),0, ll,0,function(){
            if (req.query.chunks==req.query.index*1+1) {
              res.write(bk);
            }
            fs.close(fd,function(){});
            res.end();
          });
      }); 
    } 
  });
}

拿到data后生成文件并不难，fs.writeFile、stream都可以的；原谅我初入Nodejs，怎么感觉最后一步的写入文件，这两种方式都特慢呢？不能忍啊，再探！

试来试去，最后还是选择在接收到第一段数据时就生成文件，之后接收到的数据直接push进去；即上面的fs.write(fd,buffer,offset,length,position,cb)；话说明显快了不少呢！而且，意外的收获是：想一想接下来还要实现断点续传呢！想一想，貌似这样做，基本等于Nodejs端的断点续传已经实现了呢；前端记录断点的位置，下次上传时从断点位置开始，然后直接push到这个没上传完的文件里；

到这里，Nodejs端的分段接收文件就可以的了，而且还为之后的断点续传做了个很好的铺垫呢；

好了，对于大文件上传，formidable能做的差不多就这么多了，onPart是必须的；如果大家伙有什么更好的方法，欢迎与我分享！简单的记录，与君共勉，谢谢你能看到这儿！

点击查看更多内容