152 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			152 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
 | 
						|
var fs = require('fs');
 | 
						|
 | 
						|
var utf8  = require('./encoding/utf8'),
 | 
						|
  unicode = require('./encoding/unicode'),
 | 
						|
  mbcs    = require('./encoding/mbcs'),
 | 
						|
  sbcs    = require('./encoding/sbcs'),
 | 
						|
  iso2022 = require('./encoding/iso2022');
 | 
						|
 | 
						|
var self = this;
 | 
						|
 | 
						|
var recognisers = [
 | 
						|
  new utf8,
 | 
						|
  new unicode.UTF_16BE,
 | 
						|
  new unicode.UTF_16LE,
 | 
						|
  new unicode.UTF_32BE,
 | 
						|
  new unicode.UTF_32LE,
 | 
						|
  new mbcs.sjis,
 | 
						|
  new mbcs.big5,
 | 
						|
  new mbcs.euc_jp,
 | 
						|
  new mbcs.euc_kr,
 | 
						|
  new mbcs.gb_18030,
 | 
						|
  new iso2022.ISO_2022_JP,
 | 
						|
  new iso2022.ISO_2022_KR,
 | 
						|
  new iso2022.ISO_2022_CN,
 | 
						|
  new sbcs.ISO_8859_1,
 | 
						|
  new sbcs.ISO_8859_2,
 | 
						|
  new sbcs.ISO_8859_5,
 | 
						|
  new sbcs.ISO_8859_6,
 | 
						|
  new sbcs.ISO_8859_7,
 | 
						|
  new sbcs.ISO_8859_8,
 | 
						|
  new sbcs.ISO_8859_9,
 | 
						|
  new sbcs.windows_1251,
 | 
						|
  new sbcs.windows_1256,
 | 
						|
  new sbcs.KOI8_R
 | 
						|
];
 | 
						|
 | 
						|
module.exports.detect = function(buffer, opts) {
 | 
						|
 | 
						|
  // Tally up the byte occurence statistics.
 | 
						|
  var fByteStats = [];
 | 
						|
  for (var i = 0; i < 256; i++)
 | 
						|
    fByteStats[i] = 0;
 | 
						|
 | 
						|
  for (var i = buffer.length - 1; i >= 0; i--)
 | 
						|
    fByteStats[buffer[i] & 0x00ff]++;
 | 
						|
 | 
						|
  var fC1Bytes = false;
 | 
						|
  for (var i = 0x80; i <= 0x9F; i += 1) {
 | 
						|
    if (fByteStats[i] != 0) {
 | 
						|
      fC1Bytes = true;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  var context = {
 | 
						|
    fByteStats:  fByteStats,
 | 
						|
    fC1Bytes:    fC1Bytes,
 | 
						|
    fRawInput:   buffer,
 | 
						|
    fRawLength:  buffer.length,
 | 
						|
    fInputBytes: buffer,
 | 
						|
    fInputLen:   buffer.length
 | 
						|
  };
 | 
						|
 | 
						|
  var matches = recognisers.map(function(rec) {
 | 
						|
    return rec.match(context);
 | 
						|
  }).filter(function(match) {
 | 
						|
    return !!match;
 | 
						|
  }).sort(function(a, b) {
 | 
						|
    return b.confidence - a.confidence;
 | 
						|
  });
 | 
						|
 | 
						|
  if (opts && opts.returnAllMatches === true) {
 | 
						|
    return matches;
 | 
						|
  }
 | 
						|
  else {
 | 
						|
    return matches.length > 0 ? matches[0].name : null;
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
module.exports.detectFile = function(filepath, opts, cb) {
 | 
						|
  if (typeof opts === 'function') {
 | 
						|
    cb = opts;
 | 
						|
    opts = undefined;
 | 
						|
  }
 | 
						|
 | 
						|
  var fd;
 | 
						|
 | 
						|
  var handler = function(err, buffer) {
 | 
						|
    if (fd) {
 | 
						|
      fs.closeSync(fd);
 | 
						|
    }
 | 
						|
 | 
						|
    if (err) return cb(err, null);
 | 
						|
    cb(null, self.detect(buffer, opts));
 | 
						|
  };
 | 
						|
 | 
						|
  if (opts && opts.sampleSize) {
 | 
						|
    fd = fs.openSync(filepath, 'r'),
 | 
						|
      sample = Buffer.allocUnsafe(opts.sampleSize);
 | 
						|
 | 
						|
    fs.read(fd, sample, 0, opts.sampleSize, null, function(err) {
 | 
						|
      handler(err, sample);
 | 
						|
    });
 | 
						|
    return;
 | 
						|
  }
 | 
						|
 | 
						|
  fs.readFile(filepath, handler);
 | 
						|
};
 | 
						|
 | 
						|
module.exports.detectFileSync = function(filepath, opts) {
 | 
						|
  if (opts && opts.sampleSize) {
 | 
						|
    var fd = fs.openSync(filepath, 'r'),
 | 
						|
      sample = Buffer.allocUnsafe(opts.sampleSize);
 | 
						|
 | 
						|
    fs.readSync(fd, sample, 0, opts.sampleSize);
 | 
						|
    fs.closeSync(fd);
 | 
						|
    return self.detect(sample, opts);
 | 
						|
  }
 | 
						|
 | 
						|
  return self.detect(fs.readFileSync(filepath), opts);
 | 
						|
};
 | 
						|
 | 
						|
// Wrappers for the previous functions to return all encodings
 | 
						|
module.exports.detectAll = function(buffer, opts) {
 | 
						|
  if (typeof opts !== 'object') {
 | 
						|
    opts = {};
 | 
						|
  }
 | 
						|
  opts.returnAllMatches = true;
 | 
						|
  return self.detect(buffer, opts);
 | 
						|
}
 | 
						|
 | 
						|
module.exports.detectFileAll = function(filepath, opts, cb) {
 | 
						|
  if (typeof opts === 'function') {
 | 
						|
    cb = opts;
 | 
						|
    opts = undefined;
 | 
						|
  }
 | 
						|
  if (typeof opts !== 'object') {
 | 
						|
    opts = {};
 | 
						|
  }
 | 
						|
  opts.returnAllMatches = true;
 | 
						|
  self.detectFile(filepath, opts, cb);
 | 
						|
}
 | 
						|
 | 
						|
module.exports.detectFileAllSync = function(filepath, opts) {
 | 
						|
  if (typeof opts !== 'object') {
 | 
						|
    opts = {};
 | 
						|
  }
 | 
						|
  opts.returnAllMatches = true;
 | 
						|
  return self.detectFileSync(filepath, opts);
 | 
						|
}
 |