/**
* CSV String Optimization
* Strongly inspired by OpenRefine
* https://github.com/OpenRefine/OpenRefine
* @class csv-string-optimization
*/
'use strict';
const d3_dsv = require('d3-dsv'),
fs = require('fs'),
fingerprint = require('./src/fingerprint'),
knn = require('./src/knn')
let csv_string_optimization = (function () {
let module = {}
/**
* Load a delimiter separated file.
* @name dsv
* @function
* @memberOf csv-string-optimization
* @param {string} file - Path to file.
* @param {string} delimiter - default:",".
* @return {object} parsed file.
*/
module.dsv = (file, delimiter = ',') => {
let parser = d3_dsv.dsvFormat(delimiter)
return new Promise((resolve, reject) => {
fs.readFile(file, 'utf8', (err, data) => {
if(err){
reject(err)
}else{
try{
let csv = parser.parse(data)
resolve(csv)
}catch(err){
reject(err)
}
}
})
})
}
/**
* Load a JSON file.
* @name json
* @function
* @memberOf csv-string-optimization
* @param {string} file - Path to file.
* @return {object} parsed file.
*/
module.json = (file) => {
return new Promise((resolve, reject) => {
fs.readFile(file, 'utf8', (err, data) => {
if(err){
reject(err)
}else{
try{
let json = JSON.parse(data)
resolve(json)
}catch(err){
reject(err)
}
}
})
})
}
/**
* Save object to CSV file
* @name saveCsv
* @function
* @memberOf csv-string-optimization
* @param {string} path - Path to file.
* @param {array} data - array of objects to be save to CSV.
* @param {string} separator - default:",".
* @return {Void} - .
*/
module.saveCsv = (path, data, separator = ',') => {
let csv = '', keys = []
for(let key in data[0]){
if(csv != '') csv += separator
csv += jfy(key)
keys.push(key)
}
data.forEach(d=>{
csv += '\n'
keys.forEach((k,ki)=>{
if(ki>0) csv += separator
csv += jfy(d[k])
})
})
module.save(path, csv)
}
/**
* Stringify and save JSON
* @name saveJson
* @function
* @memberOf csv-string-optimization
* @param {string} path - Path to file.
* @param {object} data - JSON content.
* @return {Void} - .
*/
module.saveJson = (path, data) => {
module.save(path, JSON.stringify(data))
}
//to be removed
module.loadTemplate = path => {
return module.json(path)
}
/**
* Extract column from object array
* @name extractColumn
* @function
* @memberOf csv-string-optimization
* @param {array} data - array of objects.
* @param {string} column_name - name of to be extracted column.
* @return {array} column.
*/
module.extractColumn = (data, column_name) => {
return data.map( d=> {
return d[column_name ]
})
}
//Exposing the comparison modules
module.fingerprint = fingerprint
module.knn = knn
/**
* Use a template to clean a file
* @name cleanFile
* @function
* @memberOf csv-string-optimization
* @param {array} data - array of objects to be cleaned.
* @param {object} template - Template (parsed JSON).
* @param {string} column_name - Column name of data to be cleaned.
* @return {object} parsed data.
*/
module.cleanFile = (data, template, column_name) => {
//create a hashmap of all replacements
let map = {}
template.forEach(t=>{
let replace = t[0].label
t.forEach(tt =>{
if(tt.ok==2){
replace = tt.label
}
})
t.forEach(tt=>{
map[tt.label] = replace
})
})
data.forEach(d=>{
if(d[column_name] in map) d[column_name] = map[d[column_name]]
})
return data;
}
//To be removed
module.createTemplate = clusters => {
return module.niceFormatting(clusters)
}
/**
* Merge two templates created with this module.
* @name mergeTemplate
* @function
* @memberOf csv-string-optimization
* @param {object} oldTemplate - Old template as parsed json.
* @param {object} newTemplate - New template as parsed json.
* @return {object} merged template.
*/
module.mergeTemplate = (oldTemplate, newTemplate) => {
let map = {}
oldTemplate.forEach((t,ti)=>{
t.forEach((tt,tti)=>{
map[tt.label] = [ti,tti]
})
})
newTemplate.forEach(t=>{
let exists = false
t.forEach(tt=>{
if(tt.label in map){
exists = map[tt.label]
}
})
if(!exists){
oldTemplate.push(t)
}else{
t.forEach(tt=>{
if(!(tt.label in map)){
tt.ok = 0
oldTemplate[exists[0]].push(tt)
}else{
oldTemplate[map[tt.label][0]][map[tt.label][1]].c += tt.c
}
})
}
})
return oldTemplate
}
/**
* Transfor json into a nice to read and edit file.
* @name niceFormatting
* @function
* @memberOf csv-string-optimization
* @param {json} json - to be formatted.
* @return {string} ready to be written to a file.
*/
module.niceFormatting = (json) => {
let template = '[\n'
json.forEach((c,ci)=>{
template += ' [\n'
c.forEach((item,i)=>{
template += ' {"label":'+JSON.stringify(item.label)+',"c":'+item.c+',"ok":'+item.ok+'}'+ ((i==c.length-1)?'':',') +'\n'
})
template += ' ]' + ((ci==json.length-1)?'':',') + '\n'
})
template += ']'
return template
}
/**
* Save a file.
* @name save
* @function
* @memberOf csv-string-optimization
* @param {string} path - Path to destination file.
* @param {string} data - To be written to file.
* @return {Void} - .
*/
module.save = (path, data) => fs.writeFileSync(path, data, 'utf8')
/**
* Shorthand for JSON.stringify.
* @name jfy
* @function
* @memberOf csv-string-optimization
* @param {string} str - to be parsed string
* @return {object} parsed string.
*/
let jfy = (str) => {
return JSON.stringify(str)
}
return module;
})()
module.exports = csv_string_optimization