Help me improve the performance of the following code, when csvInput is large, it cost too much time to run
function parseCsv(csvString) {
return csvString.split('\n').map(row => row.split(','));
}
function stringifyCsv(arr) {
return arr.map(row => row.join(',')).join('\n');
}
function setSettleInfoWithVendorCsv(csvString) {
const arr = parseCsv(csvString);
const groupedByVendor = arr.reduce((acc, item, index) => {
const vendor = item[0];
if (!acc[vendor]) {
acc[vendor] = [];
}
acc[vendor].push({ item, index });
return acc;
}, {});
const settledInfo = Object.entries(groupedByVendor).flatMap(([vendor, itemsWithIndex]) => {
let bestSettledIndexes = new Set();
let minDateSum = Number.MAX_SAFE_INTEGER;
function calculateDateSum(subset) {
return subset.reduce((acc, { item }) => acc + new Date(item[1]).getTime(), 0);
}
function checkAndUpdateBest(subset, indexes) {
const dateSum = calculateDateSum(subset);
if (subset.length > bestSettledIndexes.size || (subset.length === bestSettledIndexes.size && dateSum < minDateSum)) {
bestSettledIndexes = new Set(indexes);
minDateSum = dateSum;
}
}
function findSubsets(index = 0, currentSubset = [], remainingSum = 0, indexes = []) {
if (index === itemsWithIndex.length) {
if (remainingSum === 0) {
checkAndUpdateBest(currentSubset, indexes);
}
return;
}
const { item, index: originalIndex } = itemsWithIndex[index];
findSubsets(index + 1, [...currentSubset, { item, index: originalIndex }], remainingSum + Number(item[2]), [...indexes, index]);
findSubsets(index + 1, currentSubset, remainingSum, indexes);
}
findSubsets();
return itemsWithIndex.map(({ item, index }) => ({
item: [...item, bestSettledIndexes.has(index) ? "settled" : "unSettled"],
index
}));
});
const sortedResult = settledInfo.sort((a, b) => a.index - b.index).map(({ item }) => item);
return stringifyCsv(sortedResult);
}
const csvInput = `999999,2017-09-30,-43764.00
999999,2017-09-30,81357.21
999999,2017-09-30,-31610.44
9999,2018-05-31,-16711.61`;
console.log(setSettleInfoWithVendorCsv(csvInput));
Optimizing the performance of this code involves several strategies, such as reducing the complexity of operations, avoiding unnecessary computations, and leveraging more efficient data structures. Here are some key improvements:
Avoid Recursive Subset Search: The recursive subset search has exponential complexity and is not feasible for large datasets. Instead, use dynamic programming or greedy algorithms where possible.
Efficient Date Parsing: Parsing dates multiple times is inefficient. Parse dates once and reuse the parsed values.
Avoid Deep Copies: When combining arrays or objects, avoid deep copies to reduce overhead.
Here's a revised version of the code with these improvements:
function parseCsv(csvString) {
return csvString.split('\n').map(row => row.split(','));
}
function stringifyCsv(arr) {
return arr.map(row => row.join(',')).join('\n');
}
function setSettleInfoWithVendorCsv(csvString) {
const arr = parseCsv(csvString);
const groupedByVendor = arr.reduce((acc, item, index) => {
const vendor = item[0];
if (!acc[vendor]) {
acc[vendor] = [];
}
acc[vendor].push({ item, index, date: new Date(item[1]).getTime(), amount: Number(item[2]) });
return acc;
}, {});
const settledInfo = Object.entries(groupedByVendor).flatMap(([vendor, itemsWithIndex]) => {
let bestSettledIndexes = new Set();
let minDateSum = Number.MAX_SAFE_INTEGER;
// Use a greedy approach to find the best subset
itemsWithIndex.sort((a, b) => a.date - b.date); // Sort by date
let currentSubset = [];
let currentSum = 0;
let dateSum = 0;
for (const { item, index, date, amount } of itemsWithIndex) {
if (currentSum + amount === 0) {
currentSubset.push(index);
dateSum += date;
if (currentSubset.length > bestSettledIndexes.size || (currentSubset.length === bestSettledIndexes.size && dateSum < minDateSum)) {
bestSettledIndexes = new Set(currentSubset);
minDateSum = dateSum;
}
currentSubset = [];
currentSum = 0;
dateSum = 0;
} else {
currentSubset.push(index);
currentSum += amount;
dateSum += date;
}
}
return itemsWithIndex.map(({ item, index }) => ({
item: [...item, bestSettledIndexes.has(index) ? "settled" : "unSettled"],
index
}));
});
const sortedResult = settledInfo.sort((a, b) => a.index - b.index).map(({ item }) => item);
return stringifyCsv(sortedResult);
}
const csvInput = `999999,2017-09-30,-43764.00
999999,2017-09-30,81357.21
999999,2017-09-30,-31610.44
9999,2018-05-31,-16711.61`;
console.log(setSettleInfoWithVendorCsv(csvInput));
This approach should significantly reduce the runtime for large CSV inputs.