Help me improve the performance of the following code, when csvInput is large, it cost too much time to run ``` function parseCsv(csvString) { return csvString.split('\n').map(row => row.split(',')); } function stringifyCsv(arr) { return arr.map(row => row.join(',')).join('\n'); } function setSettleInfoWithVendorCsv(csvString) { const arr = parseCsv(csvString); const groupedByVendor = arr.reduce((acc, item, index) => { const vendor = item[0]; if (!acc[vendor]) { acc[vendor] = []; } acc[vendor].push({ item, index }); return acc; }, {}); const settledInfo = Object.entries(groupedByVendor).flatMap(([vendor, itemsWithIndex]) => { let bestSettledIndexes = new Set(); let minDateSum = Number.MAX_SAFE_INTEGER; function calculateDateSum(subset) { return subset.reduce((acc, { item }) => acc + new Date(item[1]).getTime(), 0); } function checkAndUpdateBest(subset, indexes) { const dateSum = calculateDateSum(subset); if (subset.length > bestSettledIndexes.size || (subset.length === bestSettledIndexes.size && dateSum < minDateSum)) { bestSettledIndexes = new Set(indexes); minDateSum = dateSum; } } function findSubsets(index = 0, currentSubset = [], remainingSum = 0, indexes = []) { if (index === itemsWithIndex.length) { if (remainingSum === 0) { checkAndUpdateBest(currentSubset, indexes); } return; } const { item, index: originalIndex } = itemsWithIndex[index]; findSubsets(index + 1, [...currentSubset, { item, index: originalIndex }], remainingSum + Number(item[2]), [...indexes, index]); findSubsets(index + 1, currentSubset, remainingSum, indexes); } findSubsets(); return itemsWithIndex.map(({ item, index }) => ({ item: [...item, bestSettledIndexes.has(index) ? "settled" : "unSettled"], index })); }); const sortedResult = settledInfo.sort((a, b) => a.index - b.index).map(({ item }) => item); return stringifyCsv(sortedResult); } const csvInput = `999999,2017-09-30,-43764.00 999999,2017-09-30,81357.21 999999,2017-09-30,-31610.44 9999,2018-05-31,-16711.61`; console.log(setSettleInfoWithVendorCsv(csvInput)); ```

2024-06-08by@tim&@ChatGPT.Model: GPT-4o
tim

Help me improve the performance of the following code, when csvInput is large, it cost too much time to run

function parseCsv(csvString) {
    return csvString.split('\n').map(row => row.split(','));
  }
  
  function stringifyCsv(arr) {
    return arr.map(row => row.join(',')).join('\n');
  }
  
  function setSettleInfoWithVendorCsv(csvString) {
    const arr = parseCsv(csvString);
  
    const groupedByVendor = arr.reduce((acc, item, index) => {
      const vendor = item[0];
      if (!acc[vendor]) {
        acc[vendor] = [];
      }
      acc[vendor].push({ item, index });
      return acc;
    }, {});
  
    const settledInfo = Object.entries(groupedByVendor).flatMap(([vendor, itemsWithIndex]) => {
      let bestSettledIndexes = new Set();
      let minDateSum = Number.MAX_SAFE_INTEGER;
  
      function calculateDateSum(subset) {
        return subset.reduce((acc, { item }) => acc + new Date(item[1]).getTime(), 0);
      }
  
      function checkAndUpdateBest(subset, indexes) {
        const dateSum = calculateDateSum(subset);
        if (subset.length > bestSettledIndexes.size || (subset.length === bestSettledIndexes.size && dateSum < minDateSum)) {
          bestSettledIndexes = new Set(indexes);
          minDateSum = dateSum;
        }
      }
  
      function findSubsets(index = 0, currentSubset = [], remainingSum = 0, indexes = []) {
        if (index === itemsWithIndex.length) {
          if (remainingSum === 0) {
            checkAndUpdateBest(currentSubset, indexes);
          }
          return;
        }
  
        const { item, index: originalIndex } = itemsWithIndex[index];
        findSubsets(index + 1, [...currentSubset, { item, index: originalIndex }], remainingSum + Number(item[2]), [...indexes, index]);
        findSubsets(index + 1, currentSubset, remainingSum, indexes);
      }
  
      findSubsets();
  
      return itemsWithIndex.map(({ item, index }) => ({
        item: [...item, bestSettledIndexes.has(index) ? "settled" : "unSettled"],
        index
      }));
    });
  
    const sortedResult = settledInfo.sort((a, b) => a.index - b.index).map(({ item }) => item);
    
    return stringifyCsv(sortedResult);
  }
  
  const csvInput = `999999,2017-09-30,-43764.00
  999999,2017-09-30,81357.21
  999999,2017-09-30,-31610.44
  9999,2018-05-31,-16711.61`;
  
  console.log(setSettleInfoWithVendorCsv(csvInput));
  
Avatar
Avatar

Optimizing the performance of this code involves several strategies, such as reducing the complexity of operations, avoiding unnecessary computations, and leveraging more efficient data structures. Here are some key improvements:

  1. Avoid Recursive Subset Search: The recursive subset search has exponential complexity and is not feasible for large datasets. Instead, use dynamic programming or greedy algorithms where possible.

  2. Efficient Date Parsing: Parsing dates multiple times is inefficient. Parse dates once and reuse the parsed values.

  3. Avoid Deep Copies: When combining arrays or objects, avoid deep copies to reduce overhead.

Here's a revised version of the code with these improvements:

function parseCsv(csvString) {
    return csvString.split('\n').map(row => row.split(','));
}

function stringifyCsv(arr) {
    return arr.map(row => row.join(',')).join('\n');
}

function setSettleInfoWithVendorCsv(csvString) {
    const arr = parseCsv(csvString);

    const groupedByVendor = arr.reduce((acc, item, index) => {
        const vendor = item[0];
        if (!acc[vendor]) {
            acc[vendor] = [];
        }
        acc[vendor].push({ item, index, date: new Date(item[1]).getTime(), amount: Number(item[2]) });
        return acc;
    }, {});

    const settledInfo = Object.entries(groupedByVendor).flatMap(([vendor, itemsWithIndex]) => {
        let bestSettledIndexes = new Set();
        let minDateSum = Number.MAX_SAFE_INTEGER;

        // Use a greedy approach to find the best subset
        itemsWithIndex.sort((a, b) => a.date - b.date); // Sort by date

        let currentSubset = [];
        let currentSum = 0;
        let dateSum = 0;

        for (const { item, index, date, amount } of itemsWithIndex) {
            if (currentSum + amount === 0) {
                currentSubset.push(index);
                dateSum += date;
                if (currentSubset.length > bestSettledIndexes.size || (currentSubset.length === bestSettledIndexes.size && dateSum < minDateSum)) {
                    bestSettledIndexes = new Set(currentSubset);
                    minDateSum = dateSum;
                }
                currentSubset = [];
                currentSum = 0;
                dateSum = 0;
            } else {
                currentSubset.push(index);
                currentSum += amount;
                dateSum += date;
            }
        }

        return itemsWithIndex.map(({ item, index }) => ({
            item: [...item, bestSettledIndexes.has(index) ? "settled" : "unSettled"],
            index
        }));
    });

    const sortedResult = settledInfo.sort((a, b) => a.index - b.index).map(({ item }) => item);

    return stringifyCsv(sortedResult);
}

const csvInput = `999999,2017-09-30,-43764.00
999999,2017-09-30,81357.21
999999,2017-09-30,-31610.44
9999,2018-05-31,-16711.61`;

console.log(setSettleInfoWithVendorCsv(csvInput));

Explanation of Improvements:

  1. Efficient Date Parsing: Dates are parsed once and stored in each item.
  2. Greedy Algorithm: Instead of searching all subsets, the code uses a greedy algorithm to find the best subset. Items are sorted by date, and the algorithm accumulates amounts to find zero-sum subsets efficiently.
  3. Efficient Data Handling: Avoid unnecessary deep copies by manipulating indices directly.

This approach should significantly reduce the runtime for large CSV inputs.