"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.getHighlights = exports.dropDuplicateSubstrings = exports.getWords = exports.getMatchesFromCollection = exports.getMatchesFromString = exports.joinMatchTags = void 0;
var tslib_1 = require("tslib");
var lodash_1 = require("lodash");
/**
 * Function to join match tags. This is necessary when using elasticsearch's
 * match_phrase query, as it will surround each substring of match with match
 * tags.
 * @param str string to join match tags
 * @returns string with match tags joined
 */
var joinMatchTags = function (str) { return str.replace(/<\/match>(\W)<match>/gi, '$1'); };
exports.joinMatchTags = joinMatchTags;
/**
 * Function to extract matches from a string containing match tags.
 * @param str string to extract matches from
 * @returns array of matches
 */
var getMatchesFromString = function (str) {
    var reg = /<match>(.*?)<\/match>/gi;
    var matches = Array.from(str.matchAll(reg)).map(function (x) { return x[1]; });
    return matches;
};
exports.getMatchesFromString = getMatchesFromString;
/**
 * Function to extract matches from an array of strings containing match tags.
 * @param collection array of strings to extract matches from
 * @returns array of matches
 */
var getMatchesFromCollection = function (collection) {
    return collection.map(exports.joinMatchTags).map(exports.getMatchesFromString);
};
exports.getMatchesFromCollection = getMatchesFromCollection;
/**
 * Function to split a string into words. It will split on any non-word
 * character.
 * @param str string to split into words
 * @returns array of words
 */
var getWords = function (str) { return str.split(/\W/); };
exports.getWords = getWords;
/**
 * Function to get unique matches from expert highlights. It will drop duplicate
 * substrings from the collection, e.g. if the collection contains the strings
 * 'of' and 'point of care', it will drop 'of' as it is a substring of 'point of
 * care'. This is to avoid having the same match appear twice in the UI, since
 * elasticsearch will sometimes tag substrings of a literal match_phrase
 * query with the match tag throughout the whole text.
 * @param collection array of matches to drop duplicate substrings from
 * @returns array of matches with duplicate substrings dropped
 */
var dropDuplicateSubstrings = function (collection) {
    var substrings = new Set();
    return collection.reduce(function (acc, curr) {
        var words = (0, exports.getWords)(curr);
        if (words.length === 1 && substrings.has(curr))
            return acc;
        // If the current string contains multiple words, and any of those words are
        // already in the acc as a single entry, remove them from the acc.
        if (words.length > 1)
            acc = acc.filter(function (word) { return !words.includes(word); });
        words.forEach(function (word) { return substrings.add(word); });
        return tslib_1.__spreadArray(tslib_1.__spreadArray([], tslib_1.__read(acc), false), [curr], false);
    }, []);
};
exports.dropDuplicateSubstrings = dropDuplicateSubstrings;
/**
 * Function to get unique matches from expert highlights. It will drop duplicate
 * substrings from the collection, e.g. if the collection contains the strings
 * 'of' and 'point of care', it will drop 'of' as it is a substring of 'point of
 * care'. This is to avoid having the same match appear twice in the UI, since
 * elasticsearch will sometimes surround tag substrings of literal match_phrase
 * query with the match tag.
 *
 * @param expertHighlights object of expert highlights
 * @returns array of unique matches
 * @example
 * const expertHighlight = {
 *  'experiences.description': [
 *    'Experience with <match>Point</match> <match>of</match> <match>Care</match> testing (POCT) in an <match>inpatient</match> setting.',
 *    'Experience with <match>Point</match>-<match>of</match>-<match>Care</match> testing (POCT) in an <match>inpatient</match> setting.'
 *   ]
 * }
 * const highlights = getHighlights(expertHighlight) // ['Point of Care', 'Point-of-Care', 'inpatient']
 */
var getHighlights = function (expertHighlights) {
    /**
     * Lodash's flow function allows us to compose functions together, just like
     * we would with a unix pipeline. The functions are executed from right to
     * left. The output of the first function is passed as the input to the next
     * function and so on.
     *
     * These are the steps we are performing:
     * 1. values: get the values of the object -> 2D array
     * 2. flatten the 2D array
     * 3. getMatchesFromCollection: extract matches from each string -> 2D array
     * 4. flatten the 2D array
     * 5. uniq: drop duplicate matches
     * 6. drop duplicate substrings
     */
    return (0, lodash_1.flow)([lodash_1.values, lodash_1.flatten, exports.getMatchesFromCollection, lodash_1.flatten, lodash_1.uniq, exports.dropDuplicateSubstrings])(expertHighlights);
};
exports.getHighlights = getHighlights;
