Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ This function converts a boolean query to a 2 dimensional array with all possibi
Input | Output
-------- | ---------
`a AND b` | `[[a, b]]`
`a AND b` | `[[a, b]]`
`a OR b` | `[[a], [b]]`
`a AND b AND c` | `[[a, b, c]]`
`a AND b OR c` | `[[a, b], [c]]`
`a AND (b OR c)` | `[[a, b], [a, c]]`
`a AND (b OR c) AND (d OR e)` | `[[a, b, d], [a, b, e], [a, c, d], [a, c, e]]`

Whereas `a`, `b` and `c` represent words, forming a complex query pattern.
Whereas `a`, `b` and `c` represent terms, forming a complex query pattern.

A term can be written as a single word, or a phrase can be represented by using double quotes to wrap multiple words (e.g. term `a` could be `"foo bar"`).

This function works recursively trough all brackets and generates an array of all possible combinations
of a matching query.
Expand Down
86 changes: 83 additions & 3 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,79 @@ function _arraysAreEqual(arrA, arrB) {
return true;
}

function parseBooleanQuery(searchPhrase) {

searchPhrase = escapeCharactersInQuotes(searchPhrase);

var permutations = _parseBooleanQuery(searchPhrase);

permutations = unescapeCharactersInQuotes(permutations);

return permutations;
}

//var defaultSplitTerm = 'AND';

function injectOperatorBetweenTerms(searchPhrase) {
// Default to using AND
useAnd = (module.exports.defaultSplitTerm == 'AND');

// Remove leading and trailing whitespace
searchPhrase = searchPhrase.trim();

if(useAnd){
// replace all spaces with ' AND ', then remove any extra ANDs
searchPhrase = searchPhrase.replace(/ /gi, ' AND ');
searchPhrase = searchPhrase.replace(/ AND AND AND /gi, ' AND ');
searchPhrase = searchPhrase.replace(/ AND OR AND /gi, ' OR ');
searchPhrase = searchPhrase.replace(/\( AND /gi, '(');
searchPhrase = searchPhrase.replace(/ AND \)/gi, ')');
} else {
// replace all spaces with ' OR ', then remove any extra ORs
searchPhrase = searchPhrase.replace(/ /gi, ' OR ');
searchPhrase = searchPhrase.replace(/ OR AND OR /gi, ' AND ');
searchPhrase = searchPhrase.replace(/ OR OR OR /gi, ' OR ');
searchPhrase = searchPhrase.replace(/\( OR /gi, '(');
searchPhrase = searchPhrase.replace(/ OR \)/gi, ')');
}
return searchPhrase;
}

function escapeCharactersInQuotes(searchPhrase){
searchPhrase = searchPhrase.replace(/(".+?")/g, function(match, group1, offset, input_string) {
// remove spaces
var encoded = encodeURI(group1.trim());
// remove parenthesis
encoded = encoded.replace(/\(/g, '(');
encoded = encoded.replace(/\)/g, ')');
return encoded;
});
return searchPhrase;
}

function unescapeCharactersInQuotes(permutations){
var decodedPermutations = [];

permutations.forEach(function(element){
var decodedElement = [];
element.forEach(function(term){
// restore parenthesis that may have been encoded
var decoded = term.replace(/(/g, '(');
decoded = decoded.replace(/)/g, ')');
// restore spaces that may have been encoded
decoded = decodeURI(decoded);

// strip off quotes
decoded = decoded.replace(/^"(.*)"$/, function(match, group1, offset, original){
return group1;
});
decodedElement.push(decoded);
});
decodedPermutations.push(decodedElement);
});
return decodedPermutations;
}

// This function converts a boolean query to a 2 dimensional array.
// a AND (b OR c)
// Becomes:
Expand All @@ -34,14 +107,17 @@ function _arraysAreEqual(arrA, arrB) {
// There are more efficient ways to match content to this query, though this is
// the one that is most easy to maintain and limits risk of side-effects.
// Especially when considering recursively nested queries.
function parseBooleanQuery(searchPhrase) {
function _parseBooleanQuery(searchPhrase) {

// Remove outer brackets if they exist. EX: (a OR b) -> a OR b
searchPhrase = removeOuterBrackets(searchPhrase);

// remove double whitespaces
searchPhrase = removeDoubleWhiteSpace(searchPhrase);

// Put ANDs inbetween all the terms that only have a space betwee them
searchPhrase = injectOperatorBetweenTerms(searchPhrase);

// Split the phrase on the term 'OR', but don't do this on 'OR' that's in
// between brackets. EX: a OR (b OR c) should not parse the `OR` in between b
// and c.
Expand All @@ -68,7 +144,7 @@ function parseBooleanQuery(searchPhrase) {
// If the string contains brackets, parse it recursively, and add it to
// `nestedPaths`.
if (containsBrackets(ands[i])) {
nestedPaths.push(parseBooleanQuery(ands[i]));
nestedPaths.push(_parseBooleanQuery(ands[i]));
}

// If it doesn't. Push the word to `andPath`.
Expand Down Expand Up @@ -302,5 +378,9 @@ module.exports = {
removeDoubleWhiteSpace: removeDoubleWhiteSpace,
removeOuterBrackets: removeOuterBrackets,
parseBooleanQuery: parseBooleanQuery,
containsBrackets: containsBrackets
containsBrackets: containsBrackets,
escapeCharactersInQuotes: escapeCharactersInQuotes,
unescapeCharactersInQuotes: unescapeCharactersInQuotes,
injectOperatorBetweenTerms: injectOperatorBetweenTerms,
defaultSplitTerm: 'AND'
};
83 changes: 83 additions & 0 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,65 @@ describe('String functions', function() {
assert.equal('a b c', bparser.removeDoubleWhiteSpace("a\nb\tc"));
});
});

describe('escapeCharactersInQuotes', function() {
it('Should modify inside quotes to have no spaces', function() {
assert.equal('a %22b%20c%22', bparser.escapeCharactersInQuotes('a "b c"'));
});

it('Should modify inside quotes to have no parenthesis', function() {
assert.equal('a %22(b-c)%22', bparser.escapeCharactersInQuotes('a "(b-c)"'));
});

it('Should modify multiple sets of quotes', function() {
assert.equal('a %22b%20c%22 d %22e%20f%22 g', bparser.escapeCharactersInQuotes('a "b c" d "e f" g'));
});

it('Should ignore dangling quotes', function() {
assert.equal('a "b c', bparser.escapeCharactersInQuotes('a "b c'));
});
});

describe('unescapeCharactersInQuotes', function() {
it('Should restore spaces in terms', function() {
assert.deepEqual([['a'],['b c']], bparser.unescapeCharactersInQuotes([['a'],['%22b%20c%22']]));
});

it('Should restore parenthesis in terms', function() {
assert.deepEqual([['a'],['(b-c)']], bparser.unescapeCharactersInQuotes([['a'],['%22(b-c)%22']]));
});

it('Should restore multiple sets of quotes', function() {
assert.deepEqual([['a'],['b c'],['d'],['e f'],['g']], bparser.unescapeCharactersInQuotes([['a'],['%22b%20c%22'],['d'],['%22e%20f%22'],['g']]));
});

it('Should ignore dangling quotes', function() {
assert.deepEqual([['a'],['"b'],['c']], bparser.unescapeCharactersInQuotes([['a'],['"b'],['c']]));
});
});

describe('injectOperatorBetweenTerms()', function() {
it('should add in additional ANDs to the searchPhrase by default', function() {
assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a b'));
assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a AND b'));
assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a OR b'));
assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms(' ( ( a AND ( b OR c ) ) AND ( d AND e ) AND ( f OR g OR h ) ) OR i OR j '));
assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms('((a ( b OR c)) (d e) (f OR g OR h)) OR i OR j'));
});

it('should add in ORs to the searchPhrase, if specified', function() {
// Save off the old split term and override it to 'OR'
var oldSplitTerm = bparser.defaultSplitTerm;
bparser.defaultSplitTerm = 'OR';
assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a b'));
assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a AND b'));
assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a OR b'));
assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms(' ( ( a AND ( b OR c ) ) AND ( d AND e ) AND ( f OR g OR h ) ) OR i OR j '));
assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms('((a AND (b c)) AND (d AND e) AND (f g h)) i j'));
// Restore the old split term
bparser.defaultSplitTerm = oldSplitTerm;
});
});
});

describe('query merging functions', function() {
Expand Down Expand Up @@ -177,6 +235,11 @@ describe('query merging functions', function() {
});

describe('parse function', function() {
it('Should parse a simple query without an operator', function() {
assert.deepEqual([['a', 'b']], bparser.parseBooleanQuery('a b'));
assert.deepEqual([['a', 'b','c']], bparser.parseBooleanQuery('a AND b c'));
assert.deepEqual([['a','b c']], bparser.parseBooleanQuery('a "b c"'));
});
it('Should parse a simple query without any brackets', function() {
assert.deepEqual([['a', 'b']], bparser.parseBooleanQuery('a AND b'));
assert.deepEqual([['a'], ['b']], bparser.parseBooleanQuery('a OR b'));
Expand All @@ -189,6 +252,12 @@ describe('parse function', function() {
it('Should parse a simple query a single depth of brackets', function() {
assert.deepEqual([['a', 'c'], ['b', 'c']], bparser.parseBooleanQuery('(a OR b) AND c'));
});
it('Should parse a simple query a query with quoted terms', function() {
assert.deepEqual([['a', 'c'], ['b', 'c']], bparser.parseBooleanQuery('("a" OR b) AND c'));
});
it('Should parse a more complex query a query with quoted terms', function() {
assert.deepEqual([['a b', 'e f'], ['c', 'e f']], bparser.parseBooleanQuery('("a b" OR c) AND "e f"'));
});

// This resolves to issue #3 on github
it('Should parse a query, where the final bracket is not related to the first bracket', function() {
Expand Down Expand Up @@ -220,5 +289,19 @@ describe('parse function', function() {
recursiveSort(bparser.parseBooleanQuery(searchPhrase))
);
});
it('..long shot with quotes', function(){
var searchPhrase = '(("a " AND ("(b" OR "c)")) AND ("d AND" AND "e OR") AND ("(f)" OR g OR h)) OR i OR j';
assert.deepEqual(
recursiveSort(
[['a ','(b','d AND','e OR','(f)'],
['a ','(b','d AND','e OR','g'],
['a ','(b','d AND','e OR','h'],
['a ','c)','d AND','e OR','(f)'],
['a ','c)','d AND','e OR','g'],
['a ','c)','d AND','e OR','h'],
['i'],['j']]),
recursiveSort(bparser.parseBooleanQuery(searchPhrase))
);
});

});