mirror of https://github.com/FreshRSS/FreshRSS.git
264 lines
7.2 KiB
PHP
264 lines
7.2 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Contains Boolean search from the search form.
|
|
*/
|
|
class FreshRSS_BooleanSearch {
|
|
|
|
/** @var string */
|
|
private $raw_input = '';
|
|
/** @var array<FreshRSS_BooleanSearch|FreshRSS_Search> */
|
|
private $searches = array();
|
|
|
|
/** @var string 'AND' or 'OR' */
|
|
private $operator;
|
|
|
|
public function __construct(string $input, int $level = 0, $operator = 'AND') {
|
|
$this->operator = $operator;
|
|
$input = trim($input);
|
|
if ($input == '') {
|
|
return;
|
|
}
|
|
$this->raw_input = $input;
|
|
|
|
if ($level === 0) {
|
|
$input = preg_replace('/:"(.*?)"/', ':"\1"', $input);
|
|
$input = preg_replace('/(?<=[\s!-]|^)"(.*?)"/', '"\1"', $input);
|
|
|
|
$input = $this->parseUserQueryNames($input);
|
|
$input = $this->parseUserQueryIds($input);
|
|
}
|
|
|
|
// Either parse everything as a series of BooleanSearch's combined by implicit AND
|
|
// or parse everything as a series of Search's combined by explicit OR
|
|
$this->parseParentheses($input, $level) || $this->parseOrSegments($input);
|
|
}
|
|
|
|
/**
|
|
* Parse the user queries (saved searches) by name and expand them in the input string.
|
|
*/
|
|
private function parseUserQueryNames(string $input): string {
|
|
$all_matches = [];
|
|
if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
|
|
$all_matches[] = $matches;
|
|
|
|
}
|
|
if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matches)) {
|
|
$all_matches[] = $matches;
|
|
}
|
|
|
|
if (!empty($all_matches)) {
|
|
/** @var array<string,FreshRSS_UserQuery> */
|
|
$queries = [];
|
|
foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
|
|
$query = new FreshRSS_UserQuery($raw_query);
|
|
$queries[$query->getName()] = $query;
|
|
}
|
|
|
|
$fromS = [];
|
|
$toS = [];
|
|
foreach ($all_matches as $matches) {
|
|
for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
|
|
$name = trim($matches['search'][$i]);
|
|
if (!empty($queries[$name])) {
|
|
$fromS[] = $matches[0][$i];
|
|
$toS[] = '(' . trim($queries[$name]->getSearch()) . ')';
|
|
}
|
|
}
|
|
}
|
|
|
|
$input = str_replace($fromS, $toS, $input);
|
|
}
|
|
return $input;
|
|
}
|
|
|
|
/**
|
|
* Parse the user queries (saved searches) by ID and expand them in the input string.
|
|
*/
|
|
private function parseUserQueryIds(string $input): string {
|
|
$all_matches = [];
|
|
|
|
if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matches)) {
|
|
$all_matches[] = $matches;
|
|
}
|
|
|
|
if (!empty($all_matches)) {
|
|
/** @var array<string,FreshRSS_UserQuery> */
|
|
$queries = [];
|
|
foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
|
|
$query = new FreshRSS_UserQuery($raw_query);
|
|
$queries[] = $query;
|
|
}
|
|
|
|
$fromS = [];
|
|
$toS = [];
|
|
foreach ($all_matches as $matches) {
|
|
for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
|
|
// Index starting from 1
|
|
$id = intval(trim($matches['search'][$i])) - 1;
|
|
if (!empty($queries[$id])) {
|
|
$fromS[] = $matches[0][$i];
|
|
$toS[] = '(' . trim($queries[$id]->getSearch()) . ')';
|
|
}
|
|
}
|
|
}
|
|
|
|
$input = str_replace($fromS, $toS, $input);
|
|
}
|
|
return $input;
|
|
}
|
|
|
|
/** @return bool True if some parenthesis logic took over, false otherwise */
|
|
private function parseParentheses(string $input, int $level): bool {
|
|
$input = trim($input);
|
|
$length = strlen($input);
|
|
$i = 0;
|
|
$before = '';
|
|
$hasParenthesis = false;
|
|
$nextOperator = 'AND';
|
|
while ($i < $length) {
|
|
$c = $input[$i];
|
|
|
|
if ($c === '(') {
|
|
$hasParenthesis = true;
|
|
|
|
$before = trim($before);
|
|
if (preg_match('/\bOR$/i', $before)) {
|
|
// Trim trailing OR
|
|
$before = substr($before, 0, -2);
|
|
|
|
// The text prior to the OR is a BooleanSearch
|
|
$searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
|
|
if (count($searchBefore->searches()) > 0) {
|
|
$this->searches[] = $searchBefore;
|
|
}
|
|
$before = '';
|
|
|
|
// The next BooleanSearch will have to be combined with OR instead of default AND
|
|
$nextOperator = 'OR';
|
|
} elseif ($before !== '') {
|
|
// The text prior to the opening parenthesis is a BooleanSearch
|
|
$searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
|
|
if (count($searchBefore->searches()) > 0) {
|
|
$this->searches[] = $searchBefore;
|
|
}
|
|
$before = '';
|
|
}
|
|
|
|
// Search the matching closing parenthesis
|
|
$parentheses = 1;
|
|
$sub = '';
|
|
$i++;
|
|
while ($i < $length) {
|
|
$c = $input[$i];
|
|
if ($c === '(') {
|
|
// One nested level deeper
|
|
$parentheses++;
|
|
$sub .= $c;
|
|
} elseif ($c === ')') {
|
|
$parentheses--;
|
|
if ($parentheses === 0) {
|
|
// Found the matching closing parenthesis
|
|
$searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
|
|
$nextOperator = 'AND';
|
|
if (count($searchSub->searches()) > 0) {
|
|
$this->searches[] = $searchSub;
|
|
}
|
|
$sub = '';
|
|
break;
|
|
} else {
|
|
$sub .= $c;
|
|
}
|
|
} else {
|
|
$sub .= $c;
|
|
}
|
|
$i++;
|
|
}
|
|
// $sub = trim($sub);
|
|
// if ($sub != '') {
|
|
// // TODO: Consider throwing an error or warning in case of non-matching parenthesis
|
|
// }
|
|
// } elseif ($c === ')') {
|
|
// // TODO: Consider throwing an error or warning in case of non-matching parenthesis
|
|
} else {
|
|
$before .= $c;
|
|
}
|
|
$i++;
|
|
}
|
|
if ($hasParenthesis) {
|
|
$before = trim($before);
|
|
if (preg_match('/^OR\b/i', $before)) {
|
|
// The next BooleanSearch will have to be combined with OR instead of default AND
|
|
$nextOperator = 'OR';
|
|
// Trim leading OR
|
|
$before = substr($before, 2);
|
|
}
|
|
|
|
// The remaining text after the last parenthesis is a BooleanSearch
|
|
$searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
|
|
$nextOperator = 'AND';
|
|
if (count($searchBefore->searches()) > 0) {
|
|
$this->searches[] = $searchBefore;
|
|
}
|
|
return true;
|
|
}
|
|
// There was no parenthesis logic to apply
|
|
return false;
|
|
}
|
|
|
|
private function parseOrSegments(string $input) {
|
|
$input = trim($input);
|
|
if ($input == '') {
|
|
return;
|
|
}
|
|
$splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE);
|
|
|
|
$segment = '';
|
|
$ns = count($splits);
|
|
for ($i = 0; $i < $ns; $i++) {
|
|
$segment = $segment . $splits[$i];
|
|
if (trim($segment) == '' || strcasecmp($segment, 'OR') === 0) {
|
|
$segment = '';
|
|
} else {
|
|
$quotes = substr_count($segment, '"') + substr_count($segment, '"');
|
|
if ($quotes % 2 === 0) {
|
|
$segment = trim($segment);
|
|
$this->searches[] = new FreshRSS_Search($segment);
|
|
$segment = '';
|
|
}
|
|
}
|
|
}
|
|
$segment = trim($segment);
|
|
if ($segment != '') {
|
|
$this->searches[] = new FreshRSS_Search($segment);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Either a list of FreshRSS_BooleanSearch combined by implicit AND
|
|
* or a series of FreshRSS_Search combined by explicit OR
|
|
* @return array<FreshRSS_BooleanSearch|FreshRSS_Search>
|
|
*/
|
|
public function searches() {
|
|
return $this->searches;
|
|
}
|
|
|
|
/** @return string 'AND' or 'OR' depending on how this BooleanSearch should be combined */
|
|
public function operator(): string {
|
|
return $this->operator;
|
|
}
|
|
|
|
/** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
|
|
public function add($search) {
|
|
$this->searches[] = $search;
|
|
}
|
|
|
|
public function __toString(): string {
|
|
return $this->getRawInput();
|
|
}
|
|
|
|
public function getRawInput(): string {
|
|
return $this->raw_input;
|
|
}
|
|
}
|