<?php
namespace App\Service;
use App\Enum\OrganizationStatusEnum;
use Elastica\Query;
use Elastica\Query\AbstractQuery;
use Elastica\Query\BoolQuery;
use Elastica\Query\Match;
use Elastica\Query\MatchPhrasePrefix;
use Elastica\Query\MatchQuery;
use Elastica\Query\Nested;
use Elastica\Query\Prefix;
use Elastica\Query\Term;
use Elastica\Query\Terms;
use Elastica\Query\Type;
use Elastica\Query\Wildcard;
use Elastica\Search;
use FOS\ElasticaBundle\Configuration\ConfigManager;
use FOS\ElasticaBundle\Elastica\Client;
use FOS\ElasticaBundle\Transformer\ElasticaToModelTransformerCollection;
final class ElasticSearchService
{
public const CONTEXT_ADMIN = 'admin';
public const CONTEXT_MAILBOX = 'mailbox';
private Client $client;
private ConfigManager $configManager;
private ElasticaToModelTransformerCollection $transformers;
public function __construct(
Client $client,
ConfigManager $configManager,
ElasticaToModelTransformerCollection $transformers
) {
$this->client = $client;
$this->configManager = $configManager;
$this->transformers = $transformers;
}
public function search(string $context, string $term, string $postCode = null): array
{
// ES analyzer lowercases indexed data
$term = mb_strtolower(trim($term));
$bool = new BoolQuery();
/* Filter context: keep only documents matching these conditions */
// Search only on the index of this env (as prod & staging share the same ES server)
$realIndexesNames = [];
foreach ($this->configManager->getIndexNames() as $index) {
$realIndexesNames[] = $this->configManager->getIndexConfiguration($index)->getElasticSearchName();
}
$bool->addFilter(new Terms('_index', $realIndexesNames));
// Filter on documents type, depending on the context
foreach ($this->getTypesForContext($context) as $type) {
$bool->addFilter(new Type($type));
}
// If a postcode is provided, keep only documents with it
if (null !== $postCode) {
$bool->addFilter(new MatchQuery('postalCode', $postCode));
}
// For mailbox, remove lost, deleted, canceled and new without bal
if (self::CONTEXT_MAILBOX === $context) {
$bool->addFilter(
(new BoolQuery())
->addMustNot(new Terms('status', [
OrganizationStatusEnum::LOST,
OrganizationStatusEnum::DELETED,
]))
);
$bool->addFilter(
(new BoolQuery())
->addMustNot(new Terms('status', [OrganizationStatusEnum::NEW]))
->addMustNot(new MatchQuery('bal', ''))
);
$bool->addFilter(
(new BoolQuery())
->addMustNot(new MatchPhrasePrefix('legalName', 'CANCELED_'))
);
}
/* Query context: matching conditions will be used to calculate score */
foreach ($this->getScoresForContext($context) as $key => $boost) {
$must = new BoolQuery();
$parts = array_reverse(explode('.', $key));
$field = $parts[0];
if (isset($parts[2])) {
$nested = $parts[1];
$type = $parts[2];
} else {
$type = $parts[1];
$nested = null;
}
if (null === $type || null === $field) {
continue;
}
if ($nested) {
// Document must match the type, and field must contain the search term
$must
->addMust(new Type($type))
->addMust(
(new Nested())
->setParam('ignore_unmapped', true)
->setPath($nested)
->setQuery(
(new BoolQuery())
// Extra boost for exact match
->addShould(new MatchQuery("{$nested}.{$field}", $term))
// Condition depends of the field
->addShould($this->getConditionForField($field, $term, $boost * 2, $nested))
)
);
} else {
$must->addMust(new Type($type));
$must->addMust((new BoolQuery())
->addShould(new MatchQuery($field, $term))
->addShould($this->getConditionForField($field, $term, $boost * 2)));
}
$bool->addShould($must);
}
$query = (new Query())
->setQuery($bool)
->setMinScore(1)
->setSize(10);
$search = new Search($this->client);
$resultSet = $search->search($query);
// Transform ES results into Doctrine entites
return $this->transformers->transform($resultSet->getResults());
}
public function searchInLetters(string $organizationId, string $term): array
{
$term = mb_strtolower(trim($term));
$boolQuery = new BoolQuery();
$realIndexesNames = array_filter(
array_map(
fn ($index) => $this->configManager->getIndexConfiguration($index)->getElasticSearchName(),
$this->configManager->getIndexNames()
),
static fn ($index) => false !== mb_strpos($index, 'app_letters')
);
$boolQuery->addMust(new Terms('_index', $realIndexesNames))
->addFilter(new Term(['organizationId' => $organizationId]))
->addMust(new MatchQuery('content', $term));
$resultSet = (new Search($this->client))->search($boolQuery);
return array_map(
static fn ($result) => ['letterId' => $result->getSource()['letterId']],
$resultSet->getResults()
);
}
public function letterHasAlreadyIndexed(string $organizationId, string $letterId): bool
{
$boolQuery = new BoolQuery();
$realIndexesNames = array_filter(
array_map(
fn ($index) => $this->configManager->getIndexConfiguration($index)->getElasticSearchName(),
$this->configManager->getIndexNames()
),
static fn ($index) => false !== mb_strpos($index, 'app_letters')
);
$boolQuery->addMust(new Terms('_index', $realIndexesNames))
->addFilter(new Term(['organizationId' => $organizationId]))
->addMust(new Term(['letterId' => $letterId]));
$resultSet = (new Search($this->client))->search($boolQuery);
return $resultSet->count() > 0;
}
/**
* Return a list of scores to boost some fields during search, depending on the context.
* Score format: _type.field => boost score.
*/
private function getScoresForContext(string $context): array
{
$scores = [];
if (self::CONTEXT_MAILBOX === $context) {
$scores = [
'organization.bal' => 5,
'organization.legalName' => 4,
'organization.alternateName' => 4,
'organization.legalRepresentatives.legalName' => 3,
'organization.legalRepresentatives.givenName' => 2,
'organization.legalRepresentatives.familyName' => 2,
'legalRepresentatives.children.familyName' => 2,
'legalRepresentatives.children.givenName' => 2,
];
}
if (self::CONTEXT_ADMIN === $context) {
$scores = [
'organization.legalName' => 4,
'organization.bal' => 3,
'person.legalName' => 4,
'person.givenName' => 2,
'person.familyName' => 2,
];
}
// Sort scores decreasingly, as first condition matched should give the highest score
uasort($scores, function ($a, $b) {
if ($a === $b) {
return 0;
}
return $a < $b;
});
return $scores;
}
private function getConditionForField(string $field, string $term, float $boost, string $nested = null): AbstractQuery
{
$path = $nested ? "{$nested}.{$field}" : $field;
// Prefix search for BAL only
if ('bal' === $field) {
return (new Prefix())->setPrefix($path, $term, $boost);
}
return (new Wildcard())->setValue($path, "*{$term}*", $boost);
}
private function getTypesForContext(string $context): array
{
if (self::CONTEXT_MAILBOX === $context) {
return ['organization'];
}
return [];
}
}