src/Service/ElasticSearchService.php line 138

Open in your IDE?
  1. <?php
  2. namespace App\Service;
  3. use App\Enum\OrganizationStatusEnum;
  4. use Elastica\Query;
  5. use Elastica\Query\AbstractQuery;
  6. use Elastica\Query\BoolQuery;
  7. use Elastica\Query\Match;
  8. use Elastica\Query\MatchPhrasePrefix;
  9. use Elastica\Query\MatchQuery;
  10. use Elastica\Query\Nested;
  11. use Elastica\Query\Prefix;
  12. use Elastica\Query\Term;
  13. use Elastica\Query\Terms;
  14. use Elastica\Query\Type;
  15. use Elastica\Query\Wildcard;
  16. use Elastica\Search;
  17. use FOS\ElasticaBundle\Configuration\ConfigManager;
  18. use FOS\ElasticaBundle\Elastica\Client;
  19. use FOS\ElasticaBundle\Transformer\ElasticaToModelTransformerCollection;
  20. final class ElasticSearchService
  21. {
  22.     public const CONTEXT_ADMIN 'admin';
  23.     public const CONTEXT_MAILBOX 'mailbox';
  24.     private Client $client;
  25.     private ConfigManager $configManager;
  26.     private ElasticaToModelTransformerCollection $transformers;
  27.     public function __construct(
  28.         Client $client,
  29.         ConfigManager $configManager,
  30.         ElasticaToModelTransformerCollection $transformers
  31.     ) {
  32.         $this->client $client;
  33.         $this->configManager $configManager;
  34.         $this->transformers $transformers;
  35.     }
  36.     public function search(string $contextstring $termstring $postCode null): array
  37.     {
  38.         // ES analyzer lowercases indexed data
  39.         $term mb_strtolower(trim($term));
  40.         $bool = new BoolQuery();
  41.         /* Filter context: keep only documents matching these conditions */
  42.         // Search only on the index of this env (as prod & staging share the same ES server)
  43.         $realIndexesNames = [];
  44.         foreach ($this->configManager->getIndexNames() as $index) {
  45.             $realIndexesNames[] = $this->configManager->getIndexConfiguration($index)->getElasticSearchName();
  46.         }
  47.         $bool->addFilter(new Terms('_index'$realIndexesNames));
  48.         // Filter on documents type, depending on the context
  49.         foreach ($this->getTypesForContext($context) as $type) {
  50.             $bool->addFilter(new Type($type));
  51.         }
  52.         // If a postcode is provided, keep only documents with it
  53.         if (null !== $postCode) {
  54.             $bool->addFilter(new MatchQuery('postalCode'$postCode));
  55.         }
  56.         // For mailbox, remove lost, deleted, canceled and new without bal
  57.         if (self::CONTEXT_MAILBOX === $context) {
  58.             $bool->addFilter(
  59.                 (new BoolQuery())
  60.                     ->addMustNot(new Terms('status', [
  61.                         OrganizationStatusEnum::LOST,
  62.                         OrganizationStatusEnum::DELETED,
  63.                     ]))
  64.             );
  65.             $bool->addFilter(
  66.                 (new BoolQuery())
  67.                     ->addMustNot(new Terms('status', [OrganizationStatusEnum::NEW]))
  68.                     ->addMustNot(new MatchQuery('bal'''))
  69.             );
  70.             $bool->addFilter(
  71.                 (new BoolQuery())
  72.                     ->addMustNot(new MatchPhrasePrefix('legalName''CANCELED_'))
  73.             );
  74.         }
  75.         /* Query context: matching conditions will be used to calculate score */
  76.         foreach ($this->getScoresForContext($context) as $key => $boost) {
  77.             $must = new BoolQuery();
  78.             $parts array_reverse(explode('.'$key));
  79.             $field $parts[0];
  80.             if (isset($parts[2])) {
  81.                 $nested $parts[1];
  82.                 $type $parts[2];
  83.             } else {
  84.                 $type $parts[1];
  85.                 $nested null;
  86.             }
  87.             if (null === $type || null === $field) {
  88.                 continue;
  89.             }
  90.             if ($nested) {
  91.                 // Document must match the type, and field must contain the search term
  92.                 $must
  93.                     ->addMust(new Type($type))
  94.                     ->addMust(
  95.                         (new Nested())
  96.                             ->setParam('ignore_unmapped'true)
  97.                             ->setPath($nested)
  98.                             ->setQuery(
  99.                                 (new BoolQuery())
  100.                                     // Extra boost for exact match
  101.                                     ->addShould(new MatchQuery("{$nested}.{$field}"$term))
  102.                                     // Condition depends of the field
  103.                                     ->addShould($this->getConditionForField($field$term$boost 2$nested))
  104.                             )
  105.                     );
  106.             } else {
  107.                 $must->addMust(new Type($type));
  108.                 $must->addMust((new BoolQuery())
  109.                     ->addShould(new MatchQuery($field$term))
  110.                     ->addShould($this->getConditionForField($field$term$boost 2)));
  111.             }
  112.             $bool->addShould($must);
  113.         }
  114.         $query = (new Query())
  115.             ->setQuery($bool)
  116.             ->setMinScore(1)
  117.             ->setSize(10);
  118.         $search = new Search($this->client);
  119.         $resultSet $search->search($query);
  120.         // Transform ES results into Doctrine entites
  121.         return $this->transformers->transform($resultSet->getResults());
  122.     }
  123.     public function searchInLetters(string $organizationIdstring $term): array
  124.     {
  125.         $term mb_strtolower(trim($term));
  126.         $boolQuery = new BoolQuery();
  127.         $realIndexesNames array_filter(
  128.             array_map(
  129.                 fn ($index) => $this->configManager->getIndexConfiguration($index)->getElasticSearchName(),
  130.                 $this->configManager->getIndexNames()
  131.             ),
  132.             static fn ($index) => false !== mb_strpos($index'app_letters')
  133.         );
  134.         $boolQuery->addMust(new Terms('_index'$realIndexesNames))
  135.             ->addFilter(new Term(['organizationId' => $organizationId]))
  136.             ->addMust(new MatchQuery('content'$term));
  137.         $resultSet = (new Search($this->client))->search($boolQuery);
  138.         return array_map(
  139.             static fn ($result) => ['letterId' => $result->getSource()['letterId']],
  140.             $resultSet->getResults()
  141.         );
  142.     }
  143.     public function letterHasAlreadyIndexed(string $organizationIdstring $letterId): bool
  144.     {
  145.         $boolQuery = new BoolQuery();
  146.         $realIndexesNames array_filter(
  147.             array_map(
  148.                 fn ($index) => $this->configManager->getIndexConfiguration($index)->getElasticSearchName(),
  149.                 $this->configManager->getIndexNames()
  150.             ),
  151.             static fn ($index) => false !== mb_strpos($index'app_letters')
  152.         );
  153.         $boolQuery->addMust(new Terms('_index'$realIndexesNames))
  154.             ->addFilter(new Term(['organizationId' => $organizationId]))
  155.             ->addMust(new Term(['letterId' => $letterId]));
  156.         $resultSet = (new Search($this->client))->search($boolQuery);
  157.         return $resultSet->count() > 0;
  158.     }
  159.     /**
  160.      * Return a list of scores to boost some fields during search, depending on the context.
  161.      * Score format: _type.field => boost score.
  162.      */
  163.     private function getScoresForContext(string $context): array
  164.     {
  165.         $scores = [];
  166.         if (self::CONTEXT_MAILBOX === $context) {
  167.             $scores = [
  168.                 'organization.bal' => 5,
  169.                 'organization.legalName' => 4,
  170.                 'organization.alternateName' => 4,
  171.                 'organization.legalRepresentatives.legalName' => 3,
  172.                 'organization.legalRepresentatives.givenName' => 2,
  173.                 'organization.legalRepresentatives.familyName' => 2,
  174.                 'legalRepresentatives.children.familyName' => 2,
  175.                 'legalRepresentatives.children.givenName' => 2,
  176.             ];
  177.         }
  178.         if (self::CONTEXT_ADMIN === $context) {
  179.             $scores = [
  180.                 'organization.legalName' => 4,
  181.                 'organization.bal' => 3,
  182.                 'person.legalName' => 4,
  183.                 'person.givenName' => 2,
  184.                 'person.familyName' => 2,
  185.             ];
  186.         }
  187.         // Sort scores decreasingly, as first condition matched should give the highest score
  188.         uasort($scores, function ($a$b) {
  189.             if ($a === $b) {
  190.                 return 0;
  191.             }
  192.             return $a $b;
  193.         });
  194.         return $scores;
  195.     }
  196.     private function getConditionForField(string $fieldstring $termfloat $booststring $nested null): AbstractQuery
  197.     {
  198.         $path $nested "{$nested}.{$field}$field;
  199.         // Prefix search for BAL only
  200.         if ('bal' === $field) {
  201.             return (new Prefix())->setPrefix($path$term$boost);
  202.         }
  203.         return (new Wildcard())->setValue($path"*{$term}*"$boost);
  204.     }
  205.     private function getTypesForContext(string $context): array
  206.     {
  207.         if (self::CONTEXT_MAILBOX === $context) {
  208.             return ['organization'];
  209.         }
  210.         return [];
  211.     }
  212. }