Updated the SparseVectorStrategy class to use sparse_vector query

elastic · Sep 10, 2024 · b2ba6f8 · b2ba6f8
1 parent ac64e9f
commit b2ba6f8
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 24 deletions.
diff --git a/elasticsearch/helpers/vectorstore/_async/strategies.py b/elasticsearch/helpers/vectorstore/_async/strategies.py
@@ -96,7 +96,7 @@ def needs_inference(self) -> bool:
 
 
 class AsyncSparseVectorStrategy(AsyncRetrievalStrategy):
- """Sparse retrieval strategy using the `text_expansion` processor."""
+ """Sparse retrieval strategy using the `sparse_vector` processor."""
 
  def __init__(self, model_id: str = ".elser_model_2"):
  self.model_id = model_id
@@ -127,11 +127,10 @@ def es_query(
  "bool": {
  "must": [
  {
- "text_expansion": {
- f"{vector_field}.{self._tokens_field}": {
- "model_id": self.model_id,
- "model_text": query,
- }
+ "sparse_vector": {
+ "field": f"{vector_field}.{self._tokens_field}",
+ "inference_id": self.model_id,
+ "query": query,
  }
  }
  ],
@@ -150,7 +149,7 @@ def es_mappings_settings(
  mappings: Dict[str, Any] = {
  "properties": {
  vector_field: {
- "properties": {self._tokens_field: {"type": "rank_features"}}
+ "properties": {self._tokens_field: {"type": "sparse_vector"}}
  }
  }
  }
@@ -172,11 +171,12 @@ async def before_index_creation(
  {
  "inference": {
  "model_id": self.model_id,
- "target_field": vector_field,
- "field_map": {text_field: "text_field"},
- "inference_config": {
- "text_expansion": {"results_field": self._tokens_field}
- },
+ "input_output": [
+ {
+ "input_field": text_field,
+ "output_field": f"{vector_field}.{self._tokens_field}",
+ },
+ ],
  }
  }
  ],

diff --git a/elasticsearch/helpers/vectorstore/_sync/strategies.py b/elasticsearch/helpers/vectorstore/_sync/strategies.py
@@ -96,7 +96,7 @@ def needs_inference(self) -> bool:
 
 
 class SparseVectorStrategy(RetrievalStrategy):
- """Sparse retrieval strategy using the `text_expansion` processor."""
+ """Sparse retrieval strategy using the `sparse_vector` processor."""
 
  def __init__(self, model_id: str = ".elser_model_2"):
  self.model_id = model_id
@@ -127,11 +127,10 @@ def es_query(
  "bool": {
  "must": [
  {
- "text_expansion": {
- f"{vector_field}.{self._tokens_field}": {
- "model_id": self.model_id,
- "model_text": query,
- }
+ "sparse_vector": {
+ "field": f"{vector_field}.{self._tokens_field}",
+ "inference_id": self.model_id,
+ "query": query,
  }
  }
  ],
@@ -150,7 +149,7 @@ def es_mappings_settings(
  mappings: Dict[str, Any] = {
  "properties": {
  vector_field: {
- "properties": {self._tokens_field: {"type": "rank_features"}}
+ "properties": {self._tokens_field: {"type": "sparse_vector"}}
  }
  }
  }
@@ -172,11 +171,12 @@ def before_index_creation(
  {
  "inference": {
  "model_id": self.model_id,
- "target_field": vector_field,
- "field_map": {text_field: "text_field"},
- "inference_config": {
- "text_expansion": {"results_field": self._tokens_field}
- },
+ "input_output": [
+ {
+ "input_field": text_field,
+ "output_field": f"{vector_field}.{self._tokens_field}",
+ },
+ ],
  }
  }
  ],