Skip to content

Commit

Permalink
Merge pull request #92 from mediacloud/timeout
Browse files Browse the repository at this point in the history
use 'request_timeout' in esopts and set to 600
  • Loading branch information
pgulley authored Sep 5, 2024
2 parents c22f9e7 + 6d9d8cd commit da1d0bf
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 7 deletions.
3 changes: 1 addition & 2 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class Config(BaseSettings):
eshosts: str = "http://localhost:9200"
termfields: str = "article_title,text_content"
termaggrs: str = "top"
esopts: Dict = {}
title: str = "Interactive API"
description: str = "A wrapper API for ES indexes."
debug: bool = False
Expand Down Expand Up @@ -72,7 +71,7 @@ class ApiVersion(str, Enum):
v1 = "1.3.9"


ES = EsClientWrapper(config.eshosts_list, **config.esopts)
ES = EsClientWrapper(config.eshosts_list)

Collection = Enum("Collection", [f"{kv}:{kv}".split(":")[:2] for kv in ES.get_allowed_collections()]) # type: ignore [misc]
TermField = Enum("TermField", [f"{kv}:{kv}".split(":")[:2] for kv in config.termfields_list]) # type: ignore [misc]
Expand Down
12 changes: 8 additions & 4 deletions client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
class ClientConfig(BaseSettings):
maxpage: int = 1000
elasticsearch_index_name_prefix: str = ""
top_term_query_timeout: int = 60
esopts: Dict = {"request_timeout": 600, "max_retries": 3}
debug: bool = False


client_config = ClientConfig()
logger.info(f"Loaded client config: {client_config}")


# used to package paging keys for url transport
Expand Down Expand Up @@ -73,6 +75,8 @@ def __init__(self, query_text):
"original_url",
]
self._expanded_source = self._source + ["text_content", "text_extraction"]
if client_config.debug:
logger.debug(f"Building es query for {self.query_text}")

def _validate_sort_order(self, sort_order: Optional[str]):
if sort_order and sort_order not in self.VALID_SORT_ORDERS:
Expand Down Expand Up @@ -185,8 +189,8 @@ def article_query(self) -> Dict:

class EsClientWrapper:
# A wrapper to actually make the calls to elasticsearch
def __init__(self, eshosts, **esopts):
self.ES = Elasticsearch(eshosts, **esopts)
def __init__(self, eshosts):
self.ES = Elasticsearch(eshosts, **client_config.esopts)
self.maxpage = client_config.maxpage
max_retries = 10
retries = 0
Expand Down Expand Up @@ -387,7 +391,7 @@ def get_terms(
"""
Get top terms associated with a query
"""
res = self.ES.search(index=collection, body=QueryBuilder(q).terms_query(field), request_timeout=client_config.top_term_query_timeout) # type: ignore [call-arg]
res = self.ES.search(index=collection, body=QueryBuilder(q).terms_query(field)) # type: ignore [call-arg]
if (
not res["hits"]["hits"]
or not res["aggregations"]["sample"]["topterms"]["buckets"]
Expand Down
3 changes: 2 additions & 1 deletion deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -161,19 +161,20 @@ esac

DOCKER_COMPOSE_FILE="docker-compose.yml"

export ESOPTS='{"timeout": 60, "max_retries": 3}' # 'timeout' parameter is deprecated
export TERMFIELDS="article_title,text_content"
export TERMAGGRS="top"
export ELASTICSEARCH_INDEX_NAME_PREFIX="mc_search"
export API_PORT
export API_REPLICAS
export UI_PORT
export ESHOSTS
export ESOPTS
export SENTRY_DSN
export SENTRY_ENVIRONMENT
export IMAGE_TAG
export NEWS_SEARCH_UI_TITLE
export DEPLOYMENT_TYPE
export DEBUG

if $USE_LATEST_IMAGE; then
echo "Building Docker images..."
Expand Down

0 comments on commit da1d0bf

Please sign in to comment.