Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ services:
- DEVELOPMENT=1
- PYTHONUNBUFFERED=true
- ELASTICSEARCH_URL=http://elasticsearch:9200
- ELASTICSEARCH_AUTH_USERNAME=elastic
- ELASTICSEARCH_AUTH_PASSWORD=changeme
- WORKERS=2

elasticsearch:
Expand All @@ -25,7 +27,8 @@ services:
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- bootstrap.memory_lock=true
- discovery.type=single-node
- xpack.security.enabled=false
- xpack.security.enabled=true
- ELASTIC_PASSWORD=changeme
ports:
- "127.0.0.1:9200:9200"
- "127.0.0.1:9300:9300"
562 changes: 316 additions & 246 deletions poetry.lock

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ pyyaml = "6.0.1"


[tool.poetry.group.dev.dependencies]
mypy = "0.812"
bandit = "1.7.0"
mccabe = "0.6.1"
flake8 = "3.8.4"
pytest = "7.4.0"
pytest-cov = "4.1.0"
responses = "0.12.1"
mypy = "1.18.2"
bandit = "1.8.6"
mccabe = "0.7.0"
flake8 = "7.3.0"
pytest = "8.4.2"
pytest-cov = "7.0.0"
responses = "0.25.8"

[build-system]
requires = ["poetry>=0.12"]
Expand Down
6 changes: 5 additions & 1 deletion src/es_client/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,14 @@

headers = {'Content-Type': 'application/json'}

auth_header_value = config['authorization_header_value']
if auth_header_value:
headers['Authorization'] = auth_header_value

# Allows index exclusion; otherwise there is an error
params = {'allow_no_indices': 'true'}

resp = requests.post(url, data=json.dumps(options), params=params, headers=headers)
resp = requests.post(url, data=json.dumps(options), params=params, headers=headers) # nosec B113

if not resp.ok:
_handle_es_err(resp)
Expand All @@ -100,7 +104,7 @@

def _handle_es_err(resp):
"""Handle a non-2xx response from Elasticsearch."""
logger.error(f"Elasticsearch response error:\n{resp.text}")

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
try:
resp_json = resp.json()
except Exception:
Expand Down
7 changes: 6 additions & 1 deletion src/search2_rpc/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,14 @@
def show_indexes(params, meta):
"""List all index names for our prefix"""
prefix = config['index_prefix']
headers = {'Content-Type': 'application/json'}
auth_header_value = config['authorization_header_value']
if auth_header_value:
headers['Authorization'] = auth_header_value
resp = requests.get(
config['elasticsearch_url'] + '/_cat/indices/' + prefix + '*?format=json',
headers={'Content-Type': 'application/json'},
headers=headers,
timeout=120
)
if not resp.ok:
raise ElasticsearchError(resp.text)
Expand Down
2 changes: 1 addition & 1 deletion src/server/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def _get_status_code(result: dict) -> int:

# Wait for dependencies to start
logger.info('Checking connection to elasticsearch')
wait_for_service(config['elasticsearch_url'], 'Elasticsearch')
wait_for_service(config['elasticsearch_url'], 'Elasticsearch', auth_token=config['authorization_header_value'])
# Start the server
app.run(
host='0.0.0.0', # nosec
Expand Down
27 changes: 22 additions & 5 deletions src/utils/config.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,30 @@
import yaml
import urllib.request
import os
import base64


def auth_header_encoder(username, password):
"""
Encodes username and password for a Basic Authentication header.
Returns None if either username or password is not provided.
"""
if not (username and password):
return None
credentials = f"{username}:{password}"
credentials_bytes = credentials.encode('utf-8')
base64_credentials = base64.b64encode(credentials_bytes).decode('utf-8')
return f"Basic {base64_credentials}"


def init_config():
"""
Initialize configuration data for the whole app
Initialize configuration data for the whole app.
"""
# TODO: it might be better to NOT default to testing configuration,
# but rather explicitly set the test environment.
# Reason? A failure to configure one of these in prod could lead to
# confusing failure conditions.
ws_url = os.environ.get('WORKSPACE_URL', 'https://ci.kbase.us/services/ws').strip('/')
es_url = os.environ.get('ELASTICSEARCH_URL', 'http://localhost:9200').strip('/')
es_auth_username = os.environ.get('ELASTICSEARCH_AUTH_USERNAME')
es_auth_password = os.environ.get('ELASTICSEARCH_AUTH_PASSWORD')
index_prefix = os.environ.get('INDEX_PREFIX', 'test')
prefix_delimiter = os.environ.get('INDEX_PREFIX_DELIMITER', '.')
suffix_delimiter = os.environ.get('INDEX_SUFFIX_DELIMITER', '_')
Expand All @@ -24,6 +36,9 @@ def init_config():
'USER_PROFILE_URL',
'https://ci.kbase.us/services/user_profile/rpc/'
)

auth_header_value = auth_header_encoder(es_auth_username, es_auth_password)

# Load the global configuration release (non-environment specific, public config)
allowed_protocols = ('https://', 'http://', 'file://')
matches_protocol = (config_url.startswith(prot) for prot in allowed_protocols)
Expand All @@ -33,10 +48,12 @@ def init_config():
global_config = yaml.safe_load(res)
with open('VERSION') as fd:
app_version = fd.read().replace('\n', '')

return {
'dev': bool(os.environ.get('DEVELOPMENT')),
'global': global_config,
'elasticsearch_url': es_url,
'authorization_header_value': auth_header_value,
'index_prefix': index_prefix,
'prefix_delimiter': prefix_delimiter,
'suffix_delimiter': suffix_delimiter,
Expand Down
1 change: 1 addition & 0 deletions src/utils/user_profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def get_user_profiles(usernames: list, auth_token=None):
url=url,
data=json.dumps(payload),
headers=headers,
timeout=120
)
if not resp.ok:
raise UserProfileError(url, resp.text)
Expand Down
7 changes: 5 additions & 2 deletions src/utils/wait_for_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,21 @@
WAIT_POLL_INTERVAL = 5


def wait_for_service(url, name, timeout=DEFAULT_TIMEOUT):
def wait_for_service(url, name, timeout=DEFAULT_TIMEOUT, auth_token=None):
start = time.time()
headers = {}
if auth_token:
headers['Authorization'] = auth_token
while True:
logger.info(f'Attempting to connect to {name} at {url}')

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
try:
requests.get(url, timeout=timeout).raise_for_status()
requests.get(url, timeout=timeout, headers=headers).raise_for_status()
logger.info(f'{name} is online!')
break
except Exception:
logger.info(f'Waiting for {name} at {url}')

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
total_elapsed = time.time() - start
if total_elapsed > timeout:
logger.error(f'Unable to connect to {name} at {url} after {total_elapsed} seconds')

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
exit(1)
time.sleep(WAIT_POLL_INTERVAL)
21 changes: 16 additions & 5 deletions tests/helpers/init_elasticsearch.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
import requests
import json
import os as _os

from src.utils.config import config


def _get_headers():
"""Get HTTP headers for Elasticsearch requests, including auth if configured."""
headers = {'Content-Type': 'application/json'}
auth_header_value = config.get('authorization_header_value')
if auth_header_value:
headers['Authorization'] = auth_header_value
return headers


# TODO use a util for creating index names
narrative_index_name = ''.join([
config['index_prefix'],
Expand All @@ -15,7 +26,7 @@
config['index_prefix'] + config['prefix_delimiter'] + 'index2',
]

_ES_URL = 'http://localhost:9200'
_ES_URL = _os.environ.get('ELASTICSEARCH_URL', 'http://localhost:9200')

# Simple run once semaphore
_COMPLETED = False
Expand Down Expand Up @@ -73,15 +84,15 @@ def init_elasticsearch():
{"add": {"indices": index_names, "alias": alias_name}}
]
}
resp = requests.post(url, data=json.dumps(body), headers={'Content-Type': 'application/json'})
resp = requests.post(url, data=json.dumps(body), headers=_get_headers())
if not resp.ok:
raise RuntimeError("Error creating aliases on ES:", resp.text)
_COMPLETED = True


def create_index(index_name):
# Check if exists
resp = requests.head(_ES_URL + '/' + index_name)
resp = requests.head(_ES_URL + '/' + index_name, headers=_get_headers())
if resp.status_code == 200:
return
resp = requests.put(
Expand All @@ -91,7 +102,7 @@ def create_index(index_name):
'index': {'number_of_shards': 2, 'number_of_replicas': 1}
}
}),
headers={'Content-Type': 'application/json'},
headers=_get_headers(),
)
if not resp.ok and resp.json()['error']['type'] != 'index_already_exists_exception':
raise RuntimeError('Error creating index on ES:', resp.text)
Expand All @@ -106,7 +117,7 @@ def create_doc(index_name, data):
data['name'],
'?refresh=wait_for'
])
headers = {'Content-Type': 'application/json'}
headers = _get_headers()
resp = requests.put(url, data=json.dumps(data), headers=headers)
if not resp.ok:
raise RuntimeError(f"Error creating test doc:\n{resp.text}")
4 changes: 0 additions & 4 deletions tests/helpers/integration_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@ def start_service(app_url):


def stop_service():
global container_process
global container_out
global container_err

if container_process is not None:
logger.info('Stopping container')

Expand Down
10 changes: 6 additions & 4 deletions tests/helpers/unit_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ def start_service(wait_for_url, wait_for_name):
global container_out
global container_err

# Set environment variables BEFORE importing config module
# This ensures config picks up the auth credentials
os.environ['ELASTICSEARCH_URL'] = 'http://localhost:9200'
os.environ['ELASTICSEARCH_AUTH_USERNAME'] = 'elastic'
os.environ['ELASTICSEARCH_AUTH_PASSWORD'] = 'changeme'

cmd = "docker compose --ansi never up"
logger.info(f'Running command:\n{cmd}')
container_out = open("container.out", "w")
Expand All @@ -33,10 +39,6 @@ def start_service(wait_for_url, wait_for_name):


def stop_service():
global container_process
global container_out
global container_err

if container_process is not None:
logger.info('Stopping container')

Expand Down
13 changes: 10 additions & 3 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
# content of a/conftest.py
import pytest
from tests.helpers.unit_setup import (
import os
# Set environment variables BEFORE any other imports
# This ensures the config module picks up the auth credentials
os.environ['ELASTICSEARCH_URL'] = 'http://localhost:9200'
os.environ['ELASTICSEARCH_AUTH_USERNAME'] = 'elastic'
os.environ['ELASTICSEARCH_AUTH_PASSWORD'] = 'changeme'

import pytest # noqa: E402
from tests.helpers.unit_setup import ( # noqa: E402
start_service,
stop_service
)
from tests.helpers import init_elasticsearch
from tests.helpers import init_elasticsearch # noqa: E402

# ES_URL = 'http://localhost:9200'
APP_URL = 'http://localhost:5000'
Expand Down
63 changes: 63 additions & 0 deletions tests/unit/es_client/test_es_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,66 @@ def test_es_response_error_default(services):
responses.add(responses.POST, url, body=json.dumps(error_response), status=500)
with pytest.raises(ElasticsearchError):
search({}, {'auth': None})


@responses.activate
def test_search_with_basic_auth_header(services):
"""Test that Basic auth header is correctly added to Elasticsearch request."""
with patch('src.es_client.query.ws_auth') as ws_mock, \
patch.dict('src.utils.config.config', {'authorization_header_value': 'Basic dGVzdHVzZXI6dGVzdHBhc3M='}):
ws_mock.return_value = [0, 1]

prefix = config['index_prefix']
delim = config['prefix_delimiter']
index_name_str = prefix + delim + "default_search"
url = config['elasticsearch_url'] + '/' + index_name_str + '/_search'

# Mock successful ES response
es_response = {
'took': 5,
'hits': {
'total': {'value': 0},
'hits': []
}
}
responses.add(responses.POST, url, json=es_response, status=200)

# Execute search
search({}, {'auth': None})

# Verify the request was made with the Authorization header
assert len(responses.calls) == 1
request = responses.calls[0].request
assert 'Authorization' in request.headers
assert request.headers['Authorization'] == 'Basic dGVzdHVzZXI6dGVzdHBhc3M='


@responses.activate
def test_search_without_auth_header(services):
"""Test that no Authorization header is added when auth is not configured."""
with patch('src.es_client.query.ws_auth') as ws_mock, \
patch.dict('src.utils.config.config', {'authorization_header_value': None}):
ws_mock.return_value = [0, 1]

prefix = config['index_prefix']
delim = config['prefix_delimiter']
index_name_str = prefix + delim + "default_search"
url = config['elasticsearch_url'] + '/' + index_name_str + '/_search'

# Mock successful ES response
es_response = {
'took': 5,
'hits': {
'total': {'value': 0},
'hits': []
}
}
responses.add(responses.POST, url, json=es_response, status=200)

# Execute search
search({}, {'auth': None})

# Verify the request was made without the Authorization header
assert len(responses.calls) == 1
request = responses.calls[0].request
assert 'Authorization' not in request.headers
28 changes: 28 additions & 0 deletions tests/unit/test_auth_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""
Integration tests for Elasticsearch authentication.

These tests verify that the search API can successfully connect to and query
an Elasticsearch instance with authentication enabled (using the docker-compose setup).
"""
import requests
from src.utils.config import config


def test_elasticsearch_requires_auth(services):
"""Test that Elasticsearch requires authentication (rejects requests without auth)."""
es_url = config['elasticsearch_url']
resp_no_auth = requests.get(es_url)
assert resp_no_auth.status_code == 401, \
f"Elasticsearch should require authentication, got status {resp_no_auth.status_code}"


def test_elasticsearch_with_auth(services):
"""Test that Elasticsearch accepts requests with proper authentication."""
es_url = config['elasticsearch_url']
headers = {}
auth_header_value = config['authorization_header_value']
if auth_header_value:
headers['Authorization'] = auth_header_value
resp_with_auth = requests.get(es_url, headers=headers)
assert resp_with_auth.status_code == 200, \
f"Elasticsearch should accept authenticated requests, got status {resp_with_auth.status_code}"
Loading
Loading