This is page 4 of 6. Use http://codebase.md/chillbruhhh/crawl4ai-mcp?page={x} to view the full context.
# Directory Structure
```
├── .dockerignore
├── .env.example
├── .gitattributes
├── .gitignore
├── crawled_pages.sql
├── Dockerfile
├── knowledge_graphs
│ ├── ai_hallucination_detector.py
│ ├── ai_script_analyzer.py
│ ├── hallucination_reporter.py
│ ├── knowledge_graph_validator.py
│ ├── parse_repo_into_neo4j.py
│ ├── query_knowledge_graph.py
│ └── test_script.py
├── LICENSE
├── neo4j
│ └── docker-neo4j
│ ├── .github
│ │ └── ISSUE_TEMPLATE
│ │ └── bug_report.md
│ ├── .gitignore
│ ├── build-docker-image.sh
│ ├── build-utils-common-functions.sh
│ ├── COPYRIGHT
│ ├── DEVELOPMENT.md
│ ├── devenv
│ ├── devenv.local.template
│ ├── docker-image-src
│ │ ├── 2.3
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 3.0
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 3.1
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 3.2
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 3.3
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 3.4
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 3.5
│ │ │ ├── coredb
│ │ │ │ ├── docker-entrypoint.sh
│ │ │ │ ├── Dockerfile
│ │ │ │ └── neo4j-plugins.json
│ │ │ └── neo4j-admin
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 4.0
│ │ │ ├── coredb
│ │ │ │ ├── docker-entrypoint.sh
│ │ │ │ └── Dockerfile
│ │ │ └── neo4j-admin
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 4.1
│ │ │ ├── coredb
│ │ │ │ ├── docker-entrypoint.sh
│ │ │ │ └── Dockerfile
│ │ │ └── neo4j-admin
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 4.2
│ │ │ ├── coredb
│ │ │ │ ├── docker-entrypoint.sh
│ │ │ │ ├── Dockerfile
│ │ │ │ └── neo4j-plugins.json
│ │ │ └── neo4j-admin
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 4.3
│ │ │ ├── coredb
│ │ │ │ ├── docker-entrypoint.sh
│ │ │ │ ├── Dockerfile
│ │ │ │ └── neo4j-plugins.json
│ │ │ └── neo4j-admin
│ │ │ ├── docker-entrypoint.sh
│ │ │ └── Dockerfile
│ │ ├── 4.4
│ │ │ ├── coredb
│ │ │ │ ├── docker-entrypoint.sh
│ │ │ │ ├── Dockerfile-debian
│ │ │ │ ├── Dockerfile-ubi9
│ │ │ │ ├── neo4j-admin-report.sh
│ │ │ │ └── neo4j-plugins.json
│ │ │ └── neo4j-admin
│ │ │ ├── docker-entrypoint.sh
│ │ │ ├── Dockerfile-debian
│ │ │ └── Dockerfile-ubi9
│ │ ├── 5
│ │ │ ├── coredb
│ │ │ │ ├── docker-entrypoint.sh
│ │ │ │ ├── Dockerfile-debian
│ │ │ │ ├── Dockerfile-ubi8
│ │ │ │ ├── Dockerfile-ubi9
│ │ │ │ ├── neo4j-admin-report.sh
│ │ │ │ └── neo4j-plugins.json
│ │ │ └── neo4j-admin
│ │ │ ├── docker-entrypoint.sh
│ │ │ ├── Dockerfile-debian
│ │ │ ├── Dockerfile-ubi8
│ │ │ └── Dockerfile-ubi9
│ │ ├── calver
│ │ │ ├── coredb
│ │ │ │ ├── docker-entrypoint.sh
│ │ │ │ ├── Dockerfile-debian
│ │ │ │ ├── Dockerfile-ubi9
│ │ │ │ ├── neo4j-admin-report.sh
│ │ │ │ └── neo4j-plugins.json
│ │ │ └── neo4j-admin
│ │ │ ├── docker-entrypoint.sh
│ │ │ ├── Dockerfile-debian
│ │ │ └── Dockerfile-ubi9
│ │ └── common
│ │ ├── semver.jq
│ │ └── utilities.sh
│ ├── generate-stub-plugin
│ │ ├── build.gradle.kts
│ │ ├── Dockerfile
│ │ ├── ExampleNeo4jPlugin.java
│ │ ├── Makefile
│ │ ├── README.md
│ │ └── settings.gradle.kts
│ ├── LICENSE
│ ├── Makefile
│ ├── pom.xml
│ ├── publish-neo4j-admin-image.sh
│ ├── publish-neo4j-admin-images.sh
│ ├── README.md
│ └── src
│ ├── main
│ │ └── resources
│ │ └── log4j.properties
│ └── test
│ ├── java
│ │ └── com
│ │ └── neo4j
│ │ └── docker
│ │ ├── coredb
│ │ │ ├── configurations
│ │ │ │ ├── Configuration.java
│ │ │ │ ├── Setting.java
│ │ │ │ ├── TestConfSettings.java
│ │ │ │ ├── TestExtendedConf.java
│ │ │ │ └── TestJVMAdditionalConfig.java
│ │ │ ├── plugins
│ │ │ │ ├── Neo4jPluginEnv.java
│ │ │ │ ├── StubPluginHelper.java
│ │ │ │ ├── TestBundledPluginInstallation.java
│ │ │ │ ├── TestPluginInstallation.java
│ │ │ │ └── TestSemVerPluginMatching.java
│ │ │ ├── TestAdminReport.java
│ │ │ ├── TestAuthentication.java
│ │ │ ├── TestBasic.java
│ │ │ ├── TestCausalCluster.java
│ │ │ ├── TestMounting.java
│ │ │ └── TestUpgrade.java
│ │ ├── neo4jadmin
│ │ │ ├── TestAdminBasic.java
│ │ │ ├── TestBackupRestore.java
│ │ │ ├── TestBackupRestore44.java
│ │ │ ├── TestDumpLoad.java
│ │ │ ├── TestDumpLoad44.java
│ │ │ └── TestReport.java
│ │ ├── TestDeprecationWarning.java
│ │ ├── TestDockerComposeSecrets.java
│ │ └── utils
│ │ ├── DatabaseIO.java
│ │ ├── HostFileHttpHandler.java
│ │ ├── HttpServerTestExtension.java
│ │ ├── Neo4jVersion.java
│ │ ├── Neo4jVersionTest.java
│ │ ├── Network.java
│ │ ├── SetContainerUser.java
│ │ ├── TemporaryFolderManager.java
│ │ ├── TemporaryFolderManagerTest.java
│ │ ├── TestSettings.java
│ │ └── WaitStrategies.java
│ └── resources
│ ├── causal-cluster-compose.yml
│ ├── confs
│ │ ├── before50
│ │ │ ├── ConfsNotOverridden.conf
│ │ │ ├── ConfsReplaced.conf
│ │ │ ├── EnterpriseOnlyNotOverwritten.conf
│ │ │ ├── EnvVarsOverride.conf
│ │ │ ├── ExtendedConf.conf
│ │ │ ├── InvalidExtendedConf.conf
│ │ │ ├── JvmAdditionalNotOverridden.conf
│ │ │ ├── NoNewline.conf
│ │ │ └── ReadConf.conf
│ │ ├── ConfsNotOverridden.conf
│ │ ├── ConfsReplaced.conf
│ │ ├── EnterpriseOnlyNotOverwritten.conf
│ │ ├── EnvVarsOverride.conf
│ │ ├── ExtendedConf.conf
│ │ ├── InvalidExtendedConf.conf
│ │ ├── JvmAdditionalNotOverridden.conf
│ │ ├── NoNewline.conf
│ │ └── ReadConf.conf
│ ├── dockersecrets
│ │ ├── container-compose-with-incorrect-secrets.yml
│ │ ├── container-compose-with-secrets-override.yml
│ │ ├── container-compose-with-secrets.yml
│ │ ├── simple-container-compose-with-external-file-var.yml
│ │ └── simple-container-compose.yml
│ ├── ha-cluster-compose.yml
│ └── stubplugin
│ └── myPlugin.jar
├── pyproject.toml
├── README.md
├── src
│ ├── crawl4ai_mcp.py
│ └── utils.py
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/neo4j/docker-neo4j/docker-image-src/4.2/coredb/docker-entrypoint.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash -eu
cmd="$1"
function running_as_root
{
test "$(id -u)" = "0"
}
function secure_mode_enabled
{
test "${SECURE_FILE_PERMISSIONS:=no}" = "yes"
}
function containsElement
{
local e match="$1"
shift
for e; do [[ "$e" == "$match" ]] && return 0; done
return 1
}
function is_readable
{
# this code is fairly ugly but works no matter who this script is running as.
# It would be nice if the writability tests could use this logic somehow.
local _file=${1}
perm=$(stat -c %a "${_file}")
# everyone permission
if [[ ${perm:2:1} -ge 4 ]]; then
return 0
fi
# owner permissions
if [[ ${perm:0:1} -ge 4 ]]; then
if [[ "$(stat -c %U ${_file})" = "${userid}" ]] || [[ "$(stat -c %u ${_file})" = "${userid}" ]]; then
return 0
fi
fi
# group permissions
if [[ ${perm:1:1} -ge 4 ]]; then
if containsElement "$(stat -c %g ${_file})" "${groups[@]}" || containsElement "$(stat -c %G ${_file})" "${groups[@]}" ; then
return 0
fi
fi
return 1
}
function is_writable
{
# It would be nice if this and the is_readable function could combine somehow
local _file=${1}
perm=$(stat -c %a "${_file}")
# everyone permission
if containsElement ${perm:2:1} 2 3 6 7; then
return 0
fi
# owner permissions
if containsElement ${perm:0:1} 2 3 6 7; then
if [[ "$(stat -c %U ${_file})" = "${userid}" ]] || [[ "$(stat -c %u ${_file})" = "${userid}" ]]; then
return 0
fi
fi
# group permissions
if containsElement ${perm:1:1} 2 3 6 7; then
if containsElement "$(stat -c %g ${_file})" "${groups[@]}" || containsElement "$(stat -c %G ${_file})" "${groups[@]}" ; then
return 0
fi
fi
return 1
}
function expand_commands_optionally
{
if [ "${EXTENDED_CONF+"yes"}" == "yes" ]; then
echo "--expand-commands"
fi
}
function print_permissions_advice_and_fail
{
_directory=${1}
echo >&2 "
Folder ${_directory} is not accessible for user: ${userid} or group ${groupid} or groups ${groups[@]}, this is commonly a file permissions issue on the mounted folder.
Hints to solve the issue:
1) Make sure the folder exists before mounting it. Docker will create the folder using root permissions before starting the Neo4j container. The root permissions disallow Neo4j from writing to the mounted folder.
2) Pass the folder owner's user ID and group ID to docker run, so that docker runs as that user.
If the folder is owned by the current user, this can be done by adding this flag to your docker run command:
--user=\$(id -u):\$(id -g)
"
exit 1
}
function check_mounted_folder_readable
{
local _directory=${1}
if ! is_readable "${_directory}"; then
print_permissions_advice_and_fail "${_directory}"
fi
}
function check_mounted_folder_writable_with_chown
{
# The /data and /log directory are a bit different because they are very likely to be mounted by the user but not
# necessarily writable.
# This depends on whether a user ID is passed to the container and which folders are mounted.
#
# No user ID passed to container:
# 1) No folders are mounted.
# The /data and /log folder are owned by neo4j by default, so should be writable already.
# 2) Both /log and /data are mounted.
# This means on start up, /data and /logs are owned by an unknown user and we should chown them to neo4j for
# backwards compatibility.
#
# User ID passed to container:
# 1) Both /data and /logs are mounted
# The /data and /logs folders are owned by an unknown user but we *should* have rw permission to them.
# That should be verified and error (helpfully) if not.
# 2) User mounts /data or /logs *but not both*
# The unmounted folder is still owned by neo4j, which should already be writable. The mounted folder should
# have rw permissions through user id. This should be verified.
# 3) No folders are mounted.
# The /data and /log folder are owned by neo4j by default, and these are already writable by the user.
# (This is a very unlikely use case).
local mountFolder=${1}
if running_as_root && ! secure_mode_enabled; then
# check folder permissions
if ! is_writable "${mountFolder}" ; then
# warn that we're about to chown the folder and then chown it
echo "Warning: Folder mounted to \"${mountFolder}\" is not writable from inside container. Changing folder owner to ${userid}."
chown -R "${userid}":"${groupid}" "${mountFolder}"
# check permissions on files in the folder
elif [ $(gosu "${userid}":"${groupid}" find "${mountFolder}" -not -writable | wc -l) -gt 0 ]; then
echo "Warning: Some files inside \"${mountFolder}\" are not writable from inside container. Changing folder owner to ${userid}."
chown -R "${userid}":"${groupid}" "${mountFolder}"
fi
else
if ! is_writable "${mountFolder}"; then
#if [[ ! -w "${mountFolder}" ]] && [[ "$(stat -c %U ${mountFolder})" != "neo4j" ]]; then
echo >&2 "Consider unsetting SECURE_FILE_PERMISSIONS environment variable, to enable docker to write to ${mountFolder}."
print_permissions_advice_and_fail "${mountFolder}"
fi
fi
}
function load_plugin_from_github
{
# Load a plugin at runtime. The provided github repository must have a versions.json on the master branch with the
# correct format.
local _plugin_name="${1}" #e.g. apoc, graph-algorithms, graph-ql
local _plugins_dir="${NEO4J_HOME}/plugins"
if [ -d /plugins ]; then
local _plugins_dir="/plugins"
fi
local _versions_json_url="$(jq --raw-output "with_entries( select(.key==\"${_plugin_name}\") ) | to_entries[] | .value.versions" /startup/neo4j-plugins.json )"
# Using the same name for the plugin irrespective of version ensures we don't end up with different versions of the same plugin
local _destination="${_plugins_dir}/${_plugin_name}.jar"
local _neo4j_version="$(neo4j --version | cut -d' ' -f2)"
# Now we call out to github to get the versions.json for this plugin and we parse that to find the url for the correct plugin jar for our neo4j version
echo "Fetching versions.json for Plugin '${_plugin_name}' from ${_versions_json_url}"
local _versions_json="$(wget -q --timeout 300 --tries 30 -O - "${_versions_json_url}")"
local _plugin_jar_url="$(echo "${_versions_json}" | jq -L/startup --raw-output "import \"semver\" as lib; [ .[] | select(.neo4j|lib::semver(\"${_neo4j_version}\")) ] | min_by(.neo4j) | .jar")"
if [[ -z "${_plugin_jar_url}" ]]; then
echo >&2 "Error: No jar URL found for version '${_neo4j_version}' in versions.json from '${_versions_json_url}'"
exit 1
fi
echo "Installing Plugin '${_plugin_name}' from ${_plugin_jar_url} to ${_destination} "
wget -q --timeout 300 --tries 30 --output-document="${_destination}" "${_plugin_jar_url}"
if ! is_readable "${_destination}"; then
echo >&2 "Plugin at '${_destination}' is not readable"
exit 1
fi
}
function apply_plugin_default_configuration
{
# Set the correct Load a plugin at runtime. The provided github repository must have a versions.json on the master branch with the
# correct format.
local _plugin_name="${1}" #e.g. apoc, graph-algorithms, graph-ql
local _reference_conf="${2}" # used to determine if we can override properties
local _neo4j_conf="${NEO4J_HOME}/conf/neo4j.conf"
local _property _value
echo "Applying default values for plugin ${_plugin_name} to neo4j.conf"
for _entry in $(jq --compact-output --raw-output "with_entries( select(.key==\"${_plugin_name}\") ) | to_entries[] | .value.properties | to_entries[]" /startup/neo4j-plugins.json); do
_property="$(jq --raw-output '.key' <<< "${_entry}")"
_value="$(jq --raw-output '.value' <<< "${_entry}")"
# the first grep strips out comments
if grep -o "^[^#]*" "${_reference_conf}" | grep -q --fixed-strings "${_property}=" ; then
# property is already set in the user provided config. In this case we don't override what has been set explicitly by the user.
echo "Skipping ${_property} for plugin ${_plugin_name} because it is already set"
else
if grep -o "^[^#]*" "${_neo4j_conf}" | grep -q --fixed-strings "${_property}=" ; then
sed --in-place "s/${_property}=/&${_value},/" "${_neo4j_conf}"
else
echo "${_property}=${_value}" >> "${_neo4j_conf}"
fi
fi
done
}
function install_neo4j_labs_plugins
{
# We store a copy of the config before we modify it for the plugins to allow us to see if there are user-set values in the input config that we shouldn't override
local _old_config="$(mktemp)"
cp "${NEO4J_HOME}"/conf/neo4j.conf "${_old_config}"
for plugin_name in $(echo "${NEO4JLABS_PLUGINS}" | jq --raw-output '.[]'); do
load_plugin_from_github "${plugin_name}"
apply_plugin_default_configuration "${plugin_name}" "${_old_config}"
done
rm "${_old_config}"
}
function add_docker_default_to_conf
{
# docker defaults should NOT overwrite values already in the conf file
local _setting="${1}"
local _value="${2}"
local _neo4j_home="${3}"
if ! grep -q "^${_setting}=" "${_neo4j_home}"/conf/neo4j.conf
then
echo -e "\n"${_setting}=${_value} >> "${_neo4j_home}"/conf/neo4j.conf
fi
}
function add_env_setting_to_conf
{
# settings from environment variables should overwrite values already in the conf
local _setting=${1}
local _value=${2}
local _neo4j_home=${3}
if grep -q -F "${_setting}=" "${_neo4j_home}"/conf/neo4j.conf; then
# Remove any lines containing the setting already
sed --in-place "/^${_setting}=.*/d" "${_neo4j_home}"/conf/neo4j.conf
fi
# Then always append setting to file
echo "${_setting}=${_value}" >> "${_neo4j_home}"/conf/neo4j.conf
}
function set_initial_password
{
local _neo4j_auth="${1}"
# set the neo4j initial password only if you run the database server
if [ "${cmd}" == "neo4j" ]; then
if [ "${_neo4j_auth:-}" == "none" ]; then
add_env_setting_to_conf "dbms.security.auth_enabled" "false" "${NEO4J_HOME}"
# NEO4J_dbms_security_auth__enabled=false
elif [[ "${_neo4j_auth:-}" =~ ^([^/]+)\/([^/]+)/?([tT][rR][uU][eE])?$ ]]; then
admin_user="${BASH_REMATCH[1]}"
password="${BASH_REMATCH[2]}"
do_reset="${BASH_REMATCH[3]}"
if [ "${password}" == "neo4j" ]; then
echo >&2 "Invalid value for password. It cannot be 'neo4j', which is the default."
exit 1
fi
if [ "${admin_user}" != "neo4j" ]; then
echo >&2 "Invalid admin username, it must be neo4j"
exit 1
fi
if running_as_root; then
# running set-initial-password as root will create subfolders to /data as root, causing startup fail when neo4j can't read or write the /data/dbms folder
# creating the folder first will avoid that
mkdir -p /data/dbms
chown "${userid}":"${groupid}" /data/dbms
fi
# Will exit with error if users already exist (and print a message explaining that)
# we probably don't want the message though, since it throws an error message on restarting the container.
if [ "${do_reset}" == "true" ]; then
${neo4j_admin_cmd} set-initial-password "${password}" --require-password-change $(expand_commands_optionally) 2>/dev/null || true
else
${neo4j_admin_cmd} set-initial-password "${password}" $(expand_commands_optionally) 2>/dev/null || true
fi
elif [ -n "${_neo4j_auth:-}" ]; then
echo "$_neo4j_auth is invalid"
echo >&2 "Invalid value for NEO4J_AUTH: '${_neo4j_auth}'"
exit 1
fi
fi
}
# If we're running as root, then run as the neo4j user. Otherwise
# docker is running with --user and we simply use that user. Note
# that su-exec, despite its name, does not replicate the functionality
# of exec, so we need to use both
if running_as_root; then
userid="neo4j"
groupid="neo4j"
groups=($(id -G neo4j))
exec_cmd="exec gosu neo4j:neo4j"
neo4j_admin_cmd="gosu neo4j:neo4j neo4j-admin"
else
userid="$(id -u)"
groupid="$(id -g)"
groups=($(id -G))
exec_cmd="exec"
neo4j_admin_cmd="neo4j-admin"
fi
readonly userid
readonly groupid
readonly groups
readonly exec_cmd
readonly neo4j_admin_cmd
# Need to chown the home directory
if running_as_root; then
chown -R "${userid}":"${groupid}" "${NEO4J_HOME}"
chmod 700 "${NEO4J_HOME}"
find "${NEO4J_HOME}" -mindepth 1 -maxdepth 1 -type d -exec chmod -R 700 {} \;
find "${NEO4J_HOME}"/conf -type f -exec chmod -R 600 {} \;
fi
# ==== CHECK LICENSE AGREEMENT ====
# Only prompt for license agreement if command contains "neo4j" in it
if [[ "${cmd}" == *"neo4j"* ]]; then
if [ "${NEO4J_EDITION}" == "enterprise" ]; then
if [ "${NEO4J_ACCEPT_LICENSE_AGREEMENT:=no}" != "yes" ]; then
echo >&2 "
In order to use Neo4j Enterprise Edition you must accept the license agreement.
(c) Neo4j Sweden AB. 2021. All Rights Reserved.
Use of this Software without a proper commercial license with Neo4j,
Inc. or its affiliates is prohibited.
Email inquiries can be directed to: [email protected]
More information is also available at: https://neo4j.com/licensing/
To accept the license agreement set the environment variable
NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
To do this you can use the following docker argument:
--env=NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
"
exit 1
fi
fi
fi
# ==== RENAME LEGACY ENVIRONMENT CONF VARIABLES ====
# Env variable naming convention:
# - prefix NEO4J_
# - double underscore char '__' instead of single underscore '_' char in the setting name
# - underscore char '_' instead of dot '.' char in the setting name
# Example:
# NEO4J_dbms_tx__log_rotation_retention__policy env variable to set
# dbms.tx_log.rotation.retention_policy setting
# Backward compatibility - map old hardcoded env variables into new naming convention (if they aren't set already)
# Set some to default values if unset
: ${NEO4J_dbms_tx__log_rotation_retention__policy:=${NEO4J_dbms_txLog_rotation_retentionPolicy:-}}
: ${NEO4J_dbms_unmanaged__extension__classes:=${NEO4J_dbms_unmanagedExtensionClasses:-}}
: ${NEO4J_dbms_allow__format__migration:=${NEO4J_dbms_allowFormatMigration:-}}
: ${NEO4J_dbms_connectors_default__advertised__address:=${NEO4J_dbms_connectors_defaultAdvertisedAddress:-}}
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
: ${NEO4J_causal__clustering_expected__core__cluster__size:=${NEO4J_causalClustering_expectedCoreClusterSize:-}}
: ${NEO4J_causal__clustering_initial__discovery__members:=${NEO4J_causalClustering_initialDiscoveryMembers:-}}
: ${NEO4J_causal__clustering_discovery__advertised__address:=${NEO4J_causalClustering_discoveryAdvertisedAddress:-}}
: ${NEO4J_causal__clustering_transaction__advertised__address:=${NEO4J_causalClustering_transactionAdvertisedAddress:-}}
: ${NEO4J_causal__clustering_raft__advertised__address:=${NEO4J_causalClustering_raftAdvertisedAddress:-}}
fi
# unset old hardcoded unsupported env variables
unset NEO4J_dbms_txLog_rotation_retentionPolicy NEO4J_UDC_SOURCE \
NEO4J_dbms_unmanagedExtensionClasses NEO4J_dbms_allowFormatMigration \
NEO4J_dbms_connectors_defaultAdvertisedAddress NEO4J_ha_serverId \
NEO4J_ha_initialHosts NEO4J_causalClustering_expectedCoreClusterSize \
NEO4J_causalClustering_initialDiscoveryMembers \
NEO4J_causalClustering_discoveryListenAddress \
NEO4J_causalClustering_discoveryAdvertisedAddress \
NEO4J_causalClustering_transactionListenAddress \
NEO4J_causalClustering_transactionAdvertisedAddress \
NEO4J_causalClustering_raftListenAddress \
NEO4J_causalClustering_raftAdvertisedAddress
# ==== CHECK FILE PERMISSIONS ON MOUNTED FOLDERS ====
if [ -d /conf ]; then
check_mounted_folder_readable "/conf"
rm -rf "${NEO4J_HOME}"/conf/*
find /conf -type f -exec cp --preserve=ownership,mode {} "${NEO4J_HOME}"/conf \;
fi
if [ -d /ssl ]; then
check_mounted_folder_readable "/ssl"
rm -rf "${NEO4J_HOME}"/certificates
ln -s /ssl "${NEO4J_HOME}"/certificates
fi
if [ -d /plugins ]; then
if [[ -n "${NEO4JLABS_PLUGINS:-}" ]]; then
# We need write permissions
check_mounted_folder_writable_with_chown "/plugins"
fi
check_mounted_folder_readable "/plugins"
: ${NEO4J_dbms_directories_plugins:="/plugins"}
fi
if [ -d /import ]; then
check_mounted_folder_readable "/import"
: ${NEO4J_dbms_directories_import:="/import"}
fi
if [ -d /metrics ]; then
# metrics is enterprise only
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
check_mounted_folder_writable_with_chown "/metrics"
: ${NEO4J_dbms_directories_metrics:="/metrics"}
fi
fi
if [ -d /logs ]; then
check_mounted_folder_writable_with_chown "/logs"
: ${NEO4J_dbms_directories_logs:="/logs"}
fi
if [ -d /data ]; then
check_mounted_folder_writable_with_chown "/data"
if [ -d /data/databases ]; then
check_mounted_folder_writable_with_chown "/data/databases"
fi
if [ -d /data/dbms ]; then
check_mounted_folder_writable_with_chown "/data/dbms"
fi
if [ -d /data/transactions ]; then
check_mounted_folder_writable_with_chown "/data/transactions"
fi
fi
if [ -d /licenses ]; then
check_mounted_folder_readable "/licenses"
: ${NEO4J_dbms_directories_licenses:="/licenses"}
fi
# ==== SET CONFIGURATIONS ====
## == DOCKER SPECIFIC DEFAULT CONFIGURATIONS ===
## these should not override *any* configurations set by the user
add_docker_default_to_conf "dbms.tx_log.rotation.retention_policy" "100M size" "${NEO4J_HOME}"
add_docker_default_to_conf "dbms.memory.pagecache.size" "512M" "${NEO4J_HOME}"
add_docker_default_to_conf "dbms.default_listen_address" "0.0.0.0" "${NEO4J_HOME}"
# set enterprise only docker defaults
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
add_docker_default_to_conf "causal_clustering.discovery_advertised_address" "$(hostname):5000" "${NEO4J_HOME}"
add_docker_default_to_conf "causal_clustering.transaction_advertised_address" "$(hostname):6000" "${NEO4J_HOME}"
add_docker_default_to_conf "causal_clustering.raft_advertised_address" "$(hostname):7000" "${NEO4J_HOME}"
fi
## == ENVIRONMENT VARIABLE CONFIGURATIONS ===
## these override BOTH defaults and any existing values in the neo4j.conf file
# save NEO4J_HOME and NEO4J_AUTH to temp variables that don't begin with NEO4J_ so they don't get added to the conf
temp_neo4j_home="${NEO4J_HOME}"
temp_neo4j_auth="${NEO4J_AUTH:-}"
# list env variables with prefix NEO4J_ and create settings from them
unset NEO4J_AUTH NEO4J_SHA256 NEO4J_TARBALL NEO4J_EDITION NEO4J_ACCEPT_LICENSE_AGREEMENT NEO4J_HOME
for i in $( set | grep ^NEO4J_ | awk -F'=' '{print $1}' | sort -rn ); do
setting=$(echo "${i}" | sed 's|^NEO4J_||' | sed 's|_|.|g' | sed 's|\.\.|_|g')
value=$(echo "${!i}")
# Don't allow settings with no value or settings that start with a number (neo4j converts settings to env variables and you cannot have an env variable that starts with a number)
if [[ -n ${value} ]]; then
if [[ ! "${setting}" =~ ^[0-9]+.*$ ]]; then
add_env_setting_to_conf "${setting}" "${value}" "${temp_neo4j_home}"
else
echo >&2 "WARNING: ${setting} not written to conf file because settings that start with a number are not permitted"
fi
fi
done
export NEO4J_HOME="${temp_neo4j_home}"
unset temp_neo4j_home
# ==== SET PASSWORD AND PLUGINS ====
set_initial_password "${temp_neo4j_auth}"
if [[ ! -z "${NEO4JLABS_PLUGINS:-}" ]]; then
# NEO4JLABS_PLUGINS should be a json array of plugins like '["graph-algorithms", "apoc", "streams", "graphql"]'
install_neo4j_labs_plugins
fi
# ==== INVOKE NEO4J STARTUP ====
[ -f "${EXTENSION_SCRIPT:-}" ] && . ${EXTENSION_SCRIPT}
if [ "${cmd}" == "dump-config" ]; then
if [ ! -d "/conf" ]; then
echo >&2 "You must mount a folder to /conf so that the configuration file(s) can be dumped to there."
exit 1
fi
check_mounted_folder_writable_with_chown "/conf"
cp --recursive "${NEO4J_HOME}"/conf/* /conf
echo "Config Dumped"
exit 0
fi
# Use su-exec to drop privileges to neo4j user
# Note that su-exec, despite its name, does not replicate the
# functionality of exec, so we need to use both
if [ "${cmd}" == "neo4j" ]; then
if [ "${EXTENDED_CONF+"yes"}" == "yes" ]; then
${exec_cmd} neo4j console --expand-commands
else
${exec_cmd} neo4j console
fi
else
${exec_cmd} "$@"
fi
```
--------------------------------------------------------------------------------
/neo4j/docker-neo4j/docker-image-src/3.5/coredb/docker-entrypoint.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash -eu
cmd="$1"
function running_as_root
{
test "$(id -u)" = "0"
}
function secure_mode_enabled
{
test "${SECURE_FILE_PERMISSIONS:=no}" = "yes"
}
function containsElement
{
local e match="$1"
shift
for e; do [[ "$e" == "$match" ]] && return 0; done
return 1
}
function is_readable
{
# this code is fairly ugly but works no matter who this script is running as.
# It would be nice if the writability tests could use this logic somehow.
local _file=${1}
perm=$(stat -c %a "${_file}")
# everyone permission
if [[ ${perm:2:1} -ge 4 ]]; then
return 0
fi
# owner permissions
if [[ ${perm:0:1} -ge 4 ]]; then
if [[ "$(stat -c %U ${_file})" = "${userid}" ]] || [[ "$(stat -c %u ${_file})" = "${userid}" ]]; then
return 0
fi
fi
# group permissions
if [[ ${perm:1:1} -ge 4 ]]; then
if containsElement "$(stat -c %g ${_file})" "${groups[@]}" || containsElement "$(stat -c %G ${_file})" "${groups[@]}" ; then
return 0
fi
fi
return 1
}
function is_writable
{
# It would be nice if this and the is_readable function could combine somehow
local _file=${1}
perm=$(stat -c %a "${_file}")
# everyone permission
if containsElement ${perm:2:1} 2 3 6 7; then
return 0
fi
# owner permissions
if containsElement ${perm:0:1} 2 3 6 7; then
if [[ "$(stat -c %U ${_file})" = "${userid}" ]] || [[ "$(stat -c %u ${_file})" = "${userid}" ]]; then
return 0
fi
fi
# group permissions
if containsElement ${perm:1:1} 2 3 6 7; then
if containsElement "$(stat -c %g ${_file})" "${groups[@]}" || containsElement "$(stat -c %G ${_file})" "${groups[@]}" ; then
return 0
fi
fi
return 1
}
function print_permissions_advice_and_fail
{
_directory=${1}
echo >&2 "
Folder ${_directory} is not accessible for user: ${userid} or group ${groupid} or groups ${groups[@]}, this is commonly a file permissions issue on the mounted folder.
Hints to solve the issue:
1) Make sure the folder exists before mounting it. Docker will create the folder using root permissions before starting the Neo4j container. The root permissions disallow Neo4j from writing to the mounted folder.
2) Pass the folder owner's user ID and group ID to docker run, so that docker runs as that user.
If the folder is owned by the current user, this can be done by adding this flag to your docker run command:
--user=\$(id -u):\$(id -g)
"
exit 1
}
function check_mounted_folder_readable
{
local _directory=${1}
if ! is_readable "${_directory}"; then
print_permissions_advice_and_fail "${_directory}"
fi
}
function check_mounted_folder_writable_with_chown
{
# The /data and /log directory are a bit different because they are very likely to be mounted by the user but not
# necessarily writable.
# This depends on whether a user ID is passed to the container and which folders are mounted.
#
# No user ID passed to container:
# 1) No folders are mounted.
# The /data and /log folder are owned by neo4j by default, so should be writable already.
# 2) Both /log and /data are mounted.
# This means on start up, /data and /logs are owned by an unknown user and we should chown them to neo4j for
# backwards compatibility.
#
# User ID passed to container:
# 1) Both /data and /logs are mounted
# The /data and /logs folders are owned by an unknown user but we *should* have rw permission to them.
# That should be verified and error (helpfully) if not.
# 2) User mounts /data or /logs *but not both*
# The unmounted folder is still owned by neo4j, which should already be writable. The mounted folder should
# have rw permissions through user id. This should be verified.
# 3) No folders are mounted.
# The /data and /log folder are owned by neo4j by default, and these are already writable by the user.
# (This is a very unlikely use case).
local mountFolder=${1}
if running_as_root && ! secure_mode_enabled; then
# check folder permissions
if ! is_writable "${mountFolder}" ; then
# warn that we're about to chown the folder and then chown it
echo "Warning: Folder mounted to \"${mountFolder}\" is not writable from inside container. Changing folder owner to ${userid}."
chown -R "${userid}":"${groupid}" "${mountFolder}"
# check permissions on files in the folder
elif [ $(gosu "${userid}":"${groupid}" find "${mountFolder}" -not -writable | wc -l) -gt 0 ]; then
echo "Warning: Some files inside \"${mountFolder}\" are not writable from inside container. Changing folder owner to ${userid}."
chown -R "${userid}":"${groupid}" "${mountFolder}"
fi
else
if ! is_writable "${mountFolder}"; then
#if [[ ! -w "${mountFolder}" ]] && [[ "$(stat -c %U ${mountFolder})" != "neo4j" ]]; then
echo >&2 "Consider unsetting SECURE_FILE_PERMISSIONS environment variable, to enable docker to write to ${mountFolder}."
print_permissions_advice_and_fail "${mountFolder}"
fi
fi
}
function load_plugin_from_github
{
# Load a plugin at runtime. The provided github repository must have a versions.json on the master branch with the
# correct format.
local _plugin_name="${1}" #e.g. apoc, graph-algorithms, graph-ql
local _plugins_dir="${NEO4J_HOME}/plugins"
if [ -d /plugins ]; then
local _plugins_dir="/plugins"
fi
local _versions_json_url="$(jq --raw-output "with_entries( select(.key==\"${_plugin_name}\") ) | to_entries[] | .value.versions" /startup/neo4j-plugins.json )"
# Using the same name for the plugin irrespective of version ensures we don't end up with different versions of the same plugin
local _destination="${_plugins_dir}/${_plugin_name}.jar"
local _neo4j_version="$(neo4j --version | cut -d' ' -f2)"
# Now we call out to github to get the versions.json for this plugin and we parse that to find the url for the correct plugin jar for our neo4j version
echo "Fetching versions.json for Plugin '${_plugin_name}' from ${_versions_json_url}"
local _versions_json="$(wget -q --timeout 300 --tries 30 -O - "${_versions_json_url}")"
local _plugin_jar_url="$(echo "${_versions_json}" | jq -L/startup --raw-output "import \"semver\" as lib; [ .[] | select(.neo4j|lib::semver(\"${_neo4j_version}\")) ] | min_by(.neo4j) | .jar")"
if [[ -z "${_plugin_jar_url}" ]]; then
echo >&2 "Error: No jar URL found for version '${_neo4j_version}' in versions.json from '${_versions_json_url}'"
exit 1
fi
echo "Installing Plugin '${_plugin_name}' from ${_plugin_jar_url} to ${_destination} "
wget -q --timeout 300 --tries 30 --output-document="${_destination}" "${_plugin_jar_url}"
if ! is_readable "${_destination}"; then
echo >&2 "Plugin at '${_destination}' is not readable"
exit 1
fi
}
function apply_plugin_default_configuration
{
# Set the correct Load a plugin at runtime. The provided github repository must have a versions.json on the master branch with the
# correct format.
local _plugin_name="${1}" #e.g. apoc, graph-algorithms, graph-ql
local _reference_conf="${2}" # used to determine if we can override properties
local _neo4j_conf="${NEO4J_HOME}/conf/neo4j.conf"
local _property _value
echo "Applying default values for plugin ${_plugin_name} to neo4j.conf"
for _entry in $(jq --compact-output --raw-output "with_entries( select(.key==\"${_plugin_name}\") ) | to_entries[] | .value.properties | to_entries[]" /startup/neo4j-plugins.json); do
_property="$(jq --raw-output '.key' <<< "${_entry}")"
_value="$(jq --raw-output '.value' <<< "${_entry}")"
# the first grep strips out comments
if grep -o "^[^#]*" "${_reference_conf}" | grep -q --fixed-strings "${_property}=" ; then
# property is already set in the user provided config. In this case we don't override what has been set explicitly by the user.
echo "Skipping ${_property} for plugin ${_plugin_name} because it is already set"
else
if grep -o "^[^#]*" "${_neo4j_conf}" | grep -q --fixed-strings "${_property}=" ; then
sed --in-place "s/${_property}=/&${_value},/" "${_neo4j_conf}"
else
echo "${_property}=${_value}" >> "${_neo4j_conf}"
fi
fi
done
}
function install_neo4j_labs_plugins
{
# We store a copy of the config before we modify it for the plugins to allow us to see if there are user-set values in the input config that we shouldn't override
local _old_config="$(mktemp)"
cp "${NEO4J_HOME}"/conf/neo4j.conf "${_old_config}"
for plugin_name in $(echo "${NEO4JLABS_PLUGINS}" | jq --raw-output '.[]'); do
load_plugin_from_github "${plugin_name}"
apply_plugin_default_configuration "${plugin_name}" "${_old_config}"
done
rm "${_old_config}"
}
function add_docker_default_to_conf
{
# docker defaults should NOT overwrite values already in the conf file
local _setting="${1}"
local _value="${2}"
local _neo4j_home="${3}"
if ! grep -q "^${_setting}=" "${_neo4j_home}"/conf/neo4j.conf
then
echo -e "\n"${_setting}=${_value} >> "${_neo4j_home}"/conf/neo4j.conf
fi
}
function add_env_setting_to_conf
{
# settings from environment variables should overwrite values already in the conf
local _setting=${1}
local _value=${2}
local _neo4j_home=${3}
if grep -q -F "${_setting}=" "${_neo4j_home}"/conf/neo4j.conf; then
# Remove any lines containing the setting already
sed --in-place "/^${_setting}=.*/d" "${_neo4j_home}"/conf/neo4j.conf
fi
# Then always append setting to file
echo "${_setting}=${_value}" >> "${_neo4j_home}"/conf/neo4j.conf
}
function set_initial_password
{
local _neo4j_auth="${1}"
# set the neo4j initial password only if you run the database server
if [ "${cmd}" == "neo4j" ]; then
if [ "${_neo4j_auth:-}" == "none" ]; then
add_env_setting_to_conf "dbms.security.auth_enabled" "false" "${NEO4J_HOME}"
# NEO4J_dbms_security_auth__enabled=false
elif [[ "${_neo4j_auth:-}" =~ ^([^/]+)\/([^/]+)/?([tT][rR][uU][eE])?$ ]]; then
admin_user="${BASH_REMATCH[1]}"
password="${BASH_REMATCH[2]}"
do_reset="${BASH_REMATCH[3]}"
if [ "${password}" == "neo4j" ]; then
echo >&2 "Invalid value for password. It cannot be 'neo4j', which is the default."
exit 1
fi
if [ "${admin_user}" != "neo4j" ]; then
echo >&2 "Invalid admin username, it must be neo4j"
exit 1
fi
if running_as_root; then
# running set-initial-password as root will create subfolders to /data as root, causing startup fail when neo4j can't read or write the /data/dbms folder
# creating the folder first will avoid that
mkdir -p /data/dbms
chown "${userid}":"${groupid}" /data/dbms
fi
# Will exit with error if users already exist (and print a message explaining that)
# we probably don't want the message though, since it throws an error message on restarting the container.
if [ "${do_reset}" == "true" ]; then
neo4j-admin set-initial-password "${password}" --require-password-change 2>/dev/null || true
else
neo4j-admin set-initial-password "${password}" 2>/dev/null || true
fi
elif [ -n "${_neo4j_auth:-}" ]; then
echo "$_neo4j_auth is invalid"
echo >&2 "Invalid value for NEO4J_AUTH: '${_neo4j_auth}'"
exit 1
fi
fi
}
# If we're running as root, then run as the neo4j user. Otherwise
# docker is running with --user and we simply use that user. Note
# that su-exec, despite its name, does not replicate the functionality
# of exec, so we need to use both
if running_as_root; then
userid="neo4j"
groupid="neo4j"
groups=($(id -G neo4j))
exec_cmd="exec gosu neo4j:neo4j"
else
userid="$(id -u)"
groupid="$(id -g)"
groups=($(id -G))
exec_cmd="exec"
fi
readonly userid
readonly groupid
readonly groups
readonly exec_cmd
# Need to chown the home directory - but a user might have mounted a
# volume here (notably a conf volume). So take care not to chown
# volumes (stuff not owned by neo4j)
if running_as_root; then
# Non-recursive chown for the base directory
chown "${userid}":"${groupid}" "${NEO4J_HOME}"
chmod 700 "${NEO4J_HOME}"
find "${NEO4J_HOME}" -mindepth 1 -maxdepth 1 -type d -exec chown -R ${userid}:${groupid} {} \;
find "${NEO4J_HOME}" -mindepth 1 -maxdepth 1 -type d -exec chmod -R 700 {} \;
fi
# Only prompt for license agreement if command contains "neo4j" in it
# ==== CHECK LICENSE AGREEMENT ====
if [[ "${cmd}" == *"neo4j"* ]]; then
if [ "${NEO4J_EDITION}" == "enterprise" ]; then
if [ "${NEO4J_ACCEPT_LICENSE_AGREEMENT:=no}" != "yes" ]; then
echo >&2 "
In order to use Neo4j Enterprise Edition you must accept the license agreement.
(c) Neo4j Sweden AB. 2022. All Rights Reserved.
Use of this Software without a proper commercial license with Neo4j,
Inc. or its affiliates is prohibited.
Email inquiries can be directed to: [email protected]
More information is also available at: https://neo4j.com/licensing/
To accept the license agreement set the environment variable
NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
To do this you can use the following docker argument:
--env=NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
"
exit 1
fi
fi
fi
# Env variable naming convention:
# ==== RENAME LEGACY ENVIRONMENT CONF VARIABLES ====
# - prefix NEO4J_
# - double underscore char '__' instead of single underscore '_' char in the setting name
# - underscore char '_' instead of dot '.' char in the setting name
# Example:
# NEO4J_dbms_tx__log_rotation_retention__policy env variable to set
# dbms.tx_log.rotation.retention_policy setting
# Backward compatibility - map old hardcoded env variables into new naming convention (if they aren't set already)
# Set some to default values if unset
: ${NEO4J_dbms_tx__log_rotation_retention__policy:=${NEO4J_dbms_txLog_rotation_retentionPolicy:-}}
: ${NEO4J_wrapper_java_additional:=${NEO4J_UDC_SOURCE:-"-Dneo4j.ext.udc.source=docker"}}
: ${NEO4J_dbms_unmanaged__extension__classes:=${NEO4J_dbms_unmanagedExtensionClasses:-}}
: ${NEO4J_dbms_allow__format__migration:=${NEO4J_dbms_allowFormatMigration:-}}
: ${NEO4J_dbms_connectors_default__advertised__address:=${NEO4J_dbms_connectors_defaultAdvertisedAddress:-}}
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
: ${NEO4J_causal__clustering_expected__core__cluster__size:=${NEO4J_causalClustering_expectedCoreClusterSize:-}}
: ${NEO4J_causal__clustering_initial__discovery__members:=${NEO4J_causalClustering_initialDiscoveryMembers:-}}
: ${NEO4J_causal__clustering_discovery__advertised__address:=${NEO4J_causalClustering_discoveryAdvertisedAddress:-}}
: ${NEO4J_causal__clustering_transaction__advertised__address:=${NEO4J_causalClustering_transactionAdvertisedAddress:-}}
: ${NEO4J_causal__clustering_raft__advertised__address:=${NEO4J_causalClustering_raftAdvertisedAddress:-}}
fi
# unset old hardcoded unsupported env variables
unset NEO4J_dbms_txLog_rotation_retentionPolicy NEO4J_UDC_SOURCE \
NEO4J_dbms_unmanagedExtensionClasses NEO4J_dbms_allowFormatMigration \
NEO4J_dbms_connectors_defaultAdvertisedAddress NEO4J_ha_serverId \
NEO4J_ha_initialHosts NEO4J_causalClustering_expectedCoreClusterSize \
NEO4J_causalClustering_initialDiscoveryMembers \
NEO4J_causalClustering_discoveryListenAddress \
NEO4J_causalClustering_discoveryAdvertisedAddress \
NEO4J_causalClustering_transactionListenAddress \
NEO4J_causalClustering_transactionAdvertisedAddress \
NEO4J_causalClustering_raftListenAddress \
NEO4J_causalClustering_raftAdvertisedAddress
# ==== CHECK FILE PERMISSIONS ON MOUNTED FOLDERS ====
if [ -d /conf ]; then
if secure_mode_enabled; then
check_mounted_folder_readable "/conf"
fi
find /conf -type f -exec cp {} "${NEO4J_HOME}"/conf \;
fi
if [ -d /ssl ]; then
if secure_mode_enabled; then
check_mounted_folder_readable "/ssl"
fi
: ${NEO4J_dbms_directories_certificates:="/ssl"}
fi
if [ -d /plugins ]; then
if secure_mode_enabled; then
if [[ ! -z "${NEO4JLABS_PLUGINS:-}" ]]; then
# We need write permissions
check_mounted_folder_writable_with_chown "/plugins"
fi
check_mounted_folder_readable "/plugins"
fi
: ${NEO4J_dbms_directories_plugins:="/plugins"}
fi
if [ -d /import ]; then
if secure_mode_enabled; then
check_mounted_folder_readable "/import"
fi
: ${NEO4J_dbms_directories_import:="/import"}
fi
if [ -d /metrics ]; then
if [ "${NEO4J_EDITION}" == "enterprise" ]; then
if secure_mode_enabled; then
check_mounted_folder_readable "/metrics"
fi
: ${NEO4J_dbms_directories_metrics:="/metrics"}
fi
fi
if [ -d /logs ]; then
check_mounted_folder_writable_with_chown "/logs"
: ${NEO4J_dbms_directories_logs:="/logs"}
fi
if [ -d /data ]; then
check_mounted_folder_writable_with_chown "/data"
if [ -d /data/databases ]; then
check_mounted_folder_writable_with_chown "/data/databases"
fi
if [ -d /data/dbms ]; then
check_mounted_folder_writable_with_chown "/data/dbms"
fi
fi
# ==== SET CONFIGURATIONS ====
## == DOCKER SPECIFIC DEFAULT CONFIGURATIONS ===
## these should not override *any* configurations set by the user
add_docker_default_to_conf "dbms.tx_log.rotation.retention_policy" "100M size" "${NEO4J_HOME}"
add_docker_default_to_conf "dbms.memory.pagecache.size" "512M" "${NEO4J_HOME}"
add_docker_default_to_conf "dbms.connectors.default_listen_address" "0.0.0.0" "${NEO4J_HOME}"
add_docker_default_to_conf "dbms.connector.https.listen_address" "0.0.0.0:7473" "${NEO4J_HOME}"
add_docker_default_to_conf "dbms.connector.http.listen_address" "0.0.0.0:7474" "${NEO4J_HOME}"
add_docker_default_to_conf "dbms.connector.bolt.listen_address" "0.0.0.0:7687" "${NEO4J_HOME}"
# set enterprise only docker defaults
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
add_docker_default_to_conf "causal_clustering.discovery_advertised_address" "$(hostname):5000" "${NEO4J_HOME}"
add_docker_default_to_conf "causal_clustering.transaction_advertised_address" "$(hostname):6000" "${NEO4J_HOME}"
add_docker_default_to_conf "causal_clustering.raft_advertised_address" "$(hostname):7000" "${NEO4J_HOME}"
fi
## == ENVIRONMENT VARIABLE CONFIGURATIONS ===
## these override BOTH defaults and any existing values in the neo4j.conf file
#The udc.source=tarball should be replaced by udc.source=docker in both dbms.jvm.additional and wrapper.java.additional
#Using sed to replace only this part will allow the custom configs to be added after, separated by a ,.
if grep -q "udc.source=tarball" "${NEO4J_HOME}"/conf/neo4j.conf; then
sed -i -e 's/udc.source=tarball/udc.source=docker/g' "${NEO4J_HOME}"/conf/neo4j.conf
fi
#The udc.source should always be set to docker by default and we have to allow also custom configs to be added after that.
#In this case, this piece of code helps to add the default value and a , to support custom configs after.
if ! grep -q "dbms.jvm.additional=-Dunsupported.dbms.udc.source=docker" "${NEO4J_HOME}"/conf/neo4j.conf; then
sed -i -e 's/dbms.jvm.additional=/dbms.jvm.additional=-Dunsupported.dbms.udc.source=docker,/g' "${NEO4J_HOME}"/conf/neo4j.conf
fi
# save NEO4J_HOME and NEO4J_AUTH to temp variables that don't begin with NEO4J_ so they don't get added to the conf
temp_neo4j_home="${NEO4J_HOME}"
temp_neo4j_auth="${NEO4J_AUTH:-}"
# list env variables with prefix NEO4J_ and create settings from them
unset NEO4J_AUTH NEO4J_SHA256 NEO4J_TARBALL NEO4J_EDITION NEO4J_ACCEPT_LICENSE_AGREEMENT NEO4J_HOME
for i in $( set | grep ^NEO4J_ | awk -F'=' '{print $1}' | sort -rn ); do
setting=$(echo "${i}" | sed 's|^NEO4J_||' | sed 's|_|.|g' | sed 's|\.\.|_|g')
value=$(echo "${!i}")
# Don't allow settings with no value or settings that start with a number (neo4j converts settings to env variables and you cannot have an env variable that starts with a number)
if [[ -n ${value} ]]; then
if [[ ! "${setting}" =~ ^[0-9]+.*$ ]]; then
add_env_setting_to_conf "${setting}" "${value}" "${temp_neo4j_home}"
else
echo >&2 "WARNING: ${setting} not written to conf file because settings that start with a number are not permitted"
fi
fi
done
export NEO4J_HOME="${temp_neo4j_home}"
unset temp_neo4j_home
# ==== SET PASSWORD AND PLUGINS ====
set_initial_password "${temp_neo4j_auth}"
if [[ ! -z "${NEO4JLABS_PLUGINS:-}" ]]; then
# NEO4JLABS_PLUGINS should be a json array of plugins like '["graph-algorithms", "apoc", "streams", "graphql"]'
install_neo4j_labs_plugins
fi
# ==== INVOKE NEO4J STARTUP ====
[ -f "${EXTENSION_SCRIPT:-}" ] && . ${EXTENSION_SCRIPT}
if [ "${cmd}" == "dump-config" ]; then
if [ ! -d "/conf" ]; then
echo >&2 "You must mount a folder to /conf so that the configuration file(s) can be dumped to there."
exit 1
fi
check_mounted_folder_writable_with_chown "/conf"
cp --recursive "${NEO4J_HOME}"/conf/* /conf
echo "Config Dumped"
exit 0
fi
# Use su-exec to drop privileges to neo4j user
# Note that su-exec, despite its name, does not replicate the
# functionality of exec, so we need to use both
if [ "${cmd}" == "neo4j" ]; then
${exec_cmd} neo4j console
else
${exec_cmd} "$@"
fi
```
--------------------------------------------------------------------------------
/neo4j/docker-neo4j/docker-image-src/4.3/coredb/docker-entrypoint.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash -eu
cmd="$1"
function running_as_root
{
test "$(id -u)" = "0"
}
function secure_mode_enabled
{
test "${SECURE_FILE_PERMISSIONS:=no}" = "yes"
}
function containsElement
{
local e match="$1"
shift
for e; do [[ "$e" == "$match" ]] && return 0; done
return 1
}
function is_readable
{
# this code is fairly ugly but works no matter who this script is running as.
# It would be nice if the writability tests could use this logic somehow.
local _file=${1}
perm=$(stat -c %a "${_file}")
# everyone permission
if [[ ${perm:2:1} -ge 4 ]]; then
return 0
fi
# owner permissions
if [[ ${perm:0:1} -ge 4 ]]; then
if [[ "$(stat -c %U ${_file})" = "${userid}" ]] || [[ "$(stat -c %u ${_file})" = "${userid}" ]]; then
return 0
fi
fi
# group permissions
if [[ ${perm:1:1} -ge 4 ]]; then
if containsElement "$(stat -c %g ${_file})" "${groups[@]}" || containsElement "$(stat -c %G ${_file})" "${groups[@]}" ; then
return 0
fi
fi
return 1
}
function is_writable
{
# It would be nice if this and the is_readable function could combine somehow
local _file=${1}
perm=$(stat -c %a "${_file}")
# everyone permission
if containsElement ${perm:2:1} 2 3 6 7; then
return 0
fi
# owner permissions
if containsElement ${perm:0:1} 2 3 6 7; then
if [[ "$(stat -c %U ${_file})" = "${userid}" ]] || [[ "$(stat -c %u ${_file})" = "${userid}" ]]; then
return 0
fi
fi
# group permissions
if containsElement ${perm:1:1} 2 3 6 7; then
if containsElement "$(stat -c %g ${_file})" "${groups[@]}" || containsElement "$(stat -c %G ${_file})" "${groups[@]}" ; then
return 0
fi
fi
return 1
}
function expand_commands_optionally
{
if [ "${EXTENDED_CONF+"yes"}" == "yes" ]; then
echo "--expand-commands"
fi
}
function print_permissions_advice_and_fail
{
_directory=${1}
echo >&2 "
Folder ${_directory} is not accessible for user: ${userid} or group ${groupid} or groups ${groups[@]}, this is commonly a file permissions issue on the mounted folder.
Hints to solve the issue:
1) Make sure the folder exists before mounting it. Docker will create the folder using root permissions before starting the Neo4j container. The root permissions disallow Neo4j from writing to the mounted folder.
2) Pass the folder owner's user ID and group ID to docker run, so that docker runs as that user.
If the folder is owned by the current user, this can be done by adding this flag to your docker run command:
--user=\$(id -u):\$(id -g)
"
exit 1
}
function check_mounted_folder_readable
{
local _directory=${1}
if ! is_readable "${_directory}"; then
print_permissions_advice_and_fail "${_directory}"
fi
}
function check_mounted_folder_writable_with_chown
{
# The /data and /log directory are a bit different because they are very likely to be mounted by the user but not
# necessarily writable.
# This depends on whether a user ID is passed to the container and which folders are mounted.
#
# No user ID passed to container:
# 1) No folders are mounted.
# The /data and /log folder are owned by neo4j by default, so should be writable already.
# 2) Both /log and /data are mounted.
# This means on start up, /data and /logs are owned by an unknown user and we should chown them to neo4j for
# backwards compatibility.
#
# User ID passed to container:
# 1) Both /data and /logs are mounted
# The /data and /logs folders are owned by an unknown user but we *should* have rw permission to them.
# That should be verified and error (helpfully) if not.
# 2) User mounts /data or /logs *but not both*
# The unmounted folder is still owned by neo4j, which should already be writable. The mounted folder should
# have rw permissions through user id. This should be verified.
# 3) No folders are mounted.
# The /data and /log folder are owned by neo4j by default, and these are already writable by the user.
# (This is a very unlikely use case).
local mountFolder=${1}
if running_as_root && ! secure_mode_enabled; then
# check folder permissions
if ! is_writable "${mountFolder}" ; then
# warn that we're about to chown the folder and then chown it
echo "Warning: Folder mounted to \"${mountFolder}\" is not writable from inside container. Changing folder owner to ${userid}."
chown -R "${userid}":"${groupid}" "${mountFolder}"
# check permissions on files in the folder
elif [ $(gosu "${userid}":"${groupid}" find "${mountFolder}" -not -writable | wc -l) -gt 0 ]; then
echo "Warning: Some files inside \"${mountFolder}\" are not writable from inside container. Changing folder owner to ${userid}."
chown -R "${userid}":"${groupid}" "${mountFolder}"
fi
else
if ! is_writable "${mountFolder}"; then
#if [[ ! -w "${mountFolder}" ]] && [[ "$(stat -c %U ${mountFolder})" != "neo4j" ]]; then
echo >&2 "Consider unsetting SECURE_FILE_PERMISSIONS environment variable, to enable docker to write to ${mountFolder}."
print_permissions_advice_and_fail "${mountFolder}"
fi
fi
}
function load_plugin_from_location
{
# Install a plugin from location at runtime.
local _plugin_name="${1}"
local _location="${2}"
local _plugins_dir="${NEO4J_HOME}/plugins"
if [ -d /plugins ]; then
local _plugins_dir="/plugins"
fi
local _destination="${_plugins_dir}/${_plugin_name}.jar"
# Now we install the plugin that is shipped with Neo4j
for filename in ${_location}; do
echo "Installing Plugin '${_plugin_name}' from ${_location} to ${_destination}"
cp --preserve "${filename}" "${_destination}"
done
if ! is_readable "${_destination}"; then
echo >&2 "Plugin at '${_destination}' is not readable"
exit 1
fi
}
function load_plugin_from_github
{
# Load a plugin at runtime. The provided github repository must have a versions.json on the master branch with the
# correct format.
local _plugin_name="${1}" #e.g. apoc, graph-algorithms, graph-ql
local _plugins_dir="${NEO4J_HOME}/plugins"
if [ -d /plugins ]; then
local _plugins_dir="/plugins"
fi
local _versions_json_url="$(jq --raw-output "with_entries( select(.key==\"${_plugin_name}\") ) | to_entries[] | .value.versions" /startup/neo4j-plugins.json )"
# Using the same name for the plugin irrespective of version ensures we don't end up with different versions of the same plugin
local _destination="${_plugins_dir}/${_plugin_name}.jar"
local _neo4j_version="$(neo4j --version | cut -d' ' -f2)"
# Now we call out to github to get the versions.json for this plugin and we parse that to find the url for the correct plugin jar for our neo4j version
echo "Fetching versions.json for Plugin '${_plugin_name}' from ${_versions_json_url}"
local _versions_json="$(wget -q --timeout 300 --tries 30 -O - "${_versions_json_url}")"
local _plugin_jar_url="$(echo "${_versions_json}" | jq -L/startup --raw-output "import \"semver\" as lib; [ .[] | select(.neo4j|lib::semver(\"${_neo4j_version}\")) ] | min_by(.neo4j) | .jar")"
if [[ -z "${_plugin_jar_url}" ]]; then
echo >&2 "Error: No jar URL found for version '${_neo4j_version}' in versions.json from '${_versions_json_url}'"
exit 1
fi
echo "Installing Plugin '${_plugin_name}' from ${_plugin_jar_url} to ${_destination} "
wget -q --timeout 300 --tries 30 --output-document="${_destination}" "${_plugin_jar_url}"
if ! is_readable "${_destination}"; then
echo >&2 "Plugin at '${_destination}' is not readable"
exit 1
fi
}
function apply_plugin_default_configuration
{
# Set the correct Load a plugin at runtime. The provided github repository must have a versions.json on the master branch with the
# correct format.
local _plugin_name="${1}" #e.g. apoc, graph-algorithms, graph-ql
local _reference_conf="${2}" # used to determine if we can override properties
local _neo4j_conf="${NEO4J_HOME}/conf/neo4j.conf"
local _property _value
echo "Applying default values for plugin ${_plugin_name} to neo4j.conf"
for _entry in $(jq --compact-output --raw-output "with_entries( select(.key==\"${_plugin_name}\") ) | to_entries[] | .value.properties | to_entries[]" /startup/neo4j-plugins.json); do
_property="$(jq --raw-output '.key' <<< "${_entry}")"
_value="$(jq --raw-output '.value' <<< "${_entry}")"
# the first grep strips out comments
if grep -o "^[^#]*" "${_reference_conf}" | grep -q --fixed-strings "${_property}=" ; then
# property is already set in the user provided config. In this case we don't override what has been set explicitly by the user.
echo "Skipping ${_property} for plugin ${_plugin_name} because it is already set"
else
if grep -o "^[^#]*" "${_neo4j_conf}" | grep -q --fixed-strings "${_property}=" ; then
sed --in-place "s/${_property}=/&${_value},/" "${_neo4j_conf}"
else
echo "${_property}=${_value}" >> "${_neo4j_conf}"
fi
fi
done
}
function install_neo4j_labs_plugins
{
# We store a copy of the config before we modify it for the plugins to allow us to see if there are user-set values in the input config that we shouldn't override
local _old_config="$(mktemp)"
cp "${NEO4J_HOME}"/conf/neo4j.conf "${_old_config}"
for plugin_name in $(echo "${NEO4JLABS_PLUGINS}" | jq --raw-output '.[]'); do
local _location="$(jq --raw-output "with_entries( select(.key==\"${plugin_name}\") ) | to_entries[] | .value.location" /startup/neo4j-plugins.json )"
if [ "${_location}" != "null" -a -n "$(shopt -s nullglob; echo ${_location})" ]; then
load_plugin_from_location "${plugin_name}" "${_location}"
else
load_plugin_from_github "${plugin_name}"
fi
apply_plugin_default_configuration "${plugin_name}" "${_old_config}"
done
rm "${_old_config}"
}
function add_docker_default_to_conf
{
# docker defaults should NOT overwrite values already in the conf file
local _setting="${1}"
local _value="${2}"
local _neo4j_home="${3}"
if ! grep -q "^${_setting}=" "${_neo4j_home}"/conf/neo4j.conf
then
echo -e "\n"${_setting}=${_value} >> "${_neo4j_home}"/conf/neo4j.conf
fi
}
function add_env_setting_to_conf
{
# settings from environment variables should overwrite values already in the conf
local _setting=${1}
local _value=${2}
local _neo4j_home=${3}
if grep -q -F "${_setting}=" "${_neo4j_home}"/conf/neo4j.conf; then
# Remove any lines containing the setting already
sed --in-place "/^${_setting}=.*/d" "${_neo4j_home}"/conf/neo4j.conf
fi
# Then always append setting to file
echo "${_setting}=${_value}" >> "${_neo4j_home}"/conf/neo4j.conf
}
function set_initial_password
{
local _neo4j_auth="${1}"
# set the neo4j initial password only if you run the database server
if [ "${cmd}" == "neo4j" ]; then
if [ "${_neo4j_auth:-}" == "none" ]; then
add_env_setting_to_conf "dbms.security.auth_enabled" "false" "${NEO4J_HOME}"
# NEO4J_dbms_security_auth__enabled=false
elif [[ "${_neo4j_auth:-}" =~ ^([^/]+)\/([^/]+)/?([tT][rR][uU][eE])?$ ]]; then
admin_user="${BASH_REMATCH[1]}"
password="${BASH_REMATCH[2]}"
do_reset="${BASH_REMATCH[3]}"
if [ "${password}" == "neo4j" ]; then
echo >&2 "Invalid value for password. It cannot be 'neo4j', which is the default."
exit 1
fi
if [ "${admin_user}" != "neo4j" ]; then
echo >&2 "Invalid admin username, it must be neo4j"
exit 1
fi
if running_as_root; then
# running set-initial-password as root will create subfolders to /data as root, causing startup fail when neo4j can't read or write the /data/dbms folder
# creating the folder first will avoid that
mkdir -p /data/dbms
chown "${userid}":"${groupid}" /data/dbms
fi
# Will exit with error if users already exist (and print a message explaining that)
# we probably don't want the message though, since it throws an error message on restarting the container.
if [ "${do_reset}" == "true" ]; then
${neo4j_admin_cmd} set-initial-password "${password}" --require-password-change $(expand_commands_optionally) 2>/dev/null || true
else
${neo4j_admin_cmd} set-initial-password "${password}" $(expand_commands_optionally) 2>/dev/null || true
fi
elif [ -n "${_neo4j_auth:-}" ]; then
echo "$_neo4j_auth is invalid"
echo >&2 "Invalid value for NEO4J_AUTH: '${_neo4j_auth}'"
exit 1
fi
fi
}
# If we're running as root, then run as the neo4j user. Otherwise
# docker is running with --user and we simply use that user. Note
# that su-exec, despite its name, does not replicate the functionality
# of exec, so we need to use both
if running_as_root; then
userid="neo4j"
groupid="neo4j"
groups=($(id -G neo4j))
exec_cmd="exec gosu neo4j:neo4j"
neo4j_admin_cmd="gosu neo4j:neo4j neo4j-admin"
else
userid="$(id -u)"
groupid="$(id -g)"
groups=($(id -G))
exec_cmd="exec"
neo4j_admin_cmd="neo4j-admin"
fi
readonly userid
readonly groupid
readonly groups
readonly exec_cmd
readonly neo4j_admin_cmd
# Need to chown the home directory
if running_as_root; then
chown -R "${userid}":"${groupid}" "${NEO4J_HOME}"
chmod 700 "${NEO4J_HOME}"
find "${NEO4J_HOME}" -mindepth 1 -maxdepth 1 -type d -exec chmod -R 700 {} \;
find "${NEO4J_HOME}"/conf -type f -exec chmod -R 600 {} \;
fi
# ==== CHECK LICENSE AGREEMENT ====
# Only prompt for license agreement if command contains "neo4j" in it
if [[ "${cmd}" == *"neo4j"* ]]; then
if [ "${NEO4J_EDITION}" == "enterprise" ]; then
if [ "${NEO4J_ACCEPT_LICENSE_AGREEMENT:=no}" != "yes" ]; then
echo >&2 "
In order to use Neo4j Enterprise Edition you must accept the license agreement.
(c) Neo4j Sweden AB. 2022. All Rights Reserved.
Use of this Software without a proper commercial license with Neo4j,
Inc. or its affiliates is prohibited.
Email inquiries can be directed to: [email protected]
More information is also available at: https://neo4j.com/licensing/
To accept the license agreement set the environment variable
NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
To do this you can use the following docker argument:
--env=NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
"
exit 1
fi
fi
fi
# ==== RENAME LEGACY ENVIRONMENT CONF VARIABLES ====
# Env variable naming convention:
# - prefix NEO4J_
# - double underscore char '__' instead of single underscore '_' char in the setting name
# - underscore char '_' instead of dot '.' char in the setting name
# Example:
# NEO4J_dbms_tx__log_rotation_retention__policy env variable to set
# dbms.tx_log.rotation.retention_policy setting
# Backward compatibility - map old hardcoded env variables into new naming convention (if they aren't set already)
# Set some to default values if unset
: ${NEO4J_dbms_tx__log_rotation_retention__policy:=${NEO4J_dbms_txLog_rotation_retentionPolicy:-}}
: ${NEO4J_dbms_unmanaged__extension__classes:=${NEO4J_dbms_unmanagedExtensionClasses:-}}
: ${NEO4J_dbms_allow__format__migration:=${NEO4J_dbms_allowFormatMigration:-}}
: ${NEO4J_dbms_connectors_default__advertised__address:=${NEO4J_dbms_connectors_defaultAdvertisedAddress:-}}
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
: ${NEO4J_causal__clustering_expected__core__cluster__size:=${NEO4J_causalClustering_expectedCoreClusterSize:-}}
: ${NEO4J_causal__clustering_initial__discovery__members:=${NEO4J_causalClustering_initialDiscoveryMembers:-}}
: ${NEO4J_causal__clustering_discovery__advertised__address:=${NEO4J_causalClustering_discoveryAdvertisedAddress:-}}
: ${NEO4J_causal__clustering_transaction__advertised__address:=${NEO4J_causalClustering_transactionAdvertisedAddress:-}}
: ${NEO4J_causal__clustering_raft__advertised__address:=${NEO4J_causalClustering_raftAdvertisedAddress:-}}
fi
# unset old hardcoded unsupported env variables
unset NEO4J_dbms_txLog_rotation_retentionPolicy NEO4J_UDC_SOURCE \
NEO4J_dbms_unmanagedExtensionClasses NEO4J_dbms_allowFormatMigration \
NEO4J_dbms_connectors_defaultAdvertisedAddress NEO4J_ha_serverId \
NEO4J_ha_initialHosts NEO4J_causalClustering_expectedCoreClusterSize \
NEO4J_causalClustering_initialDiscoveryMembers \
NEO4J_causalClustering_discoveryListenAddress \
NEO4J_causalClustering_discoveryAdvertisedAddress \
NEO4J_causalClustering_transactionListenAddress \
NEO4J_causalClustering_transactionAdvertisedAddress \
NEO4J_causalClustering_raftListenAddress \
NEO4J_causalClustering_raftAdvertisedAddress
# ==== CHECK FILE PERMISSIONS ON MOUNTED FOLDERS ====
if [ -d /conf ]; then
check_mounted_folder_readable "/conf"
rm -rf "${NEO4J_HOME}"/conf/*
find /conf -type f -exec cp --preserve=ownership,mode {} "${NEO4J_HOME}"/conf \;
fi
if [ -d /ssl ]; then
check_mounted_folder_readable "/ssl"
rm -rf "${NEO4J_HOME}"/certificates
ln -s /ssl "${NEO4J_HOME}"/certificates
fi
if [ -d /plugins ]; then
if [[ -n "${NEO4JLABS_PLUGINS:-}" ]]; then
# We need write permissions
check_mounted_folder_writable_with_chown "/plugins"
fi
check_mounted_folder_readable "/plugins"
: ${NEO4J_dbms_directories_plugins:="/plugins"}
fi
if [ -d /import ]; then
check_mounted_folder_readable "/import"
: ${NEO4J_dbms_directories_import:="/import"}
fi
if [ -d /metrics ]; then
# metrics is enterprise only
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
check_mounted_folder_writable_with_chown "/metrics"
: ${NEO4J_dbms_directories_metrics:="/metrics"}
fi
fi
if [ -d /logs ]; then
check_mounted_folder_writable_with_chown "/logs"
: ${NEO4J_dbms_directories_logs:="/logs"}
fi
if [ -d /data ]; then
check_mounted_folder_writable_with_chown "/data"
if [ -d /data/databases ]; then
check_mounted_folder_writable_with_chown "/data/databases"
fi
if [ -d /data/dbms ]; then
check_mounted_folder_writable_with_chown "/data/dbms"
fi
if [ -d /data/transactions ]; then
check_mounted_folder_writable_with_chown "/data/transactions"
fi
fi
if [ -d /licenses ]; then
check_mounted_folder_readable "/licenses"
: ${NEO4J_dbms_directories_licenses:="/licenses"}
fi
# ==== SET CONFIGURATIONS ====
## == DOCKER SPECIFIC DEFAULT CONFIGURATIONS ===
## these should not override *any* configurations set by the user
add_docker_default_to_conf "dbms.tx_log.rotation.retention_policy" "100M size" "${NEO4J_HOME}"
add_docker_default_to_conf "dbms.memory.pagecache.size" "512M" "${NEO4J_HOME}"
add_docker_default_to_conf "dbms.default_listen_address" "0.0.0.0" "${NEO4J_HOME}"
# set enterprise only docker defaults
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
add_docker_default_to_conf "causal_clustering.discovery_advertised_address" "$(hostname):5000" "${NEO4J_HOME}"
add_docker_default_to_conf "causal_clustering.transaction_advertised_address" "$(hostname):6000" "${NEO4J_HOME}"
add_docker_default_to_conf "causal_clustering.raft_advertised_address" "$(hostname):7000" "${NEO4J_HOME}"
fi
## == ENVIRONMENT VARIABLE CONFIGURATIONS ===
## these override BOTH defaults and any existing values in the neo4j.conf file
# save NEO4J_HOME and NEO4J_AUTH to temp variables that don't begin with NEO4J_ so they don't get added to the conf
temp_neo4j_home="${NEO4J_HOME}"
temp_neo4j_auth="${NEO4J_AUTH:-}"
# list env variables with prefix NEO4J_ and create settings from them
unset NEO4J_AUTH NEO4J_SHA256 NEO4J_TARBALL NEO4J_EDITION NEO4J_ACCEPT_LICENSE_AGREEMENT NEO4J_HOME
for i in $( set | grep ^NEO4J_ | awk -F'=' '{print $1}' | sort -rn ); do
setting=$(echo "${i}" | sed 's|^NEO4J_||' | sed 's|_|.|g' | sed 's|\.\.|_|g')
value=$(echo "${!i}")
# Don't allow settings with no value or settings that start with a number (neo4j converts settings to env variables and you cannot have an env variable that starts with a number)
if [[ -n ${value} ]]; then
if [[ ! "${setting}" =~ ^[0-9]+.*$ ]]; then
add_env_setting_to_conf "${setting}" "${value}" "${temp_neo4j_home}"
else
echo >&2 "WARNING: ${setting} not written to conf file because settings that start with a number are not permitted"
fi
fi
done
export NEO4J_HOME="${temp_neo4j_home}"
unset temp_neo4j_home
# ==== SET PASSWORD AND PLUGINS ====
set_initial_password "${temp_neo4j_auth}"
if [[ ! -z "${NEO4JLABS_PLUGINS:-}" ]]; then
# NEO4JLABS_PLUGINS should be a json array of plugins like '["graph-algorithms", "apoc", "streams", "graphql"]'
install_neo4j_labs_plugins
fi
# ==== INVOKE NEO4J STARTUP ====
[ -f "${EXTENSION_SCRIPT:-}" ] && . ${EXTENSION_SCRIPT}
if [ "${cmd}" == "dump-config" ]; then
if [ ! -d "/conf" ]; then
echo >&2 "You must mount a folder to /conf so that the configuration file(s) can be dumped to there."
exit 1
fi
check_mounted_folder_writable_with_chown "/conf"
cp --recursive "${NEO4J_HOME}"/conf/* /conf
echo "Config Dumped"
exit 0
fi
# this prints out a command for us to run.
# the command is something like: `java ...[lots of java options]... neo4j.mainClass ...[some neo4j options]...`
function get_neo4j_run_cmd {
local extraArgs=()
if [ "${EXTENDED_CONF+"yes"}" == "yes" ]; then
extraArgs+=("--expand-commands")
fi
if running_as_root; then
gosu neo4j:neo4j neo4j console --dry-run "${extraArgs[@]}"
else
neo4j console --dry-run "${extraArgs[@]}"
fi
}
# Use su-exec to drop privileges to neo4j user
# Note that su-exec, despite its name, does not replicate the
# functionality of exec, so we need to use both
if [ "${cmd}" == "neo4j" ]; then
# separate declaration and use of get_neo4j_run_cmd so that error codes are correctly surfaced
neo4j_console_cmd="$(get_neo4j_run_cmd)"
eval ${exec_cmd} ${neo4j_console_cmd?:No Neo4j command was generated}
else
${exec_cmd} "$@"
fi
```
--------------------------------------------------------------------------------
/knowledge_graphs/ai_script_analyzer.py:
--------------------------------------------------------------------------------
```python
"""
AI Script Analyzer
Parses Python scripts generated by AI coding assistants using AST to extract:
- Import statements and their usage
- Class instantiations and method calls
- Function calls with parameters
- Attribute access patterns
- Variable type tracking
"""
import ast
import logging
from pathlib import Path
from typing import Dict, List, Set, Any, Optional, Tuple
from dataclasses import dataclass, field
logger = logging.getLogger(__name__)
@dataclass
class ImportInfo:
"""Information about an import statement"""
module: str
name: str
alias: Optional[str] = None
is_from_import: bool = False
line_number: int = 0
@dataclass
class MethodCall:
"""Information about a method call"""
object_name: str
method_name: str
args: List[str]
kwargs: Dict[str, str]
line_number: int
object_type: Optional[str] = None # Inferred class type
@dataclass
class AttributeAccess:
"""Information about attribute access"""
object_name: str
attribute_name: str
line_number: int
object_type: Optional[str] = None # Inferred class type
@dataclass
class FunctionCall:
"""Information about a function call"""
function_name: str
args: List[str]
kwargs: Dict[str, str]
line_number: int
full_name: Optional[str] = None # Module.function_name
@dataclass
class ClassInstantiation:
"""Information about class instantiation"""
variable_name: str
class_name: str
args: List[str]
kwargs: Dict[str, str]
line_number: int
full_class_name: Optional[str] = None # Module.ClassName
@dataclass
class AnalysisResult:
"""Complete analysis results for a Python script"""
file_path: str
imports: List[ImportInfo] = field(default_factory=list)
class_instantiations: List[ClassInstantiation] = field(default_factory=list)
method_calls: List[MethodCall] = field(default_factory=list)
attribute_accesses: List[AttributeAccess] = field(default_factory=list)
function_calls: List[FunctionCall] = field(default_factory=list)
variable_types: Dict[str, str] = field(default_factory=dict) # variable_name -> class_type
errors: List[str] = field(default_factory=list)
class AIScriptAnalyzer:
"""Analyzes AI-generated Python scripts for validation against knowledge graph"""
def __init__(self):
self.import_map: Dict[str, str] = {} # alias -> actual_module_name
self.variable_types: Dict[str, str] = {} # variable_name -> class_type
self.context_manager_vars: Dict[str, Tuple[int, int, str]] = {} # var_name -> (start_line, end_line, type)
def analyze_script(self, script_path: str) -> AnalysisResult:
"""Analyze a Python script and extract all relevant information"""
try:
with open(script_path, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content)
result = AnalysisResult(file_path=script_path)
# Reset state for new analysis
self.import_map.clear()
self.variable_types.clear()
self.context_manager_vars.clear()
# Track processed nodes to avoid duplicates
self.processed_calls = set()
self.method_call_attributes = set()
# First pass: collect imports and build import map
for node in ast.walk(tree):
if isinstance(node, (ast.Import, ast.ImportFrom)):
self._extract_imports(node, result)
# Second pass: analyze usage patterns
for node in ast.walk(tree):
self._analyze_node(node, result)
# Set inferred types on method calls and attribute accesses
self._infer_object_types(result)
result.variable_types = self.variable_types.copy()
return result
except Exception as e:
error_msg = f"Failed to analyze script {script_path}: {str(e)}"
logger.error(error_msg)
result = AnalysisResult(file_path=script_path)
result.errors.append(error_msg)
return result
def _extract_imports(self, node: ast.AST, result: AnalysisResult):
"""Extract import information and build import mapping"""
line_num = getattr(node, 'lineno', 0)
if isinstance(node, ast.Import):
for alias in node.names:
import_name = alias.name
alias_name = alias.asname or import_name
result.imports.append(ImportInfo(
module=import_name,
name=import_name,
alias=alias.asname,
is_from_import=False,
line_number=line_num
))
self.import_map[alias_name] = import_name
elif isinstance(node, ast.ImportFrom):
module = node.module or ""
for alias in node.names:
import_name = alias.name
alias_name = alias.asname or import_name
result.imports.append(ImportInfo(
module=module,
name=import_name,
alias=alias.asname,
is_from_import=True,
line_number=line_num
))
# Map alias to full module.name
if module:
full_name = f"{module}.{import_name}"
self.import_map[alias_name] = full_name
else:
self.import_map[alias_name] = import_name
def _analyze_node(self, node: ast.AST, result: AnalysisResult):
"""Analyze individual AST nodes for usage patterns"""
line_num = getattr(node, 'lineno', 0)
# Assignments (class instantiations and method call results)
if isinstance(node, ast.Assign):
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
if isinstance(node.value, ast.Call):
# Check if it's a class instantiation or method call
if isinstance(node.value.func, ast.Name):
# Direct function/class call
self._extract_class_instantiation(node, result)
# Mark this call as processed to avoid duplicate processing
self.processed_calls.add(id(node.value))
elif isinstance(node.value.func, ast.Attribute):
# Method call - track the variable assignment for type inference
var_name = node.targets[0].id
self._track_method_result_assignment(node.value, var_name)
# Still process the method call
self._extract_method_call(node.value, result)
self.processed_calls.add(id(node.value))
# AsyncWith statements (context managers)
elif isinstance(node, ast.AsyncWith):
self._handle_async_with(node, result)
elif isinstance(node, ast.With):
self._handle_with(node, result)
# Method calls and function calls
elif isinstance(node, ast.Call):
# Skip if this call was already processed as part of an assignment
if id(node) in self.processed_calls:
return
if isinstance(node.func, ast.Attribute):
self._extract_method_call(node, result)
# Mark this attribute as used in method call to avoid duplicate processing
self.method_call_attributes.add(id(node.func))
elif isinstance(node.func, ast.Name):
# Check if this is likely a class instantiation (based on imported classes)
func_name = node.func.id
full_name = self._resolve_full_name(func_name)
# If this is a known imported class, treat as class instantiation
if self._is_likely_class_instantiation(func_name, full_name):
self._extract_nested_class_instantiation(node, result)
else:
self._extract_function_call(node, result)
# Attribute access (not in call context)
elif isinstance(node, ast.Attribute):
# Skip if this attribute was already processed as part of a method call
if id(node) in self.method_call_attributes:
return
self._extract_attribute_access(node, result)
def _extract_class_instantiation(self, node: ast.Assign, result: AnalysisResult):
"""Extract class instantiation from assignment"""
target = node.targets[0]
call = node.value
line_num = getattr(node, 'lineno', 0)
if isinstance(target, ast.Name) and isinstance(call, ast.Call):
var_name = target.id
class_name = self._get_name_from_call(call.func)
if class_name:
args = [self._get_arg_representation(arg) for arg in call.args]
kwargs = {
kw.arg: self._get_arg_representation(kw.value)
for kw in call.keywords if kw.arg
}
# Resolve full class name using import map
full_class_name = self._resolve_full_name(class_name)
instantiation = ClassInstantiation(
variable_name=var_name,
class_name=class_name,
args=args,
kwargs=kwargs,
line_number=line_num,
full_class_name=full_class_name
)
result.class_instantiations.append(instantiation)
# Track variable type for later method call analysis
self.variable_types[var_name] = full_class_name or class_name
def _extract_method_call(self, node: ast.Call, result: AnalysisResult):
"""Extract method call information"""
if isinstance(node.func, ast.Attribute):
line_num = getattr(node, 'lineno', 0)
# Get object and method names
obj_name = self._get_name_from_node(node.func.value)
method_name = node.func.attr
if obj_name and method_name:
args = [self._get_arg_representation(arg) for arg in node.args]
kwargs = {
kw.arg: self._get_arg_representation(kw.value)
for kw in node.keywords if kw.arg
}
method_call = MethodCall(
object_name=obj_name,
method_name=method_name,
args=args,
kwargs=kwargs,
line_number=line_num,
object_type=self.variable_types.get(obj_name)
)
result.method_calls.append(method_call)
def _extract_function_call(self, node: ast.Call, result: AnalysisResult):
"""Extract function call information"""
if isinstance(node.func, ast.Name):
line_num = getattr(node, 'lineno', 0)
func_name = node.func.id
args = [self._get_arg_representation(arg) for arg in node.args]
kwargs = {
kw.arg: self._get_arg_representation(kw.value)
for kw in node.keywords if kw.arg
}
# Resolve full function name using import map
full_func_name = self._resolve_full_name(func_name)
function_call = FunctionCall(
function_name=func_name,
args=args,
kwargs=kwargs,
line_number=line_num,
full_name=full_func_name
)
result.function_calls.append(function_call)
def _extract_attribute_access(self, node: ast.Attribute, result: AnalysisResult):
"""Extract attribute access information"""
line_num = getattr(node, 'lineno', 0)
obj_name = self._get_name_from_node(node.value)
attr_name = node.attr
if obj_name and attr_name:
attribute_access = AttributeAccess(
object_name=obj_name,
attribute_name=attr_name,
line_number=line_num,
object_type=self.variable_types.get(obj_name)
)
result.attribute_accesses.append(attribute_access)
def _infer_object_types(self, result: AnalysisResult):
"""Update object types for method calls and attribute accesses"""
for method_call in result.method_calls:
if not method_call.object_type:
# First check context manager variables
obj_type = self._get_context_aware_type(method_call.object_name, method_call.line_number)
if obj_type:
method_call.object_type = obj_type
else:
method_call.object_type = self.variable_types.get(method_call.object_name)
for attr_access in result.attribute_accesses:
if not attr_access.object_type:
# First check context manager variables
obj_type = self._get_context_aware_type(attr_access.object_name, attr_access.line_number)
if obj_type:
attr_access.object_type = obj_type
else:
attr_access.object_type = self.variable_types.get(attr_access.object_name)
def _get_context_aware_type(self, var_name: str, line_number: int) -> Optional[str]:
"""Get the type of a variable considering its context (e.g., async with scope)"""
if var_name in self.context_manager_vars:
start_line, end_line, var_type = self.context_manager_vars[var_name]
if start_line <= line_number <= end_line:
return var_type
return None
def _get_name_from_call(self, node: ast.AST) -> Optional[str]:
"""Get the name from a call node (for class instantiation)"""
if isinstance(node, ast.Name):
return node.id
elif isinstance(node, ast.Attribute):
value_name = self._get_name_from_node(node.value)
if value_name:
return f"{value_name}.{node.attr}"
return None
def _get_name_from_node(self, node: ast.AST) -> Optional[str]:
"""Get string representation of a node (for object names)"""
if isinstance(node, ast.Name):
return node.id
elif isinstance(node, ast.Attribute):
value_name = self._get_name_from_node(node.value)
if value_name:
return f"{value_name}.{node.attr}"
return None
def _get_arg_representation(self, node: ast.AST) -> str:
"""Get string representation of an argument"""
if isinstance(node, ast.Constant):
return repr(node.value)
elif isinstance(node, ast.Name):
return node.id
elif isinstance(node, ast.Attribute):
return self._get_name_from_node(node) or "<?>"
elif isinstance(node, ast.Call):
func_name = self._get_name_from_call(node.func)
return f"{func_name}(...)" if func_name else "call(...)"
else:
return f"<{type(node).__name__}>"
def _is_likely_class_instantiation(self, func_name: str, full_name: Optional[str]) -> bool:
"""Determine if a function call is likely a class instantiation"""
# Check if it's a known imported class (classes typically start with uppercase)
if func_name and func_name[0].isupper():
return True
# Check if the full name suggests a class (contains known class patterns)
if full_name:
# Common class patterns in module names
class_patterns = [
'Model', 'Provider', 'Client', 'Agent', 'Manager', 'Handler',
'Builder', 'Factory', 'Service', 'Controller', 'Processor'
]
return any(pattern in full_name for pattern in class_patterns)
return False
def _extract_nested_class_instantiation(self, node: ast.Call, result: AnalysisResult):
"""Extract class instantiation that's not in direct assignment (e.g., as parameter)"""
line_num = getattr(node, 'lineno', 0)
if isinstance(node.func, ast.Name):
class_name = node.func.id
args = [self._get_arg_representation(arg) for arg in node.args]
kwargs = {
kw.arg: self._get_arg_representation(kw.value)
for kw in node.keywords if kw.arg
}
# Resolve full class name using import map
full_class_name = self._resolve_full_name(class_name)
# Use a synthetic variable name since this isn't assigned to a variable
var_name = f"<{class_name.lower()}_instance>"
instantiation = ClassInstantiation(
variable_name=var_name,
class_name=class_name,
args=args,
kwargs=kwargs,
line_number=line_num,
full_class_name=full_class_name
)
result.class_instantiations.append(instantiation)
def _track_method_result_assignment(self, call_node: ast.Call, var_name: str):
"""Track when a variable is assigned the result of a method call"""
if isinstance(call_node.func, ast.Attribute):
# For now, we'll use a generic type hint for method results
# In a more sophisticated system, we could look up the return type
self.variable_types[var_name] = "method_result"
def _handle_async_with(self, node: ast.AsyncWith, result: AnalysisResult):
"""Handle async with statements and track context manager variables"""
for item in node.items:
if item.optional_vars and isinstance(item.optional_vars, ast.Name):
var_name = item.optional_vars.id
# If the context manager is a method call, track the result type
if isinstance(item.context_expr, ast.Call) and isinstance(item.context_expr.func, ast.Attribute):
# Extract and process the method call
self._extract_method_call(item.context_expr, result)
self.processed_calls.add(id(item.context_expr))
# Track context manager scope for pydantic_ai run_stream calls
obj_name = self._get_name_from_node(item.context_expr.func.value)
method_name = item.context_expr.func.attr
if (obj_name and obj_name in self.variable_types and
'pydantic_ai' in str(self.variable_types[obj_name]) and
method_name == 'run_stream'):
# Calculate the scope of this async with block
start_line = getattr(node, 'lineno', 0)
end_line = getattr(node, 'end_lineno', start_line + 50) # fallback estimate
# For run_stream, the return type is specifically StreamedRunResult
# This is the actual return type, not a generic placeholder
self.context_manager_vars[var_name] = (start_line, end_line, "pydantic_ai.StreamedRunResult")
def _handle_with(self, node: ast.With, result: AnalysisResult):
"""Handle regular with statements and track context manager variables"""
for item in node.items:
if item.optional_vars and isinstance(item.optional_vars, ast.Name):
var_name = item.optional_vars.id
# If the context manager is a method call, track the result type
if isinstance(item.context_expr, ast.Call) and isinstance(item.context_expr.func, ast.Attribute):
# Extract and process the method call
self._extract_method_call(item.context_expr, result)
self.processed_calls.add(id(item.context_expr))
# Track basic type information
self.variable_types[var_name] = "context_manager_result"
def _resolve_full_name(self, name: str) -> Optional[str]:
"""Resolve a name to its full module.name using import map"""
# Check if it's a direct import mapping
if name in self.import_map:
return self.import_map[name]
# Check if it's a dotted name with first part in import map
parts = name.split('.')
if len(parts) > 1 and parts[0] in self.import_map:
base_module = self.import_map[parts[0]]
return f"{base_module}.{'.'.join(parts[1:])}"
return None
def analyze_ai_script(script_path: str) -> AnalysisResult:
"""Convenience function to analyze a single AI-generated script"""
analyzer = AIScriptAnalyzer()
return analyzer.analyze_script(script_path)
if __name__ == "__main__":
# Example usage
import sys
if len(sys.argv) != 2:
print("Usage: python ai_script_analyzer.py <script_path>")
sys.exit(1)
script_path = sys.argv[1]
result = analyze_ai_script(script_path)
print(f"Analysis Results for: {result.file_path}")
print(f"Imports: {len(result.imports)}")
print(f"Class Instantiations: {len(result.class_instantiations)}")
print(f"Method Calls: {len(result.method_calls)}")
print(f"Function Calls: {len(result.function_calls)}")
print(f"Attribute Accesses: {len(result.attribute_accesses)}")
if result.errors:
print(f"Errors: {result.errors}")
```
--------------------------------------------------------------------------------
/knowledge_graphs/hallucination_reporter.py:
--------------------------------------------------------------------------------
```python
"""
Hallucination Reporter
Generates comprehensive reports about AI coding assistant hallucinations
detected in Python scripts. Supports multiple output formats.
"""
import json
import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Any, Optional
from knowledge_graph_validator import (
ScriptValidationResult, ValidationStatus, ValidationResult
)
logger = logging.getLogger(__name__)
class HallucinationReporter:
"""Generates reports about detected hallucinations"""
def __init__(self):
self.report_timestamp = datetime.now(timezone.utc)
def generate_comprehensive_report(self, validation_result: ScriptValidationResult) -> Dict[str, Any]:
"""Generate a comprehensive report in JSON format"""
# Categorize validations by status (knowledge graph items only)
valid_items = []
invalid_items = []
uncertain_items = []
not_found_items = []
# Process imports (only knowledge graph ones)
for val in validation_result.import_validations:
if not val.validation.details.get('in_knowledge_graph', False):
continue # Skip external libraries
item = {
'type': 'IMPORT',
'name': val.import_info.module,
'line': val.import_info.line_number,
'status': val.validation.status.value,
'confidence': val.validation.confidence,
'message': val.validation.message,
'details': {
'is_from_import': val.import_info.is_from_import,
'alias': val.import_info.alias,
'available_classes': val.available_classes,
'available_functions': val.available_functions
}
}
self._categorize_item(item, val.validation.status, valid_items, invalid_items, uncertain_items, not_found_items)
# Process classes (only knowledge graph ones)
for val in validation_result.class_validations:
class_name = val.class_instantiation.full_class_name or val.class_instantiation.class_name
if not self._is_from_knowledge_graph(class_name, validation_result):
continue # Skip external classes
item = {
'type': 'CLASS_INSTANTIATION',
'name': val.class_instantiation.class_name,
'full_name': val.class_instantiation.full_class_name,
'variable': val.class_instantiation.variable_name,
'line': val.class_instantiation.line_number,
'status': val.validation.status.value,
'confidence': val.validation.confidence,
'message': val.validation.message,
'details': {
'args_provided': val.class_instantiation.args,
'kwargs_provided': list(val.class_instantiation.kwargs.keys()),
'constructor_params': val.constructor_params,
'parameter_validation': self._serialize_validation_result(val.parameter_validation) if val.parameter_validation else None
}
}
self._categorize_item(item, val.validation.status, valid_items, invalid_items, uncertain_items, not_found_items)
# Track reported items to avoid duplicates
reported_items = set()
# Process methods (only knowledge graph ones)
for val in validation_result.method_validations:
if not (val.method_call.object_type and self._is_from_knowledge_graph(val.method_call.object_type, validation_result)):
continue # Skip external methods
# Create unique key to avoid duplicates
key = (val.method_call.line_number, val.method_call.method_name, val.method_call.object_type)
if key not in reported_items:
reported_items.add(key)
item = {
'type': 'METHOD_CALL',
'name': val.method_call.method_name,
'object': val.method_call.object_name,
'object_type': val.method_call.object_type,
'line': val.method_call.line_number,
'status': val.validation.status.value,
'confidence': val.validation.confidence,
'message': val.validation.message,
'details': {
'args_provided': val.method_call.args,
'kwargs_provided': list(val.method_call.kwargs.keys()),
'expected_params': val.expected_params,
'parameter_validation': self._serialize_validation_result(val.parameter_validation) if val.parameter_validation else None,
'suggestions': val.validation.suggestions
}
}
self._categorize_item(item, val.validation.status, valid_items, invalid_items, uncertain_items, not_found_items)
# Process attributes (only knowledge graph ones) - but skip if already reported as method
for val in validation_result.attribute_validations:
if not (val.attribute_access.object_type and self._is_from_knowledge_graph(val.attribute_access.object_type, validation_result)):
continue # Skip external attributes
# Create unique key - if this was already reported as a method, skip it
key = (val.attribute_access.line_number, val.attribute_access.attribute_name, val.attribute_access.object_type)
if key not in reported_items:
reported_items.add(key)
item = {
'type': 'ATTRIBUTE_ACCESS',
'name': val.attribute_access.attribute_name,
'object': val.attribute_access.object_name,
'object_type': val.attribute_access.object_type,
'line': val.attribute_access.line_number,
'status': val.validation.status.value,
'confidence': val.validation.confidence,
'message': val.validation.message,
'details': {
'expected_type': val.expected_type
}
}
self._categorize_item(item, val.validation.status, valid_items, invalid_items, uncertain_items, not_found_items)
# Process functions (only knowledge graph ones)
for val in validation_result.function_validations:
if not (val.function_call.full_name and self._is_from_knowledge_graph(val.function_call.full_name, validation_result)):
continue # Skip external functions
item = {
'type': 'FUNCTION_CALL',
'name': val.function_call.function_name,
'full_name': val.function_call.full_name,
'line': val.function_call.line_number,
'status': val.validation.status.value,
'confidence': val.validation.confidence,
'message': val.validation.message,
'details': {
'args_provided': val.function_call.args,
'kwargs_provided': list(val.function_call.kwargs.keys()),
'expected_params': val.expected_params,
'parameter_validation': self._serialize_validation_result(val.parameter_validation) if val.parameter_validation else None
}
}
self._categorize_item(item, val.validation.status, valid_items, invalid_items, uncertain_items, not_found_items)
# Create library summary
library_summary = self._create_library_summary(validation_result)
# Generate report
report = {
'analysis_metadata': {
'script_path': validation_result.script_path,
'analysis_timestamp': self.report_timestamp.isoformat(),
'total_imports': len(validation_result.import_validations),
'total_classes': len(validation_result.class_validations),
'total_methods': len(validation_result.method_validations),
'total_attributes': len(validation_result.attribute_validations),
'total_functions': len(validation_result.function_validations)
},
'validation_summary': {
'overall_confidence': validation_result.overall_confidence,
'total_validations': len(valid_items) + len(invalid_items) + len(uncertain_items) + len(not_found_items),
'valid_count': len(valid_items),
'invalid_count': len(invalid_items),
'uncertain_count': len(uncertain_items),
'not_found_count': len(not_found_items),
'hallucination_rate': len(invalid_items + not_found_items) / max(1, len(valid_items) + len(invalid_items) + len(not_found_items))
},
'libraries_analyzed': library_summary,
'validation_details': {
'valid_items': valid_items,
'invalid_items': invalid_items,
'uncertain_items': uncertain_items,
'not_found_items': not_found_items
},
'hallucinations_detected': validation_result.hallucinations_detected,
'recommendations': self._generate_recommendations(validation_result)
}
return report
def _is_from_knowledge_graph(self, item_name: str, validation_result) -> bool:
"""Check if an item is from a knowledge graph module"""
if not item_name:
return False
# Get knowledge graph modules from import validations
kg_modules = set()
for val in validation_result.import_validations:
if val.validation.details.get('in_knowledge_graph', False):
kg_modules.add(val.import_info.module)
if '.' in val.import_info.module:
kg_modules.add(val.import_info.module.split('.')[0])
# Check if the item belongs to any knowledge graph module
if '.' in item_name:
base_module = item_name.split('.')[0]
return base_module in kg_modules
return any(item_name in module or module.endswith(item_name) for module in kg_modules)
def _serialize_validation_result(self, validation_result) -> Dict[str, Any]:
"""Convert ValidationResult to JSON-serializable dictionary"""
if validation_result is None:
return None
return {
'status': validation_result.status.value,
'confidence': validation_result.confidence,
'message': validation_result.message,
'details': validation_result.details,
'suggestions': validation_result.suggestions
}
def _categorize_item(self, item: Dict[str, Any], status: ValidationStatus,
valid_items: List, invalid_items: List, uncertain_items: List, not_found_items: List):
"""Categorize validation item by status"""
if status == ValidationStatus.VALID:
valid_items.append(item)
elif status == ValidationStatus.INVALID:
invalid_items.append(item)
elif status == ValidationStatus.UNCERTAIN:
uncertain_items.append(item)
elif status == ValidationStatus.NOT_FOUND:
not_found_items.append(item)
def _create_library_summary(self, validation_result: ScriptValidationResult) -> List[Dict[str, Any]]:
"""Create summary of libraries analyzed"""
library_stats = {}
# Aggregate stats by library/module
for val in validation_result.import_validations:
module = val.import_info.module
if module not in library_stats:
library_stats[module] = {
'module_name': module,
'import_status': val.validation.status.value,
'import_confidence': val.validation.confidence,
'classes_used': [],
'methods_called': [],
'attributes_accessed': [],
'functions_called': []
}
# Add class usage
for val in validation_result.class_validations:
class_name = val.class_instantiation.class_name
full_name = val.class_instantiation.full_class_name
# Try to match to library
if full_name:
parts = full_name.split('.')
if len(parts) > 1:
module = '.'.join(parts[:-1])
if module in library_stats:
library_stats[module]['classes_used'].append({
'class_name': class_name,
'status': val.validation.status.value,
'confidence': val.validation.confidence
})
# Add method usage
for val in validation_result.method_validations:
method_name = val.method_call.method_name
object_type = val.method_call.object_type
if object_type:
parts = object_type.split('.')
if len(parts) > 1:
module = '.'.join(parts[:-1])
if module in library_stats:
library_stats[module]['methods_called'].append({
'method_name': method_name,
'class_name': parts[-1],
'status': val.validation.status.value,
'confidence': val.validation.confidence
})
# Add attribute usage
for val in validation_result.attribute_validations:
attr_name = val.attribute_access.attribute_name
object_type = val.attribute_access.object_type
if object_type:
parts = object_type.split('.')
if len(parts) > 1:
module = '.'.join(parts[:-1])
if module in library_stats:
library_stats[module]['attributes_accessed'].append({
'attribute_name': attr_name,
'class_name': parts[-1],
'status': val.validation.status.value,
'confidence': val.validation.confidence
})
# Add function usage
for val in validation_result.function_validations:
func_name = val.function_call.function_name
full_name = val.function_call.full_name
if full_name:
parts = full_name.split('.')
if len(parts) > 1:
module = '.'.join(parts[:-1])
if module in library_stats:
library_stats[module]['functions_called'].append({
'function_name': func_name,
'status': val.validation.status.value,
'confidence': val.validation.confidence
})
return list(library_stats.values())
def _generate_recommendations(self, validation_result: ScriptValidationResult) -> List[str]:
"""Generate recommendations based on validation results"""
recommendations = []
# Only count actual hallucinations (from knowledge graph libraries)
kg_hallucinations = [h for h in validation_result.hallucinations_detected]
if kg_hallucinations:
method_issues = [h for h in kg_hallucinations if h['type'] == 'METHOD_NOT_FOUND']
attr_issues = [h for h in kg_hallucinations if h['type'] == 'ATTRIBUTE_NOT_FOUND']
param_issues = [h for h in kg_hallucinations if h['type'] == 'INVALID_PARAMETERS']
if method_issues:
recommendations.append(
f"Found {len(method_issues)} non-existent methods in knowledge graph libraries. "
"Consider checking the official documentation for correct method names."
)
if attr_issues:
recommendations.append(
f"Found {len(attr_issues)} non-existent attributes in knowledge graph libraries. "
"Verify attribute names against the class documentation."
)
if param_issues:
recommendations.append(
f"Found {len(param_issues)} parameter mismatches in knowledge graph libraries. "
"Check function signatures for correct parameter names and types."
)
else:
recommendations.append(
"No hallucinations detected in knowledge graph libraries. "
"External library usage appears to be working as expected."
)
if validation_result.overall_confidence < 0.7:
recommendations.append(
"Overall confidence is moderate. Most validations were for external libraries not in the knowledge graph."
)
return recommendations
def save_json_report(self, report: Dict[str, Any], output_path: str):
"""Save report as JSON file"""
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(report, f, indent=2, ensure_ascii=False)
logger.info(f"JSON report saved to: {output_path}")
def save_markdown_report(self, report: Dict[str, Any], output_path: str):
"""Save report as Markdown file"""
md_content = self._generate_markdown_content(report)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(md_content)
logger.info(f"Markdown report saved to: {output_path}")
def _generate_markdown_content(self, report: Dict[str, Any]) -> str:
"""Generate Markdown content from report"""
md = []
# Header
md.append("# AI Hallucination Detection Report")
md.append("")
md.append(f"**Script:** `{report['analysis_metadata']['script_path']}`")
md.append(f"**Analysis Date:** {report['analysis_metadata']['analysis_timestamp']}")
md.append(f"**Overall Confidence:** {report['validation_summary']['overall_confidence']:.2%}")
md.append("")
# Summary
summary = report['validation_summary']
md.append("## Summary")
md.append("")
md.append(f"- **Total Validations:** {summary['total_validations']}")
md.append(f"- **Valid:** {summary['valid_count']} ({summary['valid_count']/summary['total_validations']:.1%})")
md.append(f"- **Invalid:** {summary['invalid_count']} ({summary['invalid_count']/summary['total_validations']:.1%})")
md.append(f"- **Not Found:** {summary['not_found_count']} ({summary['not_found_count']/summary['total_validations']:.1%})")
md.append(f"- **Uncertain:** {summary['uncertain_count']} ({summary['uncertain_count']/summary['total_validations']:.1%})")
md.append(f"- **Hallucination Rate:** {summary['hallucination_rate']:.1%}")
md.append("")
# Hallucinations
if report['hallucinations_detected']:
md.append("## 🚨 Hallucinations Detected")
md.append("")
for i, hallucination in enumerate(report['hallucinations_detected'], 1):
md.append(f"### {i}. {hallucination['type'].replace('_', ' ').title()}")
md.append(f"**Location:** {hallucination['location']}")
md.append(f"**Description:** {hallucination['description']}")
if hallucination.get('suggestion'):
md.append(f"**Suggestion:** {hallucination['suggestion']}")
md.append("")
# Libraries
if report['libraries_analyzed']:
md.append("## 📚 Libraries Analyzed")
md.append("")
for lib in report['libraries_analyzed']:
md.append(f"### {lib['module_name']}")
md.append(f"**Import Status:** {lib['import_status']}")
md.append(f"**Import Confidence:** {lib['import_confidence']:.2%}")
if lib['classes_used']:
md.append("**Classes Used:**")
for cls in lib['classes_used']:
status_emoji = "✅" if cls['status'] == 'VALID' else "❌"
md.append(f" - {status_emoji} `{cls['class_name']}` ({cls['confidence']:.1%})")
if lib['methods_called']:
md.append("**Methods Called:**")
for method in lib['methods_called']:
status_emoji = "✅" if method['status'] == 'VALID' else "❌"
md.append(f" - {status_emoji} `{method['class_name']}.{method['method_name']}()` ({method['confidence']:.1%})")
if lib['attributes_accessed']:
md.append("**Attributes Accessed:**")
for attr in lib['attributes_accessed']:
status_emoji = "✅" if attr['status'] == 'VALID' else "❌"
md.append(f" - {status_emoji} `{attr['class_name']}.{attr['attribute_name']}` ({attr['confidence']:.1%})")
if lib['functions_called']:
md.append("**Functions Called:**")
for func in lib['functions_called']:
status_emoji = "✅" if func['status'] == 'VALID' else "❌"
md.append(f" - {status_emoji} `{func['function_name']}()` ({func['confidence']:.1%})")
md.append("")
# Recommendations
if report['recommendations']:
md.append("## 💡 Recommendations")
md.append("")
for rec in report['recommendations']:
md.append(f"- {rec}")
md.append("")
# Detailed Results
md.append("## 📋 Detailed Validation Results")
md.append("")
# Invalid items
invalid_items = report['validation_details']['invalid_items']
if invalid_items:
md.append("### ❌ Invalid Items")
md.append("")
for item in invalid_items:
md.append(f"- **{item['type']}** `{item['name']}` (Line {item['line']}) - {item['message']}")
md.append("")
# Not found items
not_found_items = report['validation_details']['not_found_items']
if not_found_items:
md.append("### 🔍 Not Found Items")
md.append("")
for item in not_found_items:
md.append(f"- **{item['type']}** `{item['name']}` (Line {item['line']}) - {item['message']}")
md.append("")
# Valid items (sample)
valid_items = report['validation_details']['valid_items']
if valid_items:
md.append("### ✅ Valid Items (Sample)")
md.append("")
for item in valid_items[:10]: # Show first 10
md.append(f"- **{item['type']}** `{item['name']}` (Line {item['line']}) - {item['message']}")
if len(valid_items) > 10:
md.append(f"- ... and {len(valid_items) - 10} more valid items")
md.append("")
return "\n".join(md)
def print_summary(self, report: Dict[str, Any]):
"""Print a concise summary to console"""
print("\n" + "="*80)
print("🤖 AI HALLUCINATION DETECTION REPORT")
print("="*80)
print(f"Script: {report['analysis_metadata']['script_path']}")
print(f"Overall Confidence: {report['validation_summary']['overall_confidence']:.1%}")
summary = report['validation_summary']
print(f"\nValidation Results:")
print(f" ✅ Valid: {summary['valid_count']}")
print(f" ❌ Invalid: {summary['invalid_count']}")
print(f" 🔍 Not Found: {summary['not_found_count']}")
print(f" ❓ Uncertain: {summary['uncertain_count']}")
print(f" 📊 Hallucination Rate: {summary['hallucination_rate']:.1%}")
if report['hallucinations_detected']:
print(f"\n🚨 {len(report['hallucinations_detected'])} Hallucinations Detected:")
for hall in report['hallucinations_detected'][:5]: # Show first 5
print(f" - {hall['type'].replace('_', ' ').title()} at {hall['location']}")
print(f" {hall['description']}")
if report['recommendations']:
print(f"\n💡 Recommendations:")
for rec in report['recommendations'][:3]: # Show first 3
print(f" - {rec}")
print("="*80)
```
--------------------------------------------------------------------------------
/neo4j/docker-neo4j/src/test/java/com/neo4j/docker/coredb/configurations/TestConfSettings.java:
--------------------------------------------------------------------------------
```java
package com.neo4j.docker.coredb.configurations;
import com.neo4j.docker.coredb.plugins.Neo4jPluginEnv;
import com.neo4j.docker.utils.DatabaseIO;
import com.neo4j.docker.utils.Neo4jVersion;
import com.neo4j.docker.utils.SetContainerUser;
import com.neo4j.docker.utils.TemporaryFolderManager;
import com.neo4j.docker.utils.TestSettings;
import com.neo4j.docker.utils.WaitStrategies;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
import org.neo4j.driver.exceptions.ClientException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testcontainers.containers.GenericContainer;
import org.testcontainers.containers.output.OutputFrame;
import org.testcontainers.containers.output.Slf4jLogConsumer;
import org.testcontainers.containers.wait.strategy.LogMessageWaitStrategy;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.util.stream.Stream;
public class TestConfSettings
{
private static final String PASSWORD = "none";
private static final String AUTH = "none"; // or "neo4j/"+PASSWORD if we want authentication
private final Logger log = LoggerFactory.getLogger(TestConfSettings.class);
private static Path confFolder;
private static Map<Setting,Configuration> confNames;
@RegisterExtension
public static TemporaryFolderManager temporaryFolderManager = new TemporaryFolderManager();
@BeforeAll
static void getVersionSpecificConfigurationSettings()
{
confFolder = Configuration.getConfigurationResourcesFolder();
confNames = Configuration.getConfigurationNameMap();
}
private GenericContainer createContainer()
{
return new GenericContainer(TestSettings.IMAGE_ID)
.withEnv("NEO4J_AUTH", AUTH)
.withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes")
.withExposedPorts(7474, 7687)
.withLogConsumer(new Slf4jLogConsumer(log));
}
private GenericContainer makeContainerDumpConfig(GenericContainer container)
{
SetContainerUser.nonRootUser( container );
container.setCommand("dump-config");
WaitStrategies.waitUntilContainerFinished(container, Duration.ofSeconds(30));
return container;
}
private Map<String, String> parseConfFile(File conf) throws FileNotFoundException
{
Map<String, String> configurations = new HashMap<>();
Scanner scanner = new Scanner(conf);
while ( scanner.hasNextLine() )
{
String[] params = scanner.nextLine().split( "=", 2 );
if(params.length < 2)
{
continue;
}
log.debug( params[0] + "\t:\t" + params[1] );
configurations.put( params[0], params[1] );
}
return configurations;
}
private void assertConfigurationPresentInDebugLog( Path debugLog, Configuration setting, String value, boolean shouldBeFound ) throws IOException
{
// searches the debug log for the given string, returns true if present
Stream<String> lines = Files.lines(debugLog);
String actualSetting = lines.filter(s -> s.contains( setting.name ))
.findFirst()
.orElse( "" );
lines.close();
if(shouldBeFound)
{
Assertions.assertTrue( !actualSetting.isEmpty(), setting.name+" was never set" );
Assertions.assertTrue( actualSetting.contains( value ),
setting.name +" is set to the wrong value. Expected: "+
value +" Actual: " + actualSetting );
}
else
{
Assertions.assertTrue( actualSetting.isEmpty(),setting.name+" was set when it should not have been. " +
"Actual value: "+actualSetting );
}
}
@Test
void testIgnoreNumericVars()
{
try(GenericContainer container = createContainer())
{
container.withEnv( "NEO4J_1a", "1" )
.waitingFor( WaitStrategies.waitForBoltReady() );
container.start();
Assertions.assertTrue( container.isRunning() );
String errorLogs = container.getLogs( OutputFrame.OutputType.STDERR);
Assertions.assertTrue( errorLogs.contains( "WARNING: 1a not written to conf file. Settings that start with a number are not permitted" ),
"Neo4j did not warn about invalid numeric config variable `Neo4j_1a`.\n" +
"Actual warnings were:\n"+errorLogs);
}
}
@Test
void testEnvVarsOverrideDefaultConfigurations() throws Exception
{
Assumptions.assumeTrue(TestSettings.NEO4J_VERSION.isAtLeastVersion(new Neo4jVersion(3, 0, 0)),
"No neo4j-admin in 2.3: skipping neo4j-admin-conf-override test");
File conf;
Map<Setting,String> expectedValues = new HashMap<Setting,String>() {{
put( Setting.MEMORY_PAGECACHE_SIZE, "1000m");
put( Setting.MEMORY_HEAP_INITIALSIZE, "2000m");
put( Setting.MEMORY_HEAP_MAXSIZE, "3000m");
put( Setting.DIRECTORIES_LOGS, "/notdefaultlogs" );
put( Setting.DIRECTORIES_DATA, "/notdefaultdata" );
}};
try(GenericContainer container = createContainer())
{
for(Setting s : expectedValues.keySet())
{
container.withEnv( confNames.get( s ).envName, expectedValues.get( s ) );
}
Path confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
conf = confMount.resolve( "neo4j.conf" ).toFile();
makeContainerDumpConfig( container );
container.start();
}
// now check the settings we set via env are in the new conf file
Assertions.assertTrue( conf.exists(), "configuration file not written" );
Assertions.assertTrue( conf.canRead(), "configuration file not readable for some reason?" );
Map<String,String> configurations = parseConfFile( conf );
for(Setting s : expectedValues.keySet())
{
Assertions.assertTrue( configurations.containsKey( confNames.get( s ).name ),
confNames.get( s ).name + " not set at all" );
Assertions.assertEquals( expectedValues.get( s ),
configurations.get( confNames.get( s ).name ),
confNames.get( s ).name + " not overridden" );
}
}
@Test
void testReadsTheConfFile() throws Exception
{
Path debugLog;
try(GenericContainer container = createContainer().waitingFor(WaitStrategies.waitForNeo4jReady(PASSWORD)))
{
//Mount /conf
Path confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
Path logMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/logs");
debugLog = logMount.resolve("debug.log");
SetContainerUser.nonRootUser( container );
//Create ReadConf.conf file with the custom env variables
Path confFile = confFolder.resolve( "ReadConf.conf" );
Files.copy( confFile, confMount.resolve( "neo4j.conf" ) );
//Start the container
container.start();
}
//Check if the container reads the conf file
assertConfigurationPresentInDebugLog( debugLog, confNames.get( Setting.MEMORY_HEAP_MAXSIZE ),
"512", true );
}
@Test
void testDefaultsConfigsAreSet() throws Exception
{
try(GenericContainer container = createContainer().waitingFor(WaitStrategies.waitForNeo4jReady(PASSWORD)))
{
//Mount /logs
Path logMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/logs");
SetContainerUser.nonRootUser( container );
//Start the container
container.start();
DatabaseIO dbio = new DatabaseIO( container );
Path debugLog = logMount.resolve( "debug.log" );
String expectedDefaultListenAddress = "0.0.0.0";
dbio.verifyConfigurationSetting("neo4j", PASSWORD, confNames.get( Setting.DEFAULT_LISTEN_ADDRESS), expectedDefaultListenAddress);
assertConfigurationPresentInDebugLog(debugLog, confNames.get( Setting.DEFAULT_LISTEN_ADDRESS), expectedDefaultListenAddress, true);
// test enterprise only default configurations are set
if (TestSettings.EDITION == TestSettings.Edition.ENTERPRISE) {
String expectedTxAddress = container.getContainerId().substring(0, 12) + ":6000";
String expectedRaftAddress = container.getContainerId().substring(0, 12) + ":7000";
String expectedRoutingAddress = container.getContainerId().substring(0, 12) + ":7688";
dbio.verifyConfigurationSetting("neo4j", PASSWORD, confNames.get( Setting.CLUSTER_TRANSACTION_ADDRESS), expectedTxAddress);
assertConfigurationPresentInDebugLog(debugLog, confNames.get( Setting.CLUSTER_TRANSACTION_ADDRESS), expectedTxAddress,true);
dbio.verifyConfigurationSetting("neo4j", PASSWORD, confNames.get( Setting.CLUSTER_RAFT_ADDRESS), expectedRaftAddress);
assertConfigurationPresentInDebugLog(debugLog, confNames.get( Setting.CLUSTER_RAFT_ADDRESS), expectedRaftAddress,true);
dbio.verifyConfigurationSetting("neo4j", PASSWORD, confNames.get( Setting.CLUSTER_ROUTING_ADDRESS), expectedRoutingAddress);
assertConfigurationPresentInDebugLog(debugLog, confNames.get( Setting.CLUSTER_ROUTING_ADDRESS), expectedRoutingAddress,true);
}
}
}
@Test
void testCommentedConfigsAreReplacedByDefaultOnes() throws Exception
{
File conf;
try(GenericContainer container = createContainer())
{
//Mount /conf
Path confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
conf = confMount.resolve( "neo4j.conf" ).toFile();
SetContainerUser.nonRootUser( container );
//Create ConfsReplaced.conf file in mounted folder
Files.copy( confFolder.resolve( "ConfsReplaced.conf" ), conf.toPath() );
makeContainerDumpConfig( container );
//Start the container
container.start();
}
//Read the config file to check if the config is set correctly
Map<String,String> configurations = parseConfFile( conf );
Assertions.assertTrue( configurations.containsKey( confNames.get( Setting.MEMORY_PAGECACHE_SIZE ).name ),
"conf settings not set correctly by docker-entrypoint" );
Assertions.assertEquals( "512M",
configurations.get(confNames.get( Setting.MEMORY_PAGECACHE_SIZE ).name),
"conf settings not appended correctly by docker-entrypoint" );
}
@Test
void testConfFileNotOverridenByDockerEntrypoint() throws Exception
{
File conf;
try(GenericContainer container = createContainer())
{
//Mount /conf
Path confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
conf = confMount.resolve( "neo4j.conf" ).toFile();
SetContainerUser.nonRootUser( container );
//Create ConfsNotOverridden.conf file
Path confFile = confFolder.resolve( "ConfsNotOverridden.conf" );
Files.copy( confFile, confMount.resolve( "neo4j.conf" ) );
makeContainerDumpConfig( container );
container.start();
}
//Read the config file to check if the config is not overriden
Map<String, String> configurations = parseConfFile(conf);
Assertions.assertTrue(configurations.containsKey(confNames.get( Setting.MEMORY_PAGECACHE_SIZE).name),
"conf settings not set correctly by docker-entrypoint");
Assertions.assertEquals("1024M",
configurations.get(confNames.get( Setting.MEMORY_PAGECACHE_SIZE).name),
"docker-entrypoint has overridden custom setting set from user's conf");
}
@Test
void testOldConfigNamesNotOverwrittenByDockerDefaults() throws Exception
{
Assumptions.assumeTrue( TestSettings.NEO4J_VERSION.isAtLeastVersion( Neo4jVersion.NEO4J_VERSION_500),
"test only applicable after 5.0." );
// at some point we will fully deprecate old config names, at which point we add an assume-version-less-than here
Path logMount;
Map<Setting,Configuration> oldConfMap = Configuration.getConfigurationNameMap( new Neo4jVersion( 4, 4, 0 ) );
Map<Setting,String> expectedValues = new HashMap<Setting,String>() {{
put( Setting.TXLOG_RETENTION_POLICY, "5M size" );
put( Setting.MEMORY_PAGECACHE_SIZE, "100.00KiB" );
put( Setting.DEFAULT_LISTEN_ADDRESS, "127.0.0.1" );
}};
if( TestSettings.EDITION == TestSettings.Edition.ENTERPRISE)
{
expectedValues.put( Setting.CLUSTER_TRANSACTION_ADDRESS, "1.2.3.4:8000" );
expectedValues.put( Setting.CLUSTER_RAFT_ADDRESS, "1.2.3.4:9000" );
}
try(GenericContainer container = createContainer())
{
logMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/logs");
SetContainerUser.nonRootUser( container );
// set configurations using old config names
for( Setting s : expectedValues.keySet() )
{
container.withEnv( oldConfMap.get( s ).envName, expectedValues.get( s ) );
}
// the container probably won't start nicely because the clustering settings are ivalid.
// However we only care that the configs were read properly, so we can kill as soon as neo4j logs that it started.
container.waitingFor( new LogMessageWaitStrategy()
.withRegEx( ".*Remote interface available at http://localhost:7474/.*" )
.withStartupTimeout( Duration.ofSeconds( 60 ) ));
container.start();
}
for( Setting s : expectedValues.keySet() )
{
// configuration should be present in debug log under new configuration name
assertConfigurationPresentInDebugLog(logMount.resolve( "debug.log" ),
confNames.get( s ),
expectedValues.get( s ),
true );
}
}
@Test
void testEnvVarsOverrideConfFile() throws Exception
{
Assumptions.assumeTrue(TestSettings.NEO4J_VERSION.isAtLeastVersion(new Neo4jVersion(4, 2, 0)),
"test not applicable in versions before 4.2.");
Path debugLog;
try(GenericContainer container = createContainer()
.withEnv(confNames.get(Setting.MEMORY_PAGECACHE_SIZE).envName, "512.00MiB")
.waitingFor(WaitStrategies.waitForNeo4jReady(PASSWORD)))
{
Path confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
Path logMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/logs");
debugLog = logMount.resolve( "debug.log" );
SetContainerUser.nonRootUser( container );
//Create EnvVarsOverride.conf file
Path confFile = confFolder.resolve("EnvVarsOverride.conf");
Files.copy( confFile, confMount.resolve( "neo4j.conf" ) );
//Start the container
container.start();
}
assertConfigurationPresentInDebugLog(debugLog, confNames.get(Setting.MEMORY_PAGECACHE_SIZE), "512.00MiB", true );
}
@Test
void testEnterpriseOnlyDefaultsDontOverrideConfFile() throws Exception
{
Assumptions.assumeTrue(TestSettings.EDITION == TestSettings.Edition.ENTERPRISE,
"This is testing only ENTERPRISE EDITION configs");
try(GenericContainer container = createContainer().waitingFor(WaitStrategies.waitForNeo4jReady(PASSWORD)))
{
Path confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
Path logMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/logs");
// mount a configuration file with enterprise only settings already set
Path confFile = confFolder.resolve( "EnterpriseOnlyNotOverwritten.conf" );
Files.copy( confFile, confMount.resolve( "neo4j.conf" ) );
//Start the container
SetContainerUser.nonRootUser( container );
container.start();
//Read debug.log to check that cluster confs are set successfully
assertConfigurationPresentInDebugLog( logMount.resolve( "debug.log" ),
confNames.get( Setting.CLUSTER_TRANSACTION_ADDRESS ),
"localhost:6060", true );
}
}
@Test
void testMountingMetricsFolderShouldNotSetConfInCommunity() throws Exception
{
Assumptions.assumeTrue( TestSettings.EDITION == TestSettings.Edition.COMMUNITY,
"Test only valid with community edition");
try ( GenericContainer container = createContainer() )
{
temporaryFolderManager.createFolderAndMountAsVolume(container, "/metrics");
Path confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
makeContainerDumpConfig( container );
container.start();
File conf = confMount.resolve( "neo4j.conf" ).toFile();
Map<String, String> configurations = parseConfFile(conf);
Assertions.assertFalse(configurations.containsKey(confNames.get( Setting.DIRECTORIES_METRICS ).name),
"should not be setting any metrics configurations in community edition");
}
}
@Test
void testCommunityDoesNotHaveEnterpriseConfigs() throws Exception
{
Assumptions.assumeTrue(TestSettings.EDITION == TestSettings.Edition.COMMUNITY,
"This is testing only COMMUNITY EDITION configs");
Path debugLog;
try(GenericContainer container = createContainer()
.withEnv(confNames.get(Setting.MEMORY_PAGECACHE_SIZE).envName, "512m")
.waitingFor(WaitStrategies.waitForNeo4jReady(PASSWORD)))
{
//Mount /logs
Path logMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/logs");
debugLog = logMount.resolve( "debug.log" );
SetContainerUser.nonRootUser( container );
//Start the container
container.start();
}
//Read debug.log to check that cluster confs are not present
assertConfigurationPresentInDebugLog( debugLog, confNames.get(Setting.CLUSTER_TRANSACTION_ADDRESS), "*", false );
}
@Test
@Tag("BundleTest")
void testSettingAppendsToConfFileWithoutEmptyLine_neo4jPlugins() throws Exception
{
String expectedPageCacheSize = "1000.00MiB";
String pluginStr = "[\"apoc\"]";
if(TestSettings.NEO4J_VERSION.isOlderThan( Neo4jVersion.NEO4J_VERSION_500 ))
{
pluginStr = "[\"apoc-core\"]";
}
try(GenericContainer container = createContainer().waitingFor(WaitStrategies.waitForNeo4jReady(PASSWORD)))
{
Path confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
Files.copy( confFolder.resolve( "NoNewline.conf" ), confMount.resolve( "neo4j.conf" ) );
container.withEnv( Neo4jPluginEnv.get(), pluginStr );
//Start the container
container.start();
DatabaseIO dbio = new DatabaseIO( container );
try
{
dbio.runCypherQuery( "neo4j", PASSWORD, "RETURN apoc.version()" );
}
catch( ClientException ex )
{
Assertions.fail("Did not load apoc plugin.", ex);
}
dbio.verifyConfigurationSetting( "neo4j",
PASSWORD,
confNames.get( Setting.MEMORY_PAGECACHE_SIZE ),
expectedPageCacheSize);
}
}
@Test
void testSettingAppendsToConfFileWithoutEmptyLine_envSetting() throws Exception
{
String expectedHeapSize = "128.00MiB";
String expectedPageCacheSize = "1000.00MiB";
try(GenericContainer container = createContainer())
{
Path confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
Files.copy( confFolder.resolve( "NoNewline.conf" ), confMount.resolve( "neo4j.conf" ) );
// set an env variable
container.withEnv( confNames.get( Setting.MEMORY_HEAP_MAXSIZE ).envName, expectedHeapSize )
.waitingFor(WaitStrategies.waitForNeo4jReady(PASSWORD));
//Start the container
container.start();
DatabaseIO dbio = new DatabaseIO( container );
dbio.verifyConfigurationSetting( "neo4j",
PASSWORD,
confNames.get( Setting.MEMORY_HEAP_MAXSIZE ),
expectedHeapSize);
dbio.verifyConfigurationSetting( "neo4j",
PASSWORD,
confNames.get( Setting.MEMORY_PAGECACHE_SIZE ),
expectedPageCacheSize);
}
}
@Test
void testApocEnvVarsAreWrittenToApocConf() throws Exception
{
Assumptions.assumeTrue( TestSettings.NEO4J_VERSION.isAtLeastVersion( new Neo4jVersion( 5,3, 0 ) ),
"APOC conf not present before 5.0 and this bug wasn't fixed before 5.3.");
Path confMount;
try(GenericContainer container = createContainer())
{
container.withEnv( confNames.get( Setting.APOC_EXPORT_FILE_ENABLED ).envName, "true" );
container.withEnv( Neo4jPluginEnv.get(), "[\"apoc\"]" );
confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
makeContainerDumpConfig( container );
container.start();
}
// there's no way to verify that APOC configurations have been set by querying neo4j or the debug log,
// so the only verification we can do is check that neo4j started ok and that there is an apoc.conf dumped.
File apocConf = confMount.resolve( "apoc.conf" ).toFile();
Assertions.assertTrue( apocConf.exists(), "Did not create an apoc.conf to contain the apoc settings." );
Map<String,String> actualApocSettings = parseConfFile( apocConf );
Assertions.assertTrue(actualApocSettings.containsKey(confNames.get(Setting.APOC_EXPORT_FILE_ENABLED).name),
"APOC setting not added to apoc.conf");
Assertions.assertEquals("true",
actualApocSettings.get(confNames.get( Setting.APOC_EXPORT_FILE_ENABLED).name),
"Incorrect value written for APOC setting");
}
@Test
void testShellExpansionAvoided() throws Exception
{
Assumptions.assumeTrue( TestSettings.NEO4J_VERSION.isAtLeastVersion( Neo4jVersion.NEO4J_VERSION_400),
"test only applicable to 4.0 and beyond." );
Path confMount;
try(GenericContainer container = createContainer()
.withEnv(confNames.get(Setting.SECURITY_PROCEDURES_UNRESTRICTED).envName, "*"))
{
confMount = temporaryFolderManager.createFolderAndMountAsVolume(container, "/conf");
makeContainerDumpConfig( container );
container.start();
}
File conf = confMount.resolve( "neo4j.conf" ).toFile();
Map<String, String> configurations = parseConfFile(conf);
Assertions.assertTrue(configurations.containsKey(confNames.get( Setting.SECURITY_PROCEDURES_UNRESTRICTED).name),
"configuration not set from env var");
Assertions.assertEquals("*",
configurations.get(confNames.get( Setting.SECURITY_PROCEDURES_UNRESTRICTED).name),
"Configuration value should be *. If it's not docker-entrypoint.sh probably evaluated it as a glob expression.");
}
}
```
--------------------------------------------------------------------------------
/neo4j/docker-neo4j/docker-image-src/calver/coredb/docker-entrypoint.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash -eu
cmd="$1"
# load useful utility functions
. /startup/utilities.sh
function is_readable
{
# this code is fairly ugly but works no matter who this script is running as.
# It would be nice if the writability tests could use this logic somehow.
local _file=${1}
perm=$(stat -c %a "${_file}")
# everyone permission
if [[ ${perm:2:1} -ge 4 ]]; then
return 0
fi
# owner permissions
if [[ ${perm:0:1} -ge 4 ]]; then
if [[ "$(stat -c %U ${_file})" = "${userid}" ]] || [[ "$(stat -c %u ${_file})" = "${userid}" ]]; then
return 0
fi
fi
# group permissions
if [[ ${perm:1:1} -ge 4 ]]; then
if containsElement "$(stat -c %g ${_file})" "${groups[@]}" || containsElement "$(stat -c %G ${_file})" "${groups[@]}" ; then
return 0
fi
fi
return 1
}
function is_writable
{
# It would be nice if this and the is_readable function could combine somehow
local _file=${1}
perm=$(stat -c %a "${_file}")
# everyone permission
if containsElement ${perm:2:1} 2 3 6 7; then
return 0
fi
# owner permissions
if containsElement ${perm:0:1} 2 3 6 7; then
if [[ "$(stat -c %U ${_file})" = "${userid}" ]] || [[ "$(stat -c %u ${_file})" = "${userid}" ]]; then
return 0
fi
fi
# group permissions
if containsElement ${perm:1:1} 2 3 6 7; then
if containsElement "$(stat -c %g ${_file})" "${groups[@]}" || containsElement "$(stat -c %G ${_file})" "${groups[@]}" ; then
return 0
fi
fi
return 1
}
function check_mounted_folder_readable
{
local _directory=${1}
debug_msg "checking ${_directory} is readable"
if ! is_readable "${_directory}"; then
print_permissions_advice_and_fail "${_directory}" "${userid}" "${groupid}"
fi
}
function check_mounted_folder_writable_with_chown
{
# The /data and /log directory are a bit different because they are very likely to be mounted by the user but not
# necessarily writable.
# This depends on whether a user ID is passed to the container and which folders are mounted.
#
# No user ID passed to container:
# 1) No folders are mounted.
# The /data and /log folder are owned by neo4j by default, so should be writable already.
# 2) Both /log and /data are mounted.
# This means on start up, /data and /logs are owned by an unknown user and we should chown them to neo4j for
# backwards compatibility.
#
# User ID passed to container:
# 1) Both /data and /logs are mounted
# The /data and /logs folders are owned by an unknown user but we *should* have rw permission to them.
# That should be verified and error (helpfully) if not.
# 2) User mounts /data or /logs *but not both*
# The unmounted folder is still owned by neo4j, which should already be writable. The mounted folder should
# have rw permissions through user id. This should be verified.
# 3) No folders are mounted.
# The /data and /log folder are owned by neo4j by default, and these are already writable by the user.
# (This is a very unlikely use case).
local mountFolder=${1}
debug_msg "checking ${mountFolder} is writable"
if running_as_root && ! secure_mode_enabled; then
# check folder permissions
if ! is_writable "${mountFolder}" ; then
# warn that we're about to chown the folder and then chown it
echo "Warning: Folder mounted to \"${mountFolder}\" is not writable from inside container. Changing folder owner to ${userid}."
chown -R "${userid}":"${groupid}" "${mountFolder}"
# check permissions on files in the folder
elif [ $(su-exec "${userid}":"${groupid}" find "${mountFolder}" -not -writable | wc -l) -gt 0 ]; then
echo "Warning: Some files inside \"${mountFolder}\" are not writable from inside container. Changing folder owner to ${userid}."
chown -R "${userid}":"${groupid}" "${mountFolder}"
fi
else
if [[ ! -w "${mountFolder}" ]] && [[ "$(stat -c %U ${mountFolder})" != "neo4j" ]]; then
print_permissions_advice_and_fail "${mountFolder}" "${userid}" "${groupid}"
fi
fi
}
function load_plugin_from_location
{
# Install a plugin from location at runtime.
local _plugin_name="${1}"
local _location="${2}"
local _plugins_dir="${NEO4J_HOME}/plugins"
if [ -d /plugins ]; then
local _plugins_dir="/plugins"
fi
local _destination="${_plugins_dir}/${_plugin_name}.jar"
# Now we install the plugin that is shipped with Neo4j
for filename in ${_location}; do
echo "Installing Plugin '${_plugin_name}' from ${_location} to ${_destination}"
cp --preserve "${filename}" "${_destination}"
chmod +rw ${_destination}
done
if ! is_readable "${_destination}"; then
echo >&2 "Plugin at '${_destination}' is not readable"
exit 1
fi
}
function load_plugin_from_url
{
# Load a plugin at runtime. The provided github repository must have a versions.json on the master branch with the
# correct format.
local _plugin_name="${1}" #e.g. apoc, graph-algorithms, graph-ql
local _plugins_dir="${NEO4J_HOME}/plugins"
if [ -d /plugins ]; then
local _plugins_dir="/plugins"
fi
local _versions_json_url="$(jq --raw-output "with_entries( select(.key==\"${_plugin_name}\") ) | to_entries[] | .value.versions" /startup/neo4j-plugins.json )"
debug_msg "Will read ${_plugin_name} versions.json from ${_versions_json_url}"
# Using the same name for the plugin irrespective of version ensures we don't end up with different versions of the same plugin
local _destination="${_plugins_dir}/${_plugin_name}.jar"
local _neo4j_version="$(neo4j --version | cut -d' ' -f2)"
# Now we call out to github to get the versions.json for this plugin and we parse that to find the url for the correct plugin jar for our neo4j version
echo "Fetching versions.json for Plugin '${_plugin_name}' from ${_versions_json_url}"
local _versions_json
if ! _versions_json="$(wget -q --timeout 300 --tries 30 -O - "${_versions_json_url}")"; then
debug_msg "ERROR: could not fetch '${_versions_json}'"
echo >&2 "ERROR: could not query ${_versions_json_url} for plugin compatibility information.
This could indicate a problem with your network or this container's network settings.
Neo4j will continue to start, but \"${_plugin_name}\" will not be loaded."
return 1
fi
local _plugin_jar_url="$(echo "${_versions_json}" | jq -L/startup --raw-output "import \"semver\" as lib; [ .[] | select(.neo4j|lib::semver(\"${_neo4j_version}\")) ] | min_by(.neo4j) | .jar")"
if [[ -z "${_plugin_jar_url}" ]] || [[ "${_plugin_jar_url}" == "null" ]]; then
debug_msg "ERROR: '${_versions_json_url}' does not contain an entry for ${_neo4j_version}"
echo >&2 "ERROR: No compatible \"${_plugin_name}\" plugin found for Neo4j ${_neo4j_version} ${NEO4J_EDITION}.
This can happen with the newest Neo4j versions when a compatible plugin has not yet been released.
You can either use an older version of Neo4j, or continue without ${_plugin_name}.
Neo4j will continue to start, but \"${_plugin_name}\" will not be loaded."
else
echo "Installing Plugin '${_plugin_name}' from ${_plugin_jar_url} to ${_destination} "
wget -q --timeout 300 --tries 30 --output-document="${_destination}" "${_plugin_jar_url}"
if ! is_readable "${_destination}"; then
echo >&2 "Plugin at '${_destination}' is not readable"
exit 1
fi
fi
}
function apply_plugin_default_configuration
{
# Set the correct Load a plugin at runtime. The provided github repository must have a versions.json on the master branch with the
# correct format.
local _plugin_name="${1}" #e.g. apoc, graph-algorithms, graphql
local _reference_conf="${2}" # used to determine if we can override properties
local _neo4j_conf="${NEO4J_HOME}/conf/neo4j.conf"
local _property _value
echo "Applying default values for plugin ${_plugin_name} to neo4j.conf"
for _entry in $(jq --compact-output --raw-output "with_entries( select(.key==\"${_plugin_name}\") ) | to_entries[] | .value.properties | to_entries[]" /startup/neo4j-plugins.json); do
_property="$(jq --raw-output '.key' <<< "${_entry}")"
_value="$(jq --raw-output '.value' <<< "${_entry}")"
debug_msg "${_plugin_name} requires setting ${_property}=${_value}"
# the first grep strips out comments
if grep -o "^[^#]*" "${_reference_conf}" | grep -q --fixed-strings "${_property}=" ; then
# property is already set in the user provided config. In this case we don't override what has been set explicitly by the user.
echo "Skipping ${_property} for plugin ${_plugin_name} because it is already set."
echo "You may need to add ${_value} to the ${_property} setting in your configuration file."
else
if grep -o "^[^#]*" "${_neo4j_conf}" | grep -q --fixed-strings "${_property}=" ; then
sed --in-place "s/${_property}=/&${_value},/" "${_neo4j_conf}"
debug_msg "${_property} was already in the configuration file, so ${_value} was added to it."
else
echo -e "\n${_property}=${_value}" >> "${_neo4j_conf}"
debug_msg "${_property}=${_value} has been added to the configuration file."
fi
fi
done
}
function install_neo4j_plugins
{
# first verify that the requested plugins are valid.
debug_msg "One or more NEO4J_PLUGINS have been requested."
local _known_plugins=($(jq --raw-output "keys[]" /startup/neo4j-plugins.json))
debug_msg "Checking requested plugins are known and can be installed."
for plugin_name in $(echo "${NEO4J_PLUGINS}" | jq --raw-output '.[]'); do
if ! containsElement "${plugin_name}" "${_known_plugins[@]}"; then
printf >&2 "\"%s\" is not a known Neo4j plugin. Options are:\n%s" "${plugin_name}" "$(jq --raw-output "keys[1:][]" /startup/neo4j-plugins.json)"
exit 1
fi
done
# We store a copy of the config before we modify it for the plugins to allow us to see if there are user-set values in the input config that we shouldn't override
local _old_config="$(mktemp)"
if [ -e "${NEO4J_HOME}"/conf/neo4j.conf ]; then
cp "${NEO4J_HOME}"/conf/neo4j.conf "${_old_config}"
else
touch "${NEO4J_HOME}"/conf/neo4j.conf
touch "${_old_config}"
fi
for plugin_name in $(echo "${NEO4J_PLUGINS}" | jq --raw-output '.[]'); do
debug_msg "Plugin ${plugin_name} has been requested"
local _location="$(jq --raw-output "with_entries( select(.key==\"${plugin_name}\") ) | to_entries[] | .value.location" /startup/neo4j-plugins.json )"
if [ "${_location}" != "null" -a -n "$(shopt -s nullglob; echo ${_location})" ]; then
debug_msg "$plugin_name is already in the container at ${_location}"
load_plugin_from_location "${plugin_name}" "${_location}"
debug_msg "Applying plugin specific configurations."
apply_plugin_default_configuration "${plugin_name}" "${_old_config}"
else
debug_msg "$plugin_name must be downloaded."
if load_plugin_from_url "${plugin_name}"; then
debug_msg "Applying plugin specific configurations."
apply_plugin_default_configuration "${plugin_name}" "${_old_config}"
fi
fi
done
rm "${_old_config}"
}
function add_docker_default_to_conf
{
# docker defaults should NOT overwrite values already in the conf file
local _setting="${1}"
local _value="${2}"
if [ ! -e "${NEO4J_HOME}"/conf/neo4j.conf ] || ! grep -q "^${_setting}=" "${NEO4J_HOME}"/conf/neo4j.conf
then
debug_msg "Appended ${_setting}=${_value} to ${NEO4J_HOME}/conf/neo4j.conf"
echo -e "\n"${_setting}=${_value} >> "${NEO4J_HOME}"/conf/neo4j.conf
fi
}
function add_env_setting_to_conf
{
# settings from environment variables should overwrite values already in the conf
local _setting=${1}
local _value=${2}
local _conf_file
local _append_not_replace_configs=("server.jvm.additional")
# different settings need to go in different files now.
case "$(echo ${_setting} | cut -d . -f 1)" in
apoc)
_conf_file="${NEO4J_HOME}"/conf/apoc.conf
;;
*)
_conf_file="${NEO4J_HOME}"/conf/neo4j.conf
;;
esac
if [ -e "${_conf_file}" ] && grep -q -F "${_setting}=" "${_conf_file}"; then
if containsElement "${_setting}" "${_append_not_replace_configs[@]}"; then
debug_msg "${_setting} will be appended to ${_conf_file} without replacing existing settings."
else
# Remove any lines containing the setting already
debug_msg "Removing existing setting for ${_setting} in ${_conf_file}"
sed --in-place "/^${_setting}=.*/d" "${_conf_file}"
fi
fi
# Then always append setting to file
debug_msg "Appended ${_setting}=${_value} to ${_conf_file}"
echo "${_setting}=${_value}" >> "${_conf_file}"
}
function set_initial_password
{
local _neo4j_auth="${1}"
# set the neo4j initial password only if you run the database server
if [ "${cmd}" == "neo4j" ]; then
if [ "${_neo4j_auth:-}" == "none" ]; then
debug_msg "Authentication is requested to be unset"
add_env_setting_to_conf "dbms.security.auth_enabled" "false"
elif [[ "${_neo4j_auth:-}" =~ ^([^/]+)\/([^/]+)/?([tT][rR][uU][eE])?$ ]]; then
admin_user="${BASH_REMATCH[1]}"
password="${BASH_REMATCH[2]}"
do_reset="${BASH_REMATCH[3]}"
if [ "${password}" == "neo4j" ]; then
echo >&2 "Invalid value for password. It cannot be 'neo4j', which is the default."
exit 1
fi
if [ "${admin_user}" != "neo4j" ]; then
echo >&2 "Invalid admin username, it must be neo4j."
exit 1
fi
# this line has an inbuilt assumption that any configuration settings from the environment have already been applied to neo4j.conf
local _min_password_length=$(cat "${NEO4J_HOME}"/conf/neo4j.conf | grep dbms.security.auth_minimum_password_length | sed -E 's/.*=(.*)/\1/')
if [ "${#password}" -lt "${_min_password_length:-"8"}" ]; then
echo >&2 "Invalid value for password. The minimum password length is 8 characters.
If Neo4j fails to start, you can:
1) Use a stronger password.
2) Set configuration dbms.security.auth_minimum_password_length to override the minimum password length requirement.
3) Set environment variable NEO4J_dbms_security_auth__minimum__password__length to override the minimum password length requirement."
fi
if running_as_root; then
# running set-initial-password as root will create subfolders to /data as root, causing startup fail when neo4j can't read or write the /data/dbms folder
# creating the folder first will avoid that
mkdir -p /data/dbms
debug_msg "Making sure /data/dbms is owned by ${userid}:${groupid}"
chown "${userid}":"${groupid}" /data/dbms
fi
local extra_args=()
if [ "${do_reset}" == "true" ]; then
extra_args+=("--require-password-change")
fi
if [ "${EXTENDED_CONF+"yes"}" == "yes" ]; then
extra_args+=("--expand-commands")
fi
if debugging_enabled; then
extra_args+=("--verbose")
fi
debug_msg "Setting initial password"
debug_msg "${neo4j_admin_cmd} dbms set-initial-password ***** ${extra_args[*]}"
${neo4j_admin_cmd} dbms set-initial-password "${password}" "${extra_args[@]}"
elif [ -n "${_neo4j_auth:-}" ]; then
echo "$_neo4j_auth is invalid"
echo >&2 "Invalid value for NEO4J_AUTH: '${_neo4j_auth}'"
exit 1
fi
fi
}
# ==== CODE STARTS ====
debug_msg "DEBUGGING ENABLED"
# If we're running as root, then run as the neo4j user. Otherwise
# docker is running with --user and we simply use that user. Note
# that su-exec, despite its name, does not replicate the functionality
# of exec, so we need to use both
if running_as_root; then
userid="neo4j"
groupid="neo4j"
groups=($(id -G neo4j))
exec_cmd="exec su-exec neo4j:neo4j"
neo4j_admin_cmd="su-exec neo4j:neo4j neo4j-admin"
debug_msg "Running as root user inside neo4j image"
else
userid="$(id -u)"
groupid="$(id -g)"
groups=($(id -G))
exec_cmd="exec"
neo4j_admin_cmd="neo4j-admin"
debug_msg "Running as user ${userid}:${groupid} inside neo4j image"
fi
readonly userid
readonly groupid
readonly groups
readonly exec_cmd
readonly neo4j_admin_cmd
# Need to chown the home directory
if running_as_root; then
debug_msg "chowning ${NEO4J_HOME} recursively to ${userid}":"${groupid}"
chown -R "${userid}":"${groupid}" "${NEO4J_HOME}"
chmod 700 "${NEO4J_HOME}"
find "${NEO4J_HOME}" -mindepth 1 -maxdepth 1 -type d -exec chmod -R 700 {} \;
debug_msg "Setting all files in ${NEO4J_HOME}/conf to permissions 600"
find "${NEO4J_HOME}"/conf -type f -exec chmod -R 600 {} \;
fi
## == EXTRACT SECRETS FROM FILES ===
# These environment variables are set by using docker secrets and they override their equivalent env vars
# They are suffixed with _FILE and prefixed by the name of the env var they should override
# e.g. NEO4J_AUTH_FILE will override the value of the NEO4J_AUTH
# It's best to do this first so that the secrets are available for the rest of the script
for variable_name in $(printenv | awk -F= '{print $1}'); do
# Check if the variable ends with "_FILE" and starts with "NEO4J_"
if [[ $variable_name == *"_FILE" &&
$variable_name == "NEO4J_"* ]]; then
# Create a new variable name by removing the "_FILE" suffix
base_variable_name=${variable_name%_FILE}
# Get the value of the _FILE variable
secret_file_path="${!variable_name}"
if is_readable "${secret_file_path}"; then
# Read the secret value from the file
secret_value=$(<"$secret_file_path")
else
# File not readable
echo >&2 "The secret file '$secret_file_path' does not exist or is not readable. Make sure you have correctly configured docker secrets."
exit 1
fi
# Assign the value to the new variable
export "$base_variable_name"="$secret_value"
fi
done
# ==== CHECK LICENSE AGREEMENT ====
# Only prompt for license agreement if command contains "neo4j" in it
if [[ "${cmd}" == *"neo4j"* ]]; then
if [ "${NEO4J_EDITION}" == "enterprise" ]; then
: ${NEO4J_ACCEPT_LICENSE_AGREEMENT:="not accepted"}
if [[ "$NEO4J_ACCEPT_LICENSE_AGREEMENT" != "yes" && "$NEO4J_ACCEPT_LICENSE_AGREEMENT" != "eval" ]]; then
echo >&2 "
In order to use Neo4j Enterprise Edition you must accept the license agreement.
The license agreement is available at https://neo4j.com/terms/licensing/
If you have a support contract the following terms apply https://neo4j.com/terms/support-terms/
If you do not have a commercial license and want to evaluate the Software
please read the terms of the evaluation agreement before you accept.
https://neo4j.com/terms/enterprise_us/
(c) Neo4j Sweden AB. All Rights Reserved.
Use of this Software without a proper commercial license, or evaluation license
with Neo4j, Inc. or its affiliates is prohibited.
Neo4j has the right to terminate your usage if you are not compliant.
More information is also available at: https://neo4j.com/licensing/
If you have further inquiries about licensing, please contact us via https://neo4j.com/contact-us/
To accept the commercial license agreement set the environment variable
NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
To accept the terms of the evaluation agreement set the environment variable
NEO4J_ACCEPT_LICENSE_AGREEMENT=eval
To do this you can use the following docker argument:
--env=NEO4J_ACCEPT_LICENSE_AGREEMENT=<yes|eval>
"
exit 1
fi
fi
fi
# NEO4JLABS_PLUGINS has been renamed to NEO4J_PLUGINS, but we want the old name to work for now.
if [ -n "${NEO4JLABS_PLUGINS:-}" ];
then
echo >&2 "NEO4JLABS_PLUGINS has been renamed to NEO4J_PLUGINS since Neo4j 5.0.0.
The old name will still work, but is likely to be deprecated in future releases."
: ${NEO4J_PLUGINS:=${NEO4JLABS_PLUGINS:-}}
fi
# ==== CHECK FILE PERMISSIONS ON MOUNTED FOLDERS ====
if [ -d /conf ]; then
check_mounted_folder_readable "/conf"
rm -rf "${NEO4J_HOME}"/conf/*
debug_msg "Copying contents of /conf to ${NEO4J_HOME}/conf/*"
find /conf -type f -exec cp --preserve=ownership,mode {} "${NEO4J_HOME}"/conf \;
fi
if [ -d /ssl ]; then
check_mounted_folder_readable "/ssl"
rm -rf "${NEO4J_HOME}"/certificates
ln -s /ssl "${NEO4J_HOME}"/certificates
fi
if [ -d /plugins ]; then
if [[ -n "${NEO4J_PLUGINS:-}" ]]; then
# We need write permissions to write the required plugins to /plugins
debug_msg "Extra plugins were requested. Ensuring the mounted /plugins folder has the required write permissions."
check_mounted_folder_writable_with_chown "/plugins"
fi
check_mounted_folder_readable "/plugins"
: ${NEO4J_server_directories_plugins:="/plugins"}
fi
if [ -d /import ]; then
check_mounted_folder_readable "/import"
: ${NEO4J_server_directories_import:="/import"}
fi
if [ -d /metrics ]; then
# metrics is enterprise only
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
check_mounted_folder_writable_with_chown "/metrics"
: ${NEO4J_server_directories_metrics:="/metrics"}
fi
fi
if [ -d /logs ]; then
check_mounted_folder_writable_with_chown "/logs"
: ${NEO4J_server_directories_logs:="/logs"}
fi
if [ -d /data ]; then
check_mounted_folder_writable_with_chown "/data"
if [ -d /data/databases ]; then
check_mounted_folder_writable_with_chown "/data/databases"
fi
if [ -d /data/dbms ]; then
check_mounted_folder_writable_with_chown "/data/dbms"
fi
if [ -d /data/transactions ]; then
check_mounted_folder_writable_with_chown "/data/transactions"
fi
fi
if [ -d /licenses ]; then
check_mounted_folder_readable "/licenses"
: ${NEO4J_server_directories_licenses:="/licenses"}
fi
# ==== LOAD PLUGINS ====
if [[ -n "${NEO4J_PLUGINS:-}" ]]; then
# NEO4J_PLUGINS should be a json array of plugins like '["graph-algorithms", "apoc", "streams", "graphql"]'
install_neo4j_plugins
fi
# ==== RENAME LEGACY ENVIRONMENT CONF VARIABLES ====
# Env variable naming convention:
# - prefix NEO4J_
# - double underscore char '__' instead of single underscore '_' char in the setting name
# - underscore char '_' instead of dot '.' char in the setting name
# Example:
# NEO4J_server_tx__log_rotation_retention__policy env variable to set
# server.tx_log.rotation.retention_policy setting
# we only need to override the configurations with a docker specific override.
# The other config renames will be taken care of inside Neo4j.
: ${NEO4J_db_tx__log_rotation_retention__policy:=${NEO4J_dbms_tx__log_rotation_retention__policy:-}}
: ${NEO4J_server_memory_pagecache_size:=${NEO4J_dbms_memory_pagecache_size:-}}
: ${NEO4J_server_default__listen__address:=${NEO4J_dbms_default__listen__address:-}}
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
: ${NEO4J_server_cluster_advertised__address:=${NEO4J_causal__clustering_transaction__advertised__address:-}}
: ${NEO4J_server_cluster_raft_advertised__address:=${NEO4J_causal__clustering_raft__advertised__address:-}}
fi
# ==== SET CONFIGURATIONS ====
## == DOCKER SPECIFIC DEFAULT CONFIGURATIONS ===
## these should not override *any* configurations set by the user
debug_msg "Setting docker specific configuration overrides"
add_docker_default_to_conf "server.memory.pagecache.size" "512M"
add_docker_default_to_conf "server.default_listen_address" "0.0.0.0"
# set enterprise only docker defaults
if [ "${NEO4J_EDITION}" == "enterprise" ];
then
debug_msg "Setting docker specific Enterprise Edition overrides"
add_docker_default_to_conf "server.cluster.advertised_address" "$(hostname):6000"
add_docker_default_to_conf "server.cluster.raft.advertised_address" "$(hostname):7000"
add_docker_default_to_conf "server.routing.advertised_address" "$(hostname):7688"
fi
## == ENVIRONMENT VARIABLE CONFIGURATIONS ===
## these override BOTH defaults and any existing values in the neo4j.conf file
# these are docker control envs that have the NEO4J_ prefix but we don't want to add to the config.
not_configs=("NEO4J_ACCEPT_LICENSE_AGREEMENT" "NEO4J_AUTH" "NEO4J_AUTH_PATH" "NEO4J_DEBUG" "NEO4J_EDITION" \
"NEO4J_HOME" "NEO4J_PLUGINS" "NEO4J_SHA256" "NEO4J_TARBALL" "NEO4J_DEPRECATION_WARNING")
debug_msg "Applying configuration settings that have been set using environment variables."
# list env variables with prefix NEO4J_ and create settings from them
for i in $( set | grep ^NEO4J_ | awk -F'=' '{print $1}' | sort -rn ); do
if containsElement "$i" "${not_configs[@]}"; then
continue
fi
# Skip env variables with suffix _FILE, these are docker secrets
if [[ "$i" == *"_FILE" ]]; then
continue
fi
setting=$(echo "${i}" | sed 's|^NEO4J_||' | sed 's|_|.|g' | sed 's|\.\.|_|g')
value=$(echo "${!i}")
# Don't allow settings with no value or settings that start with a number (neo4j converts settings to env variables and you cannot have an env variable that starts with a number)
if [[ -n ${value} ]]; then
if [[ ! "${setting}" =~ ^[0-9]+.*$ ]]; then
add_env_setting_to_conf "${setting}" "${value}"
else
echo >&2 "WARNING: ${setting} not written to conf file. Settings that start with a number are not permitted."
fi
fi
done
# ==== SET PASSWORD ====
if [[ -n "${NEO4J_AUTH_PATH:-}" ]]; then
# Validate the existence of the password file
if [ ! -f "${NEO4J_AUTH_PATH}" ]; then
echo >&2 "The password file '${NEO4J_AUTH_PATH}' does not exist"
exit 1
fi
# validate the password file is readable
check_mounted_folder_readable "${NEO4J_AUTH_PATH}"
debug_msg "Setting initial password from file ${NEO4J_AUTH_PATH}"
set_initial_password "$(cat ${NEO4J_AUTH_PATH})"
else
debug_msg "Setting initial password from environment"
set_initial_password "${NEO4J_AUTH:-}"
fi
# ==== CLEANUP RUN FILE ====
if [ -f "${NEO4J_HOME}"/run/neo4j.pid ];
then
rm "${NEO4J_HOME}"/run/neo4j.pid
fi
# ==== INVOKE NEO4J STARTUP ====
[ -f "${EXTENSION_SCRIPT:-}" ] && . ${EXTENSION_SCRIPT}
if [ "${cmd}" == "dump-config" ]; then
if [ ! -d "/conf" ]; then
echo >&2 "You must mount a folder to /conf so that the configuration file(s) can be dumped to there."
exit 1
fi
check_mounted_folder_writable_with_chown "/conf"
cp --recursive "${NEO4J_HOME}"/conf/* /conf
echo "Config Dumped"
exit 0
fi
# this prints out a command for us to run.
# the command is something like: `java ...[lots of java options]... neo4j.mainClass ...[some neo4j options]...`
# putting debug messages here causes the function to break
function get_neo4j_run_cmd {
local extra_args=()
if [ "${EXTENDED_CONF+"yes"}" == "yes" ]; then
extra_args+=("--expand-commands")
fi
if running_as_root; then
su-exec neo4j:neo4j neo4j console --dry-run "${extra_args[@]}"
else
neo4j console --dry-run "${extra_args[@]}"
fi
}
if [ "${cmd}" == "neo4j" ]; then
# separate declaration and use of get_neo4j_run_cmd so that error codes are correctly surfaced
debug_msg "getting full neo4j run command"
neo4j_console_cmd="$(get_neo4j_run_cmd)"
debug_msg "${exec_cmd} ${neo4j_console_cmd}"
#%%DEPRECATION_WARNING_PLACEHOLDER%%
eval ${exec_cmd} ${neo4j_console_cmd?:No Neo4j command was generated}
else
debug_msg "${exec_cmd}" "$@"
${exec_cmd} "$@"
fi
```