From b91c7cd80deaf79b0a18e674d53bf7660d33e924 Mon Sep 17 00:00:00 2001
From: Alain Takoudjou <alain.takoudjou@nexedi.com>
Date: Mon, 27 Apr 2020 18:05:44 +0200
Subject: [PATCH] repman: check start and stop needed for all proxysql and
 mariadb services

---
 software/repman/buildout.hash.cfg             | 16 +++-
 .../repman/instance-mariadb.cfg.jinja2.in     | 26 +++++-
 .../repman/instance-repman-input-schema.json  |  4 +-
 software/repman/instance-repman.cfg.jinja2.in | 83 ++++++++++++++-----
 software/repman/instance.cfg.in               |  2 +
 software/repman/software.cfg                  |  8 ++
 software/repman/templates/config.toml.in      |  1 +
 .../repman/templates/mysqld-need-start.sh.in  | 21 +++++
 .../templates/proxy-need-start-stop.sh.in     | 42 ++++++++++
 9 files changed, 172 insertions(+), 31 deletions(-)
 create mode 100644 software/repman/templates/mysqld-need-start.sh.in
 create mode 100644 software/repman/templates/proxy-need-start-stop.sh.in

diff --git a/software/repman/buildout.hash.cfg b/software/repman/buildout.hash.cfg
index 5965862bb..3afbba113 100644
--- a/software/repman/buildout.hash.cfg
+++ b/software/repman/buildout.hash.cfg
@@ -14,15 +14,15 @@
 # not need these here).
 [instance.cfg]
 filename = instance.cfg.in
-md5sum = 8f3e5d9c0de16006cb36f0b8121fc11a
+md5sum = da50540b1c0fc69ffbf8f6e345a3baad
 
 [instance-repman.cfg]
 _update_hash_filename_ = instance-repman.cfg.jinja2.in
-md5sum = 2688b8aa946d4878d3f1c4fa08375cfe
+md5sum = 3b858324e8940a08379956bf76fb88bd
 
 [config-toml.in]
 _update_hash_filename_ = templates/config.toml.in
-md5sum = 1efa65af63b14ca2dd1b8c692b2af80e
+md5sum = 3adc842de5c0e8c94e90ac5f41ecab6c
 
 [config-cluster-toml.in]
 _update_hash_filename_ = templates/cluster-config.toml.in
@@ -34,7 +34,7 @@ md5sum = 0eeb24c6aa0760f0d33c4cc2828ddf30
 
 [template-mariadb.cfg]
 _update_hash_filename_ = instance-mariadb.cfg.jinja2.in
-md5sum = 10dea9ec48ce889315202d112a66f121
+md5sum = 0c6e9a3220cf6e8b096e523d0d1f407a
 
 [template-my-cnf]
 _update_hash_filename_ = templates/my.cnf.in
@@ -59,3 +59,11 @@ md5sum = 99324b56192003254081ef336dcee94c
 [dbjobs-in]
 _update_hash_filename_ = templates/dbjobs.in
 md5sum = d623a4c684578602b9d8ee49034aebfa
+
+[mysqld-need-start.sh.in]
+_update_hash_filename_ = templates/mysqld-need-start.sh.in
+md5sum = 76b984ef5e37248bc3a8010a74b24de5
+
+[proxy-need-start-stop.sh.in]
+_update_hash_filename_ = templates/proxy-need-start-stop.sh.in
+md5sum = 1ee3d69f0866605b33eac7ea596b21c3
diff --git a/software/repman/instance-mariadb.cfg.jinja2.in b/software/repman/instance-mariadb.cfg.jinja2.in
index 783974210..31aa69afa 100644
--- a/software/repman/instance-mariadb.cfg.jinja2.in
+++ b/software/repman/instance-mariadb.cfg.jinja2.in
@@ -275,16 +275,38 @@ command = ${dbjobs-executable:rendered}
 < = jinja2-template-executable
 rendered = ${directory:bin}/mysqld_restart
 template = inline:#!/bin/sh
+  ${mariadb-controller-bin:wrapper-path} status
   ${mariadb-controller-bin:wrapper-path} restart mariadb
   sleep 1
   ${mariadb-controller-bin:wrapper-path} status
 
+[{{ section('mariadb-need-start') }}]
+recipe = slapos.cookbook:cron.d
+cron-entries = ${cron:cron-entries}
+name = mariadb-need-start
+frequency = * * * * *
+command = ${template-mysqld-need-start:rendered}
+
+[template-mysqld-need-start]
+< = jinja2-template-executable
+rendered = ${directory:bin}/mysqld_need_start
+template = {{ parameter_dict['template-mysqld-need-start'] }}
+context =
+  key mariadb_controller mariadb-controller-bin:wrapper-path
+  raw username           {{ slapparameter_dict['repman-user'] }}
+  raw repman_url         {{ slapparameter_dict['repman-url'] }}
+  raw jq_bin             {{ jq_bin }}
+  raw cluster            {{ slapparameter_dict['cluster'] }}
+  raw db_host            {{ host }}
+  raw db_port            {{ port }}
+  raw bash_bin           {{ bash_bin }}
+  raw curl_bin           {{ curl_bin }}
+
 # Donwnload mariadb configuration from repman
 [{{ section('install-mysql-config') }}]
 recipe = plone.recipe.command
 stop-on-error = true
-# XXX - for now, we make only one cluster
-cluster = cluster1
+cluster = {{ slapparameter_dict['cluster'] }}
 config = ${directory:etc}/mysql/my.cnf
 command =
   cd ${directory:config-tmp} &&
diff --git a/software/repman/instance-repman-input-schema.json b/software/repman/instance-repman-input-schema.json
index 025a857bb..5ebd1fd9b 100644
--- a/software/repman/instance-repman-input-schema.json
+++ b/software/repman/instance-repman-input-schema.json
@@ -237,9 +237,9 @@
 						},
 						"failover-mode": {
 							"title": "Failover mode",
-							"description": "Failover is manual or automatic (default \"automatic\").",
+							"description": "Failover is manual or automatic (default \"manual\").",
 							"type": "string",
-							"default": "automatic",
+							"default": "manual",
               "enum": [
                 "manual",
                 "automatic"
diff --git a/software/repman/instance-repman.cfg.jinja2.in b/software/repman/instance-repman.cfg.jinja2.in
index 8d706124b..905500c60 100644
--- a/software/repman/instance-repman.cfg.jinja2.in
+++ b/software/repman/instance-repman.cfg.jinja2.in
@@ -6,6 +6,7 @@
 {% set mariadb_path_list = [] -%}
 {% set ip = (ipv6_set | list)[0] -%}
 {% set ipv4 = (ipv4_set | list)[0] -%}
+{% set cluster_list = [] -%}
 {% set tags = "gtidstrict,bind,pkg,innodb,noquerycache,slow,pfs,linux,readonly,diskmonitor,sqlerror,compressbinlog" -%}
 {% set frontend_parameter_dict = slapparameter_dict.get('slave-frontend', {}) -%}
 
@@ -17,12 +18,17 @@ var = ${:home}/var
 run = ${:var}/run
 scripts = ${:etc}/run
 service = ${:etc}/service
+controller = ${:etc}/controller
 promise = ${:etc}/promise
 log = ${:var}/log
 data = ${:var}/lib
 nginx-prefix = ${:var}/nginx
 tmp = ${:home}/tmp
 
+{% import "supervisord_lib" as supervisord_lib with context %}
+{% set proxysql_controller = "proxysql-controller" -%}
+{{ supervisord_lib.supervisord(proxysql_controller, buildout_bin_directory, supervisord_conf, use_service_hash=False) }}
+{% do part_list.append("proxysql-controller-service") -%}
 
 [request-common]
 recipe = slapos.cookbook:request.serialised
@@ -33,18 +39,6 @@ cert-file = ${slap-connection:cert-file}
 computer-id = ${slap-connection:computer-id}
 partition-id = ${slap-connection:partition-id}
 
-[proxy-admin-port]
-recipe = slapos.cookbook:free_port
-ip = {{ ipv4 }}
-minimum = 6032
-maximum = 6132
-
-[proxy-port]
-recipe = slapos.cookbook:free_port
-ip = {{ ipv4 }}
-minimum = 7032
-maximum = 7132
-
 
 {% do mariadb_dict.__setitem__('computer-memory-percent-threshold', 80) -%}
 {% set default_parameter_dict = {"cluster1": {"name": "cluster1", "db-prefered-master": "",
@@ -64,21 +58,24 @@ maximum = 7132
 {% for i in range(0, db_amount) -%}
 {% do mariadb_dict.__setitem__('tcp-port', 2099 + (i * 100)) -%}
 {% set section = 'request-mariadb-' ~ i -%}
-{% set name = 'Mariadb-' ~ i -%}
+{% set dbname = 'Mariadb-' ~ i -%}
 
 [{{ section }}]
 <= request-common
 software-type = mariadb
-name = {{ name }}
+name = {{ dbname }}
 sla-computer_guid = {{ dumps(parameter_dict.get('-sla-' ~ i ~'-computer_guid', '')) }}
 {% for key, value in mariadb_dict.items() -%}
 config-{{ key }} = {{ dumps(value) }}
 {% endfor -%}
 config-monitor-passwd = ${publish-early:monitor-password}
 config-root-password = ${publish-early:db-root-password}
+config-repman-user = ${repman-parameter:username}
+#config-repman-passwd =  ${repman-parameter:password}
 config-repman-url = ${nginx-parameter:backend-url}
-config-repman-secure-url = ${nginx-parameter:backend-url}
-config-name = {{ name }}
+config-repman-secure-url = ${nginx-parameter:backend-ssl-url}
+config-cluster = {{ name }}
+config-name = {{ dbname }}
 return = 
   database-host
   monitor-base-url
@@ -92,14 +89,26 @@ return =
 {% endfor -%}
 
 # Manage Replication Manager clusters
+[{{name}}-admin-port]
+recipe = slapos.cookbook:free_port
+ip = {{ ipv4 }}
+minimum = 6032
+maximum = 6132
+
+[{{name}}-port]
+recipe = slapos.cookbook:free_port
+ip = {{ ipv4 }}
+minimum = 7032
+maximum = 7132
+
 {% set prefered_master = parameter_dict.get("db-prefered-master") -%}
 [{{ name ~ '-cluster-parameter' }}]
 {% for key, value in parameter_dict.items() -%}
 {{ key }} = {{ dumps(value) }}
 {% endfor -%}
 proxysql-user = {{ parameter_dict.get("proxysql-user", "external") }}
-proxy-port = ${proxy-port:port}
-proxy-admin-port = ${proxy-admin-port:port}
+proxy-port = {{ '${' ~ name ~ '-port:port}' }}
+proxy-admin-port = {{ '${' ~ name ~ '-admin-port:port}' }}
 db-credential = repman:${publish-early:db-root-password}
 db-list = {{ mariadb_server_list | join(',') }}
 partition-list = {{ mariadb_path_list | join(',') }}
@@ -123,7 +132,7 @@ db-memory = {{ parameter_dict.get("db-memory", 256) }}
 db-memory-shared-pct = {{ parameter_dict.get("db-memory-shared-pct", "threads:16,innodb:60,myisam:10,aria:10,rocksdb:1,tokudb:1,s3:1,archive:1,querycache:0") }}
 db-memory-threaded-pct = {{ parameter_dict.get("db-memory-threaded-pct", "tmp:70,join:20,sort:10") }}
 # failover
-failover-mode = {{ parameter_dict.get('failover-mode', 'automatic') }}
+failover-mode = {{ parameter_dict.get('failover-mode', 'manual') }}
 failover-limit = {{ parameter_dict.get('failover-limit', 5) }}
 failover-falsepositive-heartbeat = {{ parameter_dict.get('failover-falsepositive-heartbeat', True) }}
 failover-falsepositive-heartbeat-timeout = {{ parameter_dict.get('failover-falsepositive-heartbeat-timeout', 3) }}
@@ -158,12 +167,12 @@ command =
   mkdir -p ${repman:config-tmp}/proxies &&
   mkdir -p ${:data} &&
   cd ${repman:config-tmp}/proxies &&
-  {{ curl_bin }} -o proxies.tar.gz ${nginx-parameter:repman-url}/api/clusters/{{ name }}/servers/{{ ipv4 }}/${proxy-admin-port:port}/config &&
+  {{ curl_bin }} -o proxies.tar.gz ${nginx-parameter:repman-url}/api/clusters/{{ name }}/servers/{{ ipv4 }}/{{ '${' ~ name ~ '-cluster-parameter:proxy-admin-port}' }}/config &&
   tar -xzf proxies.tar.gz &&
   cp conf/proxysql.cnf  ${:config}
 update-command = ${:command}
 
-[service-proxysql-{{ name }}]
+[proxysql-{{ name }}-wrapper]
 recipe = slapos.cookbook:wrapper
 command-line =
   {{ proxysql_location }}/bin/proxysql -f
@@ -171,12 +180,19 @@ command-line =
   -D ${config-proxysql-{{ name }}:data}
   --reload
 #  -S /tmp/proxysql_admin.sock
-wrapper-path = ${directory:service}/proxysql-{{ name }}
+wrapper-path = ${directory:controller}/proxysql-{{ name }}
 wait-for-files =
   ${repman:bootstrap}/{{ name }}_bootstrapped
 
+{% set service_name = "proxysql-" ~ name -%}
+{% set proxysql_dict = {"name": service_name, "command": "${" ~ service_name ~ "-wrapper:wrapper-path}",
+  "stopwaitsecs": 60, "environment": []} %}
+
+{{ supervisord_lib.supervisord_program(proxysql_controller, service_name, proxysql_dict) }}
+{% do part_list.append(proxysql_controller ~ "-" ~ service_name) %}
+
 {% do part_list.append('config-' ~ name) -%}
-{% do part_list.append('service-proxysql-' ~ name) -%}
+{% do cluster_list.append("{'name': '" ~ name ~ "', 'host': '" ~ ipv4 ~ "', 'port': '${" ~ name ~ "-cluster-parameter:proxy-admin-port}'}") -%}
 {% endfor -%}
 
 [slap-configuration]
@@ -430,6 +446,26 @@ name = check_repman_frontend.py
 config-url = ${nginx-parameter:backend-ssl-url}
 config-check-secure = 1
 
+[template-proxysql-need-stop-start]
+recipe = slapos.recipe.template:jinja2
+rendered = ${directory:bin}/proxysql_check_stop_start
+template = {{ template_proxy_need_stopstart }}
+mode = 755
+cluster-list = !py![{{ cluster_list | join(', ') }}]
+context =
+  key proxysql_controller {{proxysql_controller}}-bin:wrapper-path
+  key repman_url          nginx-parameter:backend-url
+  key cluster_list        :cluster-list
+  raw jq_bin              {{ jq_bin }}
+  raw bash_bin           {{ bash_bin }}
+  raw curl_bin            {{ curl_bin }}
+
+[proxy-need-stop-start]
+recipe = slapos.cookbook:cron.d
+cron-entries = ${cron:cron-entries}
+name = proxysql-need-stop-start
+frequency = * * * * *
+command = ${template-proxysql-need-stop-start:rendered}
 
 #############################
 #
@@ -447,6 +483,7 @@ parts =
   publish-connection-parameter
   repman-frontend-promise
   repman-backend-promise
+  proxy-need-stop-start
 
 # Complete parts with sections
   {{ part_list | join('\n  ') }}
diff --git a/software/repman/instance.cfg.in b/software/repman/instance.cfg.in
index 3f4fc836b..ffc1cf0bd 100644
--- a/software/repman/instance.cfg.in
+++ b/software/repman/instance.cfg.in
@@ -70,6 +70,7 @@ extra-context =
     raw sysbench_location          {{ sysbench_location }}
     raw proxysql_location          {{ proxysql_location }}
     raw template_repman_manager_sh {{ template_repman_manager_sh }}
+    raw template_proxy_need_stopstart {{ proxy_need_stop_start_template }}
 
 [template-mariadb-parameters]
 bash = {{ bash_location }}
@@ -81,6 +82,7 @@ template-mariadb-initial-setup = {{ template_mariadb_initial_setup }}
 template-mariadb-init-root = {{ template_init_root_sql }}
 template-init-root-wrapper = {{ template_init_root_wrapper }}
 template-mysqld-wrapper = {{ template_mysqld_wrapper }}
+template-mysqld-need-start = {{ mysqld_start_template }}
 link-binary = {{ dumps(mariadb_link_binary) }}
 check-computer-memory-binary = {{ bin_directory }}/check-computer-memory
 bin-directory = {{ bin_directory }}
diff --git a/software/repman/software.cfg b/software/repman/software.cfg
index c18052eca..42102f1e3 100644
--- a/software/repman/software.cfg
+++ b/software/repman/software.cfg
@@ -62,9 +62,11 @@ context =
     key template_monitor monitor2-template:rendered
     key mariadb_link_binary template-mariadb.cfg:link-binary
     key mariadb_location mariadb:location
+    key mysqld_start_template mysqld-need-start.sh.in:target
     key nginx_conf_in nginx.conf.in:target
     key nginx_location nginx:location
     key percona_toolkit_location percona-toolkit:location
+    key proxy_need_stop_start_template proxy-need-start-stop.sh.in:target
     key repman_src_location git.signal18.io_signal18_repman:location
     key rsync_location rsync:location
     key restic_bin_location restic:location
@@ -142,6 +144,12 @@ link-binary =
 [dbjobs-in]
 <= download-file
 
+[mysqld-need-start.sh.in]
+<= download-file
+
+[proxy-need-start-stop.sh.in]
+<= download-file
+
 # Pin versions of eggs used that are not already pinned by stack/slapos.cfg
 [versions]
 slapos.recipe.template = 4.3
diff --git a/software/repman/templates/config.toml.in b/software/repman/templates/config.toml.in
index 1598e7a33..fe9ce7df3 100644
--- a/software/repman/templates/config.toml.in
+++ b/software/repman/templates/config.toml.in
@@ -14,6 +14,7 @@ db-servers-binary-path = "{{ parameter_dict['mysql-bin-dir'] }}"
 #db-servers-ignored-hosts =
 # Database hosts list to monitor, IP and port (optional), specified in the host:[port] format and separated by commas
 monitoring-address = "{{ parameter_dict['ipv4'] }}"
+monitoring-wait-retry = 40
 
 #haproxy = true
 #haproxy-binary-path = "{{ parameter_dict['haproxy-bin'] }}"
diff --git a/software/repman/templates/mysqld-need-start.sh.in b/software/repman/templates/mysqld-need-start.sh.in
new file mode 100644
index 000000000..06718a395
--- /dev/null
+++ b/software/repman/templates/mysqld-need-start.sh.in
@@ -0,0 +1,21 @@
+#!{{ bash_bin }}
+
+curl () {
+  {{ curl_bin }} -k --silent -H "Accept: application/json" "$@"
+}
+
+# TOKEN=$(curl -s -X POST --data '{"username":"{{ username }}","password":"XXXXX"}' {{ repman_url }}/api/login | {{ jq_bin }} -r '.token')
+
+# Checking if mariadb start is needed
+#CODE=$(curl -H "Authorization: Bearer ${TOKEN}" -o /dev/null -w "%{http_code}" {{ repman_url }}/api/clusters/{{ cluster }}/servers/{{ db_host }}/{{ db_port }}/need-start)
+CODE=$(curl -o /dev/null -w "%{http_code}" {{ repman_url }}/api/clusters/{{ cluster }}/servers/{{ db_host }}/{{ db_port }}/need-start)
+
+if [ $CODE -eq 200 ]; then
+  echo "$CODE: Starting mariadb service..."
+  # print current status, can be useful for debug...
+  {{ mariadb_controller }} status mariadb
+  {{ mariadb_controller }} start mariadb
+  sleep 5
+  # check again if the service is still up...
+  {{ mariadb_controller }} status mariadb
+fi
diff --git a/software/repman/templates/proxy-need-start-stop.sh.in b/software/repman/templates/proxy-need-start-stop.sh.in
new file mode 100644
index 000000000..3a8ae3c69
--- /dev/null
+++ b/software/repman/templates/proxy-need-start-stop.sh.in
@@ -0,0 +1,42 @@
+#!{{ bash_bin }}
+
+curl () {
+  {{ curl_bin }} -k --silent "$@"
+}
+
+check_start_cluster () {
+  NAME=$1
+  HOST=$2
+  PORT=$3
+
+  CODE=$(curl -o /dev/null -w "%{http_code}" {{ repman_url }}/api/clusters/$NAME/servers/$HOST/$PORT/need-start)
+
+  if [ $CODE -eq 200 ]; then
+    echo "$CODE: Starting proxysql $HOST:$PORT..."
+    {{ proxysql_controller }} start proxysql-$NAME
+    sleep 1
+    # check again if the service is still up...
+    {{ proxysql_controller }} status proxysql-$NAME
+  fi
+}
+
+check_stop_cluster () {
+  NAME=$1
+  HOST=$2
+  PORT=$3
+
+  CODE=$(curl -o /dev/null -w "%{http_code}" {{ repman_url }}/api/clusters/$NAME/servers/$HOST/$PORT/need-stop)
+
+  if [ $CODE -eq 200 ]; then
+    echo "$CODE: Stoping proxysql $HOST:$PORT..."
+    {{ proxysql_controller }} stop proxysql-$NAME
+    sleep 1
+    # check again if the service is stopped...
+    {{ proxysql_controller }} status proxysql-$NAME
+  fi
+}
+
+{% for cluster_dict in cluster_list -%}
+check_start_cluster {{ cluster_dict['name'] }} {{ cluster_dict['host'] }} {{ cluster_dict['port'] }}
+check_stop_cluster {{ cluster_dict['name'] }} {{ cluster_dict['host'] }} {{ cluster_dict['port'] }}
+{% endfor -%}
-- 
2.30.9