Multi process haproxy stats with grafana / 20 Aug 2016 / Author: Haim Ari

    Estimated read time: 6 minutes

    Multi Process Haproxy stats with Python, Zabbix & Grafana

    Haproxy running with more than one process, means your stats are not valid anymore. If you set “nbproc” to > 1 then you need a way to aggregate your stats and monitor them.

    In my work environment we have a cluster of 3 haproxy servers running with 12 procs. To workaround the stats issue i wrote this python script which runs multiple threads to query the stats of each process at the same time.

    I figured since all accesses to the sockets are happening at the same time using threads, this is as close as i can get to view the correct stats

    What does this script does ?

    First let me say that there is a better way to access the stats instead of parsing them through reading the sockets as i did here. But for now it works.

    1. Get the number of haproxy process
    2. Return the stats value by the requested option (arguments)

    After this works, it was easy graphing it using Zabbix & Grafana with zabbix plugin.

    
    #!/usr/bin/python
    
    from subprocess import Popen, PIPE, check_output
    import threading
    import sys
    
    __author__ = "Haim Ari"
    __license__ = "GPL"
    __version__ = "0.0.3"
    __maintainer__ = ""
    __email__ = "[email protected]"
    __status__ = "Production"
    
    
    metric_list = ['pxname', 'svname', 'qcur', 'qmax', 'scur', 'smax', 'slim', 'stot', 'bin', 'bout', 'dreq', 'dresp',
                   'ereq', 'econ', 'eresp', 'wretr', 'wredis', 'status', 'weight', 'act', 'bck', 'chkfail', 'chkdown',
                   'lastchg', 'downtime', 'qlimit', 'pid', 'iid', 'sid', 'throttle', 'lbtot', 'tracked', 'type', 'rate',
                   'rate_lim', 'rate_max', 'check_status', 'check_code', 'check_duration', 'hrsp_1xx', 'hrsp_2xx',
                   'hrsp_3xx', 'hrsp_4xx', 'hrsp_5xx', 'hrsp_other', 'hanafail', 'req_rate', 'req_rate_max', 'req_tot',
                   'cli_abrt', 'srv_abrt', 'comp_in', 'comp_out', 'comp_byp', 'comp_rsp', 'lastsess', 'last_chk',
                   'last_agt', 'qtime', 'ctime', 'rtime', 'ttime']
    
    hostname = "localhost"
    proc_socket = "/var/lib/haproxy/haproxy.1.sock"
    socket = "/var/lib/haproxy/haproxy."
    
    threads = []
    scur_sum = []
    scur_util = []
    scur_percent = []
    result_sum = []
    front_back = ""
    metric = ""
    
    
    def get_nbroc():
        p1 = Popen(["echo", "show info"], stdout=PIPE)
        p2 = check_output(["socat", "unix-connect:" + proc_socket, "stdio"],
                          stdin=p1.stdout).splitlines()
        for line in p2:
            if "Nbproc" in line:
                nbproc = 1 + int(line.split()[1])
                return nbproc
    
    
    def hit(n, fn, option):
        field = int(metric_list.index("scur"))
        limit = int(metric_list.index("slim"))
        p1 = Popen(["echo", "show stat"], stdout=PIPE)
        p2 = check_output(["socat", "unix-connect:" + socket + str(n) + ".sock", "stdio"],
                          stdin=p1.stdout).splitlines()
        for line in p2:
            if fn + ",FRONTEND" in line:
                scur = line.split(",")[field]
                slim = line.split(",")[limit]
                sess_util = float((float(scur) / float(slim)) * 100)
                scur_percent.append(float(sess_util))
    
    
    def get_stats(n, fn, option):
        field = int(metric_list.index(option))
        p1 = Popen(["echo", "show stat"], stdout=PIPE)
        p2 = check_output(["socat", "unix-connect:" + socket + str(n) + ".sock", "stdio"],
                          stdin=p1.stdout).splitlines()
        for line in p2:
            if "--ALL--" in fn:
                if "FRONTEND" in line and "#" not in line:
                    scur = line.split(",")[field]
                    scur_sum.append(int(scur))
            else:
                if fn + ",FRONTEND" in line:
                    scur = line.split(",")[field]
                    scur_sum.append(int(scur))
                elif fn + ",BACKEND" in line:
                    scur = line.split(",")[field]
                    scur_sum.append(int(scur))
    
    
    def get_info_stats(n, metric_name):
        p1 = Popen(["echo", "show info"], stdout=PIPE)
        p2 = check_output(["socat", "unix-connect:" + socket + str(n) + ".sock", "stdio"],
                          stdin=p1.stdout).splitlines()
        for line in p2:
            if metric_name in line:
                num = int(line.split()[1])
                result_sum.append(int(num))
    
    
    usage = "Usage: $" + (sys.argv[0] + " stats Frontend/Backend metric")
    
    if __name__ == "__main__":
        if len(sys.argv) == 3:
            c_type = sys.argv[1]
            metric = sys.argv[2]
        elif len(sys.argv) == 4:
            c_type = sys.argv[1]
            front_back = sys.argv[2]
            metric = sys.argv[3]
        else:
            print usage
            sys.exit(1)
        procs = get_nbroc()
        if len(sys.argv) == 3:
            if "info" in c_type:
                for i in xrange(1, procs, 1):
                    t = threading.Thread(target=get_info_stats(i, metric))
                    threads.append(t)
                    t.start()
                total_s = sum(result_sum)
                if "Idle_pct" in metric:
                    idle = int(total_s / procs)
                    print str(idle)
                else:
                    print str(total_s)
        elif len(sys.argv) == 4:
            if "stats" in c_type:
                if "hit_limit" in metric:
                    for i in xrange(1, procs, 1):
                        t = threading.Thread(target=hit(i, front_back, metric))
                        threads.append(t)
                        t.start()
                    total_s = sum(scur_percent)
                    hit_percent = round(total_s / procs)
                    print str(hit_percent)
                else:
                    for i in xrange(1, procs, 1):
                        t = threading.Thread(target=get_stats(i, front_back, metric))
                        threads.append(t)
                        t.start()
                    total_s = sum(scur_sum)
                    print str(total_s)