Next Previous Contents

27. Patches

These are patches not in the standard ipvs distribution, but which I expect to be useful to some/many people.

Note the original sgml requires & to be represented as &. If you extract the patch from the sgml file you will get & rather than & in your patch. To get the correct patch, save the html representation as txt.

27.1 machine readable error codes from ipvsadm

Computers can talk to each other and read from and write to other programs. You shouldn't have to get a person to sit at the console to parse the output of a program. Here's a patch to make the output of ipvsadm machine readable

Padraig Brady padraig@antefacto.com 07 Nov 2001

This 1 line patch is useful for me and I don't think it will break anything. It's against ipvsadm-0.8.2 and returns a specific error code.


--Boundary_(ID_nuebet+LsBGYFsmRPljqqA)
Content-type: text/plain; name="ipvsadm-0.8.2-returncode.diff"
Content-disposition: inline; filename="ipvsadm-0.8.2-returncode.diff"
Content-transfer-encoding: 7bit

--- //ipvs-0.8.2/ipvs/ipvsadm/ipvsadm.c Fri Jun 22 16:03:08 2001
+++ ipvsadm.c   Wed Nov  7 16:29:11 2001
@@ -938,6 +938,7 @@
         result = setsockopt(sockfd, IPPROTO_IP, op,
                             (char *)&urule, sizeof(urule));
         if (result) {
+                result = errno; /* return to caller */
                 perror("setsockopt failed");
 
                 /*

--Boundary_(ID_nuebet+LsBGYFsmRPljqqA)--

27.2 machine compatible ipsvadm entries

With commands like ifconfig, you can repeat the same valid command without errors. With ipvsadm, if a VIP:port entry already exists, then you will get an error on re-entering it. If no entry exists, then you put it into the ipvsadm table by adding (-a) it; if the entry exists, then you edit (-e) it. You will get an error if you use the wrong command.

This is a problem for automated control of an LVS:

What is needed is a version of ipvs that accepts valid entries without error. Here's a patch by Horms against ipvs-0.9.0. It modifies several ipvs files, including ipvsadm.


diff -ruN ipvs-0.9.0/ipvs/ip_vs.h ipvs-0.9.0.new/ipvs/ip_vs.h
--- ipvs-0.9.0/ipvs/ip_vs.h     Wed May  9 08:01:14 2001
+++ ipvs-0.9.0.new/ipvs/ip_vs.h Fri May 11 14:54:35 2001
@@ -69,13 +69,13 @@
 #define IP_VS_SO_SET_NONE      IP_VS_BASE_CTL          /* just peek */
 #define IP_VS_SO_SET_INSERT    (IP_VS_BASE_CTL+1)
 #define IP_VS_SO_SET_ADD       (IP_VS_BASE_CTL+2)
-#define IP_VS_SO_SET_EDIT      (IP_VS_BASE_CTL+3)
+#define IP_VS_SO_SET_EDIT      (IP_VS_BASE_CTL+3)      /* Depreciated */
 #define IP_VS_SO_SET_DEL       (IP_VS_BASE_CTL+4)
 #define IP_VS_SO_SET_FLUSH     (IP_VS_BASE_CTL+5)
 #define IP_VS_SO_SET_LIST      (IP_VS_BASE_CTL+6)
 #define IP_VS_SO_SET_ADDDEST   (IP_VS_BASE_CTL+7)
 #define IP_VS_SO_SET_DELDEST   (IP_VS_BASE_CTL+8)
-#define IP_VS_SO_SET_EDITDEST  (IP_VS_BASE_CTL+9)
+#define IP_VS_SO_SET_EDITDEST  (IP_VS_BASE_CTL+9)      /* Depreciated */
 #define IP_VS_SO_SET_TIMEOUTS  (IP_VS_BASE_CTL+10)
 #define IP_VS_SO_SET_MAX       IP_VS_SO_SET_TIMEOUTS
 
diff -ruN ipvs-0.9.0/ipvs/ip_vs_ctl.c ipvs-0.9.0.new/ipvs/ip_vs_ctl.c
--- ipvs-0.9.0/ipvs/ip_vs_ctl.c Wed May  9 08:01:15 2001
+++ ipvs-0.9.0.new/ipvs/ip_vs_ctl.c     Fri May 11 14:46:26 2001
@@ -739,32 +739,21 @@
 
 
 /*
- *  Add a destination into an existing service
+ * Actually add a new destination to a service
+ * Note: You should call ip_vs_add_dest() which will add or delete
+ * a destination as appropriate.
  */
-static int ip_vs_add_dest(struct ip_vs_service *svc,
+
+static int __ip_vs_add_dest(struct ip_vs_service *svc,
                          struct ip_vs_rule_user *ur)
 {
-       struct ip_vs_dest *dest;
        __u32 daddr = ur->daddr;
        __u16 dport = ur->dport;
        int ret;
+       struct ip_vs_dest *dest;
 
        EnterFunction(2);
 
-       if (ur->weight < 0) {
-               IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
-               return -ERANGE;
-
-       /*
-        * Check if the dest already exists in the list
-        */
-       dest = ip_vs_lookup_dest(svc, daddr, dport);
-       if (dest != NULL) {
-               IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
-               return -EEXIST;
-       }
-
        /*
         * Check if the dest already exists in the trash and
         * is from the same service
@@ -838,10 +827,12 @@
 
 
 /*
- *  Edit a destination in the given service
+ * Edit a destination in the given service
+ * Note: You should call ip_vs_add_dest() which will add or delete
+ * a destination as appropriate.
  */
-static int ip_vs_edit_dest(struct ip_vs_service *svc,
-                          struct ip_vs_rule_user *ur)
+static int __ip_vs_edit_dest(struct ip_vs_service *svc,
+                          struct ip_vs_rule_user *ur)
 {
        struct ip_vs_dest *dest;
        __u32 daddr = ur->daddr;
@@ -850,11 +841,6 @@
 
        EnterFunction(2);
 
-       if (ur->weight < 0) {
-               IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
-               return -ERANGE;
-       }
-
        /*
         *  Lookup the destination list
         */
@@ -873,6 +859,42 @@
 
 
 /*
+ *  Add a destination into an existing service
+ *  If the destination alrady exists it will be edited
+ *  using __ip_vs_edit_dest()
+ *  Else add the destination using __ip_vs_add_dest()
+ */
+static int ip_vs_add_dest(struct ip_vs_service *svc,
+                         struct ip_vs_rule_user *ur)
+{
+       __u32 daddr = ur->daddr;
+       __u16 dport = ur->dport;
+       struct ip_vs_dest *dest;
+
+       EnterFunction(2);
+
+       if (ur->weight < 0) {
+               IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
+               return -ERANGE;
+       }
+
+       /*
+        * Check if the dest already exists in the list
+        * If it does, edit the existing entry
+        * Else add a new one
+        */
+
+       dest = ip_vs_lookup_dest(svc, daddr, dport);
+       if (dest != NULL)
+               return __ip_vs_edit_dest(svc, ur);
+       else
+               return __ip_vs_add_dest(svc, ur);
+       
+       LeaveFunction(2);
+}
+
+
+/*
  *  Delete a destination (must be already unlinked from the service)
  */
 
@@ -1788,14 +1810,11 @@
 
        switch (cmd) {
        case IP_VS_SO_SET_ADD:
-               if (svc != NULL)
-                       ret = -EEXIST;
-               else
-                       ret = ip_vs_add_service(urule, &svc);
-               break;
        case IP_VS_SO_SET_EDIT:
-               if (svc == NULL || svc->protocol != urule->protocol)
-                       ret = -ESRCH;
+               if (svc != NULL && svc->protocol != urule->protocol)
+                       svc = NULL;
+               if (svc == NULL)
+                       ret = ip_vs_add_service(urule, &svc);
                else
                        ret = ip_vs_edit_service(svc, urule);
                break;
@@ -1809,16 +1828,11 @@
                }
                break;
        case IP_VS_SO_SET_ADDDEST:
-               if (svc == NULL || svc->protocol != urule->protocol)
-                       ret = -ESRCH;
-               else
-                       ret = ip_vs_add_dest(svc, urule);
-               break;
        case IP_VS_SO_SET_EDITDEST:
                if (svc == NULL || svc->protocol != urule->protocol)
                        ret = -ESRCH;
                else
-                       ret = ip_vs_edit_dest(svc, urule);
+                       ret = ip_vs_add_dest(svc, urule);
                break;
        case IP_VS_SO_SET_DELDEST:
                if (svc == NULL || svc->protocol != urule->protocol)
diff -ruN ipvs-0.9.0/ipvs/ipvsadm/VERSION ipvs-0.9.0.new/ipvs/ipvsadm/VERSION
--- ipvs-0.9.0/ipvs/ipvsadm/VERSION     Thu Mar 22 04:57:46 2001
+++ ipvs-0.9.0.new/ipvs/ipvsadm/VERSION Sat May 12 11:38:19 2001
@@ -1 +1 @@
-1.17
+1.18
diff -ruN ipvs-0.9.0/ipvs/ipvsadm/debian/changelog ipvs-0.9.0.new/ipvs/ipvsadm/debian/changelog
--- ipvs-0.9.0/ipvs/ipvsadm/debian/changelog    Thu Jan 11 06:14:59 2001
+++ ipvs-0.9.0.new/ipvs/ipvsadm/debian/changelog        Sat May 12 11:39:08 2001
@@ -1,8 +1,8 @@
-ipvsadm (1.13-1) nstable; urgency=low
+ipvsadm (1.18-1) nstable; urgency=low
 
   * A release
 
- -- Horms <horms@vergenet.net>  Thu, 14 Dec 2000 17:00:00 -0800
+ -- Simon Horman <horms@vergenet.net>  Sat, 12 May 2001 11:39:05 -0700
 
 Local variables:
 mode: debian-changelog
diff -ruN ipvs-0.9.0/ipvs/ipvsadm/ipvsadm.8 ipvs-0.9.0.new/ipvs/ipvsadm/ipvsadm.8
--- ipvs-0.9.0/ipvs/ipvsadm/ipvsadm.8   Thu Mar 22 04:57:46 2001
+++ ipvs-0.9.0.new/ipvs/ipvsadm/ipvsadm.8       Sat May 12 11:41:31 2001
@@ -14,6 +14,8 @@
 .\"       Horms            :  Tidy up some of the description and the
 .\"                           grammar in the -f and sysctl sections
 .\"       Wensong Zhang    :  -s option description taken from ipchains(8)
+.\"       Horms            :  Depreciated Edit options, they are now
+.\"                           handled by add.
 .\"
 .\"     This program is free software; you can redistribute it and/or modify
 .\"     it under the terms of the GNU General Public License as published by
@@ -30,12 +32,12 @@
 .\"     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 .\"
 .\"
-.TH IPVSADM 8 "11th January 2001" "LVS Administration" "Linux Administrator's Guide"
+.TH IPVSADM 8 "11th May 2001" "LVS Administration" "Linux Administrator's Guide"
 .UC 4
 .SH NAME
 ipvsadm \- Linux Virtual Server administration
 .SH SYNOPSIS
-.B ipvsadm -A|E -t|u|f \fIservice-address\fP [-s \fIscheduler\fP]
+.B ipvsadm -A -t|u|f \fIservice-address\fP [-s \fIscheduler\fP]
 .ti 15
 .B [-p [\fItimeout\fP]] [-M \fInetmask\fP]
 .br
@@ -47,7 +49,7 @@
 .br
 .B ipvsadm -S [-n]
 .br
-.B ipvsadm -a|e -t|u|f \fIservice-address\fP
+.B ipvsadm -a -t|u|f \fIservice-address\fP
 .ti 15
 .B -r|R \fIserver-address\fP [-g|i|m] [-w \fIweight\fP]
 .br
@@ -99,10 +101,8 @@
 .B -A, --add-service
 Add a virtual service. A service address is uniquely defined by a
 triplet: IP address, port number, and protocol. Alternatively, a
-virtual service may be defined by a firewall-mark.
-.TP
-.B -E, --edit-service
-Edit a virtual service.
+virtual service may be defined by a firewall-mark. If the service
+already exists then it will be modified according to the values given.
 .TP
 .B -D, --delete-service
 Delete a virtual service, along with any associated real servers.
@@ -127,10 +127,8 @@
 This option only works if \fIipvsadm\fP is compiled against \fBpopt\fR(3).  
 .TP
 .B -a, --add-server
-Add a real server to a virtual service.
-.TP
-.B -e, --edit-server
-Edit a real server in a virtual service.
+Add a real server to a virtual service. If the real server already exists
+then it will be modified according to the values given.
 .TP
 .B -d, --delete-server
 Remove a real server from a virtual service.
diff -ruN ipvs-0.9.0/ipvs/ipvsadm/ipvsadm.c ipvs-0.9.0.new/ipvs/ipvsadm/ipvsadm.c
--- ipvs-0.9.0/ipvs/ipvsadm/ipvsadm.c   Wed Apr 11 19:59:31 2001
+++ ipvs-0.9.0.new/ipvs/ipvsadm/ipvsadm.c       Sat May 12 11:41:56 2001
@@ -50,6 +50,9 @@
  *                               in an informative error message rather
  *                               than the usage information
  *        Horms               :   added -v option
+ *        Horms               :   Merged funtionality of add and edit options
+ *                                Depreciated edit option
+ *        
  *
  *
  *      ippfvsadm - Port Fowarding & Virtual Server ADMinistration program
@@ -119,7 +122,7 @@
 #endif
 
 #define IPVSADM_VERSION_NO              "v" VERSION
-#define IPVSADM_VERSION_DATE            "2001/03/18"
+#define IPVSADM_VERSION_DATE            "2001/05/12"
 #define IPVSADM_VERSION         IPVSADM_VERSION_NO " " IPVSADM_VERSION_DATE
 
 #define MINIMUM_IPVS_VERSION_MAJOR      0
@@ -218,7 +221,7 @@
         struct poptOption add_service_option =
         {"add-service", 'A', POPT_ARG_NONE, NULL, 'A'};
         struct poptOption edit_service_option =
-        {"edit-service", 'E', POPT_ARG_NONE, NULL, 'E'};
+        {"edit-service", 'E', POPT_ARG_NONE, NULL, 'E'};     /* Depreciated */
         struct poptOption delete_service_option =
         {"delete-service", 'D', POPT_ARG_NONE, NULL, 'D'};
         struct poptOption clear_option =
@@ -229,8 +232,8 @@
         {"list", 'l', POPT_ARG_NONE, NULL, 'l'};
         struct poptOption add_server_option =
         {"add-server", 'a', POPT_ARG_NONE, NULL, 'a'};
-        struct poptOption edit_server_option =
-        {"edit-server", 'e', POPT_ARG_NONE, NULL, 'e'};
+        struct poptOption edit_server_option =         
+        {"edit-server", 'e', POPT_ARG_NONE, NULL, 'e'};      /* Depreciated */
         struct poptOption delete_server_option =
         {"delete-server", 'd', POPT_ARG_NONE, NULL, 'd'};
         struct poptOption set_option =
@@ -359,11 +362,8 @@
 
         switch (cmd) {
         case 'A':      
-                *op = IP_VS_SO_SET_ADD;
-               options_sub = options_service;
-                break;
         case 'E':      
-                *op = IP_VS_SO_SET_EDIT;
+                *op = IP_VS_SO_SET_ADD;
                options_sub = options_service;
                 break;
         case 'D':
@@ -371,11 +371,8 @@
                options_sub = options_delete_service;
                 break;
         case 'a':
-                *op = IP_VS_SO_SET_ADDDEST;
-               options_sub = options_server;
-                break;
         case 'e':
-                *op = IP_VS_SO_SET_EDITDEST;
+                *op = IP_VS_SO_SET_ADDDEST;
                options_sub = options_server;
                 break;
         case 'd':
@@ -621,12 +618,12 @@
        struct option long_options[] =
        {
                {"add-service", 0, 0, 'A'},
-               {"edit-service", 0, 0, 'E'},
+               {"edit-service", 0, 0, 'E'},                 /* Depreciated */
                {"delete-service", 0, 0, 'D'},
                {"clear", 0, 0, 'C'},
                {"list", 0, 0, 'L'},
                {"add-server", 0, 0, 'a'},
-               {"edit-server", 0, 0, 'e'},
+               {"edit-server", 0, 0, 'e'},                  /* Depreciated */
                {"delete-server", 0, 0, 'd'},
                {"help", 0, 0, 'h'},
                {"version", 0, 0, 'v'},
@@ -664,11 +661,8 @@
 
         switch (cmd) {
         case 'A':      
-                *op = IP_VS_SO_SET_ADD;
-                optstr = "t:u:f:s:M:p::";
-                break;
         case 'E':      
-                *op = IP_VS_SO_SET_EDIT;
+                *op = IP_VS_SO_SET_ADD;
                 optstr = "t:u:f:s:M:p::";
                 break;
         case 'D':
@@ -676,11 +670,8 @@
                 optstr = "t:u:f:";
                 break;
         case 'a':
-                *op = IP_VS_SO_SET_ADDDEST;
-                optstr = "t:u:f:w:r:R:gmi";
-                break;
         case 'e':
-                *op = IP_VS_SO_SET_EDITDEST;
+                *op = IP_VS_SO_SET_ADDDEST;
                 optstr = "t:u:f:w:r:R:gmi";
                 break;
         case 'd':
@@ -886,7 +877,7 @@
                return 0;
        }
 
-        if (op == IP_VS_SO_SET_ADD || op == IP_VS_SO_SET_EDIT) {
+        if (op == IP_VS_SO_SET_ADD) {
                 /*
                  * Make sure that port zero service is persistent
                  */
@@ -907,13 +898,12 @@
          * i.e. make sure that a -r accompanies a -[t|u|f]
          */
         if ((op == IP_VS_SO_SET_ADDDEST
-             || op == IP_VS_SO_SET_EDITDEST
              || op == IP_VS_SO_SET_DELDEST)
             && !urule.daddr) {
                 fail(2, "No destination specified");
         }
 
-        if (op == IP_VS_SO_SET_ADDDEST || op == IP_VS_SO_SET_EDITDEST) {
+        if (op == IP_VS_SO_SET_ADDDEST) {
                 /*
                  * Set the default weight 1 if not specified
                  */
@@ -947,16 +937,7 @@
                  */
                 switch (op) {
                 case IP_VS_SO_SET_ADD: 
-                        if (errno == EEXIST)
-                                printf("Service already exists\n");
-                        else if (errno == ENOENT)
-                                printf("Scheduler not found: ip_vs_%s.o\n",
-                                       urule.sched_name);
-                        break;
-                case IP_VS_SO_SET_EDIT:
-                        if (errno==ESRCH)
-                                printf("No such service\n");
-                        else if (errno == ENOENT)
+                        if (errno == ENOENT)
                                 printf("Scheduler not found: ip_vs_%s.o\n",
                                        urule.sched_name);
                         break;
@@ -967,10 +948,6 @@
                 case IP_VS_SO_SET_ADDDEST:
                         if (errno == ESRCH)
                                 printf("Service not defined\n");
-                        else if (errno == EEXIST)
-                                printf("Destination already exists\n");
-                        break;
-                case IP_VS_SO_SET_EDITDEST:
                 case IP_VS_SO_SET_DELDEST:
                         if (errno==ESRCH)
                                 printf("Service not defined\n");
@@ -1124,14 +1101,14 @@
        version(stream);
         fprintf(stream,
                 "Usage:\n"
-                "  %s -A|E -t|u|f service-address [-s scheduler] [-p [timeout]] [-M netmask]\n"
+                "  %s -A -t|u|f service-address [-s scheduler] [-p [timeout]] [-M netmask]\n"
                 "  %s -D -t|u|f service-address\n"
                 "  %s -C\n"
 #ifdef HAVE_POPT
                 "  %s -R\n"
                 "  %s -S [-n]\n"
 #endif
-                "  %s -a|e -t|u|f service-address -r|R server-address [-g|i|m] [-w weight]\n"
+                "  %s -a -t|u|f service-address -r|R server-address [-g|i|m] [-w weight]\n"
                 "  %s -d -t|u|f service-address -r|R server-address\n"
                 "  %s -L|l [-c] [-n]\n"
                 "  %s -s tcp tcpfin udp\n"
@@ -1145,16 +1122,14 @@
         fprintf(stream,
                 "Commands:\n"
                 "Either long or short options are allowed.\n"
-                "  --add-service     -A        add virtual service with options\n"
-                "  --edit-service    -E        edit virtual service with options\n"
+                "  --add-service     -A        add/edit virtual service with options\n"
                 "  --delete-service  -D        delete virtual service\n"
                 "  --clear           -C        clear the whole table\n"
 #ifdef HAVE_POPT
                 "  --restore         -R        restore rules from stdin\n"
                 "  --save            -S        save rules to stdout\n"
 #endif
-                "  --add-server      -a        add real server with options\n"
-                "  --edit-server     -e        edit real server with options\n"
+                "  --add-server      -a        add/edit real server with options\n"
                 "  --delete-server   -d        delete real server\n"
                 "  --list            -L|-l     list the table\n"
                 "  --set|-s tcp tcpfin udp     set connection timeout values\n"

27.3 Threshhold patch

ratz ratz@tac.ch 2001-01-29 16:16:27

This patch on top of ipvs-1.0.3-2.2.18 adds support for threshhold settings per realserver for all schedulers that have the -w option.

Description/Purpose

I was always thinking of how a kernel based implementation of connection limitation per real server would work and how it could be implemented so while waiting in the hospital for the x-ray I had enough time to write up some little dirty hack to show a proof of concept. It works like follows. I added three new entries to the ip_vs_dest() struct, u_thresh and l_thresh in ip_vs.* and I modified the ipvsadm to add the two new options x and y. A typical setup would be:

ipvsadm -A -t 192.168.100.100:80 -s wlc
ipvsadm -a -t 192.168.100.100:80 -r 192.168.100.3:80 -w 3 -x 1145 -y 923
ipvsadm -a -t 192.168.100.100:80 -r 192.168.100.3:80 -w 2 -x 982 -y 677
ipvsadm -a -t 192.168.100.100:80 -r 127.0.0.1:80 -w 1 -x 100 -y 50

So, this means, as soon as (dest->inactconns + dest->activeconns) exceed the x value the weight of this server is set to zero. As soon as the connections drop below the lower threshhold (y) the weight is set back to the initial value. What is it good for? Yeah well, I don't know exactly, imagine yourself, but first of all this is proposal and I wanted to ask for a discussion about a possible inclusion of such a feature or even a derived one into the main code (of course after fixing the race conditions and bugs and cleaning up the code) and second, I found out with tons of talks with customers that such a feature is needed, because also commercial lb have this and managers always like to have a nice comparision of all features to decide which product they take. Doing all this in user- space is unfortunately just not atomic enough.

Anyway, if anybody else thinks that such a feature might be vital for inclusion we can talk about it. If you look at the code, it wouldn't break anything and just add two lousy CPU cycles for checking if u_thresh is < 0. This feature can easily be disabled by just setting u_thresh to zero or not even initialize it.

Well, I'm open for discussion and flames. I have it running in production :) but with a special SLA. I implemented the last server of resort which works like this: If all RS of a service are down (healthcheck took it out or treshhold check set weight to zero), my userspace tool automagically invokes the last server of resort, a tiny httpd with a static page saying that the service is currently unavailable. This is also useful if you want to do maintainance of the realservers.

I already implemented a dozen of such setups and they work all pretty well.


--- ipvsadm.c-old       Mon Jan 29 08:39:45 2001
+++ ipvsadm.c   Mon Jan 29 08:56:24 2001
@@ -265,6 +265,10 @@
         {"gatewaying",'g', POPT_ARG_NONE, NULL, 'g'};
         struct poptOption weight_option =
         {"weight", 'w', POPT_ARG_STRING, &optarg, 'w'};
+        struct poptOption u_thresh_option =
+        {"u_thresh", 'x', POPT_ARG_STRING, &optarg, 'x'};
+        struct poptOption l_thresh_option =
+        {"l_thresh", 'y', POPT_ARG_STRING, &optarg, 'y'};
         struct poptOption numeric_option =
         {"numeric", 'n', POPT_ARG_NONE, NULL, 'n'};
         struct poptOption NULL_option =
@@ -326,6 +330,8 @@
                udp_service_option,
                fwmark_service_option,
                weight_option,
+               u_thresh_option,
+               l_thresh_option,
                real_server_option,
                real_server2_option,
                gatewaying_option,
@@ -517,6 +523,17 @@
                              string_to_number(optarg,0,65535)) == -1)
                                 fail(2, "illegal weight specified");
                         break;
+               case 'x':
+                       if ((mc->u.vs_user.u_thresh=
+                            string_to_number(optarg, 0, 65535)) == -1)
+                               fail(2, "illegal u_thresh specified");
+                       break;
+               case 'y':
+                       if ((mc->u.vs_user.l_thresh=
+                            string_to_number(optarg, 0, 65535)) == -1)
+                               fail(2, "illegal l_thresh specified");
+                       break;
+
                 case 'n':
                         *format |= FMT_NUMERIC;
                         break;
@@ -611,6 +628,8 @@
                {"ipip", 0, 0, 'i'},
                {"gatewaying", 0, 0, 'g'},
                {"weight", 1, 0, 'w'},
+               {"u_thresh", 1, 0, 'x'},
+               {"l_thresh", 1, 0, 'y'},
                {"numeric", 0, 0, 'n'},
                {"help", 0, 0, 'h'},
                {0, 0, 0, 0}
@@ -624,7 +643,7 @@
        /* Re-process the arguments each time options is called*/
        optind = 1;

-       if ((cmd = getopt_long(argc, argv, "AEDCSRaedlLhv",
+       if ((cmd = getopt_long(argc, argv, "AEDCSRaedlLhvxy",
                                long_options, NULL)) == EOF)
                usage_exit(argv[0], -1);

@@ -643,11 +662,11 @@
                 break;
         case 'a':
                 mc->m_cmd = IP_MASQ_CMD_ADD_DEST;
-                optstr = "t:u:f:w:r:R:gmi";
+                optstr = "t:u:f:w:r:R:gmi:x:y";
                 break;
         case 'e':
                 mc->m_cmd = IP_MASQ_CMD_SET_DEST;
-                optstr = "t:u:f:w:r:R:gmi";
+                optstr = "t:u:f:w:r:R:gmi:x:y";
                 break;
         case 'd':
                 mc->m_cmd = IP_MASQ_CMD_DEL_DEST;
@@ -787,6 +806,20 @@
                              string_to_number(optarg,0,65535)) == -1)
                                 fail(2, "illegal weight specified");
                         break;
+               case 'x':
+                       if (mc->u.vs_user.u_thresh != -1)
+                                fail(2, "multiple server u_thresh specified");
+                       if ((mc->u.vs_user.u_thresh=
+                            string_to_number(optarg, 0, 65535)) == -1)
+                               fail(2, "illegal u_thresh specified");
+                       break;
+               case 'y':
+                       if (mc->u.vs_user.l_thresh != -1)
+                                fail(2, "multiple server l_thresh specified");
+                       if ((mc->u.vs_user.l_thresh=
+                            string_to_number(optarg, 0, 65535)) == -1)
+                               fail(2, "illegal l_thresh specified");
+                       break;
                 case 'n':
                         *format |= FMT_NUMERIC;
                         break;
@@ -814,6 +847,9 @@
         ctl.m_target = IP_MASQ_TARGET_VS;
         /* weight=0 is allowed, which means that server is quiesced */
         ctl.u.vs_user.weight = -1;
+       /* set u_thresh and l_thresh to zero -> disabled */
+       ctl.u.vs_user.u_thresh = 0;
+       ctl.u.vs_user.l_thresh = 0;
         /* Set direct routing as default forwarding method */
         ctl.u.vs_user.masq_flags = IP_MASQ_F_VS_DROUTE;
         /* Set the default persistent granularity to /32 masking */
@@ -1078,7 +1114,7 @@
                 "  %s -R\n"
                 "  %s -S [-n]\n"
 #endif
-                "  %s -[a|e] -[t|u|f] service-address -[r|R] server-address \
[-g|-i|-m] [-w weight]\n" +                "  %s -[a|e] -[t|u|f] service-address \
                -[r|R] server-address [-g|-i|-m] [-w weight] [-x u_thresh] [-y \
                l_thresh]\n"
                 "  %s -d -[t|u|f] service-address -[r|R] server-address\n"
                 "  %s -[L|l] [-n]\n"
                 "  %s -h\n\n",
@@ -1128,6 +1164,8 @@
         fprintf(stream,
                 "  --ipip         -i                   ipip encapsulation \
                (tunneling)\n"
                 "  --masquerading -m                   masquerading (NAT)\n"
+                "  --u_thresh     -x <u_thresh>        max. connections\n"
+                "  --l_thresh     -y <l_thresh>        weight fallback \
                connections\n"
                 "  --weight       -w <weight>          capacity of real server\n"
                 "  --numeric      -n                   numeric output of addresses \
and ports\n"  );
@@ -1230,7 +1268,7 @@
         }
         if (fgets(buffer, sizeof(buffer), handle) && !(format & FMT_RULE))
                 printf("  -> RemoteAddress:Port          "
-                       "Forward Weight ActiveConn InActConn\n");
+                       "Forward Weight ActiveConn InActConn u_thresh l_thresh\n");

         /*
          * Print the VS information according to the format
@@ -1280,6 +1318,8 @@
         int weight;
         int activeconns;
         int inactconns;
+       unsigned int u_thresh;
+       unsigned int l_thresh;

         int n;
         unsigned long temp;
@@ -1289,11 +1329,11 @@

         if (buf[0] == ' ') {
                 /* destination entry */
-                if ((n = sscanf(buf, " %s %lX:%hX %s %d %d %d",
+                if ((n = sscanf(buf, " %s %lX:%hX %s %d %d %d %d %d",
                                 arrow, &temp, &dport, fwd, &weight,
-                                &activeconns, &inactconns)) == -1)
+                                &activeconns, &inactconns, &u_thresh, &l_thresh)) == \
-1)  exit(1);
-                if (n != 7)
+                if (n != 9)
                         fail(2, "unexpected input data");

                 daddr.s_addr = (__u32) htonl(temp);
@@ -1315,8 +1355,9 @@
                                        dname, get_fwd_switch(fwd), weight);
                         }
                 } else {
-                        printf("  -> %-27s %-7s %-6d %-10d %-10d\n",
-                               dname , fwd, weight, activeconns, inactconns);
+                        printf("  -> %-27s %-7s %-6d %-10d %-10d %-10d %-10d\n",
+                               dname , fwd, weight, activeconns, inactconns,
+                               u_thresh, l_thresh);
                 }
                 free(dname);
         } else if (buf[0] == 'F') {

["per_rs_thresh-kernel_2.2.18.diff" (text/plain)]

Only in linux-2.2.18.vanilla/include/linux: coda_opstats.h
Only in linux-2.2.18.vanilla/include/linux: dasd.h
diff -ur linux-2.2.18.vanilla/include/linux/ip_masq.h \
linux-2.2.18/include/linux/ip_masq.h --- \
linux-2.2.18.vanilla/include/linux/ip_masq.h    Fri Jan 26 22:28:59 2001 +++ \
linux-2.2.18/include/linux/ip_masq.h    Thu Jan 25 08:54:06 2001 @@ -121,6 +121,9 @@
        u_int16_t       dport;
        unsigned        masq_flags;     /* destination flags */
        int             weight;         /* destination weight */
+       int             old_weight;     /* old destination weight */
+       u_int16_t       u_thresh;       /* upper threshold */
+       u_int16_t       l_thresh;       /* lower threshold */
 };


diff -ur linux-2.2.18.vanilla/include/net/ip_vs.h linux-2.2.18/include/net/ip_vs.h
--- linux-2.2.18.vanilla/include/net/ip_vs.h    Fri Jan 26 22:28:59 2001
+++ linux-2.2.18/include/net/ip_vs.h    Sun Jan 28 10:10:28 2001
@@ -110,7 +110,10 @@
         atomic_t               activeconns;    /* active connections */
         atomic_t               inactconns;     /* inactive connections */
         atomic_t               refcnt;         /* reference counter */
+       __u16                   u_thresh;       /* upper threshold */
+       __u16                   l_thresh;       /* lower threshold */
         int                    weight;         /* server weight */
+        int                    old_weight;     /* old server weight */
        struct list_head        d_list;   /* table with all dests */

         /* for virtual service */
@@ -215,6 +218,8 @@
 extern int ip_vs_wrr_init(void);
 extern int ip_vs_lc_init(void);
 extern int ip_vs_wlc_init(void);
+extern int ip_vs_lblc_init(void);
+extern int ip_vs_lblcr_init(void);


 /*
diff -ur linux-2.2.18.vanilla/net/ipv4/ip_vs.c linux-2.2.18/net/ipv4/ip_vs.c
--- linux-2.2.18.vanilla/net/ipv4/ip_vs.c       Fri Jan 26 22:28:59 2001
+++ linux-2.2.18/net/ipv4/ip_vs.c       Sat Jan 27 14:07:42 2001
@@ -69,6 +69,7 @@
  *     Wensong Zhang           :    changed to two service hash tables
  *     Julian Anastasov        :    corrected trash_dest lookup for both
  *                                  normal service and fwmark service
+ *     Roberto Nibali         :    added per realserver threshhold (hospital version)
  *
  */

@@ -1274,6 +1275,8 @@
          *    Set the weight and the flags
          */
         dest->weight = mm->weight;
+       dest->u_thresh = mm->u_thresh;
+       dest->l_thresh = mm->l_thresh;
         dest->masq_flags = mm->masq_flags;

         dest->masq_flags |= IP_MASQ_F_VS;
@@ -1817,9 +1820,21 @@
         ms->dest = dest;

         /*
-         *    Increase the refcnt counter of the dest.
+         *    Increase the refcnt counter of the dest and set the weight
+        *    accordingly. I don't why dest->refcnt is conns+1?
          */
         atomic_inc(&dest->refcnt);
+       if ( dest->u_thresh != 0) {
+               if (( (atomic_read(&dest->inactconns) + atomic_read(&dest->activeconns)+1) >= \
dest->u_thresh) && (dest->weight > 0)){ +                       IP_VS_DBG(7, "Bind-masq [changing weight] \
conns:%d " +                            "weight=%d oldweight=%d\n",
+                               atomic_read(&dest->inactconns) +
+                               atomic_read(&dest->activeconns),
+                               dest->weight, dest->old_weight);
+                       dest->old_weight=dest->weight;
+                       dest->weight=0;
+               }
+       }

         IP_VS_DBG(9, "Bind-masq fwd:%c s:%s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
                   "d:%u.%u.%u.%u:%d flg:%X cnt:%d destcnt:%d\n",
@@ -1862,6 +1877,21 @@
                         }
                 }

+               /*
+                  if all connections are smaller then lower threshhold and the
+                  old weight isn't zero.
+               */
+               if (dest->u_thresh != 0) {
+                       if (((atomic_read(&dest->inactconns) + atomic_read(&dest->activeconns)) <= \
dest->l_thresh) && (dest->old_weight > 0)){ +                           IP_VS_DBG(7, "Unbind-masq conns:%d \
weight=%d " +                                "oldweight=%d\n",
+                                    atomic_read(&dest->inactconns) +
+                                    atomic_read(&dest->activeconns),
+                               dest->weight, dest->old_weight);
+                               dest->weight=dest->old_weight;
+                               dest->old_weight=0;
+                       }
+               }
                 /*
                  *  Decrease the refcnt of the dest, and free the dest
                  *  if nobody refers to it (refcnt=0).
@@ -2415,7 +2445,7 @@
         size = sprintf(buf+len,
                        "IP Virtual Server version %d.%d.%d (size=%d)\n"
                        "Prot LocalAddress:Port Scheduler Flags\n"
-                       "  -> RemoteAddress:Port Forward Weight ActiveConn \
InActConn\n", +                       "  -> RemoteAddress:Port Forward Weight \
ActiveConn InActConn u_thresh l_thresh\n",  NVERSION(IP_VS_VERSION_CODE), \
IP_VS_TAB_SIZE);  pos += size;
         len += size;
@@ -2456,13 +2486,15 @@
                                 dest = list_entry(q,struct ip_vs_dest,n_list);
                                 size = sprintf(buf+len,
                                                "  -> %08X:%04X      %-7s "
-                                               "%-6d %-10d %-10d\n",
+                                               "%-6d %-10d %-10d %-10d %-10d\n",
                                                ntohl(dest->addr),
                                                ntohs(dest->port),
                                                ip_vs_fwd_name(dest->masq_flags),
                                                dest->weight,
                                                atomic_read(&dest->activeconns),
-                                               atomic_read(&dest->inactconns));
+                                               atomic_read(&dest->inactconns),
+                                              dest->u_thresh,
+                                              dest->l_thresh);
                                 len += size;
                                 pos += size;

@@ -2505,13 +2537,15 @@
                                 dest = list_entry(q,struct ip_vs_dest,n_list);
                                 size = sprintf(buf+len,
                                                "  -> %08X:%04X      %-7s "
-                                               "%-6d %-10d %-10d\n",
+                                               "%-6d %-10d %-10d %-10d %-10d\n",
                                                ntohl(dest->addr),
                                                ntohs(dest->port),
                                                ip_vs_fwd_name(dest->masq_flags),
                                                dest->weight,
                                                atomic_read(&dest->activeconns),
-                                               atomic_read(&dest->inactconns));
+                                               atomic_read(&dest->inactconns),
+                                              dest->u_thresh,
+                                              dest->l_thresh);
                                 len += size;
                                 pos += size;

@@ -2565,6 +2599,7 @@
                        atomic_read(&ip_vs_concurrentconns),
                        atomic_read(&ip_vs_connshandled),
                        atomic_read(&ip_vs_packetshandled));
+       /* Here we should add a per svc and per rs statistics */
         pos += size;
         len += size;

How we will defend against DDoS (distributed DoS)?

I'm using a packetfilter and in special zones a firewall after the packetfilter ;) No seriously, I personally don't think the LVS should take too much part on securing the realservers It's just another part of the firewall setup.

The problem is that LVS has another view for the real server load. The director sees one number of connections the real server sees another one. And under attack we observe big gap between the active/inactive counters and the used threshold values. In this case we just exclude all real servers. This is the reason I prefer the more informed approach of using agents.

Using the number of active or inactive connections to assign a new weight is _very_ dangerous.

I know, but OTOH, if you set a threshhold and my code takes the server out, because of a well formated DDoS attack, I think it is even better than if you would allow the DDoS and maybe kill the realservers http-listener.
No, we have two choices:

- use SYN cookies and much memory for open requests, accept more valid requests

- don't use SYN cookies, drop the requests exceeding the backlog length, drop many valid requests but the real servers are not overloaded

In both cases the listeners don't see requests until the handshake is completed (Linux).

BTW, what if you enable the defense strategies of the loadbalancer? I've done some tests and I was able to flood the realservers by sending forged SYNs and timeshifted SYN-ACKs with the expected seq-nr. It was impossible to work on the realservers unless of course I enabled the TCP_SYNCOOKIES.
Yes, nobody claims the defense strategies guard the real servers. This is not their goal. They keep the director with more free memory and nothing more :) Only drop_packet can control the request rate but only for the new requests.
I then enabled my patch and after the connections exceeded the threshhold, the kernel took the server out temporarily by setting the weight to 0. In that way the server was usable and I could work on the server.
Yes but the clients can't work, you exclude all servers in this case because the LVS spreads the requests to all servers and the rain becomes deluge :)

In theory, the number of connections is related to the load but this is true when the world is ideal. The inactive counter can be set with very high values when we are under attack. Even the WLC method loads proportionatly the real servers but they are never excluded from operation.

True, but as I already said. I think LVS shouldn't replace a fw. I normally have a router configured properly, then a packetfilter, then a firewall or even another but stateful packetfilter. See, the patch itself is not even mandatory. I normal setup, my code is not even touched (except the ``if'':).
I have some thoughts about limiting the traffic per connection but this idea must be analyzed.
Hmm, I just want to limit the amount of concurrent connections per realserver and in the future maybe per service. This saved me quite some lines of code in my userspace healthchecking daemon.

Yes, you vote for moving some features from user to the kernel space. We must find the right balance: what can be done in LVS and what must be implemented in the user space tools.

The other alternatives are to use the Netfilter's "limit" target or QoS to limit the traffic to the real servers.

But then you have to add quite some code. The limit target has no idea about LVS tables. How should this work, f.e. if you would like to rate limit the amount of connections to a realserver?

May be we can limit the SYN rate. Of course, that not covers all cases, so my thought was to limit the packet rate for all states or per connection, not sure, this is an open topic. It is easy to open a connection through the director (especially in LVS-DR) and then to flood with packets this connection. This is one of the cases where LVS can really guard the real servers from packet floods. If we combine this with the other kind of attacks, the distributed ones, we have better control. Of course, some QoS implementations can cover such problems, not sure. And this can be a simple implementation, of course, nobody wants to invent the wheel :)

Let's analyze the problem. If we move new connections from "overloaded" real server and redirect them to the other real servers we will overload them too.

No, unless you use a old machine. This is maybe a requirement of an e-commerce application. They have some servers and if the servers are overloaded (taken out by my user-space healthchecking daemon because the response time it to high or the application daemon is not listening anymore on the port) they will be taken out. Now I have found out that by setting threshholds I could reduce the down- time of flooded server significantly. In case all servers were taken out or their weights were set to 0 the userspace application sets up a temporarily (either local route or another server) new realserver that has nothing else to do then pushing a static webpage saying that the service is currently unavailable due to high server load or DDoS attack or whatever. Put this page behind a TUX 2.0 and try to overflow it. If you can, apply the zero-copy patches of DaveM. No way you will find such a fast (88MBit/s requests!!) Link to saturate the server.

Yes, I know that this is a working solution. But see, you exclude all real servers :) You are giving up. My idea is we to find a state when we can drop some of the requests and to keep the real servers busy but responsive. This can be a difficult task but not when we have the help from our agents. We expect that many valid requests can be dropped but if we keep the real server in good health we can handle some valid requests because nobody knows when the flood will stop. The link is busy but it contains valid requests. And the service does not see the invalid ones.

IMO, the problem is that there are more connection requests than the cluster can handle. The solutions to try to move the traffic between the real servers can only cause more problems. If at the same time we set the weights to 0 this leads to more delay in the processing. May be more useful is to start to reduce the weights first but this again returns us to the theory for the smart cluster software.

So, we can't exit from this situation without dropping requests. There is more traffic that can't be served from the cluster.

The requests are not meaningful, we care how much load they introduce and we report this load to the director. It can look, for example, as one value (weight) for the real host that can be set for all real services running on this host. We don't need to generate 10 weights for the 10 real services running in our real host. And we change the weight on each 2 seconds for example. We need two syscalls (lseek and read) to get most of the values from /proc fs. But may be from 2-3 files. This is in Linux, of course. Not sure how this behaves under attack. We will see it :)

Obviously yes, but if you also include the practical problem of SLA with customers and guaranteed downtime per month I still have to say that for my deploition (is this the correct noun?) I go better with my patch in case of a DDoS and enabled LVS defense strategies then without.

If there is no cluster software to keep the real servers equally loaded, some of them can go offline too early.

The scheduler should keep them equally loaded IMO even in case of let's say 70% forged packets. Again, if you don't like to set a threshold, leave it. The patch is open enough. If you like to set it, set it, maybe set it very high. It's up to you.

The only problem we have with this scheme is the ipvsadm binary. It must be changed (the user structure in the kernel :)) The last change is dated from 0.9.10 and this is a big period :) But you know what means a change in the user structures :)

The cluster software can take the role to monitor the load instead of relying on the connection counters. I agree, changing the weights and deciding how much traffic to drop can be explained with a complex formula. But I can see it only as a complete solution: to balance the load and to drop the exceeding requests, serve as many requests as possible. Even the drop_packet strategy can help here, we can explicitly enable it specifying the proper drop rate. We don't need to use it only to defend the LVS box but to drop the exceeding traffic. But someone have to control the drop rate :) If there is no exceeding traffic what problems we can expect? Only from the bad load balancing :)

The easiest way to control the LVS is from user space and to leave in LVS only the basic needed support. This allows us to have more ways to control LVS.

27.4 Martian modification patchs

Martian modification patch for 2.2.x



--- linux/include/net/ip_fib.h.orig     Wed Feb 23 16:54:27 2000
+++ linux/include/net/ip_fib.h  Wed Mar 15 13:46:22 2000
@@ -200,7 +200,7 @@
 extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb);
 extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
-                              struct device *dev, u32 *spec_dst, u32 *itag);
+                       struct device *dev, u32 *spec_dst, u32 *itag, int our);
 extern void fib_select_multipath(const struct rt_key *key, struct fib_result *res);

 /* Exported by fib_semantics.c */
--- linux/net/ipv4/fib_frontend.c.orig  Wed Feb 23 16:54:27 2000
+++ linux/net/ipv4/fib_frontend.c       Wed Mar 15 14:44:45 2000
@@ -189,7 +189,7 @@
  */

 int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
-                       struct device *dev, u32 *spec_dst, u32 *itag)
+                       struct device *dev, u32 *spec_dst, u32 *itag, int our)
 {
        struct in_device *in_dev = dev->ip_ptr;
        struct rt_key key;
@@ -206,7 +206,8 @@
                return -EINVAL;
        if (fib_lookup(&key, &res))
                goto last_resort;
-       if (res.type != RTN_UNICAST)
+       if ((res.type != RTN_UNICAST) &&
+               ((res.type != RTN_LOCAL) || our))
                return -EINVAL;
        *spec_dst = FIB_RES_PREFSRC(res);
        if (itag)
@@ -216,13 +217,20 @@
 #else
        if (FIB_RES_DEV(res) == dev)
 #endif
+       {
+               if (res.type == RTN_LOCAL) {
+                       *itag = 0;
+                       return -EINVAL;
+               }
                return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+       }

        if (in_dev->ifa_list == NULL)
                goto last_resort;
        if (IN_DEV_RPFILTER(in_dev))
                return -EINVAL;
        key.oif = dev->ifindex;
+       if (res.type == RTN_LOCAL) key.iif = loopback_dev.ifindex;
        if (fib_lookup(&key, &res) == 0 && res.type == RTN_UNICAST) {
                *spec_dst = FIB_RES_PREFSRC(res);
                return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
--- linux/net/ipv4/route.c.orig Wed Feb 23 17:00:07 2000
+++ linux/net/ipv4/route.c      Wed Mar 15 13:07:28 2000
@@ -1037,7 +1037,7 @@
                if (!LOCAL_MCAST(daddr))
                        return -EINVAL;
                spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
-       } else if (fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag) < 0)
+       } else if (fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag, our) < 0)
                return -EINVAL;

        rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
@@ -1181,7 +1181,7 @@
        if (res.type == RTN_LOCAL) {
                int result;
                result = fib_validate_source(saddr, daddr, tos, loopback_dev.ifindex,
-                                            dev, &spec_dst, &itag);
+                                            dev, &spec_dst, &itag, 1);
                if (result < 0)
                        goto martian_source;
                if (result)
@@ -1206,7 +1206,7 @@
                return -EINVAL;
        }

-       err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(res), dev, &spec_dst, &itag);
+       err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(res), dev, &spec_dst, &itag, 0);
        if (err < 0)
                goto martian_source;

@@ -1279,7 +1279,7 @@
        if (ZERONET(saddr)) {
                spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
        } else {
-               err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag);
+               err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag, 1);
                if (err < 0)
                        goto martian_source;
                if (err)

Martian modification patch for 2.4.0


--- linux-2.4.0/include/net/ip_fib.h~   Sat Jan 20 13:16:50 2001
+++ linux/include/net/ip_fib.h  Sun Mar 11 11:07:22 2001
@@ -203,7 +203,7 @@
 extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb);
 extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
-                              struct net_device *dev, u32 *spec_dst, u32 *itag);
+                              struct net_device *dev, u32 *spec_dst, u32 *itag, int our);
 extern void fib_select_multipath(const struct rt_key *key, struct fib_result *res);
 
 /* Exported by fib_semantics.c */
--- linux-2.4.0/net/ipv4/fib_frontend.c~        Fri Jun  2 07:25:21 2000
+++ linux/net/ipv4/fib_frontend.c       Sun Mar 11 11:07:22 2001
@@ -204,7 +204,8 @@
  */
 
 int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
-                       struct net_device *dev, u32 *spec_dst, u32 *itag)
+                       struct net_device *dev, u32 *spec_dst, u32 *itag,
+                       int our)
 {
        struct in_device *in_dev;
        struct rt_key key;
@@ -233,7 +234,8 @@
 
        if (fib_lookup(&key, &res))
                goto last_resort;
-       if (res.type != RTN_UNICAST)
+       if ((res.type != RTN_UNICAST) &&
+               ((res.type != RTN_LOCAL) || our))
                goto e_inval_res;
        *spec_dst = FIB_RES_PREFSRC(res);
        if (itag)
@@ -244,6 +246,10 @@
        if (FIB_RES_DEV(res) == dev)
 #endif
        {
+               if (res.type == RTN_LOCAL) {
+                       *itag = 0;
+                       goto e_inval_res;
+               }
                ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
                fib_res_put(&res);
                return ret;
@@ -254,6 +260,7 @@
        if (rpf)
                goto e_inval;
        key.oif = dev->ifindex;
+       if (res.type == RTN_LOCAL) key.iif = loopback_dev.ifindex;
 
        ret = 0;
        if (fib_lookup(&key, &res) == 0) {
--- linux-2.4.0/net/ipv4/route.c~       Sun Nov  5 23:10:48 2000
+++ linux/net/ipv4/route.c      Sun Mar 11 11:07:22 2001
@@ -1177,7 +1177,7 @@
                if (!LOCAL_MCAST(daddr))
                        goto e_inval;
                spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
-       } else if (fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag) < 0)
+       } else if (fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag, our) < 0)
                goto e_inval;
 
        rth = dst_alloc(&ipv4_dst_ops);
@@ -1339,7 +1339,7 @@
        if (res.type == RTN_LOCAL) {
                int result;
                result = fib_validate_source(saddr, daddr, tos, loopback_dev.ifindex,
-                                            dev, &spec_dst, &itag);
+                                            dev, &spec_dst, &itag, 1);
                if (result < 0)
                        goto martian_source;
                if (result)
@@ -1364,7 +1364,7 @@
                goto e_inval;
        }
 
-       err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(res), dev, &spec_dst, &itag);
+       err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(res), dev, &spec_dst, &itag, 0);
        if (err < 0)
                goto martian_source;
 
@@ -1447,7 +1447,7 @@
        if (ZERONET(saddr)) {
                spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
        } else {
-               err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag);
+               err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag, 1);
                if (err < 0)
                        goto martian_source;
                if (err)

27.5 fwmark name-number translation table

ipvsadm allows entry of fwmark as numbers. In some cases, it would be more convenient to enter/display the fwmark as a name; e.g. an e-commerce site, serving multiple customers (i.e. VIPs) and which is linking http and https by a fwmark. The output of ipvsadm then would list the fwmark as "bills_business", "fred_inc" rather than "14","15"...

Horms has written a patch which allows the use of fwmarks as text as well as the default method of numbers, using a table in /etc that looks like the /etc/hosts table.

Horms horms@vergenet.net Nov 14 2001

while we were at OLS in June, Joe suggested that we have a file to associate names with firewall marks. I have attached a patch that enables ipvsadm to read names for firewall marks from /etc/fwmarks. This file is intended to be analogous to /etc/hosts.

The patch to the man page explains the format more fully, but briefly the format is "fwmark name..." newline delimited

e.g.

1 a_name
2 another_name yet_another_name

Which leads to

ipvsadm -A -f a_name

--Boundary_(ID_7gtcNW35aSCKH29xS00dLw)
Content-type: text/plain; charset=us-ascii
Content-disposition: attachment; filename="ipvs-0.9.5.fwmarks-file.patch"

diff -ruN ipvs-0.9.5/ipvs/ipvsadm/Makefile ipvs-0.9.5.new/ipvs/ipvsadm/Makefile
--- ipvs-0.9.5/ipvs/ipvsadm/Makefile    Sat Oct 20 04:14:00 2001
+++ ipvs-0.9.5.new/ipvs/ipvsadm/Makefile        Sat Oct 27 20:31:58 2001
@@ -63,7 +63,7 @@
 POPT_DEFINE = -DHAVE_POPT
 endif
 
-OBJS = ipvsadm.o config_stream.o dynamic_array.o
+OBJS = ipvsadm.o config_stream.o dynamic_array.o fwmark_lookup.o
 LIBS = $(POPT_LIB)
 DEFINES = -DVERSION=\"$(VERSION)\" -DSCHEDULERS=\"$(SCHEDULERS)\" \
          $(POPT_DEFINE) $(IP_VS_H_DEFINE)
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/VERSION ipvs-0.9.5.new/ipvs/ipvsadm/VERSION
--- ipvs-0.9.5/ipvs/ipvsadm/VERSION     Wed Sep 19 03:42:54 2001
+++ ipvs-0.9.5.new/ipvs/ipvsadm/VERSION Sat Oct 27 20:31:56 2001
@@ -1 +1 @@
-1.20
+1.21
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/config_stream.c ipvs-0.9.5.new/ipvs/ipvsadm/config_stream.c
--- ipvs-0.9.5/ipvs/ipvsadm/config_stream.c     Fri Mar 23 00:57:46 2001
+++ ipvs-0.9.5.new/ipvs/ipvsadm/config_stream.c Mon Oct 29 10:49:29 2001
@@ -1,12 +1,29 @@
 /*
- *      Code to convert a stream input into a dynamic array
- *      that can be parsed as argc and argv.
+ *      dynamic_array.c -  Code to convert a stream input into a
+ *                         that can be parsed as argc and argv.
  *
  *      Authors: Horms <horms@vergenet.net>
  *
- *      Released under the terms of the GNU GPL
+ *      Version: $Id: config_stream.c,v Exp $
  *
- *      ChangeLog
+ *      Copyright (c) 2001 Horms
+ *      All rights reserved.
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2 of the License, or
+ *      (at your option) any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *     Changes:
  *      Horms         :   scanf Glibc under Red Hat 7 does not appear
  *                        to return EOF when input ends. Fall through
  *                        code has been added to handle this case correctly
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/config_stream.h ipvs-0.9.5.new/ipvs/ipvsadm/config_stream.h
--- ipvs-0.9.5/ipvs/ipvsadm/config_stream.h     Wed May 31 14:36:21 2000
+++ ipvs-0.9.5.new/ipvs/ipvsadm/config_stream.h Mon Oct 29 10:50:10 2001
@@ -1,14 +1,33 @@
 /*
- *      Code to convert a stream input into a dynamic array
- *      that can be parsed as argc and argv.
+ *      dynamic_array.h -  Code to convert a stream input into a
+ *                         that can be parsed as argc and argv.
  *
  *      Authors: Horms <horms@vergenet.net>
  *
- *      Released under the terms of the GNU GPL
+ *      Version: $Id: config_stream.h,v Exp $
+ *
+ *      Copyright (c) 2001 Horms
+ *      All rights reserved.
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2 of the License, or
+ *      (at your option) any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *     Changes:
  */
 
-#ifndef CONFIG_STREAM_FLIM
-#define CONFIG_STREAM_FLIM
+#ifndef _CONFIG_STREAM_H
+#define _CONFIG_STREAM_H
 
 #include "dynamic_array.h"
 
@@ -16,4 +35,4 @@
 
 dynamic_array_t *config_stream_read (FILE *stream, const char *first_element);
 
-#endif
+#endif /* _CONFIG_STREAM_H */
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/del ipvs-0.9.5.new/ipvs/ipvsadm/del
--- ipvs-0.9.5/ipvs/ipvsadm/del Thu Jan  1 12:00:00 1970
+++ ipvs-0.9.5.new/ipvs/ipvsadm/del     Mon Oct 29 10:42:13 2001
@@ -0,0 +1,30 @@
+/*
+ *      dynamic_array.c -  Code to convert a stream input into a
+ *                         that can be parsed as argc and argv.
+ *
+ *      Author: Horms <horms@vergenet.net>
+ *
+ *      Version: $Id: config_stream.c,v Exp $
+ *
+ *      Copyright (c) 2001 Horms
+ *      All rights reserved.
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2 of the License, or
+ *      (at your option) any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *     Changes:
+ *      Horms         :   scanf Glibc under Red Hat 7 does not appear
+ *                        to return EOF when input ends. Fall through
+ *                        code has been added to handle this case correctly
+ */
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/dynamic_array.c ipvs-0.9.5.new/ipvs/ipvsadm/dynamic_array.c
--- ipvs-0.9.5/ipvs/ipvsadm/dynamic_array.c     Fri Mar 23 00:57:46 2001
+++ ipvs-0.9.5.new/ipvs/ipvsadm/dynamic_array.c Mon Oct 29 10:49:33 2001
@@ -1,4 +1,6 @@
 /*
+ *      dynamic_array.c 
+ *      
  *      Dynamic array, to store all your flims in Includes macros required
  *      to create an array of strings but as the primitive type for the
  *      array is void * providing your own duplicate_primitive and
@@ -7,8 +9,29 @@
  *
  *      Authors: Horms <horms@vergenet.net>
  *
- *      Released under the terms of the GNU GPL
+ *      Version: $Id: dynamic_array.c,v Exp $
  *
+ *      Copyright (c) 2001 Horms
+ *      All rights reserved.
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2 of the License, or
+ *      (at your option) any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *     Changes:
+ *      Horms         :   scanf Glibc under Red Hat 7 does not appear
+ *                        to return EOF when input ends. Fall through
+ *                        code has been added to handle this case correctly
  */
 
 #include "dynamic_array.h"
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/dynamic_array.h ipvs-0.9.5.new/ipvs/ipvsadm/dynamic_array.h
--- ipvs-0.9.5/ipvs/ipvsadm/dynamic_array.h     Fri Mar 23 00:57:46 2001
+++ ipvs-0.9.5.new/ipvs/ipvsadm/dynamic_array.h Mon Oct 29 10:50:11 2001
@@ -1,4 +1,7 @@
+
 /*
+ *      dynamic_array.h 
+ *      
  *      Dynamic array, to store all your flims in Includes macros required
  *      to create an array of strings but as the primitive type for the
  *      array is void * providing your own duplicate_primitive and
@@ -7,12 +10,33 @@
  *
  *      Authors: Horms <horms@vergenet.net>
  *
- *      Released under the terms of the GNU GPL
+ *      Version: $Id: dynamic_array.h,v Exp $
+ *
+ *      Copyright (c) 2001 Horms
+ *      All rights reserved.
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2 of the License, or
+ *      (at your option) any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
+ *     Changes:
+ *      Horms         :   scanf Glibc under Red Hat 7 does not appear
+ *                        to return EOF when input ends. Fall through
+ *                        code has been added to handle this case correctly
  */
 
-#ifndef DYNAMIC_ARRAY_FLIM
-#define DYNAMIC_ARRAY_FLIM
+#ifndef _DYNAMIC_ARRAY_H
+#define _DYNAMIC_ARRAY_H
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -174,4 +198,4 @@
 
 dynamic_array_t *dynamic_array_split_str(char *string, const char delimiter);
 
-#endif
+#endif /* _DYNAMIC_ARRAY_H */
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/fwmark_lookup.c ipvs-0.9.5.new/ipvs/ipvsadm/fwmark_lookup.c
--- ipvs-0.9.5/ipvs/ipvsadm/fwmark_lookup.c     Thu Jan  1 12:00:00 1970
+++ ipvs-0.9.5.new/ipvs/ipvsadm/fwmark_lookup.c Mon Oct 29 10:49:36 2001
@@ -0,0 +1,786 @@
+/*
+ *      fwmark_lookup.c
+ *
+ *      Look up firwall marks in /etc/fwmarks. This is intended to be
+ *      analogous to /etc/hosts
+ *
+ *      Authors: Horms <horms@vergenet.net>
+ *
+ *      Version: $Id: fwmark_lookup.c,v Exp $
+ *
+ *      Copyright (c) 2001 Horms
+ *      All rights reserved.
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2 of the License, or
+ *      (at your option) any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *     Changes:
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <netdb.h>
+
+#include "fwmark_lookup.h"
+
+#define __FWM_LOOKUP_MAX_LINE_LENGTH 4096
+#define __FWM_BLOCKING ((size_t) 7)
+
+static struct hostent __fwm_hostent;
+static char **__fwm_h_aliases = NULL;
+static size_t __fwm_no_h_aliases = 0;
+static char **__fwm_h_addr_list = NULL;
+static size_t __fwm_no_h_addr_list = 0;
+static int __fwm_fd = -1;
+
+static struct hostent *__fwm_simple_hostent(const fwm_t fwm, const char *name);
+static int __fwm_beginfwment(void);
+
+static unsigned char __fwm_getc_buffer[__FWM_LOOKUP_MAX_LINE_LENGTH];
+static ssize_t __fwm_getc_nread = 0;
+static ssize_t __fwm_getc_offset = 0;
+
+static int __fwm_getc(void);
+
+static int __fwm_add_fwmark(fwm_t key);
+static int __fwm_add_fwmark_str(char *key);
+static int __fwm_add_name(const char *name);
+
+static void __fwm_reset_global(void);
+static struct hostent *__fwm_reset_hostent(struct hostent *hostent);
+static void __fwm_array_destroy(void **a, const size_t count,
+                               const size_t fixed_size);
+static void **__fwm_array_add_element(void **a, size_t * count,
+                                     void *new_elem, size_t fixed_size);
+
+
+/**********************************************************************
+ * __fwm_reset_global
+ * Reset the global hostent structure and associated arrays.
+ * pre: none
+ * post: __fwm_hostent is reset
+ *       __fwm_h_aliases and __fwm_h_addr_list are freed and set to NULL
+ *       __fwm_no_h_aliases and __fwm_no_h_addr_list are set to 0
+ * return: none
+ **********************************************************************/
+
+static void __fwm_reset_global(void)
+{
+       __fwm_reset_hostent(&__fwm_hostent);
+
+       __fwm_array_destroy((void **) __fwm_h_aliases, __fwm_no_h_aliases,
+                           0);
+       __fwm_h_aliases = NULL;
+       __fwm_no_h_aliases = 0;
+
+       __fwm_array_destroy((void **) __fwm_h_addr_list,
+                           __fwm_no_h_addr_list, sizeof(fwm_t));
+       __fwm_h_addr_list = NULL;
+       __fwm_no_h_addr_list = 0;
+}
+
+
+/**********************************************************************
+ * __fwm_reset_hostent
+ * Zero a hostent structure
+ * pre: hostent: hostent to zero
+ * post: values of hostent are zeroed
+ * return: hostent 
+ * Note: Any memory associated with elements of hostent are
+ *       not freed.
+ **********************************************************************/
+
+static struct hostent *__fwm_reset_hostent(struct hostent *hostent)
+{
+       if (hostent == NULL) {
+               return (NULL);
+       }
+
+       hostent->h_name = NULL;
+       hostent->h_aliases = NULL;
+       hostent->h_addrtype = AF_INET;
+       hostent->h_length = sizeof(fwm_t);
+       hostent->h_addr_list = NULL;
+
+       return (hostent);
+}
+
+
+/**********************************************************************
+ * __fwm_array_destroy
+ * Destroy an array of strings
+ * pre: a: array of strings to free
+ *      count: number of elements allocated in the array
+ *      fixed_size: Size of elements if they are a fixed size
+ *                  0 if they are variable size
+ * post: If fixed_size is zero free each element in the array
+ *       Free the array.
+ *       Note: If elements are fixed size they are allocated as part
+ *             of the array itself and hence are freed anyway,
+ * return: none
+ **********************************************************************/
+
+static void __fwm_array_destroy(void **a, const size_t count,
+                               const size_t fixed_size)
+{
+       int i;
+
+       if (a == NULL) {
+               return;
+       }
+
+       if (!fixed_size) {
+               for (i = 0; i < count; i++) {
+                       if (a[i] != NULL) {
+                               free(a[i]);
+                       }
+               }
+       }
+
+       free(a);
+}
+
+
+/**********************************************************************
+ * __fwm_array_add_element
+ * Add an element to an array of strings.
+ * If the array is empty create it, if there is not enough space
+ * allocate more space.
+ * pre: a: array to add element to
+ *      count: number of elements allocated in the array
+ *      new_elem: element to add to the array
+ *      fixed_size: Size of elements if they are a fixed size
+ *                  0 if they are variable size
+ * post: If a is NULL
+ *           allocate a block of elements in the array and
+ *           make elem the fist element
+ *       Else
+ *           Find the last unused element
+ *           If this is the last allocated element in the array
+ *               Allocate another block of elemnts
+ *           Make the last unused element in the array new_elem
+ *       Note: If elements are fixed size they are allocated as part
+ *             of the array itself and copied into the array
+ *             using memcpy. Otherewise the only the pointer
+ *             to the element is stored and well be freed by
+ *             __fwm_array_destroy.
+ * return: none
+ **********************************************************************/
+
+static void **__fwm_array_add_element(void **a, size_t * count,
+                                     void *new_elem, size_t fixed_size)
+{
+       void **old_a;
+       size_t elem;
+       size_t elem_alloc;
+
+       elem_alloc = (fixed_size ? fixed_size : sizeof(void *));
+
+       if (a == NULL || *count == 0) {
+               *count = __FWM_BLOCKING;
+               a = (void **) malloc(__FWM_BLOCKING * elem_alloc);
+               if (a == NULL) {
+                       *count = 0;
+                       return (NULL);
+               }
+               memset(a, '\0', __FWM_BLOCKING * elem_alloc);
+               elem = 0;
+       } else {
+               old_a = a;
+
+               for (elem = 0; elem < *count; elem++) {
+                       if (a[elem] == NULL) {
+                               break;
+                       }
+               }
+
+               if (elem + 2 > *count) {
+                       *count += __FWM_BLOCKING;
+                       a = (void **) realloc((void *) a,
+                                             *count * elem_alloc);
+                       if (a == NULL) {
+                               __fwm_array_destroy(old_a,
+                                                   *count -
+                                                   __FWM_BLOCKING,
+                                                   fixed_size);
+                               *count = 0;
+                               return (NULL);
+                       }
+                       memset(a + (*count) - __FWM_BLOCKING, '\0',
+                              __FWM_BLOCKING * elem_alloc);
+               }
+       }
+
+       if (fixed_size) {
+               memcpy(&(a[elem]), new_elem, fixed_size);
+       } else {
+               a[elem] = new_elem;
+       }
+
+       return (a);
+}
+
+
+/**********************************************************************
+ * __fwm_str_to_fwm
+ * Convert a string to a fwmark
+ * pre: str: ASCII representation of a fwmark
+ *      fwm: pointer to fwm_t to store result in
+ * post: str is converted to a fwm
+ * return: 0 on success
+ *         -1 on erroe
+ **********************************************************************/
+
+static int __fwm_str_to_fwm(const char *str, fwm_t *fwm)
+{
+       long l;
+       char *end;
+
+       if(str == NULL || *str == '\0' || fwm == NULL) {
+               return(-1);
+       }
+
+       l = strtol(str, &end, 10);
+       if(*end != '\0' || errno == ERANGE || l < 0 || l > UINT_MAX) {
+               return(-1);
+       }
+
+       *fwm = (fwm_t) l;
+       return(0);
+}
+
+
+/**********************************************************************
+ * __fwm_getc
+ * read a single character from the fwmarks file
+ * Note: This is buffered internally and hence should
+ * be reasonably efficient.
+ * pre: fwmarks file is open.
+ * post: If __fwm_getc_buffer is empty or has been exhausted
+ *       then it is filled by reding
+ *       __FWM_LOOKUP_MAX_LINE_LENGTH bytes from __fwm_fd.
+ *       One character is returned from __fwm_getc_buffer and
+ *       the offset is advanced.
+ * return: Once character from the fwmarks file.
+ *         EOF on error or end of file.
+ **********************************************************************/
+
+static int __fwm_getc(void)
+{
+       ssize_t nread;
+
+       if (__fwm_getc_offset < __fwm_getc_nread) {
+               return ((int) (__fwm_getc_buffer[__fwm_getc_offset++]));
+       }
+
+       while (1) {
+               nread = read(__fwm_fd, __fwm_getc_buffer,
+                            __FWM_LOOKUP_MAX_LINE_LENGTH);
+               if (nread < 0) {
+                       if (errno == EINTR) {
+                               continue;
+                       }
+                       return (EOF);
+               }
+               if (nread == 0) {
+                       return (EOF);
+               }
+
+               __fwm_getc_offset = 0;
+               __fwm_getc_nread = nread;
+               break;
+       }
+
+       return ((int) (__fwm_getc_buffer[__fwm_getc_offset++]));
+}
+
+
+/**********************************************************************
+ * __fwm_add_fwmark
+ * Add a fwmark to __fwm_hostent
+ * pre: fwm to add to the __fwm_hostent
+ * post: fwm is added to h_addr_list
+ * return: 0 on success
+ *         -1 on error
+ **********************************************************************/
+
+static int __fwm_add_fwmark(fwm_t fwm)
+{
+       void **a;
+
+       fwm = htonl(fwm);
+
+       a = __fwm_array_add_element((void **) __fwm_h_addr_list,
+                                   &__fwm_no_h_addr_list, (void *) &fwm,
+                                   sizeof(fwm_t));
+       if (a == NULL) {
+               __fwm_reset_global();
+               endfwment();
+               return (-1);
+       }
+
+       __fwm_h_addr_list = (char **) a;
+       __fwm_hostent.h_addr_list = __fwm_h_addr_list;
+
+       return (0);
+}
+
+
+/**********************************************************************
+ * __fwm_add_fwmark_str
+ * Add a fwmark to __fwm_hostent
+ * pre: fwm_str: fwm to add to the __fwm_hostent, as a string.
+ *      The string must be the ASCII representation a posigive integer.
+ * post: fwm is added to h_addr_list
+ * return: 0 on success
+ *         -1 on error
+ **********************************************************************/
+
+static int __fwm_add_fwmark_str(char *fwm_str)
+{
+       fwm_t fwm;
+
+       if(__fwm_str_to_fwm(fwm_str, &fwm) < 0) {
+               return(-1);
+       }
+
+       return (__fwm_add_fwmark(fwm));
+}
+
+
+/**********************************************************************
+ * __fwm_add_name
+ * Add a name to __fwm_hostent
+ * pre: name: name to add to __fwm_hostent
+ * post: name is copied using strcpy.
+ *       __fwm_hostent is added to h_addr_list. If h_name is
+ *       null, it is set to name.
+ * return: 0 on success
+ *         -1 on error
+ **********************************************************************/
+
+static int __fwm_add_name(const char *name)
+{
+       void **a;
+       char *name_cpy;
+
+       name_cpy = strdup(name);
+       if (name_cpy == NULL) {
+               return (-1);
+       }
+
+       a = __fwm_array_add_element((void **) __fwm_h_aliases,
+                                   &__fwm_no_h_aliases, (void *) name_cpy, 0);
+       if (a == NULL) {
+               __fwm_reset_global();
+               endfwment();
+               return (-1);
+       }
+
+       __fwm_h_aliases = (char **) a;
+       __fwm_hostent.h_aliases = __fwm_h_aliases;
+
+       if (__fwm_hostent.h_name == NULL) {
+               __fwm_hostent.h_name = __fwm_hostent.h_aliases[0];
+       }
+
+       return (0);
+}
+
+
+/**********************************************************************
+ * __fwm_simple_hostent
+ * hostent with fwm as the only entry in h_addr_list and
+ * name as the h_name and only entry in h_aliases
+ * pre: fwm: fwm to be the "address" in the hostent
+ *      name: "name" for the hostent
+ * post: The global __fwm_hostent is set with fwm and name
+ * return: pointer to __fwm_hostent
+ *         NULL on error
+ **********************************************************************/
+
+static struct hostent *__fwm_simple_hostent(const fwm_t fwm, const char *name) 
+{
+       __fwm_reset_global();
+       if (__fwm_add_fwmark(fwm) < 0) {
+               endfwment();
+               return (NULL);
+       }
+       if (__fwm_add_name(name) < 0) {
+               endfwment();
+               return (NULL);
+       }
+       return(&__fwm_hostent);
+}
+
+/**********************************************************************
+ * __fwm_beginfwment
+ * Open the fwmarks file and reset the read buffers.
+ * pre: FWMARKS_FILE is defined
+ * post: globals __fwm_getc_* globals are zeroed
+ *       __fwm_fd is an open, read-only file descriptor to FWMARKS_FILE.
+ * return: none.
+ **********************************************************************/
+
+static int __fwm_beginfwment(void)
+{
+       __fwm_getc_nread = 0;
+       __fwm_getc_offset = 0;
+       memset(__fwm_getc_buffer, '\0', __FWM_LOOKUP_MAX_LINE_LENGTH);
+
+       if (__fwm_fd > 0) {
+               endfwment();
+       }
+
+       if ((__fwm_fd = open(FWMARKS_FILE, O_RDONLY)) < 0) {
+               endfwment();
+       }
+
+       return (__fwm_fd);
+}
+
+
+/**********************************************************************
+ * setfwment
+ * Rewind the fwmarks file
+ * pre: FWMARKS_FILE is defined
+ * post: If fwmarks file is already open it is rewound to the begining
+ *       else it is opened.
+ * return: none
+ **********************************************************************/
+
+void setfwment(void)
+{
+       if (__fwm_fd > -1) {
+               if (lseek(__fwm_fd, SEEK_SET, 0) < 0) {
+                       endfwment();
+               }
+       } else {
+               __fwm_beginfwment();
+       }
+}
+
+
+/**********************************************************************
+ * endfwment
+ * Close the fwmarks file
+ * pre: none
+ * post: fwmarks file is closed if it was open.
+ * return: none
+ **********************************************************************/
+
+void endfwment(void)
+{
+       if (__fwm_fd > -1) {
+               close(__fwm_fd);
+       }
+       __fwm_fd = -1;
+}
+
+
+/**********************************************************************
+ * getfwment
+ * Read the next valid line from the fwmarks file
+ * pre: none
+ * return: hostent for line for fwmarks file if a valid line is found
+ *         NULL if the end of the file is reached without finding a new
+ *         line, or on error.
+ *
+ * Note: Values are stored in internally allocated memory which is not
+ *       persistant across calls. If you wish to keep values you should
+ *       copy them.  Don not free the memory returned by this function.
+ **********************************************************************/
+
+
+/* Mark the begining of a key */
+#define BEGIN_KEY \
+       if(!in_escape && !in_comment && !in_quote){ \
+               in_key=1; \
+               valid_key = 0; \
+       }
+
+/* Mark the end of a key. 
+ * If a valid key has been read then store it */
+#define END_KEY \
+       if(!in_escape && in_key && !in_quote){ \
+               if(in_key && token_pos){ \
+                       *(token_buffer+token_pos)='\0'; \
+                       if(__fwm_add_fwmark_str(token_buffer) < 0) { \
+                               __fwm_reset_global(); \
+                               endfwment(); \
+                               return(NULL); \
+                       } \
+                       else { \
+                               valid_key = 1; \
+                       } \
+               } \
+               token_pos=0; \
+               in_key=0; \
+       }
+
+/* Mark the begining of a value */
+#define BEGIN_VALUE \
+       if(!in_key && !in_comment && !in_quote){ \
+               in_value=1; \
+       } \
+
+/* Mark the end of a key.
+ * If a valid value and a valid key have been read then store the value */
+
+#define END_VALUE \
+       if(!in_escape && in_value && !in_quote){ \
+               if(in_value && valid_key){ \
+                       *(token_buffer+token_pos)='\0'; \
+                       if(__fwm_add_name(token_buffer) < 0 ) { \
+                               __fwm_reset_global(); \
+                               endfwment(); \
+                               return(NULL); \
+                       } \
+               } \
+               token_pos=0; \
+               in_value=0; \
+       }
+
+/* Mark the end of a comment if it is not escaped*/
+#define END_COMMENT \
+       if(!in_escape){ \
+               in_comment=0; \
+       }
+
+/* Mark the begining of a comment if it is not escaped or in a commnet */
+#define BEGIN_COMMENT \
+       if(!in_escape && !in_quote){ \
+               in_comment=1; \
+       }
+
+/* Mark the begining of an escape */
+#define BEGIN_ESCAPE \
+       in_escape=1;
+
+/* Mark the end of an escape */
+#define END_ESCAPE \
+       in_escape=0;
+
+#define __FWM_SINGLE_QUOTE 1
+#define __FWM_DOUBLE_QUOTE 2
+
+struct hostent *getfwment(void)
+{
+       ssize_t token_pos;
+       char token_buffer[__FWM_LOOKUP_MAX_LINE_LENGTH];
+       char c;
+       int max_token_pos = __FWM_LOOKUP_MAX_LINE_LENGTH - 3;
+       int i;
+
+       int in_escape = 0;
+       int in_comment = 0;
+       int skip_char = 0;
+       int in_value = 0;
+       int in_quote = 0;
+       int in_key = 0;
+       int valid_key = 0;
+
+       if (__fwm_fd < 0 && __fwm_beginfwment() < 0) {
+               return (NULL);
+       }
+
+       token_pos = 0;
+
+       __fwm_reset_global();
+
+       BEGIN_KEY;
+
+       while ((i = __fwm_getc()) != EOF) {
+               c = (char) i;
+
+               switch (c) {
+               case ' ':
+               case '\t':
+                       END_KEY;
+                       END_VALUE;
+                       if (in_escape) {
+                               BEGIN_VALUE;
+                       }
+                       END_ESCAPE;
+                       break;
+               case '\n':
+               case '\r':
+                       END_KEY;
+                       END_COMMENT;
+                       END_VALUE;
+                       if (!in_escape && !in_quote && valid_key) {
+                               if (__fwm_hostent.h_aliases == NULL) {
+                                       return (NULL);
+                               }
+                               return (&__fwm_hostent);
+                       }
+                       BEGIN_KEY;
+                       END_ESCAPE;
+                       break;
+               case '\\':
+                       if (in_escape || in_quote) {
+                               END_ESCAPE;
+                       } else {
+                               BEGIN_ESCAPE;
+                       }
+                       BEGIN_VALUE;
+                       break;
+               case '#':
+                       BEGIN_COMMENT;
+                       END_KEY;
+                       END_VALUE;
+                       BEGIN_VALUE;
+                       END_ESCAPE;
+                       break;
+               case '"':
+                       BEGIN_VALUE;
+                       if (!in_escape && !in_comment
+                           && !(in_quote & __FWM_SINGLE_QUOTE)) {
+                               if (in_quote & __FWM_DOUBLE_QUOTE) {
+                                       in_quote ^=
+                                           in_quote & __FWM_DOUBLE_QUOTE;
+                               } else {
+                                       in_quote |= __FWM_DOUBLE_QUOTE;
+                               }
+                               skip_char = 1;
+                       }
+                       END_ESCAPE;
+                       break;
+               case '\'':
+                       BEGIN_VALUE;
+                       if (!in_escape && !in_comment) {
+                               if (in_quote & __FWM_SINGLE_QUOTE) {
+                                       in_quote ^= __FWM_SINGLE_QUOTE;
+                               } else {
+                                       in_quote |= __FWM_SINGLE_QUOTE;
+                               }
+                               skip_char = 1;
+                       }
+                       END_ESCAPE;
+                       break;
+               default:
+                       BEGIN_VALUE;
+                       END_ESCAPE;
+                       break;
+               }
+
+               if (in_key | in_value &&
+                   c != '\n' &&
+                   c != '\r' &&
+                   !in_escape && !skip_char
+                   && token_pos < max_token_pos) {
+                       *(token_buffer + token_pos) = c;
+                       token_pos++;
+               }
+               skip_char = 0;
+
+       }
+
+       __fwm_reset_global();
+       return (NULL);
+}
+
+
+/********************************************************************** 
+ * getfwmbyfwm
+ * Find a the names for a firewall mark
+ * pre: fwm: firewall mark to find names of
+ * post: fwmarks file is serached for the first line corresponding
+ *       to fwm
+ * return: hostent connresponding to fwm as per the fwmarks file.
+ *
+ * Note: Values are stored in internally allocated memory which is not
+ *       persistant across calls. If you wish to keep values you should
+ *       copy them.  Don not free the memory returned by this function.
+ **********************************************************************/
+
+struct hostent *getfwmbyfwm(fwm_t fwm)
+{
+       struct hostent *hostent;
+       char **elem;
+       char buf[11];
+
+       setfwment();
+       while ((hostent = getfwment()) != NULL) {
+               for (elem = hostent->h_addr_list; *elem != NULL; elem++) {
+                       if (ntohl((fwm_t) * elem) == fwm) {
+                               endfwment();
+                               return (hostent);
+                       }
+               }
+       }
+
+       endfwment();
+
+       /* No entry was found in the fwmarks file, so return
+        * the fwm as its own name */
+       if (snprintf(buf, 11, "%u", fwm) < 0) {
+               endfwment();
+               return (NULL);
+       }
+       buf[10] = '\0';
+       return(__fwm_simple_hostent(fwm, buf));
+}
+
+
+/********************************************************************** 
+ * getfwmbyname
+ * Find a the fwmark and aliases for a name
+ * pre: fwm: name to find the firewall mark and aliases of
+ * post: If name is the ASCII representation of a fwm
+ *           A hostent with that as the name and fwm is returned.
+ *       Else 
+ *          fwmarks file is serached for the first line corresponding
+ *          to name
+ * return: hostent connresponding to name as per the fwmarks file.
+ *
+ * Note: Values are stored in internally allocated memory which is not
+ *       persistant across calls. If you wish to keep values you should
+ *       copy them.  Don not free the memory returned by this function.
+ **********************************************************************/
+
+struct hostent *getfwmbyname(const char *name)
+{
+       struct hostent *hostent;
+       char **elem;
+       fwm_t fwm;
+
+       /* If name is just the ASCII representation of an fwm then
+        * use that as the fwm */
+       if(__fwm_str_to_fwm(name, &fwm) > -1) {
+               return(__fwm_simple_hostent(fwm, name));
+       }
+
+       setfwment();
+       while ((hostent = getfwment()) != NULL) {
+               for (elem = hostent->h_aliases; *elem != NULL; elem++) {
+                       if (strcmp(name, *elem) == 0) {
+                               endfwment();
+                               fflush(NULL);
+                               return (hostent);
+                       }
+               }
+       }
+
+       endfwment();
+       return (NULL);
+
+}
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/fwmark_lookup.h ipvs-0.9.5.new/ipvs/ipvsadm/fwmark_lookup.h
--- ipvs-0.9.5/ipvs/ipvsadm/fwmark_lookup.h     Thu Jan  1 12:00:00 1970
+++ ipvs-0.9.5.new/ipvs/ipvsadm/fwmark_lookup.h Mon Oct 29 10:50:23 2001
@@ -0,0 +1,144 @@
+/*
+ *      fwmark_lookup.h
+ *
+ *      Look up firwall marks in /etc/fwmarks. This is intended to be
+ *      analogous to /etc/hosts
+ *
+ *      Authors: Horms <horms@vergenet.net>
+ *
+ *      Version: $Id: fwmark_lookup.h,v Exp $
+ *
+ *      Copyright (c) 2001 Horms
+ *      All rights reserved.
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2 of the License, or
+ *      (at your option) any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *     Changes:
+ */
+
+/*
+ * /etc/fwmarks
+ *
+ * The format for this file is
+ *
+ * fwnark name ...
+ *
+ * Blank lines are ignored, as is anything including and after a # (hash)
+ * on a line. If a \ precedes a new line then the lines will be
+ * concatenated.  if a \ precedes any other character, including a #
+ * (hash) it will be treated as a literal. Anything inside single quotes
+ * (') will be treated as a litreal. Anything other than a (') inside
+ * double quotes (") will be treated as a litreal. Whitespace in names must
+ * be escaped or quoted.
+ *
+ * e.g.
+ *
+ * 1 jimmy james
+ * 2 john
+ *
+ * This associates the names jimmy ad james with fwmark 1
+ * and the name john with fwmark 2
+ *
+ */
+
+#ifndef _FWMARK_LOOKUP_H
+#define _FWMARK_LOOKUP_H
+
+
+/* This should be set by a configure check */
+typedef unsigned int uint32;
+
+typedef uint32 fwm_t;
+
+#ifndef FWMARKS_FILE
+#define FWMARKS_FILE "/etc/fwmarks"
+#endif
+
+
+/**********************************************************************
+ * setfwment
+ * Rewind the fwmarks file
+ * pre: FWMARKS_FILE is defined
+ * post: If fwmarks file is already open it is rewound to the begining
+ *       else it is opened.
+ * return: none
+ **********************************************************************/
+
+void setfwment(void);
+
+
+/**********************************************************************
+ * endfwment
+ * Close the fwmarks file
+ * pre: none
+ * post: fwmarks file is closed if it was open.
+ * return: none
+ **********************************************************************/
+
+void endfwment(void);
+
+
+/**********************************************************************
+ * getfwment
+ * Read the next valid line from the fwmarks file
+ * pre: none
+ * return: hostent for line for fwmarks file if a valid line is found
+ *         NULL if the end of the file is reached without finding a new
+ *         line, or on error.
+ *
+ * Note: Values are stored in internally allocated memory which is not
+ *       persistant across calls. If you wish to keep values you should
+ *       copy them.  Don not free the memory returned by this function.
+ **********************************************************************/
+
+
+struct hostent *getfwment(void);
+
+
+/********************************************************************** 
+ * getfwmbyfwm
+ * Find a the names for a firewall mark
+ * pre: fwm: firewall mark to find names of
+ * post: fwmarks file is serached for the first line corresponding
+ *       to fwm
+ * return: hostent connresponding to fwm as per the fwmarks file.
+ *
+ * Note: Values are stored in internally allocated memory which is not
+ *       persistant across calls. If you wish to keep values you should
+ *       copy them.  Don not free the memory returned by this function.
+ **********************************************************************/
+
+struct hostent *getfwmbyfwm(fwm_t fwm);
+
+
+/********************************************************************** 
+ * getfwmbyname
+ * Find a the fwmark and aliases for a name
+ * pre: fwm: name to find the firewall mark and aliases of
+ * post: If name is the ASCII representation of a fwm
+ *           A hostent with that as the name and fwm is returned.
+ *       Else 
+ *          fwmarks file is serached for the first line corresponding
+ *          to name
+ * return: hostent connresponding to name as per the fwmarks file.
+ *
+ * Note: Values are stored in internally allocated memory which is not
+ *       persistant across calls. If you wish to keep values you should
+ *       copy them.  Don not free the memory returned by this function.
+ **********************************************************************/
+
+struct hostent *getfwmbyname(const char *name);
+
+#endif /* _FWMARK_LOOKUP_H */
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/header ipvs-0.9.5.new/ipvs/ipvsadm/header
--- ipvs-0.9.5/ipvs/ipvsadm/header      Thu Jan  1 12:00:00 1970
+++ ipvs-0.9.5.new/ipvs/ipvsadm/header  Mon Oct 29 10:42:16 2001
@@ -0,0 +1,30 @@
+/*
+ *      dynamic_array.c -  Code to convert a stream input into a
+ *                         that can be parsed as argc and argv.
+ *
+ *      Author: Horms <horms@vergenet.net>
+ *
+ *      Version: $Id: config_stream.c,v Exp $
+ *
+ *      Copyright (c) 2001 Horms
+ *      All rights reserved.
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2 of the License, or
+ *      (at your option) any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *     Changes:
+ *      Horms         :   scanf Glibc under Red Hat 7 does not appear
+ *                        to return EOF when input ends. Fall through
+ *                        code has been added to handle this case correctly
+ */
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/ipvsadm.8 ipvs-0.9.5.new/ipvs/ipvsadm/ipvsadm.8
--- ipvs-0.9.5/ipvs/ipvsadm/ipvsadm.8   Wed Sep 19 03:42:54 2001
+++ ipvs-0.9.5.new/ipvs/ipvsadm/ipvsadm.8       Mon Oct 29 11:05:22 2001
@@ -4,16 +4,18 @@
 .\"    $Id: ipvsadm.8,v 1.8 2001/09/18 15:42:54 wensong Exp $
 .\"
 .\"     Authors: Mike Wangsmo <wanger@redhat.com>
-.\"              Wensong Zhang <wensong@linux.com>
+.\"              Wensong Zhang <wensong@linuxvirtualserver.org>
+.\"              Horms <horms@vergenet.net>
 .\"
 .\"     Changes:
 .\"       Horms            :  Updated to reflect recent change of ipvsadm
 .\"                        :  Style guidance taken from ipchains(8)
 .\"                           where appropriate.
-.\"       Wensong Zhang    :  Added a short note about the defense strategies
+.\"       Wensong Zhang    :  Added a short note about the defence strategies
 .\"       Horms            :  Tidy up some of the description and the
 .\"                           grammar in the -f and sysctl sections
 .\"       Wensong Zhang    :  -s option description taken from ipchains(8)
+.\"       Horms            :  documented /etc/fwmarks support
 .\"
 .\"     This program is free software; you can redistribute it and/or modify
 .\"     it under the terms of the GNU General Public License as published by
@@ -30,7 +32,7 @@
 .\"     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 .\"
 .\"
-.TH IPVSADM 8 "18th September 2001" "LVS Administration" "Linux Administrator's Guide"
+.TH IPVSADM 8 "29th October 2001" "LVS Administration" "Linux Administrator's Guide"
 .UC 4
 .SH NAME
 ipvsadm \- Linux Virtual Server administration
@@ -69,7 +71,7 @@
 nodes. The active node of the cluster redirects service requests to a
 collection of server hosts that will actually perform the
 services. Supported features include two protocols (TCP and UDP),
-three packet-forwarding methods (NAT, tunneling, and direct routing),
+three packet-forwarding methods (NAT, tunnelling, and direct routing),
 and six load balancing algorithms (round robin, weighted round robin,
 least-connection, weighted least-connection, locality-based
 least-connection, and locality-based least-connection with
@@ -149,9 +151,9 @@
 the current timeout value of the  corresponding  entry  is preserved.
 .TP
 .B --start-daemon \fIstate\fP
-Start the connection synchronization daemon. The \fIstate\fP is to
+Start the connection synchronisation daemon. The \fIstate\fP is to
 indicate that the daemon is started as master or backup. The
-connection synchronization daemon is implemented inside the Linux
+connection synchronisation daemon is implemented inside the Linux
 kernel. The master daemon running on the primary load balancer
 multicasts changes of connections periodically, and the backup daemon
 running the backup load balancers receives multicast message and
@@ -161,7 +163,7 @@
 can continue to access the service.
 .TP
 .B --stop-daemon
-Stop the connection synchronization daemon.
+Stop the connection synchronisation daemon.
 .TP
 \fB-h, --help\fR
 Display a description of the command syntax.
@@ -187,8 +189,8 @@
 virtual service instead of an address, port and protocol (UDP or
 TCP). The marking of packets with a firewall-mark is configured using
 the -m|--mark option to \fBiptables\fR(8). It can be used to build a
-virtual service assoicated with the same real servers, covering
-multiple IP addresss, port and protocol tripplets.
+virtual service associated with the same real servers, covering
+multiple IP addresses, port and protocol triplets.
 .sp
 Using firewall-mark virtual services provides a convenient method of
 grouping together different IP addresses, ports and protocols into a
@@ -270,10 +272,10 @@
 service name of port.  In the case of the masquerading method, the
 host address is usually an RFC 1918 private IP address, and the port
 can be different from that of the associated service. With the
-tunneling and direct routing methods, \fIport\fP must be equal to that
+tunnelling and direct routing methods, \fIport\fP must be equal to that
 of the service address. For normal services, the port specified  in
 the service address will be used if \fIport\fP is not specified. For
-fwmark services, \fIport\fP may be ommitted, in which case  the
+fwmark services, \fIport\fP may be omitted, in which case  the
 destination port on the real server will be the destination port of
 the request sent to the virtual service.
 .TP
@@ -281,7 +283,7 @@
 .sp
 \fB-g, --gatewaying\fR  Use gatewaying (direct routing). This is the default.
 .sp
-\fB-i, --ipip\fR  Use ipip encapsulation (tunneling).
+\fB-i, --ipip\fR  Use ipip encapsulation (tunnelling).
 .sp
 \fB-m, --masquerading\fR  Use masquerading (network access translation, or NAT).
 .sp
@@ -405,13 +407,14 @@
 modprobe ip_vs_ftp
 .fi
 .SH NOTES
-The Linux Virtual Server implements three defense strategies against
+.SS DENIAL OF SERVICE DEFENCE STRATEGIES
+The Linux Virtual Server implements three defence strategies against
 some types of denial of service (DoS) attacks. The Linux Director
 creates an entry for each connection in order to keep its state, and
 each entry occupies 128 bytes effective memory. LVS's vulnerability to
 a DoS attack lies in the potential to increase the number entries as
 much as possible until the linux director runs out of memory. The
-three defense strategies against the attack are: Randomly drop some
+three defence strategies against the attack are: Randomly drop some
 entries in the table. Drop 1/rate packets before forwarding them. And
 use secure tcp state transition table and short timeouts. The
 strategies are controlled by sysctl variables and corresponding
@@ -422,7 +425,7 @@
 /proc/sys/net/ipv4/vs/secure_tcp
 .PP
 Valid values for each variable are 0 through to 3. The default value
-is 0, which disables the respective defense strategy. 1 and 2 are
+is 0, which disables the respective defence strategy. 1 and 2 are
 automatic modes - when there is no enough available memory, the
 respective strategy will be enabled and the variable is automatically
 set to 2, otherwise the strategy is disabled and the variable is set
@@ -433,6 +436,36 @@
 .sp
 /proc/sys/net/ipv4/vs/amemthresh
 /proc/sys/net/ipv4/vs/timeout_*
+.SS NAMING FIREWALL MARKS
+ipvsadm understands names for firwall marks. These are assigned
+by adding entries to /etc/fwmarks which is intended to
+be analogous to /etc/hosts.
+.PP
+The format of the file is "fwmark name ...".
+Blank lines are ignored, as is anything including and after a # (hash) on a
+line. If a \\ precedes a new line then the lines will be concatenated.  if a
+\\ precedes any other character, including a # (hash) it will be treated as
+a literal. Anything inside single quotes (') will be treated as a literal.
+Anything other than a (') inside double quotes (") will be treated as a
+literal. Whitespace in names must be escaped or quoted.
+.sp
+.nf
+e.g.
+# /etc/fwmarks
+1 a-name
+2 another-name yet-another-name
+.fi
+.PP
+This associates a-name with fwmark 1 and associates another-name
+and yet-another-name with fwmark 2.
+In this way names may be used instead of numeric values when
+defining fwmark virtual services
+.sp
+.nf
+e.g.
+ipvsadm -A -f a-name
+ipvsadm -a -f a-name -r 127.0.0.1
+.fi
 .SH FILES
 .I /proc/net/ip_masq/vs
 .br
@@ -469,6 +502,8 @@
 .I /proc/sys/net/ipv4/vs/timeout_timewait
 .br
 .I /proc/sys/net/ipv4/vs/timeout_udp
+.br
+.I /etc/fwmarks
 .SH SEE ALSO
 \fBiptables\fP(8), \fBinsmod\fP(8), \fBmodprobe\fP(8)
 .SH AUTHORS
@@ -477,5 +512,5 @@
          Peter Kese <peter.kese@ijs.si>
 man page - Mike Wangsmo <wanger@redhat.com>
           Wensong Zhang <wensong@linuxvirtualserver.org>
-          Horms <horms@valinux.com>
+          Horms <horms@vergenet.net>
 .fi
diff -ruN ipvs-0.9.5/ipvs/ipvsadm/ipvsadm.c ipvs-0.9.5.new/ipvs/ipvsadm/ipvsadm.c
--- ipvs-0.9.5/ipvs/ipvsadm/ipvsadm.c   Sat Oct 20 04:05:17 2001
+++ ipvs-0.9.5.new/ipvs/ipvsadm/ipvsadm.c       Mon Oct 29 10:49:39 2001
@@ -52,6 +52,7 @@
  *        Horms               :   added -v option
  *        Wensong Zhang       :   rewrite most code of parsing options and
  *                                processing options.
+ *        Horms               :   added /etc/fwmarks support
  *
  *
  *      ippfvsadm - Port Fowarding & Virtual Server ADMinistration program
@@ -122,6 +123,7 @@
 #endif
 
 #include "config_stream.h"
+#include "fwmark_lookup.h"
 #include "libipvs/libipvs.h"
 
 #define IPVSADM_VERSION_NO              "v" VERSION
@@ -241,13 +243,15 @@
 int str_is_digit(const char *str);
 int string_to_number(const char *s, int min, int max);
 int host_to_addr(const char *name, struct in_addr *addr);
+int name_to_fwm(const char *name, fwm_t *fwm);
 char * addr_to_host(struct in_addr *addr);
 char * addr_to_anyname(struct in_addr *addr);
 int service_to_port(const char *name, unsigned short proto);
-char * port_to_service(int port, unsigned short proto);
-char * port_to_anyname(int port, unsigned short proto);
-char * addrport_to_anyname(struct in_addr *addr, int port,
+char * port_to_service(unsigned int port, unsigned short proto);
+char * port_to_anyname(unsigned int port, unsigned short proto);
+char * addrport_to_anyname(struct in_addr *addr, unsigned int port,
                           unsigned short proto, unsigned int format);
+char * fwmark_to_anyname(fwm_t fwm, unsigned int format);
 int parse_service(char *buf, u_int16_t proto,
                  u_int32_t *addr, u_int16_t *port);
 int parse_netmask(char *buf, u_int32_t *addr);
@@ -932,20 +936,26 @@
 
 
 /*
- * Parse IP fwmark from the argument.
+ * Parse fwmark from the argument.
  */
 unsigned int parse_fwmark(char *buf)
 {
        unsigned long l;
+       unsigned int i;
        char *end;
 
        errno = 0;
        l = strtol(buf, &end, 10);
-       if (*end != '\0' || end == buf ||
-           errno == ERANGE || l <= 0 || l > UINT_MAX)
-               fail(2, "invalid fwmark value `%s' specified", buf);
+       if (*end == '\0' && end != buf &&
+                       errno != ERANGE && l > 0 && l < UINT_MAX) {
+               return(l);
+       }
+       else if(name_to_fwm(buf, &i) > -1) {
+               return(ntohl(i));
+       }
 
-       return l;
+       fail(2, "invalid fwmark value `%s' specified", buf);
+       return 0;
 }
 
 
@@ -1401,19 +1411,22 @@
 {
        struct ip_vs_get_dests *d;
        char svc_name[64];
+       char *vname;
        int i;
 
        if (!(d = ipvs_get_dests(svc)))
                exit(1);
 
        if (svc->fwmark) {
+               if(!(vname = fwmark_to_anyname(svc->fwmark, format)))
+                       fail(2, "fwmark_to_anyname");
                if (format & FMT_RULE)
-                       sprintf(svc_name, "-f %d", svc->fwmark);
+                       sprintf(svc_name, "-f %s", vname);
                else
-                       sprintf(svc_name, "FWM  %d", svc->fwmark);
+                       sprintf(svc_name, "FWM  %s", vname);
+               free(vname);
        } else {
                struct in_addr vaddr;
-               char *vname;
                vaddr.s_addr = svc->addr;
 
                if (!(vname = addrport_to_anyname(&vaddr, ntohs(svc->port),
@@ -1597,6 +1610,20 @@
 }
 
 
+int name_to_fwm(const char *name, fwm_t *fwm)
+{
+       struct hostent *hostent;
+
+       if ((hostent = getfwmbyname(name)) == NULL) {
+               return -1;
+       }
+
+       /* warning: we just handle h_addr_list[0] here */
+       *fwm = (fwm_t) hostent->h_addr_list[0];
+       return 0;
+}
+
+
 char * addr_to_host(struct in_addr *addr)
 {
        struct hostent *host;
@@ -1635,7 +1662,7 @@
 }
 
 
-char * port_to_service(int port, unsigned short proto)
+char * port_to_service(unsigned int port, unsigned short proto)
 {
        struct servent *service;
 
@@ -1650,7 +1677,7 @@
 }
 
 
-char * port_to_anyname(int port, unsigned short proto)
+char * port_to_anyname(unsigned int port, unsigned short proto)
 {
        char *name;
        static char buf[10];
@@ -1658,13 +1685,13 @@
        if ((name = port_to_service(port, proto)) != NULL)
                return name;
        else {
-               sprintf(buf, "%d", port);
+               sprintf(buf, "%u", port);
                return buf;
        }
 }
 
 
-char * addrport_to_anyname(struct in_addr *addr, int port,
+char * addrport_to_anyname(struct in_addr *addr, unsigned int port,
                           unsigned short proto, unsigned int format)
 {
        char *buf;
@@ -1678,6 +1705,28 @@
        } else {
                snprintf(buf, 60, "%s:%s", addr_to_anyname(addr),
                         port_to_anyname(port, proto));
+       }
+
+       return buf;
+}
+
+
+char * fwmark_to_anyname(fwm_t fwm, unsigned int format)
+{
+       char *buf;
+       struct hostent *h;
+
+       if (!(buf=malloc(60)))
+               return NULL;
+
+       if (format & FMT_NUMERIC) {
+               snprintf(buf, 60, "%u", fwm);
+       } else {
+               if((h = getfwmbyfwm(fwm)) == NULL) {
+                       free(buf);
+                       return NULL;
+               }
+               snprintf(buf, 60, "%s", h->h_name);
        }
 
        return buf;

--Boundary_(ID_7gtcNW35aSCKH29xS00dLw)--


Next Previous Contents