diff --git a/Gopkg.lock b/Gopkg.lock index bb895973e0..0252481b14 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -2687,6 +2687,7 @@ "github.com/terraform-providers/terraform-provider-template/template", "github.com/terraform-providers/terraform-provider-tls/tls", "golang.org/x/crypto/ssh", + "golang.org/x/crypto/ssh/terminal", "golang.org/x/net/context", "gopkg.in/src-d/go-git.v4", "gopkg.in/src-d/go-git.v4/config", diff --git a/cmd/tarmak/cmd/cluster.go b/cmd/tarmak/cmd/cluster.go index e237b3b788..efc4e2f55a 100644 --- a/cmd/tarmak/cmd/cluster.go +++ b/cmd/tarmak/cmd/cluster.go @@ -115,6 +115,31 @@ func clusterKubeconfigFlags(fs *flag.FlagSet) { ) } +func clusterSnapshotEtcdRestoreFlags(fs *flag.FlagSet) { + store := &globalFlags.Cluster.Snapshot.Etcd.Restore + + fs.StringVar( + &store.K8sMain, + consts.RestoreK8sMainFlagName, + "", + "location of k8s-main snapshot backup", + ) + + fs.StringVar( + &store.K8sEvents, + consts.RestoreK8sEventsFlagName, + "", + "location of k8s-events snapshot backup", + ) + + fs.StringVar( + &store.Overlay, + consts.RestoreOverlayFlagName, + "", + "location of overlay snapshot backup", + ) +} + func init() { RootCmd.AddCommand(clusterCmd) } diff --git a/cmd/tarmak/cmd/cluster_apply.go b/cmd/tarmak/cmd/cluster_apply.go index 955b499532..8a68a7d92d 100644 --- a/cmd/tarmak/cmd/cluster_apply.go +++ b/cmd/tarmak/cmd/cluster_apply.go @@ -27,9 +27,7 @@ var clusterApplyCmd = &cobra.Command{ }, Run: func(cmd *cobra.Command, args []string) { t := tarmak.New(globalFlags) - applyCmd := t.NewCmdTarmak(cmd.Flags(), args) - t.CancellationContext().WaitOrCancel(applyCmd.Apply) }, } diff --git a/cmd/tarmak/cmd/cluster_instances_ssh.go b/cmd/tarmak/cmd/cluster_instances_ssh.go index 77597bebd8..030a7c8536 100644 --- a/cmd/tarmak/cmd/cluster_instances_ssh.go +++ b/cmd/tarmak/cmd/cluster_instances_ssh.go @@ -1,22 +1,6 @@ // Copyright Jetstack Ltd. See LICENSE for details. package cmd -import ( - "github.com/spf13/cobra" - - "github.com/jetstack/tarmak/pkg/tarmak" -) - -var clusterInstancesSshCmd = &cobra.Command{ - Use: "ssh [instance alias]", - Short: "Log into an instance with SSH", - Run: func(cmd *cobra.Command, args []string) { - t := tarmak.New(globalFlags) - defer t.Cleanup() - t.SSHPassThrough(args) - }, -} - func init() { - clusterInstancesCmd.AddCommand(clusterInstancesSshCmd) + clusterInstancesCmd.AddCommand(clusterSshCmd) } diff --git a/cmd/tarmak/cmd/cluster_snapshot.go b/cmd/tarmak/cmd/cluster_snapshot.go new file mode 100644 index 0000000000..d1faa1aefe --- /dev/null +++ b/cmd/tarmak/cmd/cluster_snapshot.go @@ -0,0 +1,15 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package cmd + +import ( + "github.com/spf13/cobra" +) + +var clusterSnapshotCmd = &cobra.Command{ + Use: "snapshot", + Short: "Manage snapshots of remote consul and etcd clusters", +} + +func init() { + clusterCmd.AddCommand(clusterSnapshotCmd) +} diff --git a/cmd/tarmak/cmd/cluster_snapshot_consul.go b/cmd/tarmak/cmd/cluster_snapshot_consul.go new file mode 100644 index 0000000000..13b067c926 --- /dev/null +++ b/cmd/tarmak/cmd/cluster_snapshot_consul.go @@ -0,0 +1,15 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package cmd + +import ( + "github.com/spf13/cobra" +) + +var clusterSnapshotConsulCmd = &cobra.Command{ + Use: "consul", + Short: "Manage snapshots on remote consul clusters", +} + +func init() { + clusterSnapshotCmd.AddCommand(clusterSnapshotConsulCmd) +} diff --git a/cmd/tarmak/cmd/cluster_snapshot_consul_restore.go b/cmd/tarmak/cmd/cluster_snapshot_consul_restore.go new file mode 100644 index 0000000000..756d73c951 --- /dev/null +++ b/cmd/tarmak/cmd/cluster_snapshot_consul_restore.go @@ -0,0 +1,32 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package cmd + +import ( + "fmt" + + "github.com/spf13/cobra" + + "github.com/jetstack/tarmak/pkg/tarmak" + "github.com/jetstack/tarmak/pkg/tarmak/snapshot/consul" +) + +var clusterSnapshotConsulRestoreCmd = &cobra.Command{ + Use: "restore [source path]", + Short: "restore consul cluster with source snapshot", + PreRunE: func(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return fmt.Errorf("expecting single source path, got=%d", len(args)) + } + + return nil + }, + Run: func(cmd *cobra.Command, args []string) { + t := tarmak.New(globalFlags) + s := consul.New(t, args[0]) + t.CancellationContext().WaitOrCancel(t.NewCmdSnapshot(cmd.Flags(), args, s).Restore) + }, +} + +func init() { + clusterSnapshotConsulCmd.AddCommand(clusterSnapshotConsulRestoreCmd) +} diff --git a/cmd/tarmak/cmd/cluster_snapshot_consul_save.go b/cmd/tarmak/cmd/cluster_snapshot_consul_save.go new file mode 100644 index 0000000000..84c99aea75 --- /dev/null +++ b/cmd/tarmak/cmd/cluster_snapshot_consul_save.go @@ -0,0 +1,32 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package cmd + +import ( + "fmt" + + "github.com/spf13/cobra" + + "github.com/jetstack/tarmak/pkg/tarmak" + "github.com/jetstack/tarmak/pkg/tarmak/snapshot/consul" +) + +var clusterSnapshotConsulSaveCmd = &cobra.Command{ + Use: "save [target path]", + Short: "save consul cluster snapshot to target path", + PreRunE: func(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return fmt.Errorf("expecting single target path, got=%d", len(args)) + } + + return nil + }, + Run: func(cmd *cobra.Command, args []string) { + t := tarmak.New(globalFlags) + s := consul.New(t, args[0]) + t.CancellationContext().WaitOrCancel(t.NewCmdSnapshot(cmd.Flags(), args, s).Save) + }, +} + +func init() { + clusterSnapshotConsulCmd.AddCommand(clusterSnapshotConsulSaveCmd) +} diff --git a/cmd/tarmak/cmd/cluster_snapshot_etcd.go b/cmd/tarmak/cmd/cluster_snapshot_etcd.go new file mode 100644 index 0000000000..f14853cb62 --- /dev/null +++ b/cmd/tarmak/cmd/cluster_snapshot_etcd.go @@ -0,0 +1,15 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package cmd + +import ( + "github.com/spf13/cobra" +) + +var clusterSnapshotEtcdCmd = &cobra.Command{ + Use: "etcd", + Short: "Manage snapshots on remote etcd clusters", +} + +func init() { + clusterSnapshotCmd.AddCommand(clusterSnapshotEtcdCmd) +} diff --git a/cmd/tarmak/cmd/cluster_snapshot_etcd_restore.go b/cmd/tarmak/cmd/cluster_snapshot_etcd_restore.go new file mode 100644 index 0000000000..6454fca411 --- /dev/null +++ b/cmd/tarmak/cmd/cluster_snapshot_etcd_restore.go @@ -0,0 +1,42 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package cmd + +import ( + "fmt" + + "github.com/spf13/cobra" + + "github.com/jetstack/tarmak/pkg/tarmak" + "github.com/jetstack/tarmak/pkg/tarmak/snapshot/etcd" + "github.com/jetstack/tarmak/pkg/tarmak/utils/consts" +) + +var clusterSnapshotEtcdRestoreCmd = &cobra.Command{ + Use: "restore", + Short: "restore etcd cluster with source snapshots", + PreRunE: func(cmd *cobra.Command, args []string) error { + if !cmd.Flags().Changed(consts.RestoreK8sMainFlagName) && + !cmd.Flags().Changed(consts.RestoreK8sEventsFlagName) && + !cmd.Flags().Changed(consts.RestoreOverlayFlagName) { + + return fmt.Errorf("expecting at least one set flag of [%s %s %s]", + consts.RestoreK8sMainFlagName, + consts.RestoreK8sEventsFlagName, + consts.RestoreOverlayFlagName, + ) + } + return nil + }, + Run: func(cmd *cobra.Command, args []string) { + t := tarmak.New(globalFlags) + s := etcd.New(t, "") + t.CancellationContext().WaitOrCancel(t.NewCmdSnapshot(cmd.Flags(), args, s).Restore) + }, +} + +func init() { + clusterSnapshotEtcdRestoreFlags( + clusterSnapshotEtcdRestoreCmd.PersistentFlags(), + ) + clusterSnapshotEtcdCmd.AddCommand(clusterSnapshotEtcdRestoreCmd) +} diff --git a/cmd/tarmak/cmd/cluster_snapshot_etcd_save.go b/cmd/tarmak/cmd/cluster_snapshot_etcd_save.go new file mode 100644 index 0000000000..62f1ffdc79 --- /dev/null +++ b/cmd/tarmak/cmd/cluster_snapshot_etcd_save.go @@ -0,0 +1,26 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package cmd + +import ( + "github.com/spf13/cobra" + + "github.com/jetstack/tarmak/pkg/tarmak" + "github.com/jetstack/tarmak/pkg/tarmak/snapshot/etcd" +) + +var clusterSnapshotEtcdSaveCmd = &cobra.Command{ + Use: "save [target path prefix]", + Short: "save etcd snapshot to target path prefix, i.e 'backup-'", + Run: func(cmd *cobra.Command, args []string) { + if len(args) == 0 { + args = []string{""} + } + t := tarmak.New(globalFlags) + s := etcd.New(t, args[0]) + t.CancellationContext().WaitOrCancel(t.NewCmdSnapshot(cmd.Flags(), args, s).Save) + }, +} + +func init() { + clusterSnapshotEtcdCmd.AddCommand(clusterSnapshotEtcdSaveCmd) +} diff --git a/cmd/tarmak/cmd/cluster_ssh.go b/cmd/tarmak/cmd/cluster_ssh.go index debfc5efea..a2a7087389 100644 --- a/cmd/tarmak/cmd/cluster_ssh.go +++ b/cmd/tarmak/cmd/cluster_ssh.go @@ -2,18 +2,26 @@ package cmd import ( + "errors" + "strings" + "github.com/spf13/cobra" "github.com/jetstack/tarmak/pkg/tarmak" ) var clusterSshCmd = &cobra.Command{ - Use: "ssh [instance alias]", + Use: "ssh [instance alias] [optional ssh arguments]", Short: "Log into an instance with SSH", + PreRunE: func(cmd *cobra.Command, args []string) error { + if len(args) < 1 { + return errors.New("expecting an instance aliases argument") + } + return nil + }, Run: func(cmd *cobra.Command, args []string) { t := tarmak.New(globalFlags) - defer t.Cleanup() - t.SSHPassThrough(args) + t.Perform(t.SSHPassThrough(args[0], strings.Join(args[1:], " "))) }, } diff --git a/cmd/tarmak/cmd/tunnel.go b/cmd/tarmak/cmd/tunnel.go new file mode 100644 index 0000000000..d0e8d5eafc --- /dev/null +++ b/cmd/tarmak/cmd/tunnel.go @@ -0,0 +1,56 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package cmd + +import ( + "fmt" + "os" + "time" + + "github.com/spf13/cobra" + + "github.com/jetstack/tarmak/pkg/tarmak" +) + +var tunnelCmd = &cobra.Command{ + Use: "tunnel [destination] [destination port] [local port]", + PreRunE: func(cmd *cobra.Command, args []string) error { + if len(args) != 3 { + return fmt.Errorf( + "expecting only a destination, destination and local port argument, got=%s", args) + } + return nil + }, + Run: func(cmd *cobra.Command, args []string) { + t := tarmak.New(globalFlags) + tunnel := t.SSH().Tunnel(args[0], args[1], args[2], false) + + retries := 5 + for { + err := tunnel.Start() + if err == nil { + t.Log().Infof("tunnel started: %s", args) + break + } + + t.Log().Errorf("failed to start tunnel: %s", err) + retries-- + if retries == 0 { + t.Log().Error("failed to start tunnel after 5 attempts") + t.Cleanup() + os.Exit(1) + } + + time.Sleep(time.Second * 2) + } + + time.Sleep(time.Minute * 10) + t.Cleanup() + os.Exit(0) + }, + Hidden: true, + DisableFlagParsing: true, +} + +func init() { + RootCmd.AddCommand(tunnelCmd) +} diff --git a/docs/cmd-docs.rst b/docs/cmd-docs.rst index bd1d9ef050..0a8e272ac9 100644 --- a/docs/cmd-docs.rst +++ b/docs/cmd-docs.rst @@ -82,22 +82,52 @@ Command line documentation for both tarmak and wing commands .. toctree:: :maxdepth: 1 - generated/cmd/tarmak/tarmak_clusters_instances_ssh + generated/cmd/tarmak/tarmak_clusters_list .. toctree:: :maxdepth: 1 - generated/cmd/tarmak/tarmak_clusters_list + generated/cmd/tarmak/tarmak_clusters_plan .. toctree:: :maxdepth: 1 - generated/cmd/tarmak/tarmak_clusters_plan + generated/cmd/tarmak/tarmak_clusters_set-current .. toctree:: :maxdepth: 1 - generated/cmd/tarmak/tarmak_clusters_set-current + generated/cmd/tarmak/tarmak_clusters_snapshot + +.. toctree:: + :maxdepth: 1 + + generated/cmd/tarmak/tarmak_clusters_snapshot_consul + +.. toctree:: + :maxdepth: 1 + + generated/cmd/tarmak/tarmak_clusters_snapshot_consul_restore + +.. toctree:: + :maxdepth: 1 + + generated/cmd/tarmak/tarmak_clusters_snapshot_consul_save + +.. toctree:: + :maxdepth: 1 + + generated/cmd/tarmak/tarmak_clusters_snapshot_etcd + +.. toctree:: + :maxdepth: 1 + + generated/cmd/tarmak/tarmak_clusters_snapshot_etcd_restore + +.. toctree:: + :maxdepth: 1 + + generated/cmd/tarmak/tarmak_clusters_snapshot_etcd_save .. toctree:: :maxdepth: 1 diff --git a/docs/generated/cmd/tarmak/tarmak_clusters.rst b/docs/generated/cmd/tarmak/tarmak_clusters.rst index 3c392b100f..ba9f8e6954 100644 --- a/docs/generated/cmd/tarmak/tarmak_clusters.rst +++ b/docs/generated/cmd/tarmak/tarmak_clusters.rst @@ -47,5 +47,6 @@ SEE ALSO * `tarmak clusters list `_ - Print a list of clusters * `tarmak clusters plan `_ - Plan changes on the currently configured cluster * `tarmak clusters set-current `_ - Set current cluster in config +* `tarmak clusters snapshot `_ - Manage snapshots of remote consul and etcd clusters * `tarmak clusters ssh `_ - Log into an instance with SSH diff --git a/docs/generated/cmd/tarmak/tarmak_clusters_snapshot.rst b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot.rst new file mode 100644 index 0000000000..c224d2166b --- /dev/null +++ b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot.rst @@ -0,0 +1,40 @@ +.. _tarmak_clusters_snapshot: + +tarmak clusters snapshot +------------------------ + +Manage snapshots of remote consul and etcd clusters + +Synopsis +~~~~~~~~ + + +Manage snapshots of remote consul and etcd clusters + +Options +~~~~~~~ + +:: + + -h, --help help for snapshot + +Options inherited from parent commands +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + -c, --config-directory string config directory for tarmak's configuration (default "~/.tarmak") + --current-cluster string override the current cluster set in the config + --keep-containers do not clean-up terraform/packer containers after running them + --log-flush-frequency duration Maximum number of seconds between log flushes (default 5s) + --public-api-endpoint Override kubeconfig to point to cluster's public API endpoint + -v, --verbose enable verbose logging + --wing-dev-mode use a bundled wing version rather than a tagged release from GitHub + +SEE ALSO +~~~~~~~~ + +* `tarmak clusters `_ - Operations on clusters +* `tarmak clusters snapshot consul `_ - Manage snapshots on remote consul clusters +* `tarmak clusters snapshot etcd `_ - Manage snapshots on remote etcd clusters + diff --git a/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_consul.rst b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_consul.rst new file mode 100644 index 0000000000..fee8445b27 --- /dev/null +++ b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_consul.rst @@ -0,0 +1,40 @@ +.. _tarmak_clusters_snapshot_consul: + +tarmak clusters snapshot consul +------------------------------- + +Manage snapshots on remote consul clusters + +Synopsis +~~~~~~~~ + + +Manage snapshots on remote consul clusters + +Options +~~~~~~~ + +:: + + -h, --help help for consul + +Options inherited from parent commands +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + -c, --config-directory string config directory for tarmak's configuration (default "~/.tarmak") + --current-cluster string override the current cluster set in the config + --keep-containers do not clean-up terraform/packer containers after running them + --log-flush-frequency duration Maximum number of seconds between log flushes (default 5s) + --public-api-endpoint Override kubeconfig to point to cluster's public API endpoint + -v, --verbose enable verbose logging + --wing-dev-mode use a bundled wing version rather than a tagged release from GitHub + +SEE ALSO +~~~~~~~~ + +* `tarmak clusters snapshot `_ - Manage snapshots of remote consul and etcd clusters +* `tarmak clusters snapshot consul restore `_ - restore consul cluster with source snapshot +* `tarmak clusters snapshot consul save `_ - save consul cluster snapshot to target path + diff --git a/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_consul_restore.rst b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_consul_restore.rst new file mode 100644 index 0000000000..fa50a7867b --- /dev/null +++ b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_consul_restore.rst @@ -0,0 +1,42 @@ +.. _tarmak_clusters_snapshot_consul_restore: + +tarmak clusters snapshot consul restore +--------------------------------------- + +restore consul cluster with source snapshot + +Synopsis +~~~~~~~~ + + +restore consul cluster with source snapshot + +:: + + tarmak clusters snapshot consul restore [source path] [flags] + +Options +~~~~~~~ + +:: + + -h, --help help for restore + +Options inherited from parent commands +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + -c, --config-directory string config directory for tarmak's configuration (default "~/.tarmak") + --current-cluster string override the current cluster set in the config + --keep-containers do not clean-up terraform/packer containers after running them + --log-flush-frequency duration Maximum number of seconds between log flushes (default 5s) + --public-api-endpoint Override kubeconfig to point to cluster's public API endpoint + -v, --verbose enable verbose logging + --wing-dev-mode use a bundled wing version rather than a tagged release from GitHub + +SEE ALSO +~~~~~~~~ + +* `tarmak clusters snapshot consul `_ - Manage snapshots on remote consul clusters + diff --git a/docs/generated/cmd/tarmak/tarmak_clusters_instances_ssh.rst b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_consul_save.rst similarity index 66% rename from docs/generated/cmd/tarmak/tarmak_clusters_instances_ssh.rst rename to docs/generated/cmd/tarmak/tarmak_clusters_snapshot_consul_save.rst index 017820f6cf..fa51af35e6 100644 --- a/docs/generated/cmd/tarmak/tarmak_clusters_instances_ssh.rst +++ b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_consul_save.rst @@ -1,26 +1,26 @@ -.. _tarmak_clusters_instances_ssh: +.. _tarmak_clusters_snapshot_consul_save: -tarmak clusters instances ssh ------------------------------ +tarmak clusters snapshot consul save +------------------------------------ -Log into an instance with SSH +save consul cluster snapshot to target path Synopsis ~~~~~~~~ -Log into an instance with SSH +save consul cluster snapshot to target path :: - tarmak clusters instances ssh [instance alias] [flags] + tarmak clusters snapshot consul save [target path] [flags] Options ~~~~~~~ :: - -h, --help help for ssh + -h, --help help for save Options inherited from parent commands ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -38,5 +38,5 @@ Options inherited from parent commands SEE ALSO ~~~~~~~~ -* `tarmak clusters instances `_ - Operations on instances +* `tarmak clusters snapshot consul `_ - Manage snapshots on remote consul clusters diff --git a/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_etcd.rst b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_etcd.rst new file mode 100644 index 0000000000..44c53e8353 --- /dev/null +++ b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_etcd.rst @@ -0,0 +1,40 @@ +.. _tarmak_clusters_snapshot_etcd: + +tarmak clusters snapshot etcd +----------------------------- + +Manage snapshots on remote etcd clusters + +Synopsis +~~~~~~~~ + + +Manage snapshots on remote etcd clusters + +Options +~~~~~~~ + +:: + + -h, --help help for etcd + +Options inherited from parent commands +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + -c, --config-directory string config directory for tarmak's configuration (default "~/.tarmak") + --current-cluster string override the current cluster set in the config + --keep-containers do not clean-up terraform/packer containers after running them + --log-flush-frequency duration Maximum number of seconds between log flushes (default 5s) + --public-api-endpoint Override kubeconfig to point to cluster's public API endpoint + -v, --verbose enable verbose logging + --wing-dev-mode use a bundled wing version rather than a tagged release from GitHub + +SEE ALSO +~~~~~~~~ + +* `tarmak clusters snapshot `_ - Manage snapshots of remote consul and etcd clusters +* `tarmak clusters snapshot etcd restore `_ - restore etcd cluster with source snapshots +* `tarmak clusters snapshot etcd save `_ - save etcd snapshot to target path prefix, i.e 'backup-' + diff --git a/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_etcd_restore.rst b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_etcd_restore.rst new file mode 100644 index 0000000000..c476c97b3e --- /dev/null +++ b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_etcd_restore.rst @@ -0,0 +1,45 @@ +.. _tarmak_clusters_snapshot_etcd_restore: + +tarmak clusters snapshot etcd restore +------------------------------------- + +restore etcd cluster with source snapshots + +Synopsis +~~~~~~~~ + + +restore etcd cluster with source snapshots + +:: + + tarmak clusters snapshot etcd restore [flags] + +Options +~~~~~~~ + +:: + + -h, --help help for restore + --k8s-events string location of k8s-events snapshot backup + --k8s-main string location of k8s-main snapshot backup + --overlay string location of overlay snapshot backup + +Options inherited from parent commands +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + -c, --config-directory string config directory for tarmak's configuration (default "~/.tarmak") + --current-cluster string override the current cluster set in the config + --keep-containers do not clean-up terraform/packer containers after running them + --log-flush-frequency duration Maximum number of seconds between log flushes (default 5s) + --public-api-endpoint Override kubeconfig to point to cluster's public API endpoint + -v, --verbose enable verbose logging + --wing-dev-mode use a bundled wing version rather than a tagged release from GitHub + +SEE ALSO +~~~~~~~~ + +* `tarmak clusters snapshot etcd `_ - Manage snapshots on remote etcd clusters + diff --git a/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_etcd_save.rst b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_etcd_save.rst new file mode 100644 index 0000000000..fa549a2af6 --- /dev/null +++ b/docs/generated/cmd/tarmak/tarmak_clusters_snapshot_etcd_save.rst @@ -0,0 +1,42 @@ +.. _tarmak_clusters_snapshot_etcd_save: + +tarmak clusters snapshot etcd save +---------------------------------- + +save etcd snapshot to target path prefix, i.e 'backup-' + +Synopsis +~~~~~~~~ + + +save etcd snapshot to target path prefix, i.e 'backup-' + +:: + + tarmak clusters snapshot etcd save [target path prefix] [flags] + +Options +~~~~~~~ + +:: + + -h, --help help for save + +Options inherited from parent commands +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + -c, --config-directory string config directory for tarmak's configuration (default "~/.tarmak") + --current-cluster string override the current cluster set in the config + --keep-containers do not clean-up terraform/packer containers after running them + --log-flush-frequency duration Maximum number of seconds between log flushes (default 5s) + --public-api-endpoint Override kubeconfig to point to cluster's public API endpoint + -v, --verbose enable verbose logging + --wing-dev-mode use a bundled wing version rather than a tagged release from GitHub + +SEE ALSO +~~~~~~~~ + +* `tarmak clusters snapshot etcd `_ - Manage snapshots on remote etcd clusters + diff --git a/docs/generated/cmd/tarmak/tarmak_clusters_ssh.rst b/docs/generated/cmd/tarmak/tarmak_clusters_ssh.rst index a0a1525712..0208b5f168 100644 --- a/docs/generated/cmd/tarmak/tarmak_clusters_ssh.rst +++ b/docs/generated/cmd/tarmak/tarmak_clusters_ssh.rst @@ -13,7 +13,7 @@ Log into an instance with SSH :: - tarmak clusters ssh [instance alias] [flags] + tarmak clusters ssh [instance alias] [optional ssh arguments] [flags] Options ~~~~~~~ diff --git a/docs/generated/reference/output/api-docs.html b/docs/generated/reference/output/api-docs.html index 1ba5b8dc0f..c78a0ada7d 100644 --- a/docs/generated/reference/output/api-docs.html +++ b/docs/generated/reference/output/api-docs.html @@ -11,7 +11,7 @@ - +

    Tarmak

    @@ -640,6 +640,10 @@

    ClusterFlags v1alpha1

    plan
    ClusterPlanFlags flags for handling images + +snapshot
    ClusterSnapshotFlags +flags for kubeconfig of clusters +

    ClusterImagesBuildFlags v1alpha1

    @@ -1377,6 +1381,131 @@

    ClusterPodSecurityPolicy v1alpha1

    +

    ClusterSnapshotEtcdFlags v1alpha1

    + + + + + + + + + + + + + + + +
    GroupVersionKind
    tarmakv1alpha1ClusterSnapshotEtcdFlags
    +

    Contains the cluster snapshot etcd flags

    + + + + + + + + + + + + + + + +
    FieldDescription
    restore
    ClusterSnapshotEtcdRestoreFlags
    +

    ClusterSnapshotEtcdRestoreFlags v1alpha1

    + + + + + + + + + + + + + + + +
    GroupVersionKind
    tarmakv1alpha1ClusterSnapshotEtcdRestoreFlags
    +

    Contains the cluster snapshot etcd restore flags

    + + + + + + + + + + + + + + + + + + + + + + + +
    FieldDescription
    k8sEvents
    string
    Path to k8s-main snapshot backup
    k8sMain
    string
    overlay
    string
    Path to k8s-events snapshot backup
    +

    ClusterSnapshotFlags v1alpha1

    + + + + + + + + + + + + + + + +
    GroupVersionKind
    tarmakv1alpha1ClusterSnapshotFlags
    +

    Contains the cluster snapshot flags

    + + + + + + + + + + + + + + + +
    FieldDescription
    etcd
    ClusterSnapshotEtcdFlags

    EgressRule v1alpha1

    diff --git a/docs/generated/reference/output/navData.js b/docs/generated/reference/output/navData.js index 3d0d480511..662c0de380 100644 --- a/docs/generated/reference/output/navData.js +++ b/docs/generated/reference/output/navData.js @@ -1 +1 @@ -(function(){navData = {"toc":[{"section":"-strong-field-definitions-strong-","subsections":[{"section":"volume-v1alpha1"},{"section":"values-v1alpha1"},{"section":"taint-v1alpha1"},{"section":"subnet-v1alpha1"},{"section":"ssh-v1alpha1"},{"section":"providergcp-v1alpha1"},{"section":"providerazure-v1alpha1"},{"section":"provideramazon-v1alpha1"},{"section":"provider-v1alpha1"},{"section":"network-v1alpha1"},{"section":"loggingsinkelasticsearch-v1alpha1"},{"section":"loggingsink-v1alpha1"},{"section":"label-v1alpha1"},{"section":"kubernetesapi-v1alpha1"},{"section":"internetgw-v1alpha1"},{"section":"instancestatusmanifest-v1alpha1"},{"section":"instancespecmanifest-v1alpha1"},{"section":"instancepoolkubernetes-v1alpha1"},{"section":"instancepoolamazon-v1alpha1"},{"section":"instancepool-v1alpha1"},{"section":"ingressrule-v1alpha1"},{"section":"httpbasicauth-v1alpha1"},{"section":"firewall-v1alpha1"},{"section":"environment-v1alpha1"},{"section":"egressrule-v1alpha1"},{"section":"clusterpodsecuritypolicy-v1alpha1"},{"section":"clusterplanflags-v1alpha1"},{"section":"clusterkubernetestiller-v1alpha1"},{"section":"clusterkubernetesprometheus-v1alpha1"},{"section":"clusterkubernetesdashboard-v1alpha1"},{"section":"clusterkubernetesclusterautoscaleroverprovisioning-v1alpha1"},{"section":"clusterkubernetesclusterautoscaler-v1alpha1"},{"section":"clusterkubernetesapiserveroidc-v1alpha1"},{"section":"clusterkubernetesapiserveramazonaccesslogs-v1alpha1"},{"section":"clusterkubernetesapiserveramazon-v1alpha1"},{"section":"clusterkubernetesapiserver-v1alpha1"},{"section":"clusterkubernetes-v1alpha1"},{"section":"clusterkubeconfigflags-v1alpha1"},{"section":"clusterimagesflags-v1alpha1"},{"section":"clusterimagesbuildflags-v1alpha1"},{"section":"clusterflags-v1alpha1"},{"section":"clusterdestroyflags-v1alpha1"},{"section":"clusterapplyflags-v1alpha1"},{"section":"clusteramazon-v1alpha1"},{"section":"amazonesproxy-v1alpha1"}]},{"section":"-strong-old-api-versions-strong-","subsections":[]},{"section":"instance-v1alpha1","subsections":[]},{"section":"cluster-v1alpha1","subsections":[]},{"section":"flags-v1alpha1","subsections":[]},{"section":"image-v1alpha1","subsections":[]},{"section":"config-v1alpha1","subsections":[]},{"section":"-strong-tarmak-strong-","subsections":[]}],"flatToc":["volume-v1alpha1","values-v1alpha1","taint-v1alpha1","subnet-v1alpha1","ssh-v1alpha1","providergcp-v1alpha1","providerazure-v1alpha1","provideramazon-v1alpha1","provider-v1alpha1","network-v1alpha1","loggingsinkelasticsearch-v1alpha1","loggingsink-v1alpha1","label-v1alpha1","kubernetesapi-v1alpha1","internetgw-v1alpha1","instancestatusmanifest-v1alpha1","instancespecmanifest-v1alpha1","instancepoolkubernetes-v1alpha1","instancepoolamazon-v1alpha1","instancepool-v1alpha1","ingressrule-v1alpha1","httpbasicauth-v1alpha1","firewall-v1alpha1","environment-v1alpha1","egressrule-v1alpha1","clusterpodsecuritypolicy-v1alpha1","clusterplanflags-v1alpha1","clusterkubernetestiller-v1alpha1","clusterkubernetesprometheus-v1alpha1","clusterkubernetesdashboard-v1alpha1","clusterkubernetesclusterautoscaleroverprovisioning-v1alpha1","clusterkubernetesclusterautoscaler-v1alpha1","clusterkubernetesapiserveroidc-v1alpha1","clusterkubernetesapiserveramazonaccesslogs-v1alpha1","clusterkubernetesapiserveramazon-v1alpha1","clusterkubernetesapiserver-v1alpha1","clusterkubernetes-v1alpha1","clusterkubeconfigflags-v1alpha1","clusterimagesflags-v1alpha1","clusterimagesbuildflags-v1alpha1","clusterflags-v1alpha1","clusterdestroyflags-v1alpha1","clusterapplyflags-v1alpha1","clusteramazon-v1alpha1","amazonesproxy-v1alpha1","-strong-field-definitions-strong-","-strong-old-api-versions-strong-","instance-v1alpha1","cluster-v1alpha1","flags-v1alpha1","image-v1alpha1","config-v1alpha1","-strong-tarmak-strong-"]};})(); \ No newline at end of file +(function(){navData = {"toc":[{"section":"-strong-field-definitions-strong-","subsections":[{"section":"volume-v1alpha1"},{"section":"values-v1alpha1"},{"section":"taint-v1alpha1"},{"section":"subnet-v1alpha1"},{"section":"ssh-v1alpha1"},{"section":"providergcp-v1alpha1"},{"section":"providerazure-v1alpha1"},{"section":"provideramazon-v1alpha1"},{"section":"provider-v1alpha1"},{"section":"network-v1alpha1"},{"section":"loggingsinkelasticsearch-v1alpha1"},{"section":"loggingsink-v1alpha1"},{"section":"label-v1alpha1"},{"section":"kubernetesapi-v1alpha1"},{"section":"internetgw-v1alpha1"},{"section":"instancestatusmanifest-v1alpha1"},{"section":"instancespecmanifest-v1alpha1"},{"section":"instancepoolkubernetes-v1alpha1"},{"section":"instancepoolamazon-v1alpha1"},{"section":"instancepool-v1alpha1"},{"section":"ingressrule-v1alpha1"},{"section":"httpbasicauth-v1alpha1"},{"section":"firewall-v1alpha1"},{"section":"environment-v1alpha1"},{"section":"egressrule-v1alpha1"},{"section":"clustersnapshotflags-v1alpha1"},{"section":"clustersnapshotetcdrestoreflags-v1alpha1"},{"section":"clustersnapshotetcdflags-v1alpha1"},{"section":"clusterpodsecuritypolicy-v1alpha1"},{"section":"clusterplanflags-v1alpha1"},{"section":"clusterkubernetestiller-v1alpha1"},{"section":"clusterkubernetesprometheus-v1alpha1"},{"section":"clusterkubernetesdashboard-v1alpha1"},{"section":"clusterkubernetesclusterautoscaleroverprovisioning-v1alpha1"},{"section":"clusterkubernetesclusterautoscaler-v1alpha1"},{"section":"clusterkubernetesapiserveroidc-v1alpha1"},{"section":"clusterkubernetesapiserveramazonaccesslogs-v1alpha1"},{"section":"clusterkubernetesapiserveramazon-v1alpha1"},{"section":"clusterkubernetesapiserver-v1alpha1"},{"section":"clusterkubernetes-v1alpha1"},{"section":"clusterkubeconfigflags-v1alpha1"},{"section":"clusterimagesflags-v1alpha1"},{"section":"clusterimagesbuildflags-v1alpha1"},{"section":"clusterflags-v1alpha1"},{"section":"clusterdestroyflags-v1alpha1"},{"section":"clusterapplyflags-v1alpha1"},{"section":"clusteramazon-v1alpha1"},{"section":"amazonesproxy-v1alpha1"}]},{"section":"-strong-old-api-versions-strong-","subsections":[]},{"section":"instance-v1alpha1","subsections":[]},{"section":"cluster-v1alpha1","subsections":[]},{"section":"flags-v1alpha1","subsections":[]},{"section":"image-v1alpha1","subsections":[]},{"section":"config-v1alpha1","subsections":[]},{"section":"-strong-tarmak-strong-","subsections":[]}],"flatToc":["volume-v1alpha1","values-v1alpha1","taint-v1alpha1","subnet-v1alpha1","ssh-v1alpha1","providergcp-v1alpha1","providerazure-v1alpha1","provideramazon-v1alpha1","provider-v1alpha1","network-v1alpha1","loggingsinkelasticsearch-v1alpha1","loggingsink-v1alpha1","label-v1alpha1","kubernetesapi-v1alpha1","internetgw-v1alpha1","instancestatusmanifest-v1alpha1","instancespecmanifest-v1alpha1","instancepoolkubernetes-v1alpha1","instancepoolamazon-v1alpha1","instancepool-v1alpha1","ingressrule-v1alpha1","httpbasicauth-v1alpha1","firewall-v1alpha1","environment-v1alpha1","egressrule-v1alpha1","clustersnapshotflags-v1alpha1","clustersnapshotetcdrestoreflags-v1alpha1","clustersnapshotetcdflags-v1alpha1","clusterpodsecuritypolicy-v1alpha1","clusterplanflags-v1alpha1","clusterkubernetestiller-v1alpha1","clusterkubernetesprometheus-v1alpha1","clusterkubernetesdashboard-v1alpha1","clusterkubernetesclusterautoscaleroverprovisioning-v1alpha1","clusterkubernetesclusterautoscaler-v1alpha1","clusterkubernetesapiserveroidc-v1alpha1","clusterkubernetesapiserveramazonaccesslogs-v1alpha1","clusterkubernetesapiserveramazon-v1alpha1","clusterkubernetesapiserver-v1alpha1","clusterkubernetes-v1alpha1","clusterkubeconfigflags-v1alpha1","clusterimagesflags-v1alpha1","clusterimagesbuildflags-v1alpha1","clusterflags-v1alpha1","clusterdestroyflags-v1alpha1","clusterapplyflags-v1alpha1","clusteramazon-v1alpha1","amazonesproxy-v1alpha1","-strong-field-definitions-strong-","-strong-old-api-versions-strong-","instance-v1alpha1","cluster-v1alpha1","flags-v1alpha1","image-v1alpha1","config-v1alpha1","-strong-tarmak-strong-"]};})(); \ No newline at end of file diff --git a/pkg/apis/tarmak/v1alpha1/types.go b/pkg/apis/tarmak/v1alpha1/types.go index 7bc0683d78..fa769351c2 100644 --- a/pkg/apis/tarmak/v1alpha1/types.go +++ b/pkg/apis/tarmak/v1alpha1/types.go @@ -146,6 +146,7 @@ type ClusterFlags struct { Images ClusterImagesFlags `json:"images,omitempty"` // flags for handling images Plan ClusterPlanFlags `json:"plan,omitempty"` // flags for planning clusters Kubeconfig ClusterKubeconfigFlags `json:"kubeconfig,omitempty"` // flags for kubeconfig of clusters + Snapshot ClusterSnapshotFlags `json:"snapshot,omitempty"` // flags for snapshots of clusters } // Contains the cluster plan flags @@ -187,3 +188,20 @@ type ClusterImagesBuildFlags struct { type ClusterKubeconfigFlags struct { Path string `json:"path,omitempty"` // Path to save kubeconfig to } + +// Contains the cluster snapshot flags +type ClusterSnapshotFlags struct { + Etcd ClusterSnapshotEtcdFlags `json:"etcd,omitempty"` // flags for handling etcd snapshots +} + +// Contains the cluster snapshot etcd flags +type ClusterSnapshotEtcdFlags struct { + Restore ClusterSnapshotEtcdRestoreFlags `json:"restore,omitempty"` // flags for handling etcd snapshot restore +} + +// Contains the cluster snapshot etcd restore flags +type ClusterSnapshotEtcdRestoreFlags struct { + K8sMain string `json:"k8sMain,omitempty"` // Path to k8s-main snapshot backup + K8sEvents string `json:"k8sEvents,omitempty"` // Path to k8s-events snapshot backup + Overlay string `json:"overlay,omitempty"` // Path to overlay snapshot backup +} diff --git a/pkg/apis/tarmak/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/tarmak/v1alpha1/zz_generated.deepcopy.go index aee63e81cd..7e2bfc809a 100644 --- a/pkg/apis/tarmak/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/tarmak/v1alpha1/zz_generated.deepcopy.go @@ -51,6 +51,7 @@ func (in *ClusterFlags) DeepCopyInto(out *ClusterFlags) { out.Images = in.Images out.Plan = in.Plan out.Kubeconfig = in.Kubeconfig + out.Snapshot = in.Snapshot return } @@ -129,6 +130,56 @@ func (in *ClusterPlanFlags) DeepCopy() *ClusterPlanFlags { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClusterSnapshotEtcdFlags) DeepCopyInto(out *ClusterSnapshotEtcdFlags) { + *out = *in + out.Restore = in.Restore + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterSnapshotEtcdFlags. +func (in *ClusterSnapshotEtcdFlags) DeepCopy() *ClusterSnapshotEtcdFlags { + if in == nil { + return nil + } + out := new(ClusterSnapshotEtcdFlags) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClusterSnapshotEtcdRestoreFlags) DeepCopyInto(out *ClusterSnapshotEtcdRestoreFlags) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterSnapshotEtcdRestoreFlags. +func (in *ClusterSnapshotEtcdRestoreFlags) DeepCopy() *ClusterSnapshotEtcdRestoreFlags { + if in == nil { + return nil + } + out := new(ClusterSnapshotEtcdRestoreFlags) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClusterSnapshotFlags) DeepCopyInto(out *ClusterSnapshotFlags) { + *out = *in + out.Etcd = in.Etcd + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterSnapshotFlags. +func (in *ClusterSnapshotFlags) DeepCopy() *ClusterSnapshotFlags { + if in == nil { + return nil + } + out := new(ClusterSnapshotFlags) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Config) DeepCopyInto(out *Config) { *out = *in diff --git a/pkg/tarmak/cluster/cluster.go b/pkg/tarmak/cluster/cluster.go index c66347e32d..473fa928f1 100644 --- a/pkg/tarmak/cluster/cluster.go +++ b/pkg/tarmak/cluster/cluster.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "path/filepath" + "strconv" "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-version" @@ -16,6 +17,7 @@ import ( "github.com/jetstack/tarmak/pkg/tarmak/instance_pool" "github.com/jetstack/tarmak/pkg/tarmak/interfaces" "github.com/jetstack/tarmak/pkg/tarmak/role" + "github.com/jetstack/tarmak/pkg/tarmak/utils" wingclient "github.com/jetstack/tarmak/pkg/wing/client/clientset/versioned" ) @@ -660,9 +662,10 @@ func (c *Cluster) NetworkCIDR() *net.IPNet { func (c *Cluster) APITunnel() interfaces.Tunnel { return c.Environment().Tarmak().SSH().Tunnel( - "bastion", fmt.Sprintf("api.%s.%s", c.ClusterName(), c.Environment().Config().PrivateZone), - 6443, + "6443", + strconv.Itoa(utils.UnusedPort()), + true, ) } diff --git a/pkg/tarmak/cmd.go b/pkg/tarmak/cmd.go index 78d6399324..19ba4d03ff 100644 --- a/pkg/tarmak/cmd.go +++ b/pkg/tarmak/cmd.go @@ -30,6 +30,11 @@ type CmdTarmak struct { ctx interfaces.CancellationContext } +type CmdSnapshot struct { + *CmdTarmak + snapshot interfaces.Snapshot +} + func (t *Tarmak) NewCmdTarmak(pflags *pflag.FlagSet, args []string) *CmdTarmak { return &CmdTarmak{ Tarmak: t, @@ -40,6 +45,13 @@ func (t *Tarmak) NewCmdTarmak(pflags *pflag.FlagSet, args []string) *CmdTarmak { } } +func (t *Tarmak) NewCmdSnapshot(pflags *pflag.FlagSet, args []string, sh interfaces.Snapshot) *CmdSnapshot { + return &CmdSnapshot{ + CmdTarmak: t.NewCmdTarmak(pflags, args), + snapshot: sh, + } +} + func (c *CmdTarmak) Plan() (returnCode int, err error) { if err := c.setup(); err != nil { return 1, err @@ -286,6 +298,14 @@ func (c *CmdTarmak) kubePublicAPIEndpoint() bool { return publicEndpoint } +func (c *CmdSnapshot) Save() error { + return c.snapshot.Save() +} + +func (c *CmdSnapshot) Restore() error { + return c.snapshot.Restore() +} + func (c *CmdTarmak) verifyTerraformBinaryVersion() error { cmd := exec.Command("terraform", "version") cmd.Env = os.Environ() diff --git a/pkg/tarmak/environment/bastion.go b/pkg/tarmak/environment/bastion.go index a718d52a03..6700de3b19 100644 --- a/pkg/tarmak/environment/bastion.go +++ b/pkg/tarmak/environment/bastion.go @@ -2,8 +2,10 @@ package environment import ( + "bufio" "context" "fmt" + "io" "os" "os/signal" "sync" @@ -48,11 +50,19 @@ func (e *Environment) VerifyBastionAvailable() error { expBackoff.MaxElapsedTime = time.Minute * 2 b := backoff.WithContext(expBackoff, ctx) + stderrR, stderrW := io.Pipe() + stderrScanner := bufio.NewScanner(stderrR) + go func() { + for stderrScanner.Scan() { + e.log.WithField("std", "err").Debug(stderrScanner.Text()) + } + }() + executeSSH := func() error { retCode, err := ssh.Execute( "bastion", "/bin/true", - []string{}, + nil, nil, stderrW, ) msg := "error while connecting to bastion host" diff --git a/pkg/tarmak/environment/environment.go b/pkg/tarmak/environment/environment.go index 3e07a56010..54327ab6cb 100644 --- a/pkg/tarmak/environment/environment.go +++ b/pkg/tarmak/environment/environment.go @@ -10,6 +10,7 @@ import ( "io/ioutil" "os" "path/filepath" + "strconv" "github.com/hashicorp/go-multierror" "github.com/sirupsen/logrus" @@ -297,9 +298,10 @@ func (e *Environment) Verify() error { func (e *Environment) WingTunnel() interfaces.Tunnel { return e.Tarmak().SSH().Tunnel( - "bastion", "localhost", - 9443, + "9443", + strconv.Itoa(utils.UnusedPort()), + false, ) } @@ -311,7 +313,7 @@ func (e *Environment) WingClientset() (*wingclient.Clientset, interfaces.Tunnel, // TODO: Do proper TLS here restConfig := &rest.Config{ - Host: fmt.Sprintf("https://127.0.0.1:%d", tunnel.Port()), + Host: fmt.Sprintf("https://127.0.0.1:%s", tunnel.Port()), TLSClientConfig: rest.TLSClientConfig{ Insecure: true, }, diff --git a/pkg/tarmak/interfaces/interfaces.go b/pkg/tarmak/interfaces/interfaces.go index 13a9a1d370..0b4e5e1a97 100644 --- a/pkg/tarmak/interfaces/interfaces.go +++ b/pkg/tarmak/interfaces/interfaces.go @@ -208,17 +208,17 @@ type Terraform interface { type SSH interface { WriteConfig(Cluster) error - PassThrough([]string) - Tunnel(hostname string, destination string, destinationPort int) Tunnel - Execute(host string, cmd string, args []string) (returnCode int, err error) + PassThrough(host string, additionalArguments string) error + Tunnel(destination, destinationPort, localPort string, daemonize bool) Tunnel + Execute(host string, cmd string, stdin io.Reader, stdout, stderr io.Writer) (returnCode int, err error) Validate() error - Cleanup() error + Cleanup() } type Tunnel interface { Start() error - Stop() error - Port() int + Stop() + Port() string BindAddress() string } @@ -234,7 +234,7 @@ type Host interface { Roles() []string SSHConfig() string Parameters() map[string]string - SSHControlPath() string + Aliases() []string } type Puppet interface { @@ -292,3 +292,10 @@ type CancellationContext interface { WaitOrCancel(f func() error) WaitOrCancelReturnCode(f func() (int, error)) } + +type Snapshot interface { + Save() error + Restore() error + Log() *logrus.Entry + SSH() SSH +} diff --git a/pkg/tarmak/kubectl/kubectl.go b/pkg/tarmak/kubectl/kubectl.go index 3e44adbc95..09145b4424 100644 --- a/pkg/tarmak/kubectl/kubectl.go +++ b/pkg/tarmak/kubectl/kubectl.go @@ -21,7 +21,6 @@ import ( clusterv1alpha1 "github.com/jetstack/tarmak/pkg/apis/cluster/v1alpha1" "github.com/jetstack/tarmak/pkg/tarmak/interfaces" - //"github.com/jetstack/tarmak/pkg/tarmak/ssh" "github.com/jetstack/tarmak/pkg/tarmak/utils" ) @@ -30,6 +29,7 @@ var _ interfaces.Kubectl = &Kubectl{} type Kubectl struct { tarmak interfaces.Tarmak log *logrus.Entry + tunnel interfaces.Tunnel } func New(tarmak interfaces.Tarmak) *Kubectl { @@ -200,10 +200,6 @@ func (k *Kubectl) ensureWorkingKubeconfig(configPath string, publicAPIEndpoint b } } - // If we are using a public endpoint then we don't need to set up a tunnel - // but we need to keep a tunnel var around so we can close is later on if is - // being used. Use k.stopTunnel(tunnel) to ensure no panics. - var tunnel interfaces.Tunnel if publicAPIEndpoint { cluster.Server = fmt.Sprintf("https://api.%s-%s.%s", k.tarmak.Environment().Name(), @@ -211,14 +207,14 @@ func (k *Kubectl) ensureWorkingKubeconfig(configPath string, publicAPIEndpoint b k.tarmak.Provider().PublicZone()) } else { - tunnel = k.tarmak.Cluster().APITunnel() - if err := tunnel.Start(); err != nil { - k.stopTunnel(tunnel) + k.tunnel = k.tarmak.Cluster().APITunnel() + if err := k.tunnel.Start(); err != nil { + k.stopTunnel() return err } - cluster.Server = fmt.Sprintf("https://%s:%d", - tunnel.BindAddress(), tunnel.Port()) + cluster.Server = fmt.Sprintf("https://%s:%s", + k.tunnel.BindAddress(), k.tunnel.Port()) k.log.Warnf("ssh tunnel connecting to Kubernetes API server will close after 10 minutes: %s", cluster.Server) } @@ -226,7 +222,7 @@ func (k *Kubectl) ensureWorkingKubeconfig(configPath string, publicAPIEndpoint b var err error retries := 5 for { - k.log.Debugf("trying to connect to %+v", cluster.Server) + k.log.Debugf("trying to connect to %s", cluster.Server) var version string version, err = k.verifyAPIVersion(*c) @@ -254,9 +250,9 @@ func (k *Kubectl) ensureWorkingKubeconfig(configPath string, publicAPIEndpoint b } if !publicAPIEndpoint { - k.stopTunnel(tunnel) - tunnel = k.tarmak.Cluster().APITunnel() - err = tunnel.Start() + k.stopTunnel() + k.tunnel = k.tarmak.Cluster().APITunnel() + err = k.tunnel.Start() if err != nil { break } @@ -265,17 +261,17 @@ func (k *Kubectl) ensureWorkingKubeconfig(configPath string, publicAPIEndpoint b // ensure we close the tunnel on error if err != nil { - k.stopTunnel(tunnel) + k.stopTunnel() return err } if err := utils.EnsureDirectory(filepath.Dir(configPath), 0700); err != nil { - k.stopTunnel(tunnel) + k.stopTunnel() return err } if err := clientcmd.WriteToFile(*c, configPath); err != nil { - k.stopTunnel(tunnel) + k.stopTunnel() return err } @@ -349,8 +345,8 @@ func (k *Kubectl) Kubeconfig(path string, publicAPIEndpoint bool) (string, error return fmt.Sprintf("KUBECONFIG=%s", path), nil } -func (k *Kubectl) stopTunnel(tunnel interfaces.Tunnel) { - if tunnel != nil { - tunnel.Stop() +func (k *Kubectl) stopTunnel() { + if k.tunnel != nil { + k.tunnel.Stop() } } diff --git a/pkg/tarmak/provider/amazon/hosts.go b/pkg/tarmak/provider/amazon/hosts.go index 5391fe76e7..9c3c206e17 100644 --- a/pkg/tarmak/provider/amazon/hosts.go +++ b/pkg/tarmak/provider/amazon/hosts.go @@ -4,7 +4,6 @@ package amazon import ( "fmt" "os" - "path/filepath" "strings" "github.com/aws/aws-sdk-go/aws" @@ -194,8 +193,3 @@ func (a *Amazon) ListHosts(c interfaces.Cluster) ([]interfaces.Host, error) { return hostsInterfaces, nil } - -func (h *host) SSHControlPath() string { - return filepath.Join(os.TempDir(), fmt.Sprintf( - "ssh-control-%s@%s:22", h.user, h.hostname)) -} diff --git a/pkg/tarmak/snapshot/consul/consul.go b/pkg/tarmak/snapshot/consul/consul.go new file mode 100644 index 0000000000..dffdf3b52d --- /dev/null +++ b/pkg/tarmak/snapshot/consul/consul.go @@ -0,0 +1,129 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package consul + +import ( + "fmt" + "io" + "time" + + "github.com/sirupsen/logrus" + + clusterv1alpha1 "github.com/jetstack/tarmak/pkg/apis/cluster/v1alpha1" + "github.com/jetstack/tarmak/pkg/tarmak/interfaces" + "github.com/jetstack/tarmak/pkg/tarmak/snapshot" +) + +var _ interfaces.Snapshot = &Consul{} + +const ( + consulCmd = "consul snapshot %s %s > /dev/null;" +) + +var ( + envCmd = []string{"CONSUL_HTTP_TOKEN=$(sudo cat /etc/consul/consul.json | jq -r '.acl_master_token')"} +) + +type Consul struct { + tarmak interfaces.Tarmak + ssh interfaces.SSH + log *logrus.Entry + + path string + aliases []string +} + +func New(tarmak interfaces.Tarmak, path string) *Consul { + return &Consul{ + tarmak: tarmak, + ssh: tarmak.SSH(), + log: tarmak.Log(), + path: path, + } +} + +func (c *Consul) Save() error { + aliases, err := snapshot.Prepare(c.tarmak, clusterv1alpha1.InstancePoolTypeVault) + if err != nil { + return err + } + c.aliases = aliases + + c.log.Infof("saving snapshot from instance %s", aliases[0]) + + hostPath := fmt.Sprintf("/tmp/consul-snapshot-%s.snap", + time.Now().Format(snapshot.TimeLayout)) + cmdArgs := fmt.Sprintf(`set -e; +export CONSUL_HTTP_TOKEN=$(sudo cat /etc/consul/consul.json | jq -r '.acl_master_token'); +export DATACENTER=$(sudo cat /etc/consul/consul.json | jq -r '.datacenter'); +/usr/local/bin/consul snapshot save -datacenter $DATACENTER %s; +/usr/local/bin/consul snapshot inspect %s`, hostPath, hostPath) + + err = snapshot.SSHCmd(c, aliases[0], cmdArgs, nil, nil, nil) + if err != nil { + return err + } + + reader, writer := io.Pipe() + err = snapshot.TarFromStream(func() error { + err := snapshot.SSHCmd(c, aliases[0], fmt.Sprintf(snapshot.GZipCCmd, hostPath), + nil, writer, nil) + writer.Close() + return err + }, reader, c.path) + if err != nil { + return err + } + + c.log.Infof("consul snapshot saved to %s", c.path) + + return nil +} + +func (c *Consul) Restore() error { + aliases, err := snapshot.Prepare(c.tarmak, clusterv1alpha1.InstancePoolTypeVault) + if err != nil { + return err + } + c.aliases = aliases + + for _, a := range aliases { + c.log.Infof("restoring snapshot to instance %s", a) + + reader, writer := io.Pipe() + hostPath := fmt.Sprintf("/tmp/consul-snapshot-%s.snap", + time.Now().Format(snapshot.TimeLayout)) + + err = snapshot.TarToStream(func() error { + err := snapshot.SSHCmd(c, a, fmt.Sprintf(snapshot.GZipDCmd, hostPath), reader, nil, nil) + return err + }, writer, c.path) + if err != nil { + return err + } + + cmdArgs := fmt.Sprintf(`set -e; +export CONSUL_HTTP_TOKEN=$(sudo cat /etc/consul/consul.json | jq -r '.acl_master_token'); +export DATACENTER=$(sudo cat /etc/consul/consul.json | jq -r '.datacenter'); +/usr/local/bin/consul snapshot inspect %s; +/usr/local/bin/consul snapshot restore -datacenter $DATACENTER %s; +echo number of keys: $(curl --header "X-Consul-Token: $CONSUL_HTTP_TOKEN" -s 'http://127.0.0.1:8500/v1/kv/?keys' | jq '. | length'); +`, hostPath, hostPath) + + err = snapshot.SSHCmd(c, a, cmdArgs, nil, nil, nil) + if err != nil { + return err + } + } + + c.log.Infof("consul snapshot restored from %s", c.path) + + return nil +} + +func (c *Consul) Log() *logrus.Entry { + return c.log +} + +func (c *Consul) SSH() interfaces.SSH { + return c.ssh +} diff --git a/pkg/tarmak/snapshot/etcd/etcd.go b/pkg/tarmak/snapshot/etcd/etcd.go new file mode 100644 index 0000000000..54b6e35779 --- /dev/null +++ b/pkg/tarmak/snapshot/etcd/etcd.go @@ -0,0 +1,348 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package etcd + +import ( + "crypto/rand" + "encoding/base64" + "fmt" + "io" + "strings" + "sync" + "time" + + "github.com/hashicorp/go-multierror" + "github.com/sirupsen/logrus" + + clusterv1alpha1 "github.com/jetstack/tarmak/pkg/apis/cluster/v1alpha1" + "github.com/jetstack/tarmak/pkg/tarmak/interfaces" + "github.com/jetstack/tarmak/pkg/tarmak/snapshot" + "github.com/jetstack/tarmak/pkg/tarmak/utils/consts" +) + +var _ interfaces.Snapshot = &Etcd{} + +const ( + etcdctlCmd = `/opt/bin/etcdctl snapshot %s %s` +) + +var ( + stores = []map[string]string{ + {"cluster": consts.RestoreK8sMainFlagName, "file": "k8s", "client_port": "2379", "peer_port": "2380"}, + {"cluster": consts.RestoreK8sEventsFlagName, "file": "k8s", "client_port": "2369", "peer_port": "2370"}, + {"cluster": consts.RestoreOverlayFlagName, "file": "overlay", "client_port": "2359", "peer_port": "2360"}, + } + + envCmd = ` +set -e; +export ETCDCTL_CERT=/etc/etcd/ssl/etcd-{{file}}.pem; +export ETCDCTL_KEY=/etc/etcd/ssl/etcd-{{file}}-key.pem; +export ETCDCTL_CACERT=/etc/etcd/ssl/etcd-{{file}}-ca.pem; +export ETCDCTL_API=3; +export ETCDCTL_ENDPOINTS=https://127.0.0.1:{{client_port}}; +` +) + +type Etcd struct { + tarmak interfaces.Tarmak + ssh interfaces.SSH + log *logrus.Entry + ctx interfaces.CancellationContext + + path string + aliases []string +} + +func New(tarmak interfaces.Tarmak, path string) *Etcd { + return &Etcd{ + tarmak: tarmak, + ssh: tarmak.SSH(), + ctx: tarmak.CancellationContext(), + log: tarmak.Log(), + path: path, + } +} + +func (e *Etcd) Save() error { + aliases, err := snapshot.Prepare(e.tarmak, clusterv1alpha1.InstancePoolTypeEtcd) + if err != nil { + return err + } + e.aliases = aliases + + e.log.Infof("saving snapshots from instance %s", aliases[0]) + + var wg sync.WaitGroup + var result *multierror.Error + var errLock sync.Mutex + + saveFunc := func(store map[string]string) { + defer wg.Done() + + hostPath := fmt.Sprintf("/tmp/etcd-snapshot-%s-%s.db", + store["cluster"], time.Now().Format(snapshot.TimeLayout)) + + cmdArgs := fmt.Sprintf(`sudo /bin/bash -c "%s %s"`, e.template(envCmd, store), + fmt.Sprintf(etcdctlCmd, "save", hostPath)) + err = snapshot.SSHCmd(e, aliases[0], cmdArgs, nil, nil, nil) + if err != nil { + + errLock.Lock() + result = multierror.Append(result, err) + errLock.Unlock() + + return + } + + targetPath := fmt.Sprintf("%s%s.db", e.path, store["cluster"]) + reader, writer := io.Pipe() + err = snapshot.TarFromStream(func() error { + err := snapshot.SSHCmd(e, aliases[0], fmt.Sprintf(snapshot.GZipCCmd, hostPath), + nil, writer, nil) + writer.Close() + return err + }, reader, targetPath) + if err != nil { + errLock.Lock() + result = multierror.Append(result, err) + errLock.Unlock() + + return + } + + e.log.Infof("etcd %s snapshot saved to %s", store["cluster"], targetPath) + + select { + case <-e.ctx.Done(): + return + default: + } + } + + wg.Add(len(stores)) + for _, store := range stores { + go saveFunc(store) + } + + wg.Wait() + + select { + case <-e.ctx.Done(): + return e.ctx.Err() + default: + } + + return result.ErrorOrNil() +} + +func (e *Etcd) Restore() error { + aliases, err := snapshot.Prepare(e.tarmak, clusterv1alpha1.InstancePoolTypeEtcd) + if err != nil { + return err + } + e.aliases = aliases + + restoreFunc := func(host, path, token string, store map[string]string) error { + reader, writer := io.Pipe() + hostPath := fmt.Sprintf("/tmp/etcd-snapshot-%s-%s.db", + store["cluster"], time.Now().Format(snapshot.TimeLayout)) + + err = snapshot.TarToStream(func() error { + err := snapshot.SSHCmd(e, host, fmt.Sprintf(snapshot.GZipDCmd, hostPath), reader, nil, nil) + return err + }, writer, path) + if err != nil { + return err + } + + cmdArgs := fmt.Sprintf(`set -e; +sudo systemctl stop etcd-%s +`, store["cluster"]) + err = snapshot.SSHCmd(e, host, cmdArgs, nil, nil, nil) + if err != nil { + return err + } + + cmdArgs = e.template(`set -e; +sudo mkdir -p /var/lib/etcd_backup; +sudo rsync -a --delete --ignore-missing-args /var/lib/etcd/{{cluster}} /var/lib/etcd_backup/; +sudo rm -rf /var/lib/etcd/{{cluster}}; +`, store) + err = snapshot.SSHCmd(e, host, cmdArgs, nil, nil, nil) + if err != nil { + return err + } + + initialCluster := e.initialClusterString(host, store) + for _, a := range aliases[1:] { + initialCluster = strings.Join( + []string{initialCluster, e.initialClusterString(a, store)}, ",", + ) + } + + cmdArgs = e.template(fmt.Sprintf(`set -e; +sudo ETCDCTL_API=3 /opt/bin/etcdctl snapshot restore %s \ +--name=%s.%s.%s \ +--data-dir=/var/lib/etcd/{{cluster}} \ +--initial-advertise-peer-urls=https://%s.%s.%s:{{peer_port}} \ +--initial-cluster=%s \ +--initial-cluster-token=etcd-{{cluster}}-%s +`, + hostPath, + host, e.clusterName(), e.privateZone(), + host, e.clusterName(), e.privateZone(), + initialCluster, + token, + ), store) + err = snapshot.SSHCmd(e, host, cmdArgs, nil, nil, nil) + if err != nil { + return err + } + + cmdArgs = e.template(`set -e; + sudo chown -R etcd:etcd /var/lib/etcd/{{cluster}} + `, store) + err = snapshot.SSHCmd(e, host, cmdArgs, nil, nil, nil) + if err != nil { + return err + } + + return nil + } + + startEtcdFunc := func(host string, store map[string]string) error { + cmdArgs := e.template(`set -e; +sudo systemctl start etcd-{{cluster}} + `, store) + err = snapshot.SSHCmd(e, host, cmdArgs, nil, nil, nil) + if err != nil { + return err + } + + return nil + } + + healthCheckFunc := func(host string, store map[string]string) error { + endpoints := e.endpointsString(host, store) + for _, a := range aliases[1:] { + endpoints = strings.Join( + []string{endpoints, e.endpointsString(a, store)}, ",", + ) + } + + cmdArgs := e.template(fmt.Sprint(`%s +sudo /opt/bin/etcdctl endpoint health +--endpoints=%s + `, envCmd, endpoints), store) + err = snapshot.SSHCmd(e, host, cmdArgs, nil, nil, nil) + if err != nil { + return err + } + + return nil + } + + for _, store := range stores { + value := e.restoreFlagValue(store["cluster"]) + if value == "" { + continue + } + + b := make([]byte, 32) + _, err := rand.Read(b) + if err != nil { + return fmt.Errorf("failed to create random etcd initial token: %s", err) + } + token := base64.URLEncoding.EncodeToString(b) + + for _, a := range aliases { + e.log.Infof("restoring etcd %s on host %s", store["cluster"], a) + err := restoreFunc(a, value, token, store) + if err != nil { + return err + } + } + + for _, a := range aliases { + e.log.Infof("starting etcd %s on host %s", store["cluster"], a) + err := startEtcdFunc(a, store) + if err != nil { + return err + } + } + + for _, a := range aliases { + e.log.Infof("checking health of etcd %s on host %s", store["cluster"], a) + err := healthCheckFunc(a, store) + if err != nil { + return err + } + } + + e.log.Infof("successfully restored etcd cluster %s with snapshot %s", store["cluster"], value) + } + + e.log.Info("restarting API servers on master hosts") + masters, err := snapshot.Prepare(e.tarmak, clusterv1alpha1.InstancePoolTypeMaster) + for _, master := range masters { + cmdArgs := " sudo systemctl restart kube-apiserver" + err = snapshot.SSHCmd(e, master, cmdArgs, nil, nil, nil) + if err != nil { + return err + } + } + + return nil +} + +func (e *Etcd) initialClusterString(host string, store map[string]string) string { + return fmt.Sprintf("%s.%s.%s=https://%s.%s.%s:%s", + host, e.clusterName(), e.privateZone(), + host, e.clusterName(), e.privateZone(), store["peer_port"]) +} + +func (e *Etcd) endpointsString(host string, store map[string]string) string { + return fmt.Sprintf("%s.%s.%s=https://%s.%s.%s:%s", + host, e.clusterName(), e.privateZone(), + host, e.clusterName(), e.privateZone(), store["client_port"]) +} + +func (e *Etcd) template(args string, vars map[string]string) string { + for k, v := range vars { + args = strings.Replace(args, fmt.Sprintf("{{%s}}", k), v, -1) + } + + return args +} + +func (e *Etcd) Log() *logrus.Entry { + return e.log +} + +func (e *Etcd) SSH() interfaces.SSH { + return e.ssh +} + +func (e *Etcd) clusterName() string { + return e.tarmak.Cluster().ClusterName() +} + +func (e *Etcd) privateZone() string { + return e.tarmak.Environment().Config().PrivateZone +} + +func (e *Etcd) restoreFlagValue(flag string) string { + rf := e.tarmak.ClusterFlags().Snapshot.Etcd.Restore + for _, db := range []struct { + name, value string + }{ + {consts.RestoreK8sMainFlagName, rf.K8sMain}, + {consts.RestoreK8sEventsFlagName, rf.K8sEvents}, + {consts.RestoreOverlayFlagName, rf.Overlay}, + } { + if db.name == flag { + return db.value + } + } + + return "" +} diff --git a/pkg/tarmak/snapshot/snapshot.go b/pkg/tarmak/snapshot/snapshot.go new file mode 100644 index 0000000000..c0a72c30a1 --- /dev/null +++ b/pkg/tarmak/snapshot/snapshot.go @@ -0,0 +1,191 @@ +// Copyright Jetstack Ltd. See LICENSE for details. +package snapshot + +import ( + "bufio" + "compress/gzip" + "fmt" + "io" + "os" + "sync" + + "github.com/hashicorp/go-multierror" + + "github.com/jetstack/tarmak/pkg/tarmak/interfaces" + "github.com/jetstack/tarmak/pkg/tarmak/utils" +) + +const ( + TimeLayout = "2006-01-02_15-04-05" + GZipCCmd = "gzip -c %s" + GZipDCmd = "gzip -d > %s" +) + +func Prepare(tarmak interfaces.Tarmak, role string) (aliases []string, err error) { + if err := tarmak.SSH().WriteConfig(tarmak.Cluster()); err != nil { + return nil, err + } + + hosts, err := tarmak.Cluster().ListHosts() + if err != nil { + return nil, err + } + + var result *multierror.Error + for _, host := range hosts { + if utils.SliceContainsPrefix(host.Roles(), role) { + if len(host.Aliases()) == 0 { + err := fmt.Errorf( + "host with correct role '%v' found without alias: %v", + host.Roles(), + host.ID(), + ) + result = multierror.Append(result, err) + continue + } + + aliases = append(aliases, host.Aliases()[0]) + } + } + + if result != nil { + return nil, result + } + + if len(aliases) == 0 { + return nil, fmt.Errorf("no host aliases were found with role %s", role) + } + + return aliases, result.ErrorOrNil() +} + +func TarFromStream(sshCmd func() error, stream io.Reader, path string) error { + var result *multierror.Error + var errLock sync.Mutex + var wg sync.WaitGroup + + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + + wg.Add(1) + go func() { + defer wg.Done() + err = sshCmd() + if err != nil { + + errLock.Lock() + result = multierror.Append(result, err) + errLock.Unlock() + + } + return + }() + + gzr, err := gzip.NewReader(stream) + if err != nil { + + errLock.Lock() + result = multierror.Append(result, err) + errLock.Unlock() + + } + defer gzr.Close() + + if _, err := io.Copy(f, gzr); err != nil { + + errLock.Lock() + result = multierror.Append(result, err) + errLock.Unlock() + + } + + wg.Wait() + + if result != nil { + return result + } + + return nil +} + +func TarToStream(sshCmd func() error, stream io.WriteCloser, src string) error { + var result *multierror.Error + var errLock sync.Mutex + var wg sync.WaitGroup + + f, err := os.Open(src) + if err != nil { + return err + } + defer f.Close() + + wg.Add(1) + go func() { + defer wg.Done() + err = sshCmd() + if err != nil { + + errLock.Lock() + result = multierror.Append(result, err) + errLock.Unlock() + + } + return + }() + + gzw := gzip.NewWriter(stream) + defer gzw.Close() + if _, err := io.Copy(gzw, f); err != nil { + + errLock.Lock() + result = multierror.Append(result, err) + errLock.Unlock() + + } + + f.Close() + gzw.Close() + stream.Close() + wg.Wait() + + if result != nil { + return result + } + + return nil +} + +func SSHCmd(s interfaces.Snapshot, host, cmd string, stdin io.Reader, stdout, stderr io.Writer) error { + fmt.Printf("$ %s\n", cmd) + + for _, w := range []struct { + writer io.Writer + out string + }{ + {stdout, "out"}, + {stderr, "err"}, + } { + + if w.writer == nil { + var reader *io.PipeReader + reader, w.writer = io.Pipe() + scanner := bufio.NewScanner(reader) + + go func() { + for scanner.Scan() { + s.Log().WithField("std", w.out).Warn(scanner.Text()) + } + }() + } + } + + ret, err := s.SSH().Execute(host, cmd, stdin, stdout, stderr) + if ret != 0 { + return fmt.Errorf("command [%s] returned non-zero (%d): %s", cmd, ret, err) + } + + return err +} diff --git a/pkg/tarmak/ssh.go b/pkg/tarmak/ssh.go index 8711ed76b0..789dcd2e5b 100644 --- a/pkg/tarmak/ssh.go +++ b/pkg/tarmak/ssh.go @@ -9,14 +9,18 @@ func (t *Tarmak) SSH() interfaces.SSH { return t.ssh } -func (t *Tarmak) SSHPassThrough(argsAdditional []string) { +func (t *Tarmak) SSHPassThrough(host string, argsAdditional string) error { if err := t.ssh.WriteConfig(t.Cluster()); err != nil { - t.log.Fatal(err) + return err } if err := t.ssh.Validate(); err != nil { - t.log.Fatal(err) + return err } - t.ssh.PassThrough(argsAdditional) + if err := t.ssh.PassThrough(host, argsAdditional); err != nil { + return err + } + + return nil } diff --git a/pkg/tarmak/ssh/ssh.go b/pkg/tarmak/ssh/ssh.go index 4c82dd8618..0c9d4af651 100644 --- a/pkg/tarmak/ssh/ssh.go +++ b/pkg/tarmak/ssh/ssh.go @@ -6,15 +6,18 @@ import ( "encoding/pem" "errors" "fmt" + "io" "io/ioutil" + "net" "os" - "os/exec" "path/filepath" - "syscall" + "time" - "github.com/hashicorp/go-multierror" "github.com/sirupsen/logrus" + "golang.org/x/crypto/ssh" + "golang.org/x/crypto/ssh/terminal" + clusterv1alpha1 "github.com/jetstack/tarmak/pkg/apis/cluster/v1alpha1" "github.com/jetstack/tarmak/pkg/tarmak/interfaces" "github.com/jetstack/tarmak/pkg/tarmak/utils" ) @@ -25,7 +28,8 @@ type SSH struct { tarmak interfaces.Tarmak log *logrus.Entry - controlPaths []string + hosts map[string]interfaces.Host + tunnels []interfaces.Tunnel } func New(tarmak interfaces.Tarmak) *SSH { @@ -38,7 +42,6 @@ func New(tarmak interfaces.Tarmak) *SSH { } func (s *SSH) WriteConfig(c interfaces.Cluster) error { - hosts, err := c.ListHosts() if err != nil { return err @@ -47,13 +50,18 @@ func (s *SSH) WriteConfig(c interfaces.Cluster) error { var sshConfig bytes.Buffer sshConfig.WriteString(fmt.Sprintf("# ssh config for tarmak cluster %s\n", c.ClusterName())) + s.hosts = make(map[string]interfaces.Host) for _, host := range hosts { _, err = sshConfig.WriteString(host.SSHConfig()) if err != nil { return err } - s.controlPaths = append(s.controlPaths, host.SSHControlPath()) + if len(host.Aliases()) == 0 { + return fmt.Errorf("found host with no aliases: %s", host.Hostname()) + } + + s.hosts[host.Aliases()[0]] = host } err = utils.EnsureDirectory(filepath.Dir(c.SSHConfigPath()), 0700) @@ -69,58 +77,110 @@ func (s *SSH) WriteConfig(c interfaces.Cluster) error { return nil } -func (s *SSH) args() []string { - return []string{ - "ssh", - "-F", - s.tarmak.Cluster().SSHConfigPath(), +// Pass through a local CLI session +func (s *SSH) PassThrough(hostName string, argsAdditional string) error { + if argsAdditional != "" { + _, err := s.Execute(hostName, argsAdditional, nil, nil, nil) + return err } -} -// Pass through a local CLI session -func (s *SSH) PassThrough(argsAdditional []string) { - args := append(s.args(), argsAdditional...) + client, err := s.client(hostName) + if err != nil { + return err + } + defer client.Close() + + sess, err := client.NewSession() + if err != nil { + return err + } + defer sess.Close() + + sess.Stderr = os.Stderr + sess.Stdout = os.Stdout + sess.Stdin = os.Stdin + + modes := ssh.TerminalModes{ + ssh.ECHO: 1, + ssh.TTY_OP_ISPEED: 14400, // input speed = 14.4kbaud + ssh.TTY_OP_OSPEED: 14400, // output speed = 14.4kbaud + } + + fileDescriptor := int(os.Stdin.Fd()) + if terminal.IsTerminal(fileDescriptor) { + originalState, err := terminal.MakeRaw(fileDescriptor) + if err != nil { + return err + } + defer terminal.Restore(fileDescriptor, originalState) - cmd := exec.Command(args[0], args[1:len(args)]...) - cmd.Stderr = os.Stderr - cmd.Stdout = os.Stdout - cmd.Stdin = os.Stdin + termWidth, termHeight, err := terminal.GetSize(fileDescriptor) + if err != nil { + return err + } - err := cmd.Start() + err = sess.RequestPty("xterm-256color", termHeight, termWidth, modes) + if err != nil { + return err + } + } + + if err := sess.Shell(); err != nil { + return err + } + + if err := sess.Wait(); err != nil { + return err + } + + return nil +} + +func (s *SSH) Execute(host string, cmd string, stdin io.Reader, stdout, stderr io.Writer) (int, error) { + client, err := s.client(host) if err != nil { - s.log.Fatal(err) + return -1, err } + defer client.Close() - err = cmd.Wait() + sess, err := client.NewSession() if err != nil { - s.log.Fatal(err) + return -1, err } -} + defer sess.Close() -func (s *SSH) Execute(host string, command string, argsAdditional []string) (returnCode int, err error) { - args := append(s.args(), host, "--", command) - args = append(args, argsAdditional...) + if stderr == nil { + sess.Stderr = os.Stderr + } else { + sess.Stderr = stderr + } - cmd := exec.Command(args[0], args[1:len(args)]...) + if stdout == nil { + sess.Stdout = os.Stdout + } else { + sess.Stdout = stdout + } + + if stdin == nil { + sess.Stdin = os.Stdin + } else { + sess.Stdin = stdin + } - err = cmd.Start() + err = sess.Start(cmd) if err != nil { return -1, err } - err = cmd.Wait() - if err != nil { - perr, ok := err.(*exec.ExitError) - if ok { - if status, ok := perr.Sys().(syscall.WaitStatus); ok { - return status.ExitStatus(), nil - } + if err := sess.Wait(); err != nil { + if e, ok := err.(*ssh.ExitError); ok { + return e.ExitStatus(), e } + return -1, err } return 0, nil - } func (s *SSH) Validate() error { @@ -163,14 +223,103 @@ func (s *SSH) Validate() error { return nil } -func (s *SSH) Cleanup() error { - var result *multierror.Error +func (s *SSH) client(hostName string) (*ssh.Client, error) { + conf, err := s.config() + if err != nil { + return nil, err + } - for _, c := range utils.RemoveDuplicateStrings(s.controlPaths) { - if err := os.RemoveAll(c); err != nil { - result = multierror.Append(result, err) - } + bastion, err := s.host(clusterv1alpha1.InstancePoolTypeBastion) + if err != nil { + return nil, err + } + + proxyClient, err := ssh.Dial("tcp", net.JoinHostPort(bastion.Hostname(), "22"), conf) + if err != nil { + return nil, fmt.Errorf("failed to set up connection to bastion: %s", err) + } + + // ssh into bastion so no need to set up proxy hop + if hostName == clusterv1alpha1.InstancePoolTypeBastion { + return proxyClient, nil + } + + host, err := s.host(hostName) + if err != nil { + return nil, err + } + + conn, err := proxyClient.Dial("tcp", net.JoinHostPort(host.Hostname(), "22")) + if err != nil { + return nil, fmt.Errorf("failed to set up connection to %s from basiton: %s", host.Hostname(), err) + } + + ncc, chans, reqs, err := ssh.NewClientConn(conn, net.JoinHostPort(host.Hostname(), "22"), conf) + if err != nil { + return nil, fmt.Errorf("failed to set up ssh client: %s", err) + } + + return ssh.NewClient(ncc, chans, reqs), nil +} + +func (s *SSH) config() (*ssh.ClientConfig, error) { + bastion, err := s.host(clusterv1alpha1.InstancePoolTypeBastion) + if err != nil { + return nil, err + } + + b, err := ioutil.ReadFile(s.tarmak.Environment().SSHPrivateKeyPath()) + if err != nil { + return nil, fmt.Errorf("failed to read ssh private key: %s", err) + } + + signer, err := ssh.ParsePrivateKey(b) + if err != nil { + return nil, fmt.Errorf("failed to parse ssh private key: %s", err) + } + + return &ssh.ClientConfig{ + Timeout: time.Minute * 10, + User: bastion.User(), + Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)}, + HostKeyCallback: ssh.InsecureIgnoreHostKey(), + }, nil +} + +func (s *SSH) host(name string) (interfaces.Host, error) { + host, ok := s.hosts[name] + if ok { + return host, nil } - return result.ErrorOrNil() + // we have already have all hosts, we can't find it + if len(s.hosts) > 0 { + return nil, fmt.Errorf("failed to resolve host: %s", name) + } + + err := s.WriteConfig(s.tarmak.Cluster()) + if err != nil { + return nil, err + } + + _, bok := s.hosts[clusterv1alpha1.InstancePoolTypeBastion] + err = fmt.Errorf("failed to resolve target hosts for ssh: found %s=%v", + clusterv1alpha1.InstancePoolTypeBastion, + bok) + if !bok && name == clusterv1alpha1.InstancePoolTypeBastion { + return nil, err + } + + host, hok := s.hosts[name] + if !hok { + return nil, fmt.Errorf("%s %s=%v", err, name, hok) + } + + return host, nil +} + +func (s *SSH) Cleanup() { + for _, tunnel := range s.tunnels { + tunnel.Stop() + } } diff --git a/pkg/tarmak/ssh/tunnel.go b/pkg/tarmak/ssh/tunnel.go index f5bb6ab7aa..8ec1669633 100644 --- a/pkg/tarmak/ssh/tunnel.go +++ b/pkg/tarmak/ssh/tunnel.go @@ -2,124 +2,215 @@ package ssh import ( + "bufio" "fmt" "io" "net" "os/exec" + "syscall" "time" + "github.com/kardianos/osext" "github.com/sirupsen/logrus" + "golang.org/x/crypto/ssh" + clusterv1alpha1 "github.com/jetstack/tarmak/pkg/apis/cluster/v1alpha1" "github.com/jetstack/tarmak/pkg/tarmak/interfaces" - "github.com/jetstack/tarmak/pkg/tarmak/utils" ) type Tunnel struct { - localPort int - log *logrus.Entry - stdin io.WriteCloser + log *logrus.Entry + ssh *SSH + stopCh chan struct{} - retryCount int - retryWait time.Duration + dest string + destPort string + localPort string + daemonize bool - forwardSpec string - sshCommand []string + serverConn *ssh.Client + listener net.Listener + + remoteConns, localConns []net.Conn } var _ interfaces.Tunnel = &Tunnel{} // This opens a local tunnel through a SSH connection -func (s *SSH) Tunnel(hostname string, destination string, destinationPort int) interfaces.Tunnel { - t := &Tunnel{ - localPort: utils.UnusedPort(), - log: s.log.WithField("destination", destination), - retryCount: 30, - retryWait: 500 * time.Millisecond, - sshCommand: s.args(), +func (s *SSH) Tunnel(dest, destPort, localPort string, daemonize bool) interfaces.Tunnel { + tunnel := &Tunnel{ + log: s.log.WithField("destination", dest), + ssh: s, + dest: dest, + destPort: destPort, + daemonize: daemonize, + localPort: localPort, + stopCh: make(chan struct{}), } - t.forwardSpec = fmt.Sprintf("-L%s:%d:%s:%d", t.BindAddress(), t.localPort, destination, destinationPort) - return t + s.tunnels = append(s.tunnels, tunnel) + return tunnel } // Start tunnel and wait till a tcp socket is reachable func (t *Tunnel) Start() error { - var err error - // ensure there is connectivity to the bastion - args := append(t.sshCommand, "bastion", "/bin/true") - cmd := exec.Command(args[0], args[1:len(args)]...) + ret, err := t.ssh.Execute("bastion", "/bin/true", nil, nil, nil) + if err != nil || ret != 0 { + return fmt.Errorf("error checking SSH connecting to bastion (%d): %s", ret, err) + } + + if t.daemonize { + err := t.startDaemon() + if err != nil { + return err + } + + // allow for some warm up time + time.Sleep(time.Second * 2) + return nil + } - t.log.Debugf("check SSH connection to bastion cmd=%s", cmd.Args) - err = cmd.Start() + conf, err := t.ssh.config() if err != nil { return err } - // check for errors - err = cmd.Wait() + bastion, err := t.ssh.host(clusterv1alpha1.InstancePoolTypeBastion) if err != nil { - return fmt.Errorf("error checking SSH connecting to bastion: %s", err) + return err } - args = append(t.sshCommand, "-O", "forward", t.forwardSpec, "bastion") - cmd = exec.Command(args[0], args[1:len(args)]...) - - t.log.Debugf("start tunnel cmd=%s", cmd.Args) - err = cmd.Start() + serverConn, err := ssh.Dial("tcp", net.JoinHostPort(bastion.Hostname(), "22"), conf) if err != nil { return err } + t.serverConn = serverConn - // check for errors - err = cmd.Wait() + listener, err := net.Listen("tcp", net.JoinHostPort(t.BindAddress(), t.Port())) if err != nil { - return fmt.Errorf("error starting SSH tunnel via bastion: %s", err) + return err } + t.listener = listener + + go t.handle() + + return nil +} - // wait for TCP socket to be reachable - tries := t.retryCount +func (t *Tunnel) handle() { + tries := 5 for { - if conn, err := net.DialTimeout("tcp", fmt.Sprintf("127.0.0.1:%d", t.Port()), t.retryWait); err != nil { - t.log.Debug("error connecting to tunnel: ", err) - } else { - conn.Close() - return nil + remoteConn, err := t.serverConn.Dial("tcp", + net.JoinHostPort(t.dest, t.destPort)) + if err != nil { + t.log.Warnf("failed to create tunnel to remote connection: %s", err) + tries-- + if tries == 0 { + t.log.Error("failed to create tunnel after 5 tries") + return + } + + time.Sleep(time.Second * 3) + continue } - - tries -= 1 - if tries == 0 { - break + t.remoteConns = append(t.remoteConns, remoteConn) + + conn, err := t.listener.Accept() + if err != nil { + select { + case <-t.stopCh: + return + default: + } + + t.log.Warnf("error accepting ssh tunnel connection: %s", err) + continue } - time.Sleep(t.retryWait) - } + t.localConns = append(t.localConns, conn) - return fmt.Errorf("could not establish a connection to destination via tunnel after %d tries", t.retryCount) -} + go func() { + io.Copy(remoteConn, conn) + remoteConn.Close() + }() -func (t *Tunnel) Stop() error { - args := append(t.sshCommand, "-O", "cancel", t.forwardSpec, "bastion") - cmd := exec.Command(args[0], args[1:len(args)]...) + go func() { + io.Copy(conn, remoteConn) + conn.Close() + }() + } +} - t.log.Debugf("stop tunnel cmd=%s", cmd.Args) - err := cmd.Start() - if err != nil { - return err +func (t *Tunnel) Stop() { + select { + case <-t.stopCh: + default: + close(t.stopCh) } - // check for errors - err = cmd.Wait() - if err != nil { - t.log.Warn("stopping ssh tunnel failed with error: ", err) + for _, l := range t.localConns { + if l != nil { + l.Close() + } + } + for _, r := range t.remoteConns { + if r != nil { + r.Close() + } } - return nil + if t.listener != nil { + t.listener.Close() + } + if t.serverConn != nil { + t.serverConn.Close() + } } -func (t *Tunnel) Port() int { +func (t *Tunnel) Port() string { return t.localPort } func (t *Tunnel) BindAddress() string { return "127.0.0.1" } + +func (t *Tunnel) startDaemon() error { + binaryPath, err := osext.Executable() + if err != nil { + return fmt.Errorf("error finding tarmak executable: %s", err) + } + + cmd := exec.Command(binaryPath, "tunnel", t.dest, t.destPort, t.localPort) + + outR, outW := io.Pipe() + errR, errW := io.Pipe() + outS := bufio.NewScanner(outR) + errS := bufio.NewScanner(errR) + + cmd.Stdin = nil + cmd.Stdout = outW + cmd.Stderr = errW + + go func() { + for outS.Scan() { + t.log.WithField("tunnel", t.dest).Debug(outS.Text()) + } + }() + go func() { + for errS.Scan() { + t.log.WithField("tunnel", t.dest).Debug(errS.Text()) + } + }() + + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + Foreground: false, + } + + if err := cmd.Start(); err != nil { + return err + } + + return nil +} diff --git a/pkg/tarmak/tarmak.go b/pkg/tarmak/tarmak.go index e0faa5585c..3a092f75dd 100644 --- a/pkg/tarmak/tarmak.go +++ b/pkg/tarmak/tarmak.go @@ -361,13 +361,11 @@ func (t *Tarmak) Cleanup() { t.rootPath = nil } - if err := t.SSH().Cleanup(); err != nil { - t.log.Warnf("error cleaning up ssh run time assets: %s", err) - } - if err := t.terraform.Cleanup(); err != nil { t.log.Warnf("error cleaning up terraform run time assets: %s", err) } + + t.ssh.Cleanup() } func (t *Tarmak) Variables() map[string]interface{} { diff --git a/pkg/tarmak/utils/consts/consts.go b/pkg/tarmak/utils/consts/consts.go index 98c07253f4..4ca16fb8d1 100644 --- a/pkg/tarmak/utils/consts/consts.go +++ b/pkg/tarmak/utils/consts/consts.go @@ -9,4 +9,8 @@ const ( DefaultKubeconfigPath = "${TARMAK_CONFIG}/${CURRENT_CLUSTER}/kubeconfig" KubeconfigFlagName = "public-api-endpoint" + + RestoreK8sMainFlagName = "k8s-main" + RestoreK8sEventsFlagName = "k8s-events" + RestoreOverlayFlagName = "overlay" ) diff --git a/pkg/tarmak/utils/context.go b/pkg/tarmak/utils/context.go index 8be5a9f9eb..968a315542 100644 --- a/pkg/tarmak/utils/context.go +++ b/pkg/tarmak/utils/context.go @@ -110,7 +110,7 @@ func (c *CancellationContext) WaitOrCancelReturnCode(f func() (int, error)) { wg.Done() return case <-time.After(time.Second * 3): - log.Warn("tarmak is shutting down") + log.Warn("tarmak is shutting down...") log.Warn("* tarmak will attempt to kill the current task") log.Warn("* send another SIGTERM or SIGINT (ctrl-c) to exit immediately") } diff --git a/pkg/tarmak/utils/slices.go b/pkg/tarmak/utils/slices.go index 292a1c2bee..938979232e 100644 --- a/pkg/tarmak/utils/slices.go +++ b/pkg/tarmak/utils/slices.go @@ -1,6 +1,10 @@ // Copyright Jetstack Ltd. See LICENSE for details. package utils +import ( + "strings" +) + func RemoveDuplicateStrings(slice []string) (result []string) { seen := make(map[string]bool) @@ -36,3 +40,12 @@ func SliceContains(slice []string, str string) bool { return false } + +func SliceContainsPrefix(slice []string, prefix string) bool { + for _, s := range slice { + if strings.HasPrefix(s, prefix) { + return true + } + } + return false +} diff --git a/pkg/tarmak/vault/tunnel.go b/pkg/tarmak/vault/tunnel.go index 58d8c000cd..4b9ac2fc3a 100644 --- a/pkg/tarmak/vault/tunnel.go +++ b/pkg/tarmak/vault/tunnel.go @@ -38,7 +38,7 @@ func NewTunnel( } err = vaultClient.SetAddress( fmt.Sprintf( - "https://%s:%d", tunnel.BindAddress(), tunnel.Port(), + "https://%s:%s", tunnel.BindAddress(), tunnel.Port(), ), ) if err != nil { @@ -64,11 +64,11 @@ func (v *vaultTunnel) Start() error { return nil } -func (v *vaultTunnel) Stop() error { - return v.tunnel.Stop() +func (v *vaultTunnel) Stop() { + v.tunnel.Stop() } -func (v *vaultTunnel) Port() int { +func (v *vaultTunnel) Port() string { return v.tunnel.Port() } diff --git a/pkg/tarmak/vault/tunnel_test.go b/pkg/tarmak/vault/tunnel_test.go index f4a4d04721..54775f2a28 100644 --- a/pkg/tarmak/vault/tunnel_test.go +++ b/pkg/tarmak/vault/tunnel_test.go @@ -7,7 +7,6 @@ import ( "net/http" "net/http/httptest" "net/url" - "strconv" "testing" "github.com/jetstack/tarmak/pkg/tarmak/interfaces" @@ -16,10 +15,10 @@ import ( type FakeTunnel struct { bindAddress string - port int + port string } -func (ft *FakeTunnel) Port() int { +func (ft *FakeTunnel) Port() string { return ft.port } @@ -31,8 +30,8 @@ func (ft *FakeTunnel) Start() error { return nil } -func (ft *FakeTunnel) Stop() error { - return nil +func (ft *FakeTunnel) Stop() { + return } var _ interfaces.Tunnel = &FakeTunnel{} @@ -60,13 +59,9 @@ func TestVaultTunnel(t *testing.T) { if err != nil { t.Fatal(err) } - port, err := strconv.Atoi(u.Port()) - if err != nil { - t.Fatal(err) - } tunnel := &FakeTunnel{ bindAddress: u.Hostname(), - port: port, + port: u.Port(), } fqdn := "host1.example.com" vaultCA := x509.NewCertPool() diff --git a/pkg/tarmak/vault/vault.go b/pkg/tarmak/vault/vault.go index f39a8d799c..910a26a727 100644 --- a/pkg/tarmak/vault/vault.go +++ b/pkg/tarmak/vault/vault.go @@ -8,6 +8,7 @@ import ( "io/ioutil" "os" "path/filepath" + "strconv" "strings" "sync" "time" @@ -154,7 +155,7 @@ func (v *Vault) createTunnelsWithCA(instances []string, vaultCA string) ([]*vaul for pos := range instances { fqdn := instances[pos] sshTunnel := v.cluster.Environment().Tarmak().SSH().Tunnel( - "bastion", fqdn, 8200, + fqdn, "8200", strconv.Itoa(utils.UnusedPort()), false, ) vaultTunnel, err := NewTunnel( sshTunnel, @@ -198,10 +199,7 @@ func (v *Vault) VerifyInitFromFQDNs(instances []string, vaultCA, vaultKMSKeyID, wg.Add(1) go func(pos int) { defer wg.Done() - err := tunnels[pos].Stop() - if err != nil { - v.log.Warn(err) - } + tunnels[pos].Stop() }(pos) } wg.Wait() diff --git a/puppet/modules/etcd/manifests/init.pp b/puppet/modules/etcd/manifests/init.pp index 1dc08d0d7f..fd4b83f35a 100644 --- a/puppet/modules/etcd/manifests/init.pp +++ b/puppet/modules/etcd/manifests/init.pp @@ -13,6 +13,7 @@ $gid = $::etcd::params::gid, $user = $::etcd::params::user, $group = $::etcd::params::group, + $bin_dir = $::etcd::params::bin_dir, Boolean $backup_enabled = false, Enum['aws:kms',''] $backup_sse = '', String $backup_bucket_prefix = '', diff --git a/puppet/modules/etcd/manifests/install.pp b/puppet/modules/etcd/manifests/install.pp index ccd25240e7..871e70fca1 100644 --- a/puppet/modules/etcd/manifests/install.pp +++ b/puppet/modules/etcd/manifests/install.pp @@ -33,5 +33,8 @@ creates => "${dest_dir}/etcd", path => ['/usr/bin/', '/bin'], } - + -> file { "${::etcd::bin_dir}/etcdctl": + ensure => 'link', + target => "${dest_dir}/etcdctl", + } }