In [67]:
from sklearn.base import BaseEstimator, TransformerMixin


class DataFrameSelector(BaseEstimator, TransformerMixin):
    def __init__(self, attribute_names=None):
        self.attribute_names = attribute_names
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        if self.attribute_names:
            df_cols = X.columns.tolist()
            missing_cols = [x for x in self.attribute_names if x not in df_cols]
            total_cols = list(set(self.attribute_names + missing_cols))
            if missing_cols:
                for col in missing_cols:
                    X[col] = np.zeros(X.shape[0])
            return X[total_cols].values
        else:
            return X.values
                

class Dummifier(BaseEstimator, TransformerMixin):
    def __init__(self, cat=True):
        self.cat = cat
    def fit(self, X, y=None):
        return self
    def transform(self, X, y=None):
        dataframe = pd.get_dummies(X)
        return dataframe
In [85]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


pipeline = Pipeline([
        ('dummies', Dummifier()),
        ('selector', DataFrameSelector(columns)),
        ('std_scaler', StandardScaler()),
    ])

train_prepared = pipeline.fit_transform(X_train)
test_prepared = pipeline.transform(X_test)