FTRL(Follow The Regularized Leader)是一种优化方法,就如同SGD(Stochastic Gradient Descent)一样。这里直接给出用FTRL优化LR(Logistic Regression)的步骤:
其中pt=σ(Xt⋅w)pt=σ(Xt⋅w)是LR的预测函数,求出ptpt的唯一目的是为了求出目标函数(在LR中采用交叉熵损失函数作为目标函数)对参数ww的一阶导数gg,gi=(pt−yt)xigi=(pt−yt)xi。上面的步骤同样适用于FTRL优化其他目标函数,唯一的不同就是求次梯度gg(次梯度是左导和右导之间的集合,函数可导--左导等于右导时,次梯度就等于一阶梯度)的方法不同。
下面的python代码把FTRL和LR进行了解耦:
# coding=utf-8
__author__ = "orisun"import numpy as npclass LR(object):@staticmethoddef fn(w, x):'''决策函数为sigmoid函数'''return 1.0 / (1.0 + np.exp(-w.dot(x)))@staticmethoddef loss(y, y_hat):'''交叉熵损失函数'''return np.sum(np.nan_to_num(-y * np.log(y_hat) - (1 - y) * np.log(1 - y_hat)))@staticmethoddef grad(y, y_hat, x):'''交叉熵损失函数对权重w的一阶导数'''return (y_hat - y) * xclass FTRL(object):def __init__(self, dim, l1, l2, alpha, beta, decisionFunc=LR):self.dim = dimself.decisionFunc = decisionFuncself.z = np.zeros(dim)self.n = np.zeros(dim)self.w = np.zeros(dim)self.l1 = l1self.l2 = l2self.alpha = alphaself.beta = betadef predict(self, x):return self.decisionFunc.fn(self.w, x)def update(self, x, y):self.w = np.array([0 if np.abs(self.z[i]) <= self.l1 else (np.sign(self.z[i]) * self.l1 - self.z[i]) / (self.l2 + (self.beta + np.sqrt(self.n[i])) / self.alpha) for i in xrange(self.dim)])y_hat = self.predict(x)g = self.decisionFunc.grad(y, y_hat, x)sigma = (np.sqrt(self.n + g * g) - np.sqrt(self.n)) / self.alphaself.z += g - sigma * self.wself.n += g * greturn self.decisionFunc.loss(y, y_hat)def train(self, trainSet, verbos=False, max_itr=100000000, eta=0.01, epochs=100):itr = 0n = 0while True:for x, y in trainSet:loss = self.update(x, y)if verbos:print "itr=" + str(n) + "\tloss=" + str(loss)if loss < eta:itr += 1else:itr = 0if itr >= epochs: # 损失函数已连续epochs次迭代小于etaprint "loss have less than", eta, " continuously for ", itr, "iterations"returnn += 1if n >= max_itr:print "reach max iteration", max_itrreturnclass Corpus(object):def __init__(self, file, d):self.d = dself.file = filedef __iter__(self):with open(self.file, 'r') as f_in:for line in f_in:arr = line.strip().split()if len(arr) >= (self.d + 1):yield (np.array([float(x) for x in arr[0:self.d]]), float(arr[self.d]))if __name__ == '__main__':d = 4corpus = Corpus("train.txt", d)ftrl = FTRL(dim=d, l1=1.0, l2=1.0, alpha=0.1, beta=1.0)ftrl.train(corpus, verbos=False, max_itr=100000, eta=0.01, epochs=100)w = ftrl.wprint wcorrect = 0wrong = 0for x, y in corpus:y_hat = 1.0 if ftrl.predict(x) > 0.5 else 0.0if y == y_hat:correct += 1else:wrong += 1print "correct ratio", 1.0 * correct / (correct + wrong)
输出:
reach max iteration 100000 w= [ 4.08813934 1.84596245 10.83446088 3.12315268] correct ratio 0.9946
当把参数调为λ1=0,λ2=0,α=0.5,β=1λ1=0,λ2=0,α=0.5,β=1时,准确率能达到0.9976。
train.txt文件前4列是特征,第5列是标签。内容形如:
-0.567811945258 0.899305436215 0.501926599477 -0.222973905568 1.0
-0.993964260114 0.261988294216 -0.349167046026 -0.923759536056 0.0
0.300707261785 -0.90855090557 -0.248270600228 0.879134142054 0.0
-0.311566995194 -0.698903141283 0.369841040784 0.175901270771 1.0
0.0245841670644 0.782128080056 0.542680482068 0.44897929707 1.0
0.344387543846 0.297686731698 0.338210312887 0.175049733038 1.0
转自https://www.cnblogs.com/zhangchaoyang/articles/6854175.html