仅列出核心代码:
1.sigmoidGradient.m
h = 1.0 ./ (1.0 + exp(-z));
g = h.*(1 - h);
2.randInitializeWeights.m
epsilon_init = 0.12;
W = rand(L_out, 1 + L_in)*2*epsilon_init - epsilon_init;
3.nnCostFunction.m
% cost function
A1 = X;
A1 = [ones(m, 1), A1];
Z2 = A1 * Theta1.';
A2 = sigmoid(Z2);
A2 = [ones(m, 1), A2];
Z3 = A2 * Theta2.';
A3 = sigmoid(Z3);
H = A3;
Y = zeros(m, num_labels);
for ind = 1:m
Y(ind, y(ind)) = 1;
end
K = num_labels;
Jk = zeros(K, 1);for k =1:K
Jk(k) = ( -Y(:, k).' *log(H(:, k)) )-( (1 - Y(:, k)).' * log(1-H(:, k)) );
end
J = sum(Jk)/m;J = J + ( lambda/(2*m) )*( sum(sum(Theta1(:, 2:end).^2))+sum(sum(Theta2(:, 2:end).^2)) );
% Unroll gradients
delta3 = A3 - Y;
delta2 = delta3*Theta2.* (A2.*(1-A2));
delta2 = delta2(:, 2: end);
Delta2 = zeros(size(delta3, 2), size(A2, 2));
Delta1 = zeros(size(delta2, 2), size(A1, 2));
for i=1:m
Delta2 = Delta2 + delta3(i, :).' * A2(i, :);
Delta1 = Delta1 + delta2(i, :).' * A1(i, :);
end
Theta1_grad = Delta1/m;
Theta1_grad(:, 2:end) = Theta1_grad(:, 2:end) + Theta1(:, 2:end)*(lambda/m);
Theta2_grad = Delta2/m;
Theta2_grad(:, 2:end) = Theta2_grad(:, 2:end) + Theta2(:, 2:end)*(lambda/m);
grad = [Theta1_grad(:) ; Theta2_grad(:)];
end
没有评论:
发表评论