缘起最近跟着老师在学习神经网络,为了更加深刻地理解这个黑盒,我打算自己用C/C++将其实现一遍。今天忙活了好一会儿,终于实现了一个BP神经网络,后期还会陆续实现CNN神经网络之类的,也会发上来和大家一起分享的因为最近比较忙,所以这里直接放代码了,关于一些原理以及自己的一点见解会在有空的时候整理出来的代码main.cpp#include <iostream>#include <vector>#include “BPUtils.h"using namespace std;/* run this program using the console pauser or add your own getch, system(“pause”) or input loop /vector<vector<double>>dataTest;vector<double>dataTestY;vector<vector<double>>trainDataX;vector<double>trainDataY;int main() {// double m1[3][1]={{1},{2},{3}};// double m2[1][4]={1,2,3,4};// double m3[3][4];// dott(&m1[0][0],&m2[0][0],&m3[0][0],3,1,4);// for(int i=0;i<3;i++){// for(int j=0;j<4;j++){// cout<<m3[i][j]<<” “;// }// cout<<endl;// } createTrainSet(); createTestSet(); guiYiHua(dataTest); guiYiHua(trainDataX); NeuralNetwork nn(2,44,2); nn.train(trainDataX,trainDataY);// for(int i=0;i<trainDataX.size();i++){// for(int j=0;j<trainDataX[i].size();j++){// cout<<trainDataX[i][j]<<” “;// }// cout<<endl;// }// for(int i=0;i<trainDataX.size();i++){// cout<<trainDataY[i]<<” “;// }//// cout<<endl<<”———————————————————"<<endl;//// for(int i=0;i<dataTest.size();i++){// for(int j=0;j<dataTest[i].size();j++){// cout<<dataTest[i][j]<<" “;// }// cout<<endl;// }// for(int i=0;i<dataTestY.size();i++){// cout<<dataTestY[i]<<” “;// }// NeuralNetwork nn(2,4,3);// vector<vector<double>>dataX;// vector<double>dataY;// for(int i=0;i<4;i++){// vector<double>vec;// for(int j=0;j<2;j++){// vec.push_back(i+j);// }// dataX.push_back(vec);// }// for(int i=0;i<4;i++){// for(int j=0;j<2;j++){// cout<<dataX[i][j]<<” “;// }// cout<<endl;// }// for(int i=0;i<4;i++){// dataY.push_back(i);// }// nn.train(dataX,dataY); return 0;}BPUtils.h#ifndef BP_UTILS#define BP_UTILS#include <cmath>#include <cstdlib>#include <iostream>#include <vector>#include <ctime>#include <string.h>#include <cstdio>#include <fstream>#define random(x) (rand()%x)using namespace std;#define MAXSIZE 99//全局变量//测试集extern vector<vector<double>>dataTest;extern vector<double>dataTestY;extern vector<vector<double>>trainDataX;extern vector<double>trainDataY;vector<string> split(const string& str, const string& delim) { vector<string> res; if(”" == str) return res; //先将要切割的字符串从string类型转换为char类型 char * strs = new char[str.length() + 1] ; //不要忘了 strcpy(strs, str.c_str()); char * d = new char[delim.length() + 1]; strcpy(d, delim.c_str()); char p = strtok(strs, d); while(p) { string s = p; //分割得到的字符串转换为string类型 res.push_back(s); //存入结果数组 p = strtok(NULL, d); } return res;}double getMax(vector<vector<double>>dataSet){ double MYMAX=-999; for(int i=0;i<dataSet.size();i++){ for(int j=0;j<dataSet[i].size();j++){ if(MYMAX<dataSet[i][j]){ MYMAX=dataSet[i][j]; } } } return MYMAX;}double getMin(vector<vector<double>>dataSet){ double MYMIN=999; for(int i=0;i<dataSet.size();i++){ for(int j=0;j<dataSet[i].size();j++){ if(MYMIN>dataSet[i][j]){ MYMIN=dataSet[i][j]; } } } return MYMIN;}//数据归一化//一般是x=(x-x.min)/x.max-x.minvoid guiYiHua(vector<vector<double>>&dataSet){ double MYMAX=getMax(dataSet); double MYMIN=getMin(dataSet); for(int i=0;i<dataSet.size();i++){ for(int j=0;j<dataSet[i].size();j++){ dataSet[i][j]=(dataSet[i][j]-MYMIN)/(MYMAX-MYMIN); } }}//创建测试集的数据void createTrainSet(){ fstream f(“train.txt”); //保存读入的每一行 string line; vector<string>res; int ii=0; while(getline(f,line)){ res=split(line,"\t"); vector<double>vec1; for(int i=0;i<res.size();i++){ //cout<<res[i]<<endl; char ch[MAXSIZE]; strcpy(ch,res[i].c_str()); if(i!=2){ vec1.push_back(atof(ch)); }else{ trainDataY.push_back(atof(ch)); } } trainDataX.push_back(vec1); ii++; }}//创建训练集的数据void createTestSet(){ fstream f(“test.txt”); //保存读入的每一行 string line; vector<string>res; int ii=0; while(getline(f,line)){ res=split(line,"\t"); vector<double>vec1; for(int i=0;i<res.size();i++){ //cout<<res[i]<<endl; char ch[MAXSIZE]; strcpy(ch,res[i].c_str()); if(i!=2){ vec1.push_back(atof(ch)); }else{ dataTestY.push_back(atof(ch)); } } dataTest.push_back(vec1); ii++; }}//sigmoid激活函数double sigmoid(double x){ return 1/(1+exp(-x));}//sigmoid函数的导数double dsigmoid(double x){ return x(1-x);}class NeuralNetwork{public: //输入层单元个数 int inputLayers; //隐藏层单元个数 int hidenLayers; //输出层单元个数 int outputLayers; //输入层到隐藏层的权值 //行数为输入层单元个数+1(因为有偏置) //列数为隐藏层单元个数 vector<vector<double>>VArr; //隐藏层到输出层的权值 //行数为隐藏层单元个数 //列数为输出层单元个数 vector<vector<double>>WArr;private: //矩阵乘积 void dot(const double* m1,const double* m2,double m3,int m,int n,int p){ for(int i=0;i<m;++i) //点乘运算 { for(int j=0;j<p;++j) { ((m3+ip+j))=0; for(int k=0;k<n;++k) { ((m3+ip+j))+=((m1+in+k))((m2+kp+j)); } } } } void vectorToArr1(vector<vector<double>>vec,double arr,int n){ for(int i=0;i<vec.size();i++){ for(int j=0;j<vec[i].size();j++){ //cout<<endl<<vec[i][j]<<""<<i<<""<<j<<""; ((arr+in+j))=vec[i][j]; } //cout<<endl; } } void vectorToArr2(vector<double>vec,double arr){ for(int i=0;i<vec.size();i++){ ((arr+i))=vec[i]; } } void arrToVector1(double arr,vector<double>&vec,int m){ for(int i=0;i<m;i++){ vec.push_back(((arr+i))); } } //矩阵转置 void ZhuanZhi(const doublem1,double m2,int n1,int n2){ for(int i=0;i<n1;i++){ for(int j=0;j<n2;j++){ ((m2+jn1+i))=((m1+in2+j)); } } } //验证准确率时的预测 //输入测试集的一行数据 //ArrL2为输出层的输出 //eg.当我们要分成10类的时候,输出10个数,类似于该样本属于这10个类别的概率 //我们选取其中概率最大的类别作为最终分类得到的类别 void predict(vector<double>test,double ArrL2){// for(int i=0;i<test.size();i++){// cout<<“test[i]:"<<test[i]<<endl;// } //添加转置 test.push_back(1); double testArr[1][inputLayers+1]; //转成矩阵 vectorToArr2(test,&testArr[0][0]);// for(int i=0;i<inputLayers+1;i++){// cout<<“testArr:"<<testArr[0][i]<<endl;// } double dotL1[1][hidenLayers]; double VArr_temp[inputLayers+1][hidenLayers]; vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers);// for(int i=0;i<inputLayers+1;i++){// for(int j=0;j<hidenLayers;j++){// cout<<VArr_temp[i][j]<<” “;// }// cout<<endl;// } //testArr[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers] dot(&testArr[0][0],&VArr_temp[0][0],&dotL1[0][0],1,inputLayers+1,hidenLayers);// for(int i=0;i<1;i++){// for(int j=0;j<hidenLayers;j++){// cout<<dotL1[i][j]<<” “;// }// cout<<endl;// } //隐藏层输出 double ArrL1[1][hidenLayers]; //double ArrL2[1][outputLayers]; for(int i=0;i<hidenLayers;i++){ ArrL1[0][i]=sigmoid(dotL1[0][i]); //cout<<ArrL1[0][i]<<endl; } double dotL2[1][outputLayers]; double WArr_temp[hidenLayers][outputLayers]; vectorToArr1(WArr,&WArr_temp[0][0],outputLayers); //ArrL1[1][hidenLayers] dot WArr[hidenLayers][outputLayers] dot(&ArrL1[0][0],&WArr_temp[0][0],&dotL2[0][0],1,hidenLayers,outputLayers); //输出层输出 for(int i=0;i<outputLayers;i++){ //ArrL2[0][i]=sigmoid(dotL2[0][1]); ((ArrL2+i))=sigmoid(dotL2[0][i]); //cout<<(ArrL2+i)<<endl; } } int getMaxIndex(vector<double>vec){ int index=-1; double MYMAX=-999; for(int i=0;i<vec.size();i++){ //cout<<vec.size()<<”"<<endl; //cout<<i<<”::::"<<vec[i]<<endl; if(MYMAX<vec[i]){ MYMAX=vec[i]; index=i; } } return index; }public: //构造函数,传入输入层,隐藏层,输出层单元个数 //并且构造权值矩阵 NeuralNetwork(int _inputLayers,int _hidenLayers,int _outputLayers){ this->inputLayers=_inputLayers; hidenLayers=_hidenLayers; outputLayers=_outputLayers; //构造V权值矩阵 for(int i=0;i<inputLayers+1;i++){ vector<double>vec; for(int j=0;j<hidenLayers;j++){ vec.push_back((double)rand()/RAND_MAX2-1); } VArr.push_back(vec); } for(int i=0;i<hidenLayers;i++){ vector<double>vec; for(int j=0;j<outputLayers;j++){ vec.push_back((double)rand()/RAND_MAX2-1); } WArr.push_back(vec); } } //开始训练 //传入训练集,预期的y值,学习效率,以及训练迭代的次数 //这里规定输入的数据为2列的数据 void train(vector<vector<double>>dataX,vector<double>dataY,double lr=0.03,int epochs=1000000){ double arrL1[1][hidenLayers]; //将VArr由vector转成arr double VArr_temp[inputLayers+1][hidenLayers]; double hangx_temp[1][inputLayers+1]; vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers); double hangxT[inputLayers+1][1]; double hangxDotVArr[1][hidenLayers]; double arrL2[1][outputLayers]; double WArr_temp[hidenLayers][outputLayers]; double arrL2_delta[1][outputLayers]; double arrL1_delta[1][hidenLayers]; double E; double dao; double dotTemp[hidenLayers][outputLayers]; double WArr_tempT[outputLayers][hidenLayers]; double arrL1T[hidenLayers][1]; double dotTempp[inputLayers+1][hidenLayers]; srand((int)time(0)); //为数据集添加偏置 //eg.当我们输入的数据集为4X2的时候,需要为其在最后添加一列偏置,让其变成一个4X3的矩阵 for(int i=0;i<dataX.size();i++){ //最后一列为偏置 dataX[i].push_back(1); } //进行权值训练更新 for(int n=0;n<epochs;n++){ //随机选取一行样本进行更新 int iii=random(dataX.size()); //cout<<“iii:"<<iii<<endl; //得到随机选取的一行数据 vector<double>hangx=dataX[iii];// for(int i=0;i<hangx.size();i++){// cout<<hangx[i]<<”"<<endl;// } //隐藏层输出 //这里先计算输入矩阵与权值矩阵的点乘,再将其输入sigmoid函数中,得到最终的输出 //eg.输入4X2的dataX,我们先加上偏置变成4X3 //选取其中的一行数据1X3 //然后计算dataX与arrV(3XhidenLayers)的dot,得到一个1XhidenLayers的矩阵// for(int ii=0;ii<inputLayers+1;ii++){// for(int jj=0;jj<hidenLayers;jj++){// cout<<VArr[ii][jj]<<"—";// cout<<VArr_temp[ii][jj]<<" “;// }// cout<<endl;// } vectorToArr2(hangx,&hangx_temp[0][0]);// for(int i=0;i<inputLayers+1;i++){// cout<<hangx[i]<<”—"<<endl;// cout<<hangx_temp[0][i]<<""<<endl;// } //hangx[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers] dot(&hangx_temp[0][0],&VArr_temp[0][0],&arrL1[0][0],1,inputLayers+1,hidenLayers); //将点乘后的值输入到sigmoid函数中 for(int k1=0;k1<hidenLayers;k1++){ arrL1[0][k1]=sigmoid(arrL1[0][k1]); //cout<<arrL1[0][k1]<<endl; } vectorToArr1(WArr,&WArr_temp[0][0],outputLayers);// for(int ii=0;ii<hidenLayers;ii++){// for(int jj=0;jj<outputLayers;jj++){// cout<<WArr_temp[ii][jj]<<endl;// }// } //arrL1[1][hidenLayers] dot WArr_temp[hidenLayers][outputLayers] dot(&arrL1[0][0],&WArr_temp[0][0],&arrL2[0][0],1,hidenLayers,outputLayers); //cout<<outputLayers<<endl; //cout<<arrL2[0][1]<<endl;// for(int k1=0;k1<outputLayers;k1++){// arrL2[0][k1]=sigmoid(arrL2[0][k1]);//// // cout<<k1<<endl;//// cout<<arrL2[0][k1]<<endl;// } //求权值的delta //根据公式计算权值更新的delta for(int k1=0;k1<outputLayers;k1++){ arrL2[0][k1]=sigmoid(arrL2[0][k1]); // cout<<k1<<endl; //cout<<“arrL2[0][k1]:"<<arrL2[0][k1]<<endl; E=dataY[iii]-arrL2[0][k1]; //cout<<“E:"<<E<<endl; dao=dsigmoid(arrL2[0][k1]); //cout<<“dao:"<<dao<<endl; arrL2_delta[0][k1]=Edao; //cout<<“arrL2_delta[0][k1]:"<<arrL2_delta[0][k1]<<endl; }// for(int k1=0;k1<outputLayers;k1++){// //计算误差// E=dataY[iii]-arrL2[0][k1];// //对L2输出的结果求导// dao=dsigmoid(arrL2[0][k1]);//// cout<<“arrL2[0][k1]:"<<arrL2[0][k1]<<endl;//// cout<<“dataY[iii]:"<<dataY[iii]<<endl;//// cout<<“E:"<<E<<endl;//// cout<<“dao:"<<dao<<endl;// //计算delta// arrL2_delta[0][k1]=Edao;// }// for(int i=0;i<outputLayers;i++){// cout<<arrL2_delta[0][i]<<endl;// } //W矩阵转置 ZhuanZhi(&WArr_temp[0][0],&WArr_tempT[0][0],hidenLayers,outputLayers);// for(int i=0;i<outputLayers;i++){// for(int j=0;j<hidenLayers;j++){// cout<<WArr_temp[j][i]<<””;// cout<<WArr_tempT[i][j]<<” “;// }// cout<<endl;// } //arrL2_delta[1][outputLayers] dot WArr_tempT[outputLayers][hidenLayers] dot(&arrL2_delta[0][0],&WArr_tempT[0][0],&arrL1_delta[0][0],1,outputLayers,hidenLayers); //乘上L1输出的导数// for(int k1=0;k1<hidenLayers;k1++){// cout<<dsigmoid(arrL1[0][k1])<<endl;// } //乘上L1输出的导数 for(int k1=0;k1<hidenLayers;k1++){ double ii=arrL1_delta[0][k1]; arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]); //cout<<ii<<”"<<dsigmoid(arrL1[0][k1])<<”"<<arrL1_delta[0][k1]<<endl; } //通过上面的delta更新权值WV ZhuanZhi(&arrL1[0][0],&arrL1T[0][0],1,hidenLayers);// for(int i=0;i<hidenLayers;i++){// cout<<arrL1T[i][0]<<endl;// } //arrL1T[hidenLayers][1] dot arrL2_delta[1][outputLayers] dot(&arrL1T[0][0],&arrL2_delta[0][0],&dotTemp[0][0],hidenLayers,1,outputLayers);// for(int k1=0;k1<outputLayers;k1++){// cout<<arrL2_delta[0][k1]<<endl;// }// for(int k1=0;k1<hidenLayers;k1++){// for(int k2=0;k2<outputLayers;k2++){// cout<<dotTemp[k1][k2]<<” “;// }// cout<<endl;// }// for(int k1=0;k1<outputLayers;k1++){// cout<<arrL2_delta[0][k1]<<endl;// } for(int k1=0;k1<hidenLayers;k1++){ for(int k2=0;k2<outputLayers;k2++){ //根据学习效率进行更新 //cout<<dotTemp[k1][k2]<<endl; WArr[k1][k2]+=(lrdotTemp[k1][k2]); //cout<<“WArr[k1][k2]:"<<WArr[k1][k2]<<endl; } } //转置 ZhuanZhi(&hangx_temp[0][0],&hangxT[0][0],1,inputLayers+1);// for(int i=0;i<inputLayers+1;i++){// cout<<hangxT[i][0]<<”))"<<endl;// } //hangxT[inputLayers+1][1] dot arrL1_delta[1][hidenLayers]// for(int k1=0;k1<hidenLayers;k1++){// //double ii=arrL1_delta[0][k1];// //arrL1_delta[0][k1]=iidsigmoid(arrL1[0][k1]);// cout<<arrL1_delta[0][k1]<<” “;// } //cout<<endl; dot(&hangxT[0][0],&arrL1_delta[0][0],&dotTempp[0][0],inputLayers+1,1,hidenLayers);// for(int i=0;i<inputLayers+1;i++){// for(int j=0;j<hidenLayers;j++){// cout<<dotTempp[i][j]<<” “;// }// cout<<endl;// } for(int k1=0;k1<inputLayers+1;k1++){ for(int k2=0;k2<hidenLayers;k2++){ VArr[k1][k2]+=(lrdotTempp[k1][k2]); //cout<<"(lrdotTempp[k1][k2]):"<<(lr*dotTempp[k1][k2])<<endl; //cout<<VArr[k1][k2]<<”*****"<<endl; } } //每训练100次预测一下准确率 if(n%10000==0){ //使用测试集验证一下准确率 //存放预测返回的结果 double resultArr[1][outputLayers]; int index; //整个样本集中预测结果正确的样本个数 int num=0; //准确率 double accuracy=0; //遍历整个测试样本 for(int k1=0;k1<dataTest.size();k1++){ vector<double>result; //取测试集中的第k1行进行测试,结果保存在resultArr中 predict(dataTest[k1],&resultArr[0][0]); //将arr转成vector arrToVector1(&resultArr[0][0],result,outputLayers);// for(int kk=0;kk<result.size();kk++){// //cout<<resultArr[0][kk]<<”%%%%%%%%"<<endl;// cout<<result[kk]<<"&&&&&&&&&7”<<endl;// } //取得结果中的最大值(概率最大)的index index=getMaxIndex(result);// cout<<”**k1:"<<k1<<endl;// cout<<"**index:"<<index<<endl;// cout<<"**Y:"<<dataTestY[k1]<<endl; if(index==dataTestY[k1]){// cout<<“k1:"<<k1<<endl;// cout<<“index:"<<index<<endl;// cout<<“Y:"<<dataTestY[k1]<<endl; num++; } } accuracy=(double)num/dataTestY.size(); //if(num>5)cout<<“num:!!!!!!!!!!!!!!!!!!!!!!!111”<<num<<endl; cout<<“epoch: “<<n<<”, “<<“accuracy: “<<accuracy<<endl; } } }};#endif训练效果