C++ program to perform Data cleaning operation with Binning Method

Simple C++ program to perform Data cleaning operation on data set which include treatment of missing value, smoothing of noisy data by binning method.

 

Code:


#include<iostream>

#include<algorithm>

#include<math.h>



using namespace std;



int main()

{

int num,i, j, d, e, in, k, sum, b1, b2;

int n, r, r1, r2;

cout << "Enter the total number of data: " << endl;

cin >> num;

int a[num];

cout << "Enter the values: " << endl;

for(i=0; i<num; i++)

{

cin >> a[i];

}

sort(a, a+num);

while(true)

{

cout << "Enter your choice: " << endl;

cout << "1. Equal-Depth Binning" << endl;

cout << "2. Equal-Width Binning" << endl;

cout << "3. Exit" << endl;

cin >> in;

if(in==1)

{

cout << "Enter the depth of bin: " << endl;

cin >> d;

cout << endl;

e = num / d;

int b[e][d], avg[e], mean[e][d];

k = 0;

//bin generate

for(i=0; i<e; i++)

{

cout << "BIN " << (i+1) << ": ";

for(j=0; j<d; j++)

{

b[i][j] = a[j+k];

cout << b[i][j] << " ";

}

k = k + 3;

cout << endl;

}

//find average

for(i=0; i<e; i++)

{

sum=0;

for(j=0; j<d; j++)

{

sum = sum + b[i][j];

}

avg[i] = sum / d;

}

//give mean value to bin

for(i=0; i<e; i++)

{

for(j=0; j<d; j++)

{

mean[i][j] = avg[i];

}

}

cout << "After smoothing of bin using mean method: " << endl;

for(i=0; i<e; i++)

{

cout << "BIN " << (i+1) << ": ";

for(j=0; j<d; j++)

{

cout << mean[i][j] << " ";

}

cout << endl;

}





cout << "After smoothing of bin using boundary method: " << endl;

for(i=0; i<e; i++)

{

b1 = b[i][0];

b2 = b[i][d-1];

for(j=0; j<d; j++)

{

if(abs(b[i][j] - b1) < abs(b[i][j] - b2))

{

b[i][j] = b1;

}

else if(abs(b[i][j] - b1) > abs(b[i][j] - b2))

{

b[i][j] = b2;

}

else

{

b[i][j] = b1;

}

}

}

for(i=0; i<e; i++)

{

cout << "BIN " << (i+1) << ": ";

for(j=0; j<d; j++)

{

cout << b[i][j] << " ";

}

cout << endl;

}

}

else if(in == 2)

{

// cout << "Sorted Array: " << endl;

for(i=0; i<n; i++)

{

cout << a[i] ;

}

cout << endl;

cout << "Enter the number of bins: " << endl;

cin >> n;

int w[n][num], bw[n][num], count1[n], z, n1;

r = (a[n-1] - a[0]) / n;

r1 = a[0];

r2 = r1 + r;

for(i=0; i<n; i++)

{

z = 0;

cout << "BIN " << (i+1) << ": ";

for(j=0; j<num; j++)

{



if((r1 <= a[j]) && (a[j] <= r2))

{

w[i][j] = a[j];

z = z + 1;

cout << w[i][j] << " ";

count1[i] = z;

}

else

{

w[i][j] = 0;

}

}

r1 = r1 + r + 1;

r2 = r2 + r;

cout << endl;

}



}

else if(in == 3)

{

exit(0);

}

}

return 0; }

Comments

Popular posts from this blog

C program to evaluate Prefix Expression using Stack data structure

Java Program to Implement sorting algorithm using TCP on Server application

C++ program to perform data transformation Min-max and Z score Normalization