-
Notifications
You must be signed in to change notification settings - Fork 0
Training Data Cleaner
Calcitem edited this page Nov 28, 2022
·
1 revision
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
using namespace std;
int main()
{
vector<string> vec;
const string inputFile = "D:\\repos\\merger\\all.txt";
const string outputFile = "D:\\repos\\merger\\clean.txt";
ifstream file(inputFile);
if (!file.is_open()) {
return -1;
}
std::string line;
bool repeat = false;
while (getline(file, line)) {
if (line == "") {
vec.push_back(line);
} else {
size_t size = vec.size();
for (int i = 0; i < size; i++) {
if (line == vec[i]) {
repeat = true;
cout << "#";
goto out;
}
}
out:
if (repeat == false) {
vec.push_back(line);
cout << "*";
}
repeat = false;
}
}
file.close();
cout << endl;
ofstream ofile(outputFile);
if (!ofile.is_open()) {
return -1;
}
size_t size = vec.size();
for (int i = 0; i < size; i++) {
ofile << vec[i] << "\n";
cout << ">";
}
ofile.close();
cout << endl;
cout << "Done." << endl;
system("pause");
return 0;
}