c++ - Parallel program no speed increase vs linear program -


i have created model program of more complex program utilize multithreading , multiple harddrives increase performance. data size large reading data memory not feasible data read, processed, , written out in chunks. test program uses pipeline design able read, process , write out @ same time on 3 different threads. because read , write different harddrive, there no problem read , write @ same time. however, program utilizing multithread seems run 2x slower linear version(also in code). have tried have read , write thread not destoryed after running chunk synchronization seem have slowed down more current version. wondering if doing wrong or how can improve this. thank you.

tested using i3-2100 @ 3.1ghz , 16gb ram.

#include <iostream> #include <fstream> #include <ctime> #include <thread>  #define chunksize 8192    //size of each chunk process #define datasize 2097152  //total size of data  using namespace std;  int data[3][chunksize]; int run = 0; int totalrun = datasize/chunksize;  bool finishread = false, finishwrite = false;  ifstream infile; ofstream outfile;  clock_t starttime, endtime;  /*     process chunk of data(simulate only, not require sort data) */ void quicksort(int arr[], int left, int right) {      int = left, j = right;     int tmp;     int pivot = arr[(left + right) / 2];      while (i <= j) {         while (arr[i] < pivot) i++;         while (arr[j] > pivot) j--;         if (i <= j) {             tmp = arr[i];             arr[i] = arr[j];             arr[j] = tmp;             i++;             j--;         }     };      if (left < j) quicksort(arr, left, j);     if (i < right) quicksort(arr, i, right); }  /*     find runtime */ void diffclock(){     double diff = (endtime - starttime)/(clocks_per_sec/1000);     cout<<"total run time: "<<diff<<"ms"<<endl; }  /*     read chunk of data */ void readdata(){      for(int = 0; < chunksize; i++){         infile>>data[run%3][i];     }     finishread = true;  }  /*     write chunk of data */ void writedata(){      for(int = 0; < chunksize; i++){         outfile<<data[(run-2)%3][i]<<endl;     }     finishwrite = true; }  /*     pipelines read, process, write using multithread */ void threadtransfer(){      starttime = clock();      infile.open("/home/pcg/test/iothread/source.txt");     outfile.open("/media/pcg/data/test/iothread/threadduplicate.txt");      thread read, write;      run = 0;     readdata();      run = 1;     readdata();     quicksort(data[(run-1)%3], 0, chunksize - 1);      run = 2;     while(run < totalrun){         //cout<<run<<endl;         finishread = finishwrite = false;         read = thread(readdata);         write = thread(writedata);         read.detach();         write.detach();         quicksort(data[(run-1)%3], 0, chunksize - 1);         while(!finishread||!finishwrite){}  //check if next cycle ready.         run++;     }       quicksort(data[(run-1)%3], 0, chunksize - 1);     writedata();      run++;     writedata();      infile.close();     outfile.close();      endtime = clock();     diffclock(); }  /*     linearly read, sort, , write chunk , repeat. */ void lineartransfer(){      int totalrun = datasize/chunksize;     int holder[chunksize];     starttime = clock();      infile.open("/home/pcg/test/iothread/source.txt");     outfile.open("/media/pcg/data/test/iothread/linearduplicate.txt");      run = 0;      while(run < totalrun){          for(int = 0; < chunksize; i++) infile>>holder[i];         quicksort(holder, 0, chunksize - 1);         for(int = 0; < chunksize; i++) outfile<<holder[i]<<endl;         run++;     }      endtime = clock();     diffclock(); }  /*     create large amount of data testing */ void createdata(){     outfile.open("/home/pcg/test/iothread/source.txt");      for(int = 0; < datasize; i++){         outfile<<rand()<<endl;      }     outfile.close(); }    int main(){      int mode=0;     cout<<"number of threads: "<<thread::hardware_concurrency()<<endl;     cout<<"enter mode\n1.create data\n2.thread copy\n3.linear copy\ninput mode:";     cin>>mode;      if(mode == 1) createdata();     else if(mode == 2) threadtransfer();     else if(mode == 3) lineartransfer();      return 0; } 

since measuing time using clock on linux machine, expect total cpu time (roughly) same whether run 1 thread or multiple threads.

maybe want use time myprog instead? or use gettimeofday fetch time (which give time in seconds + nanoseconds [although nanoseconds may not "accurate" down last digit].

edit: next, don't use endl when writing file. slows things down lot, because c++ runtime goes , flushes file, operating system call. somehow protected against multiple threads, have 3 threads doing write-data, single line, synchronously, @ time. going take 3x long running single thread. also, don't write same file 3 different threads - that's going bad in 1 way or another.


Comments

Popular posts from this blog

basic authentication with http post params android -

vb.net - Virtual Keyboard commands -

css - Firefox for ubuntu renders wrong colors -