/* Code to embarassingly parallelize a set of system commands. If there is a file with independent system commands - and you would like to use multiple CPUs on the same physical machine, then use this code. Written : Nov 17, 2013 Author : Manodeep Sinha Email : manodeep@gmail.com Usage : ./run_system_command Compilation: Has no dependencies - you need a system() command available - all *nix systems should work. gcc : gcc -std=c99 run_system_commands.c -Wall -Wextra -Wshadow -o run_system_commands -fopenmp -g -lm icc : icc -std=c99 run_system_commands.c -Wall -Wextra -Wshadow -o run_system_commands -openmp -g Bugs : This is not very well-tested - please let me know if you encounter any bugs/issues. Valgrind claimed there were a few memory leaks/uninitialized memory access from openmp (when compiled with gcc). Not really sure how to fix those. Also, stackoverflow claims that those memory leaks might not even be considered as leaks. Changelog : Dec 9, 2013: Explicitly null-terminating the system command. Was getting errors on STAMPEDE */ #include #include #include #include #include//defines int64_t datatype -> *exactly* 8 bytes int #include //defines PRId64 for printing int64_t #include #include #include #include #define MAXLEN 1000 void run_system_call(const char *execstring); FILE * my_fopen(const char *fname,const char *mode); size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE *stream); int my_fseek(FILE *stream, long offset, int whence); void print_time(struct timeval t0,struct timeval t1,const char *s); void* my_calloc(size_t size,int64_t N); int64_t getnumlines(const char *fname,const char comment); void run_system_call(const char *execstring) { int status; status=system(execstring); if(status != EXIT_SUCCESS) { fprintf(stderr,"ERROR: executing system command: \n`%s'\n...exiting\n",execstring); exit(EXIT_FAILURE); } } FILE * my_fopen(const char *fname,const char *mode) { FILE *fp=NULL; fp = fopen(fname,mode); if(fp == NULL) { fprintf(stderr,"Could not open file `%s'\n",fname); exit(EXIT_FAILURE); } return fp; } size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { size_t nread; nread = fread(ptr, size, nmemb, stream); if(nread != nmemb) { fprintf(stderr,"I/O error (fread) has occured.\n"); fprintf(stderr,"Instead of reading nmemb=%zu, I got nread = %zu ..exiting\n",nmemb,nread); exit(EXIT_FAILURE); } return nread; } int my_fseek(FILE *stream, long offset, int whence) { int err=fseek(stream,offset,whence); if(err != 0) { fprintf(stderr,"ERROR: Could not seek `%ld' bytes into the file..exiting\n",offset); exit(EXIT_FAILURE); } return err; } /* I like this particular function. Generic replacement for printing (in meaningful units) the actual execution time of a code/code segment. The function call should be like this: --------------------------- struct timeval t_start,t_end; gettimeofday(&t_start,NULL); do_something(); gettimeofday(&t_end,NULL); print_time(t_start,t_end,"do something"); --------------------------- if the code took 220 mins 30.1 secs -> print_time will output `Time taken to execute `do something' = 3 hours 40 mins 30.1 seconds (code can be easily extended to include `weeks' as a system of time unit. left to the reader) */ void print_time(struct timeval t0,struct timeval t1,const char *s) { double timediff = difftime(t1.tv_sec,t0.tv_sec); double ratios[] = {24*3600.0, 3600.0, 60.0, 1}; char units[4][10] = {"days", "hrs" , "mins", "secs"}; int which = 0; double timeleft = timediff; double time_to_print; fprintf(stderr,"Time taken to execute '%s' = ",s); if(timediff < ratios[2]) { fprintf(stderr,"%6.3lf secs",1e-6*(t1.tv_usec-t0.tv_usec) + timediff); } else { while (which < 4) { time_to_print = floor(timeleft/ratios[which]); if (time_to_print > 1) { timeleft -= (time_to_print*ratios[which]); fprintf(stderr,"%5d %s",(int)time_to_print,units[which]); } which++; } } fprintf(stderr,"\n"); } void* my_calloc(size_t size,int64_t N) { void *x = NULL; x = calloc((size_t) N, size); if (x==NULL) { fprintf(stderr,"malloc for %"PRId64" elements with %zu size failed..aborting\n",N,size); exit(EXIT_FAILURE); } return x; } int64_t getnumlines(const char *fname,const char comment) { FILE *fp= NULL; const int MAXLINESIZE = 10000; int64_t nlines=0; char str_line[MAXLINESIZE]; fp = my_fopen(fname,"rt"); while(1){ if(fgets(str_line, MAXLINESIZE,fp)!=NULL) { //WARNING: this does not remove white-space. You might //want to implement that (was never an issue for me) if(str_line[0] !=comment) nlines++; } else break; } fclose(fp); return nlines; } int main(int argc,char **argv) { if(argc < 3) { fprintf(stderr,"ERROR: Usage `%s' \n",argv[0]); exit(EXIT_FAILURE); } char comment='#'; int64_t nlines,curr_pos; int numthreads; char execstring[MAXLEN]; int64_t *begining_line_pos,*length; char *buffer=NULL; FILE *fp=NULL; long fileSize; struct timeval t0,t1; gettimeofday(&t0,NULL); numthreads = atoi(argv[2]); fprintf(stderr,"numthreads = %d\n",numthreads); omp_set_num_threads(numthreads); nlines = getnumlines(argv[1],comment); fp = my_fopen(argv[1],"r"); my_fseek(fp,0,SEEK_END); fileSize = ftell(fp); my_fseek(fp,0,SEEK_SET); buffer = my_calloc(sizeof(char),fileSize);//should have used my_malloc but too lazy to copy that function over. my_fread (buffer,sizeof(char),fileSize/sizeof(char),fp); fclose(fp); fprintf(stderr,"filesize = %ld nlines=%"PRId64"\n",fileSize,nlines); begining_line_pos = my_calloc(sizeof(*begining_line_pos),nlines); length = my_calloc(sizeof(*length),nlines); curr_pos=0; begining_line_pos[0]=0; curr_pos=1; for(int i=1;i 0 && length[i] < MAXLEN); strncpy(execstring,&(buffer[begining_line_pos[i]]),length[i]); execstring[length[i]] = '\0'; /* fprintf(stderr,"%"PRId64" %"PRId64" %"PRId64" %s\n",i,begining_line_pos[i],length[i],execstring); */ run_system_call(execstring); } free(length); free(begining_line_pos); free(buffer); gettimeofday(&t1,NULL); print_time(t0,t1,"Entire code"); }