many.c

Go to the documentation of this file.
00001 /*                          M A N Y . C
00002  * BRL-CAD
00003  *
00004  * Copyright (c) 1999-2006 United States Government as represented by
00005  * the U.S. Army Research Laboratory.
00006  *
00007  * This library is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License
00009  * as published by the Free Software Foundation; either version 2 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * This library is distributed in the hope that it will be useful, but
00013  * WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Library General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with this file; see the file named COPYING for more
00019  * information.
00020  */
00021 
00022 /** @addtogroup librt */
00023 
00024 /*@{*/
00025 /** @file many.c
00026  *  Wrapper routines to help fire multiple rays in parallel,
00027  *  without exposing the caller to the details of running in parallel.
00028  *
00029  *  Authors -
00030  *      Michael John Muuss
00031  *      Christopher T. Johnson
00032  *
00033  *  Source -
00034  *      The U. S. Army Research Laboratory
00035  *      Aberdeen Proving Ground, Maryland  21005-5068  USA
00036  */
00037 /*@}*/
00038 
00039 #ifndef lint
00040 static const char RCSid[] = "@(#)$Header: /cvsroot/brlcad/brlcad/src/librt/many.c,v 14.12 2006/09/16 02:04:25 lbutler Exp $ (ARL)";
00041 #endif
00042 
00043 #include "common.h"
00044 
00045 
00046 
00047 #include <stdio.h>
00048 #include <math.h>
00049 #include "machine.h"
00050 #include "bu.h"
00051 #include "vmath.h"
00052 #include "bn.h"
00053 #include "raytrace.h"
00054 
00055 /* For communication between interface routine and each of the threads */
00056 struct rt_many_internal  {
00057         long                    magic;
00058         long                    cur_index;              /* semaphored */
00059         long                    max_index;
00060         const struct application *proto_ap;
00061         struct resource         *resources;
00062         int                     (*callback) BU_ARGS((struct application *, int index));
00063         int                     stop_worker;
00064         int                     sem_chunk;
00065 };
00066 #define RT_MANY_INTERNAL_MAGIC  0x526d6970      /* Rmip */
00067 #define RT_CK_RMI(_p)   BU_CKMAG(_p, RT_MANY_INTERNAL_MAGIC, "rt_many_internal")
00068 
00069 /*
00070  *                      R T _ S H O O T _ M A N Y _ R A Y S _ W O R K E R
00071  *
00072  *  Internal helper routine for rt_shoot_many_rays().
00073  *  Runs in PARALLEL, one instance per thread.
00074  *
00075  *  In order to reduce the traffic through the critical section,
00076  *  a multiple pixel block may be removed from the work queue at once.
00077  */
00078 void
00079 rt_shoot_many_rays_worker(int cpu, genptr_t arg)
00080 {
00081         LOCAL struct application app;
00082         struct rt_many_internal *rmip = (struct rt_many_internal *)arg;
00083 
00084         if( cpu >= MAX_PSW )  {
00085                 bu_log("rt_shoot_many_rays_worker() cpu %d > MAX_PSW %d, array overrun\n", cpu, MAX_PSW);
00086                 rt_bomb("rt_shoot_many_rays_worker() cpu > MAX_PSW, array overrun\n");
00087         }
00088 
00089         RT_CK_RMI(rmip);
00090         RT_CK_RESOURCE( &rmip->resources[cpu] );
00091         RT_CK_APPLICATION( rmip->proto_ap );
00092 
00093         app = *rmip->proto_ap;                  /* struct copy */
00094         app.a_resource = &rmip->resources[cpu];
00095 
00096         while(1)  {
00097                 register long   index;
00098                 register long   lim;
00099 
00100                 if( rmip->stop_worker )  break;
00101 
00102                 bu_semaphore_acquire( RT_SEM_WORKER );
00103                 index = rmip->cur_index;
00104                 rmip->cur_index += rmip->sem_chunk;
00105                 bu_semaphore_release( RT_SEM_WORKER );
00106 
00107                 lim = index + rmip->sem_chunk;
00108                 for( ; index < lim; index++ )  {
00109                         if( index >= rmip->max_index )  return;
00110 
00111                         /*
00112                          * a_x is set here to get differentiated LIBRT
00113                          * debugging messages even from a trivial callback.
00114                          * The callback may, of course, override it.
00115                          */
00116                         app.a_x = index;
00117 
00118                         /* Allow our user to do per-ray init of application struct */
00119                         if( (*rmip->callback)( &app, index ) < 0 )  {
00120                                 rmip->stop_worker = 1;
00121                                 break;
00122                         }
00123 
00124                         (void)rt_shootray( &app );
00125                 }
00126         }
00127 }
00128 
00129 /*
00130  *                      R T _ S H O O T _ M A N Y _ R A Y S
00131  *
00132  *  A convenience routine for application developers who wish to fire a
00133  *  large but fixed number of rays in parallel,
00134  *  without wanting to create a parallel "self dispatcher"
00135  *  routine of their own.
00136  *
00137  *  Basic setup of the application structure is done by the caller,
00138  *  and provided via the proto_ap pointer.
00139  *
00140  *  Per-ray setup of the application structure is done by the callback
00141  *  routine, which takes an index in the range 0..(nrays-1) and uses that
00142  *  to fill in each specific instance of application structure as required.
00143  *
00144  *  The a_hit() and a_miss() routines must save any results;
00145  *  their formal return codes, and the return code from rt_shootray(),
00146  *  are ignored.
00147  *
00148  *  a_x is changed by this wrapper, and may be overridden by the callback.
00149  *
00150  *  Note that the cost of spawning threads is sufficiently expensive
00151  *  that 'nrays' should be at least dozens or hundreds to get
00152  *  a real benefit from parallelism.
00153  *
00154  *  Return codes expected from the callback() -
00155  *      -1      End processing before all nrays have been fired.
00156  *       0      Normal return, proceed with firing the ray.
00157  *
00158  *  Note that bu_parallel() is not re-entrant, so you can't have an
00159  *  a_hit() routine which is already running in parallel call into
00160  *  this routine and expect to get even more parallelism.
00161  *  This is not a limitation, as you usually can't construct more CPUs.
00162  */
00163 void
00164 rt_shoot_many_rays(const struct application *proto_ap, int (*callback) (struct application *, int), int ncpus, long int nrays, struct resource *resources)
00165 
00166 
00167 
00168 
00169                                         /* resources[ncpus] */
00170 {
00171         struct rt_many_internal rmi;
00172         int     i;
00173 
00174         RT_CK_APPLICATION(proto_ap);
00175         for( i=0; i < ncpus; i++ )  {
00176                 RT_CK_RESOURCE( &resources[i] );
00177         }
00178         rmi.resources = resources;
00179 
00180         rmi.magic = RT_MANY_INTERNAL_MAGIC;
00181         rmi.stop_worker = 0;
00182         rmi.cur_index = 0;
00183         rmi.max_index = nrays;
00184         rmi.proto_ap = proto_ap;
00185         rmi.callback = callback;
00186         rmi.sem_chunk = ncpus;
00187 
00188         if( !rt_g.rtg_parallel || ncpus <= 1 )  {
00189                 /* The 1-cpu case is supported for testing & generality. */
00190                 rt_shoot_many_rays_worker( 0, (genptr_t)&rmi );
00191         } else {
00192                 bu_parallel( rt_shoot_many_rays_worker, ncpus, (genptr_t)&rmi );
00193         }
00194 }
00195 
00196 /*
00197  * Local Variables:
00198  * mode: C
00199  * tab-width: 8
00200  * c-basic-offset: 4
00201  * indent-tabs-mode: t
00202  * End:
00203  * ex: shiftwidth=4 tabstop=8
00204  */

Generated on Mon Sep 18 01:24:52 2006 for BRL-CAD by  doxygen 1.4.6