AMD GCN: Implement circular buffering.
The GCN port outputs stdout and stderr via a shared-memory interface. Previously the buffer was limited to 1000 write operations, which was enough for testing purposes, but easy to exhaust. This patch implements a new circular buffering system allowing a greater amount of output. The interface must allow hundreds of hardware threads to output simultaneously. The new limit is UINT32_MAX write operations. Unfortunately, there's no way to tell if the host side has also been updated. This code will misbehave unless the gcn-run from GCC is also updated (although it's fine the other way around), but that patch has already been committed. OK? Andrew Stubbs Mentor Graphics / CodeSourcery
This commit is contained in:
		
				
					committed by
					
						 Corinna Vinschen
						Corinna Vinschen
					
				
			
			
				
	
			
			
			
						parent
						
							38322b9bf6
						
					
				
				
					commit
					62c66a39bd
				
			| @@ -26,10 +26,14 @@ | |||||||
|   |   | ||||||
|    The next_output counter must be atomically incremented for each |    The next_output counter must be atomically incremented for each | ||||||
|    print output.  Only when the print data is fully written can the |    print output.  Only when the print data is fully written can the | ||||||
|    "written" flag be set.  */ |    "written" flag be set. | ||||||
|  |  | ||||||
|  |    The buffer is circular; the host increments the consumed counter | ||||||
|  |    and clears the written flag as it goes, opening up slots for reuse. | ||||||
|  |    The counters always use absolute numbers.  */ | ||||||
| struct output { | struct output { | ||||||
|   int return_value; |   int return_value; | ||||||
|   int next_output; |   unsigned int next_output; | ||||||
|   struct printf_data { |   struct printf_data { | ||||||
|     int written; |     int written; | ||||||
|     char msg[128]; |     char msg[128]; | ||||||
| @@ -39,7 +43,8 @@ struct output { | |||||||
|       double dvalue; |       double dvalue; | ||||||
|       char text[128]; |       char text[128]; | ||||||
|     }; |     }; | ||||||
|   } queue[1000]; |   } queue[1024]; | ||||||
|  |   unsigned int consumed; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| _READ_WRITE_RETURN_TYPE write (int fd, const void *buf, size_t count) | _READ_WRITE_RETURN_TYPE write (int fd, const void *buf, size_t count) | ||||||
| @@ -55,33 +60,49 @@ _READ_WRITE_RETURN_TYPE write (int fd, const void *buf, size_t count) | |||||||
|   struct output *data = (struct output *)kernargs[2]; |   struct output *data = (struct output *)kernargs[2]; | ||||||
|  |  | ||||||
|   /* Each output slot allows 256 bytes, so reserve as many as we need. */ |   /* Each output slot allows 256 bytes, so reserve as many as we need. */ | ||||||
|   int slot_count = ((count+1)/256)+1; |   unsigned int slot_count = ((count+1)/256)+1; | ||||||
|   int index = __atomic_fetch_add (&data->next_output, slot_count, |   unsigned int index = __atomic_fetch_add (&data->next_output, slot_count, | ||||||
| 				  __ATOMIC_ACQUIRE); | 					   __ATOMIC_ACQUIRE); | ||||||
|  |  | ||||||
|  |   if ((unsigned int)(index + slot_count) < data->consumed) | ||||||
|  |     { | ||||||
|  |       /* Overflow.  */ | ||||||
|  |       errno = EFBIG; | ||||||
|  |       return 0; | ||||||
|  |     } | ||||||
|  |  | ||||||
|   for (int c = count; |   for (int c = count; | ||||||
|        c >= 0 && index < 1000; |        c >= 0; | ||||||
|        buf += 256, c -= 256, index++) |        buf += 256, c -= 256, index++) | ||||||
|     { |     { | ||||||
|  |       unsigned int slot = index % 1024; | ||||||
|  |  | ||||||
|  |       /* Spinlock while the host catches up.  */ | ||||||
|  |       if (index >= 1024) | ||||||
|  | 	while (__atomic_load_n (&data->consumed, __ATOMIC_ACQUIRE) | ||||||
|  | 	       <= (index - 1024)) | ||||||
|  | 	  asm ("s_sleep 64"); | ||||||
|  |  | ||||||
|       if (c < 128) |       if (c < 128) | ||||||
| 	{ | 	{ | ||||||
| 	  memcpy (data->queue[index].msg, buf, c); | 	  memcpy (data->queue[slot].msg, buf, c); | ||||||
| 	  data->queue[index].msg[c] = '\0'; | 	  data->queue[slot].msg[c] = '\0'; | ||||||
| 	  data->queue[index].text[0] = '\0'; | 	  data->queue[slot].text[0] = '\0'; | ||||||
| 	} | 	} | ||||||
|       else if (c < 256) |       else if (c < 256) | ||||||
| 	{ | 	{ | ||||||
| 	  memcpy (data->queue[index].msg, buf, 128); | 	  memcpy (data->queue[slot].msg, buf, 128); | ||||||
| 	  memcpy (data->queue[index].text, buf+128, c-128); | 	  memcpy (data->queue[slot].text, buf+128, c-128); | ||||||
| 	  data->queue[index].text[c-128] = '\0'; | 	  data->queue[slot].text[c-128] = '\0'; | ||||||
| 	} | 	} | ||||||
|       else |       else | ||||||
| 	{ | 	{ | ||||||
| 	  memcpy (data->queue[index].msg, buf, 128); | 	  memcpy (data->queue[slot].msg, buf, 128); | ||||||
| 	  memcpy (data->queue[index].text, buf+128, 128); | 	  memcpy (data->queue[slot].text, buf+128, 128); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|       data->queue[index].type = 3; /* Raw.  */ |       data->queue[slot].type = 3; /* Raw.  */ | ||||||
|       __atomic_store_n (&data->queue[index].written, 1, __ATOMIC_RELEASE); |       __atomic_store_n (&data->queue[slot].written, 1, __ATOMIC_RELEASE); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   return count; |   return count; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user