mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-31 09:49:48 +00:00 
			
		
		
		
	Fix for allocate memory exception by specifing max_processes (#910)
This commit is contained in:
		
							parent
							
								
									f954f0db38
								
							
						
					
					
						commit
						47dc069afe
					
				| @ -270,6 +270,7 @@ class DensePassageRetriever(BaseRetriever): | |||||||
|               train_filename: str, |               train_filename: str, | ||||||
|               dev_filename: str = None, |               dev_filename: str = None, | ||||||
|               test_filename: str = None, |               test_filename: str = None, | ||||||
|  |               max_processes: int = 128, | ||||||
|               dev_split: float = 0, |               dev_split: float = 0, | ||||||
|               batch_size: int = 2, |               batch_size: int = 2, | ||||||
|               embed_title: bool = True, |               embed_title: bool = True, | ||||||
| @ -295,6 +296,8 @@ class DensePassageRetriever(BaseRetriever): | |||||||
|         :param train_filename: training filename |         :param train_filename: training filename | ||||||
|         :param dev_filename: development set filename, file to be used by model in eval step of training |         :param dev_filename: development set filename, file to be used by model in eval step of training | ||||||
|         :param test_filename: test set filename, file to be used by model in test step after training |         :param test_filename: test set filename, file to be used by model in test step after training | ||||||
|  |         :param max_processes: the maximum number of processes to spawn in the multiprocessing.Pool used in DataSilo. | ||||||
|  |                               It can be set to 1 to disable the use of multiprocessing or make debugging easier. | ||||||
|         :param dev_split: The proportion of the train set that will sliced. Only works if dev_filename is set to None |         :param dev_split: The proportion of the train set that will sliced. Only works if dev_filename is set to None | ||||||
|         :param batch_size: total number of samples in 1 batch of data |         :param batch_size: total number of samples in 1 batch of data | ||||||
|         :param embed_title: whether to concatenate passage title with each passage. The default setting in official DPR embeds passage title with the corresponding passage |         :param embed_title: whether to concatenate passage title with each passage. The default setting in official DPR embeds passage title with the corresponding passage | ||||||
| @ -333,7 +336,7 @@ class DensePassageRetriever(BaseRetriever): | |||||||
| 
 | 
 | ||||||
|         self.model.connect_heads_with_processor(self.processor.tasks, require_labels=True) |         self.model.connect_heads_with_processor(self.processor.tasks, require_labels=True) | ||||||
| 
 | 
 | ||||||
|         data_silo = DataSilo(processor=self.processor, batch_size=batch_size, distributed=False) |         data_silo = DataSilo(processor=self.processor, batch_size=batch_size, distributed=False, max_processes=max_processes) | ||||||
| 
 | 
 | ||||||
|         # 5. Create an optimizer |         # 5. Create an optimizer | ||||||
|         self.model, optimizer, lr_schedule = initialize_optimizer( |         self.model, optimizer, lr_schedule = initialize_optimizer( | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Moshe Berchansky
						Moshe Berchansky