| 
									
										
										
										
											2023-10-03 10:39:33 -04:00
										 |  |  | #!/usr/bin/env python3 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-01 18:45:28 +01:00
										 |  |  | import pandas as pd | 
					
						
							|  |  |  | from elasticsearch import Elasticsearch | 
					
						
							|  |  |  | from elasticsearch.helpers import bulk | 
					
						
							|  |  |  | from es_cluster_config import ( | 
					
						
							|  |  |  |     CLUSTER_URL, | 
					
						
							|  |  |  |     DATA_PATH, | 
					
						
							|  |  |  |     INDEX_NAME, | 
					
						
							|  |  |  |     MAPPINGS, | 
					
						
							|  |  |  |     form_elasticsearch_doc_dict, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | print("Connecting to the Elasticsearch cluster.") | 
					
						
							| 
									
										
										
										
											2023-12-05 15:55:19 -05:00
										 |  |  | es = Elasticsearch(CLUSTER_URL, basic_auth=("elastic", "DkIedPPSCb"), request_timeout=30) | 
					
						
							| 
									
										
										
										
											2023-07-01 18:45:28 +01:00
										 |  |  | print(es.info()) | 
					
						
							|  |  |  | df = pd.read_csv(DATA_PATH).dropna().reset_index() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | print("Creating an Elasticsearch index for testing elasticsearch ingest.") | 
					
						
							| 
									
										
										
										
											2023-09-29 13:42:21 -05:00
										 |  |  | response = es.options(max_retries=5).indices.create(index=INDEX_NAME, mappings=MAPPINGS) | 
					
						
							|  |  |  | if response.meta.status != 200: | 
					
						
							|  |  |  |     raise RuntimeError("failed to create index") | 
					
						
							| 
									
										
										
										
											2023-07-01 18:45:28 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | print("Loading data into the index.") | 
					
						
							|  |  |  | bulk_data = [] | 
					
						
							|  |  |  | for i, row in df.iterrows(): | 
					
						
							|  |  |  |     bulk_data.append(form_elasticsearch_doc_dict(i, row)) | 
					
						
							|  |  |  | bulk(es, bulk_data) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | es.indices.refresh(index=INDEX_NAME) | 
					
						
							|  |  |  | response = es.cat.count(index=INDEX_NAME, format="json") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-05 15:55:19 -05:00
										 |  |  | print("Successfully created and filled an Elasticsearch index for testing elasticsearch ingest.") |