| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  | import datetime | 
					
						
							|  |  |  | import time | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  | import click | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | from flask import current_app | 
					
						
							|  |  |  | from werkzeug.exceptions import NotFound | 
					
						
							| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-06 13:21:13 +08:00
										 |  |  | import app | 
					
						
							|  |  |  | from extensions.ext_database import db | 
					
						
							|  |  |  | from models.dataset import Embedding | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | @app.celery.task(queue='dataset') | 
					
						
							|  |  |  | def clean_embedding_cache_task(): | 
					
						
							|  |  |  |     click.echo(click.style('Start clean embedding cache.', fg='green')) | 
					
						
							|  |  |  |     clean_days = int(current_app.config.get('CLEAN_DAY_SETTING')) | 
					
						
							|  |  |  |     start_at = time.perf_counter() | 
					
						
							|  |  |  |     thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days) | 
					
						
							|  |  |  |     page = 1 | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             embeddings = db.session.query(Embedding).filter(Embedding.created_at < thirty_days_ago) \ | 
					
						
							|  |  |  |                 .order_by(Embedding.created_at.desc()).paginate(page=page, per_page=100) | 
					
						
							|  |  |  |         except NotFound: | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         for embedding in embeddings: | 
					
						
							|  |  |  |             db.session.delete(embedding) | 
					
						
							|  |  |  |         db.session.commit() | 
					
						
							|  |  |  |         page += 1 | 
					
						
							|  |  |  |     end_at = time.perf_counter() | 
					
						
							|  |  |  |     click.echo(click.style('Cleaned embedding cache from db success latency: {}'.format(end_at - start_at), fg='green')) |