| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  | import datetime | 
					
						
							|  |  |  | import time | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  | import click | 
					
						
							| 
									
										
										
										
											2024-07-20 01:29:25 +08:00
										 |  |  | from sqlalchemy import text | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | from werkzeug.exceptions import NotFound | 
					
						
							| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-06 13:21:13 +08:00
										 |  |  | import app | 
					
						
							| 
									
										
										
										
											2024-07-12 16:51:43 +08:00
										 |  |  | from configs import dify_config | 
					
						
							| 
									
										
										
										
											2024-02-06 13:21:13 +08:00
										 |  |  | from extensions.ext_database import db | 
					
						
							|  |  |  | from models.dataset import Embedding | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-15 12:54:05 +08:00
										 |  |  | @app.celery.task(queue="dataset") | 
					
						
							| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  | def clean_embedding_cache_task(): | 
					
						
							| 
									
										
										
										
											2024-08-15 12:54:05 +08:00
										 |  |  |     click.echo(click.style("Start clean embedding cache.", fg="green")) | 
					
						
							| 
									
										
										
										
											2024-11-01 17:25:31 +08:00
										 |  |  |     clean_days = int(dify_config.PLAN_SANDBOX_CLEAN_DAY_SETTING) | 
					
						
							| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  |     start_at = time.perf_counter() | 
					
						
							|  |  |  |     thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days) | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2024-08-15 12:54:05 +08:00
										 |  |  |             embedding_ids = ( | 
					
						
							|  |  |  |                 db.session.query(Embedding.id) | 
					
						
							|  |  |  |                 .filter(Embedding.created_at < thirty_days_ago) | 
					
						
							|  |  |  |                 .order_by(Embedding.created_at.desc()) | 
					
						
							|  |  |  |                 .limit(100) | 
					
						
							|  |  |  |                 .all() | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2024-07-20 01:29:25 +08:00
										 |  |  |             embedding_ids = [embedding_id[0] for embedding_id in embedding_ids] | 
					
						
							| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  |         except NotFound: | 
					
						
							|  |  |  |             break | 
					
						
							| 
									
										
										
										
											2024-07-20 01:29:25 +08:00
										 |  |  |         if embedding_ids: | 
					
						
							| 
									
										
										
										
											2024-07-20 09:04:21 +08:00
										 |  |  |             for embedding_id in embedding_ids: | 
					
						
							| 
									
										
										
										
											2024-08-15 12:54:05 +08:00
										 |  |  |                 db.session.execute( | 
					
						
							|  |  |  |                     text("DELETE FROM embeddings WHERE id = :embedding_id"), {"embedding_id": embedding_id} | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2024-07-20 01:29:25 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             db.session.commit() | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             break | 
					
						
							| 
									
										
										
										
											2024-01-02 15:29:18 +08:00
										 |  |  |     end_at = time.perf_counter() | 
					
						
							| 
									
										
										
										
											2024-08-15 12:54:05 +08:00
										 |  |  |     click.echo(click.style("Cleaned embedding cache from db success latency: {}".format(end_at - start_at), fg="green")) |