filtering out stupid ads

This commit is contained in:
Jake Poznanski 2024-10-02 15:36:41 +00:00
parent 6ef8226347
commit 549e07bed0

View File

@ -85,6 +85,9 @@ class PdfFilter:
"file",
"save",
"casino",
"viagra",
"cialis",
"ciprofloxacin",
}
seo_word_probs = {word: self.ngram_log_probs[word] for word in seo_words}