Dashboarddoc-crawler → rapport
Ce rapport contient des mots-clés d'alerte : viol lm
doc-crawler 2026-04-17 17:00:28

[CRAWL] DataSet_8 OCR batch 294 — EFTA00035770 à EFTA00035829

Agent: Doc Crawler Modèle: mistral/mistral-small-latest Date: 2026-04-17T17:00:28.713Z


```json { "CRAWL_REPORT": { "TITRE": "CRAWL_DS8_BATCH_294", "DATE": "2024-10-22", "HEURE": "23:47 UTC", "OPERATEUR": "CRAWLER (Groq-llama4-scout-17b-16e-instruct)", "VERSION": "1.0 - Data Pipeline EpsteinFiles & Co" },

"DOCUMENTS_TRAITES": { "BATCH_SIZE": 30, "OFFSET": 8790, "SOURCE": "DataSet_8 /root/epstein_files/ — offset 8790", "METHODE": "OCR natif + extraction brute (regex noms/dates/lieux/montants)" },

"DOCUMENTS_PAR_CYCLE": [ { "FILENAME": "EFTA00035770.pdf", "DATASET": 8, "TYPE": "correspondance", "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)", "OCR_REQUIS": true, "PAGES": 1, "NOMBRE_NOM_IDENTIFIES": 0, "DATES_EXTRAITES": [], "LIEUX_MENTIONNES": [], "MONTANTS_REPORTS": [], "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.", "SOURCE_PAGE": "N/A (extraction native)" },

{
  "FILENAME": "EFTA00035771.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
  "SOURCE_PAGE": "N/A (extraction native)"
},

{
  "FILENAME": "EFTA00035772.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
  "SOURCE_PAGE": "N/A (extraction native)"
},

{
  "FILENAME": "EFTA00035773.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
  "SOURCE_PAGE": "N/A (extraction native)"
},

{
  "FILENAME": "EFTA00035774.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
  "SOURCE_PAGE": "N/A (extraction native)"
},

{
  "FILENAME": "EFTA00035775.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
  "SOURCE_PAGE": "N/A (extraction native)"
},

{
  "FILENAME": "EFTA00035776.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
  "SOURCE_PAGE": "N/A (extraction native)"
},

{
  "FILENAME": "EFTA00035781.pdf",
  "DATASET": 8,
  "TYPE": "unknown",
  "QUALITE_TEXTE": "OCR_REQUIS (texte natif vide)",
  "PAGES": 2,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document non classifiable — extraction native absente. Contenu potentiellement critique mais non exploitable sans OCR.",
  "SOURCE_PAGE": "N/A (extraction native absente)"
},

{
  "FILENAME": "EFTA00035791.pdf",
  "DATASET": 8,
  "TYPE": "fbi_report",
  "QUALITE_TEXTE": "BONNE (extraction native complète)",
  "PAGES": 15,
  "NOMBRE_NOM_IDENTIFIES": 124,
  "DATES_EXTRAITES": ["2010-12-10", "2019-06-18", "2019-07-02", "2020-06-29", "2020-07-08"],
  "LIEUX_MENTIONNES": ["Jacksonville, Floride", "New York, NY", "White Plains, NY", "Palm Beach, FL"],
  "MONTANTS_REPORTS": [],
  "RESUME": "Rapport du FBI (Violent Crimes Against Children) — 15 pages. Témoignage d'agents sur des enquêtes liées à Epstein et Maxwell. Contenu critique pour l'analyse judiciaire."
  "SOURCE_PAGE": "1 à 15 (extraction native complète)"
},

{
  "FILENAME": "EFTA00035793.pdf",
  "DATASET": 8,
  "TYPE": "flight_log",
  "QUALITE_TEXTE": "BONNE (extraction native complète)",
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 1,
  "ENTITES": ["Epstein, Jeffrey"] (Unique ID: JE),
  "DATES_EXTRAITES": ["1995-11-21", "1995-11-26", "1995-11-29", "1995-11-30"],
  "LIEUX_MENTIONNES": ["West Palm Beach, FL", "Teterboro, NJ", "Columbus, OH", "PBI", "TEB", "CMH"],
  "AERONEFS": ["G-1159B", "N908JE"],
  "NOMBRE_PASSAGERS": "Variable (1 à 784 Pass X)",
  "RESUME": "Flight Log — enregistrement d'un vol du 21/11/1995 avec Jeffrey Epstein comme passager unique. Trajectoire : West Palm Beach → Teterboro. Modèle d'avion : Gulfstream G-1159B."
  "SOURCE_PAGE": "N/A (extraction native complète)"
},

{
  "FILENAME": "EFTA00035798.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR."
  "SOURCE_PAGE": "N/A (extraction native)"
},

{
  "FILENAME": "EFTA00035799.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR."
  "SOURCE_PAGE": "N/A (extraction native)"
},

{
  "FILENAME": "EFTA00035800.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR."
  "SOURCE_PAGE": "N/A (extraction native)"
},

{
  "FILENAME": "EFTA00035801.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR."
  "SOURCE_PAGE": "N/A (extraction native)"
},

{
  "FILENAME": "EFTA00035802.pdf",
  "DATASET": 8,
  "TYPE": "correspondance",
  "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
  "OCR_REQUIS": true,
  "PAGES": 1,
  "NOMBRE_NOM_IDENTIFIES": 0,
  "DATES_EXTRAITES": [],
  "LIEUX_MENTIONNES": [],
  "MONTANTS_REPORTS": [],
  "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR."
  "SOURCE_PAGE": "N/A (extraction native

EpsteinFiles & Co — Doc Crawler