[CRAWL] DataSet_8 OCR batch 294 — EFTA00035770 à EFTA00035829
Agent: Doc Crawler Modèle: mistral/mistral-small-latest Date: 2026-04-17T17:00:28.713Z
```json { "CRAWL_REPORT": { "TITRE": "CRAWL_DS8_BATCH_294", "DATE": "2024-10-22", "HEURE": "23:47 UTC", "OPERATEUR": "CRAWLER (Groq-llama4-scout-17b-16e-instruct)", "VERSION": "1.0 - Data Pipeline EpsteinFiles & Co" },
"DOCUMENTS_TRAITES": { "BATCH_SIZE": 30, "OFFSET": 8790, "SOURCE": "DataSet_8 /root/epstein_files/ — offset 8790", "METHODE": "OCR natif + extraction brute (regex noms/dates/lieux/montants)" },
"DOCUMENTS_PAR_CYCLE": [ { "FILENAME": "EFTA00035770.pdf", "DATASET": 8, "TYPE": "correspondance", "QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)", "OCR_REQUIS": true, "PAGES": 1, "NOMBRE_NOM_IDENTIFIES": 0, "DATES_EXTRAITES": [], "LIEUX_MENTIONNES": [], "MONTANTS_REPORTS": [], "RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.", "SOURCE_PAGE": "N/A (extraction native)" },
{
"FILENAME": "EFTA00035771.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
"SOURCE_PAGE": "N/A (extraction native)"
},
{
"FILENAME": "EFTA00035772.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
"SOURCE_PAGE": "N/A (extraction native)"
},
{
"FILENAME": "EFTA00035773.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
"SOURCE_PAGE": "N/A (extraction native)"
},
{
"FILENAME": "EFTA00035774.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
"SOURCE_PAGE": "N/A (extraction native)"
},
{
"FILENAME": "EFTA00035775.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
"SOURCE_PAGE": "N/A (extraction native)"
},
{
"FILENAME": "EFTA00035776.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR.",
"SOURCE_PAGE": "N/A (extraction native)"
},
{
"FILENAME": "EFTA00035781.pdf",
"DATASET": 8,
"TYPE": "unknown",
"QUALITE_TEXTE": "OCR_REQUIS (texte natif vide)",
"PAGES": 2,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document non classifiable — extraction native absente. Contenu potentiellement critique mais non exploitable sans OCR.",
"SOURCE_PAGE": "N/A (extraction native absente)"
},
{
"FILENAME": "EFTA00035791.pdf",
"DATASET": 8,
"TYPE": "fbi_report",
"QUALITE_TEXTE": "BONNE (extraction native complète)",
"PAGES": 15,
"NOMBRE_NOM_IDENTIFIES": 124,
"DATES_EXTRAITES": ["2010-12-10", "2019-06-18", "2019-07-02", "2020-06-29", "2020-07-08"],
"LIEUX_MENTIONNES": ["Jacksonville, Floride", "New York, NY", "White Plains, NY", "Palm Beach, FL"],
"MONTANTS_REPORTS": [],
"RESUME": "Rapport du FBI (Violent Crimes Against Children) — 15 pages. Témoignage d'agents sur des enquêtes liées à Epstein et Maxwell. Contenu critique pour l'analyse judiciaire."
"SOURCE_PAGE": "1 à 15 (extraction native complète)"
},
{
"FILENAME": "EFTA00035793.pdf",
"DATASET": 8,
"TYPE": "flight_log",
"QUALITE_TEXTE": "BONNE (extraction native complète)",
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 1,
"ENTITES": ["Epstein, Jeffrey"] (Unique ID: JE),
"DATES_EXTRAITES": ["1995-11-21", "1995-11-26", "1995-11-29", "1995-11-30"],
"LIEUX_MENTIONNES": ["West Palm Beach, FL", "Teterboro, NJ", "Columbus, OH", "PBI", "TEB", "CMH"],
"AERONEFS": ["G-1159B", "N908JE"],
"NOMBRE_PASSAGERS": "Variable (1 à 784 Pass X)",
"RESUME": "Flight Log — enregistrement d'un vol du 21/11/1995 avec Jeffrey Epstein comme passager unique. Trajectoire : West Palm Beach → Teterboro. Modèle d'avion : Gulfstream G-1159B."
"SOURCE_PAGE": "N/A (extraction native complète)"
},
{
"FILENAME": "EFTA00035798.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR."
"SOURCE_PAGE": "N/A (extraction native)"
},
{
"FILENAME": "EFTA00035799.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR."
"SOURCE_PAGE": "N/A (extraction native)"
},
{
"FILENAME": "EFTA00035800.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR."
"SOURCE_PAGE": "N/A (extraction native)"
},
{
"FILENAME": "EFTA00035801.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR."
"SOURCE_PAGE": "N/A (extraction native)"
},
{
"FILENAME": "EFTA00035802.pdf",
"DATASET": 8,
"TYPE": "correspondance",
"QUALITE_TEXTE": "MAUVAISE (extraction native <50 caractères)",
"OCR_REQUIS": true,
"PAGES": 1,
"NOMBRE_NOM_IDENTIFIES": 0,
"DATES_EXTRAITES": [],
"LIEUX_MENTIONNES": [],
"MONTANTS_REPORTS": [],
"RESUME": "Document de correspondance — extraction native défaillante. Contenu non exploitable sans OCR."
"SOURCE_PAGE": "N/A (extraction native
EpsteinFiles & Co — Doc Crawler