<?php
date_default_timezone_set('Asia/Shanghai');
require_once("tools.php");

$dir = dirname(__FILE__) . '/ocr';

$logger = dirname(__FILE__) . '/log/ocr.log';
$logger_nomarkdown = dirname(__FILE__) . '/log/nomarkdown.log';

@mkdir(dirname($logger), 0777, true);
@unlink($logger);
@unlink($logger_nomarkdown);

$entries = scandir($dir);
$counter = 0; $other = 0;
$status = [];
$otherinfo = [];
$nodata = 0;
$nomarkdown = 0;
@unlink(dirname(__FILE__) . '/orc_error_status.json');

foreach ($entries as $entry) {
    // 跳过.、..和子文件夹
    if ($entry === '.' || $entry === '..') {
        continue;
    }

    $filename = $dir . DIRECTORY_SEPARATOR . $entry;
    // 只保留文件（排除文件夹）
    if (is_file($filename)) {
        $json = json_decode(file_get_contents($filename), true);
        if (!isset($json['data'])) {
            echo "delete file {$filename}\n";
            $nodata++;
            if (isset($json['status'])) {
                if (isset($status[$json['status']])) {
                    $status[$json['status']]++;
                } else {
                    $status[$json['status']] = 1;
                }
            }
            @unlink($filename);
        } else {
            $error = $json['data']['error'];
            if (!is_null($error)) {
                echo "delete file {$filename}\n";
                if (trim($json['data']['error']) == "代码执行超时 (>15秒)") {
                    $counter++;
                } else {
                    file_put_contents($logger, $json['workflow_run_id'] . "\n", FILE_APPEND);
                    $other++;
//                    $otherinfo[] = $json;
//                    file_put_contents(dirname(__FILE__) . '/orc_error_status.json', json_encode($json). "\n", FILE_APPEND);
                }
                @unlink($filename);
            }
            else {
                if ($json['data']['outputs']['markdown'] == "" && $json['data']['outputs']['stem_desc'] == "") {
                    echo "delete ocr fail file {$filename}\n";
//                    print_r($json);
                    file_put_contents($logger_nomarkdown, $json['workflow_run_id'] . "\n", FILE_APPEND);
                    $nomarkdown++;
                    @unlink($filename);
                }
            }
        }
    }
}

echo "counter: {$counter}, other: {$other}, ondata: {$nodata}, nomarkdown: {$nomarkdown}\n";
//file_put_contents(dirname(__FILE__) . '/orc_error_status.json', $content);
