<?php
date_default_timezone_set('Asia/Shanghai');
require_once("tools.php");

$cdn = "https://cdn-resource.ekwing.com";
$log = dirname(__FILE__)."/word_num.log";
$logDir = dirname(__FILE__) . "/log";

$date = date("Ymd");
$time = date("His");

$logs = [
    'success' => "{$logDir}/{$date}/success/{$time}.log",
    'failure' => "{$logDir}/{$date}/failure/{$time}.log",
    'ocr_error' => "{$logDir}/{$date}/error/{$time}.log",
];


$qid = 0;

$handle = fopen("337_338.txt", "r");
$i = 0;
while (($line = fgets($handle)) !== false) {
    if ($i++ == 0) {
        continue;
    }
    // 处理每行内容（trim() 去除换行符和首尾空格）
    $data = explode("\t", trim($line));
    $id = $data[0];
    $qid = $data[1];
    $items = stripslashes(trim($data[3]));
    $items = json_decode($items, true);
    foreach ($items as $item) {
        $tx = $item['tixing'];
        if (in_array($tx, ['337', '338'])) {
            worker($item['content'][0], $id);
        }
    }
//    exit;
}

function worker($data, $id)
{
    global $cdn;
    $rid = $data['id'];

//    if ($rid == '2456063') {
//        print_r($data);
//    } else {
//        return;
//    }
    $params = [
        'ask' => [],
        'withs' => [
            'start' => null,
            'end' => null,
        ],
        'img' => [
            'url' => [],
            'desc' => [],
        ],
        'word_num' => [
            'min' => 0,
            'max' => 0,
        ],
    ];

    if ($data['instruct_text']) {
        $params['ask'][] = $data['instruct_text'];
    }

    if ($data['material']) {
        $params['ask'][] = $data['material'];
    }

    $img = $data['material_pic'];
    $article = "";
    if (isset($data['sample_text'][0]['sample_text'])) {
        $article = $data['sample_text'][0]['sample_text'];
    }

    $keys = [];
    if (isset($data['keywords']) && $data['keywords']) {
        $keys = explode("#", $data['keywords']);
    }

    if ($img) {
        $response = handelImg($img);
        $rs = $response['data']['outputs'];
        $url = "{$cdn}{$img}";
        if ($rs['is_essay'] == IS_ESSAY) {
            //不需要特殊处理，直接文本做拼接， 取开头结尾
            $markdown = trim($rs['markdown']);
            $params['ask'][] = $markdown;
            if ((bool)$rs['word_rule']) {
                $params['word_num']['min'] = $rs['word_min_count'];
                $params['word_num']['max'] = $rs['word_max_count'];
            }

            if (!is_null($rs['word_num'])) {
                $params['word_num']['min'] = intval($rs['word_num']);
            }
//                    if (isImgMarkDown($markdown)) {
            //如果图片中包含图片则将图片描述给传过去
            if ($rs['has_image'] == '0' && $rs['stem_desc']) {
                if (isImgMarkDown($rs['markdown'])) {
                    //只有图片暂时还是添加到content中
                }
                $params['img']['url'][] = $url;
                $params['img']['desc'][] = $rs['stem_desc'];
            }

            if ($rs['has_image'] == '1' && $rs['stem_desc']) {
                $params['img']['url'][] = $url;
                $params['img']['desc'][] = $rs['stem_desc'];
            }

        } else {
            //markdown为空字符，stem_desc不为空就视为纯图片
            if ((is_null($rs['markdown']) || $rs['markdown'] === '') && $rs['stem_desc'] != '') {
                $params['img']['url'][] = $url;
                $params['img']['desc'][] = $rs['stem_desc'];
            } else {
                //有图片但是没有markdown
                if (!$rs['markdown']) {
                    //记录日志吧，很可能是识别的问题
                    ocrErrLog('E1', $rid, $url, $response);
                }
                if ($rs['markdown']) {
                    $params['ask'][] = trim($rs['markdown']);
                }
                if ($rs['has_image'] == '1' && $rs['stem_desc']) {
                    $params['img']['url'][] = $url;
                    $params['img']['desc'][] = $rs['stem_desc'];
                }
            }
        }
    }

    $ask = implode("\n", $params['ask']);
    $num = getWorkNum($ask);

    if (is_array($num)) {
        $params['word_num']['min'] = max($params['word_num']['min'], $num[0]);
        $params['word_num']['max'] = max($params['word_num']['max'], $num[1]);
    } else {
        //没有识别到有词数就认为是有问题的吧
        if ($num == 0 && $img) {
            ocrErrLog('E_337_338_4', $rid, "{$cdn}{$img}", $response);
        } else {
            $params['word_num']['min'] = max($params['word_num']['min'], $num);
        }
    }

    $essayData = [
        'questionId' => "{$id}",
        'name' => null,
        'stemUrls' => $params['img']['url'],
        'stemDesc' => implode("\n---\n", $params['img']['desc']),
        'stemText' => $ask,
        'minWords' => $params['word_num']['min'],
        'score' => 100,
        'answer' => $article,
        'keyWords' => $keys,
    ];

    if ($params['word_num']['max']) {
        $essayData['maxWords'] = $params['word_num']['max'];
    }

    if (!is_null($params['withs']['start'])) {
        $essayData['themStart'] = $params['withs']['start'];
    }
    if (!is_null($params['withs']['end'])) {
        $essayData['themEnd'] = $params['withs']['end'];
    }
//
//    print_r($essayData);
//    exit;

    $rs = addEssay($essayData);

    global $logs;
    if ($rs && isset($rs['code']) && $rs['code'] == 0) {
        //成功的处理, 暂时成功不做处理了
        file_put_contents($logs['success'], "{$rid},", FILE_APPEND);
    } else {
        //失败处理, 失败了的话将参数记录日志，然后重新发
        $log_cnt = "{$rid}, {$rs['msg']}";
        if ($img) {
            $_img = md5($img);
            $log_cnt = "{$log_cnt}, {$_img}";
        } else {
            file_put_contents($logs['failure'], "{$log_cnt}\n", FILE_APPEND);
        }
    }

    echo "\n$rid, deal finished.\n";
}

function handelImg($img) {
    $rtn = '';
    global $cdn, $qid;
    $filename = dirname(__FILE__) . "\\ocr_new\\" . md5($img);
    $img = "{$cdn}{$img}";

    if (file_exists($filename)) {
        $rtn = json_decode(file_get_contents($filename), true);
    } else {
//            return false;
        $rs = ocr($img);
        $rtn = json_decode($rs, true);
        echo $qid, "\n";
        var_dump($rtn);
        file_put_contents($filename, $rs);
    }


    return $rtn;
}

function ocrErrLog($errCode, $id, $url, $ocrResponse) {
    global $logs;
    $strLog = "{$errCode}, status:{$ocrResponse['data']['status']}, ekw_id:{$id}, url: {$url}, workflow_run_id:{$ocrResponse['workflow_run_id']}\n";
    $logger = $logs['ocr_error'];
    file_put_contents($logger, $strLog, FILE_APPEND);
}
