finish

等支持翻译再优化吧
10 months ago · 932410f619
1 changed files with 153 additions and 0 deletions
--- a/core.py
+++ b/core.py
@ -0,0 +1,153 @@
+import json
+import time,datetime
+import requests as rq
+import os
+
+base_url = "https://v2.doc2x.noedgeai.com"
+key_file="E:/华为云盘/doc2x_key.txt"
+if not os.path.exists(key_file):
+    key_file="D:/华为云盘/doc2x_key.txt"
+with open(key_file, "r") as f:
+    secret = f.read().strip()
+
+def preupload():
+    url = f"{base_url}/api/v2/parse/preupload"
+    headers = {
+        "Authorization": f"Bearer {secret}"
+    }
+    res = rq.post(url, headers=headers)
+    if res.status_code == 200: 
+        data = res.json()
+        if data["code"] == "success":
+            return data["data"]
+        else:
+            raise Exception(f"get preupload url failed: {data}")
+    else:
+        raise Exception(f"get preupload url failed: {res.text}")
+
+def put_file(pdf_path: str, url: str):
+    with open(pdf_path, "rb") as f:
+        res = rq.put(url, data=f) # body为文件二进制流
+        if res.status_code != 200:
+            raise Exception(f"put file failed: {res.text}")
+
+def get_status(uid: str):
+    url = f"{base_url}/api/v2/parse/status?uid={uid}"
+    headers = {
+        "Authorization": f"Bearer {secret}"
+    }
+    res = rq.get(url, headers=headers)
+    if res.status_code == 200:
+        data = res.json()
+        if data["code"] == "success":
+            return data["data"]
+        else:
+            raise Exception(f"get status failed: {data}")
+    else:
+        raise Exception(f"get status failed: {res.text}")
+
+def parse_result(uid: str, download_path: str, to: str):
+    url = "https://v2.doc2x.noedgeai.com/api/v2/convert/parse"
+    headers = {
+        "Authorization": f"Bearer {secret}",
+        "Content-Type": "application/json",
+    }
+
+    data = {
+        "uid": uid,
+        "to": to,
+        "formula_mode": "normal",
+        "filename": "output."+to,
+    }
+
+    response = rq.post(url, headers=headers, data=json.dumps(data))
+    print(response.text)
+
+    url_result='https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result?uid='+uid
+    headers = {"Authorization": f"Bearer {secret}"}
+
+    while True:
+        result_status = rq.get(url_result, headers=headers)
+        if result_status.status_code != 200:
+            raise Exception(f"get result failed: {result_status.text}")
+        else:
+            print(result_status.text)
+            if result_status.json()['data']['status'] == 'success':
+                download_url = result_status.json()['data']['url']
+                break
+            elif result_status.json()['data']['status'] == 'processing':
+                time.sleep(3)
+            else:
+                raise Exception(f"get result failed: {result_status.text}")
+    downloag_res = rq.get(download_url)
+    dt=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
+    if to=='docx':
+        kzm='docx'
+    else:
+        kzm='zip'
+
+    try:
+        if not os.path.exists(download_path):
+            os.makedirs(download_path)
+        if download_path.find('"')!=-1 or download_path.find("'")!=-1:
+            download_path=download_path[1:-1]
+        download_path=download_path.replace('\\','/')
+        if download_path[-1]!='/':
+            download_path=download_path+'/'
+
+        with open(download_path+'result'+dt+'.'+kzm, 'wb') as f:
+            f.write(downloag_res.content)
+        os.remove('result.json')
+    except Exception as e:
+        print(e)
+        print('将文件保存在当前目录')
+        with open('result'+dt+'.'+kzm, 'wb') as f:
+            f.write(downloag_res.content)
+        os.remove('result.json')
+
+def process_pdf(pdf_path: str, download_path: str, to: str="docx"):
+    upload_data = preupload()
+    print(upload_data)
+    url = upload_data["url"]
+    uid = upload_data["uid"]
+    print(f"uid: {uid}")
+
+    if pdf_path.find('"')!=-1 or pdf_path.find("'")!=-1:
+        pdf_path=pdf_path[1:-1]
+
+    if os.path.exists(pdf_path):
+        if not pdf_path.endswith(".pdf"):
+            raise Exception("file type not supported")
+        else:
+            put_file(pdf_path, url)
+    else:
+        raise Exception("file not exists")
+
+    while True:
+        status_data = get_status(uid)
+        print(status_data)
+        if status_data["status"] == "success":
+            result = status_data["result"]
+            with open("result.json", "w") as f:
+                json.dump(result, f)
+            break
+        elif status_data["status"] == "failed":
+            detail = status_data["detail"]
+            raise Exception(f"parse failed: {detail}")
+        elif status_data["status"] == "processing":
+            # processing
+            progress = status_data["progress"]
+            print(f"progress: {progress}")
+            time.sleep(3)
+
+    parse_result(uid, download_path, to)
+
+if __name__ == "__main__":
+    # pdf_path = input("pdf path: ")
+    # download_path = input("download path: ")
+    # process_pdf(pdf_path, download_path)
+    uid="0194413e-d82e-707c-b3ba-dd87e94a1d7f"
+    parse_result(uid,'E:/','docx')
+
+
+