1 changed files with 153 additions and 0 deletions
@ -0,0 +1,153 @@ |
|||
import json |
|||
import time,datetime |
|||
import requests as rq |
|||
import os |
|||
|
|||
base_url = "https://v2.doc2x.noedgeai.com" |
|||
key_file="E:/华为云盘/doc2x_key.txt" |
|||
if not os.path.exists(key_file): |
|||
key_file="D:/华为云盘/doc2x_key.txt" |
|||
with open(key_file, "r") as f: |
|||
secret = f.read().strip() |
|||
|
|||
def preupload(): |
|||
url = f"{base_url}/api/v2/parse/preupload" |
|||
headers = { |
|||
"Authorization": f"Bearer {secret}" |
|||
} |
|||
res = rq.post(url, headers=headers) |
|||
if res.status_code == 200: |
|||
data = res.json() |
|||
if data["code"] == "success": |
|||
return data["data"] |
|||
else: |
|||
raise Exception(f"get preupload url failed: {data}") |
|||
else: |
|||
raise Exception(f"get preupload url failed: {res.text}") |
|||
|
|||
def put_file(pdf_path: str, url: str): |
|||
with open(pdf_path, "rb") as f: |
|||
res = rq.put(url, data=f) # body为文件二进制流 |
|||
if res.status_code != 200: |
|||
raise Exception(f"put file failed: {res.text}") |
|||
|
|||
def get_status(uid: str): |
|||
url = f"{base_url}/api/v2/parse/status?uid={uid}" |
|||
headers = { |
|||
"Authorization": f"Bearer {secret}" |
|||
} |
|||
res = rq.get(url, headers=headers) |
|||
if res.status_code == 200: |
|||
data = res.json() |
|||
if data["code"] == "success": |
|||
return data["data"] |
|||
else: |
|||
raise Exception(f"get status failed: {data}") |
|||
else: |
|||
raise Exception(f"get status failed: {res.text}") |
|||
|
|||
def parse_result(uid: str, download_path: str, to: str): |
|||
url = "https://v2.doc2x.noedgeai.com/api/v2/convert/parse" |
|||
headers = { |
|||
"Authorization": f"Bearer {secret}", |
|||
"Content-Type": "application/json", |
|||
} |
|||
|
|||
data = { |
|||
"uid": uid, |
|||
"to": to, |
|||
"formula_mode": "normal", |
|||
"filename": "output."+to, |
|||
} |
|||
|
|||
response = rq.post(url, headers=headers, data=json.dumps(data)) |
|||
print(response.text) |
|||
|
|||
url_result='https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result?uid='+uid |
|||
headers = {"Authorization": f"Bearer {secret}"} |
|||
|
|||
while True: |
|||
result_status = rq.get(url_result, headers=headers) |
|||
if result_status.status_code != 200: |
|||
raise Exception(f"get result failed: {result_status.text}") |
|||
else: |
|||
print(result_status.text) |
|||
if result_status.json()['data']['status'] == 'success': |
|||
download_url = result_status.json()['data']['url'] |
|||
break |
|||
elif result_status.json()['data']['status'] == 'processing': |
|||
time.sleep(3) |
|||
else: |
|||
raise Exception(f"get result failed: {result_status.text}") |
|||
downloag_res = rq.get(download_url) |
|||
dt=datetime.datetime.now().strftime('%Y%m%d_%H%M%S') |
|||
if to=='docx': |
|||
kzm='docx' |
|||
else: |
|||
kzm='zip' |
|||
|
|||
try: |
|||
if not os.path.exists(download_path): |
|||
os.makedirs(download_path) |
|||
if download_path.find('"')!=-1 or download_path.find("'")!=-1: |
|||
download_path=download_path[1:-1] |
|||
download_path=download_path.replace('\\','/') |
|||
if download_path[-1]!='/': |
|||
download_path=download_path+'/' |
|||
|
|||
with open(download_path+'result'+dt+'.'+kzm, 'wb') as f: |
|||
f.write(downloag_res.content) |
|||
os.remove('result.json') |
|||
except Exception as e: |
|||
print(e) |
|||
print('将文件保存在当前目录') |
|||
with open('result'+dt+'.'+kzm, 'wb') as f: |
|||
f.write(downloag_res.content) |
|||
os.remove('result.json') |
|||
|
|||
def process_pdf(pdf_path: str, download_path: str, to: str="docx"): |
|||
upload_data = preupload() |
|||
print(upload_data) |
|||
url = upload_data["url"] |
|||
uid = upload_data["uid"] |
|||
print(f"uid: {uid}") |
|||
|
|||
if pdf_path.find('"')!=-1 or pdf_path.find("'")!=-1: |
|||
pdf_path=pdf_path[1:-1] |
|||
|
|||
if os.path.exists(pdf_path): |
|||
if not pdf_path.endswith(".pdf"): |
|||
raise Exception("file type not supported") |
|||
else: |
|||
put_file(pdf_path, url) |
|||
else: |
|||
raise Exception("file not exists") |
|||
|
|||
while True: |
|||
status_data = get_status(uid) |
|||
print(status_data) |
|||
if status_data["status"] == "success": |
|||
result = status_data["result"] |
|||
with open("result.json", "w") as f: |
|||
json.dump(result, f) |
|||
break |
|||
elif status_data["status"] == "failed": |
|||
detail = status_data["detail"] |
|||
raise Exception(f"parse failed: {detail}") |
|||
elif status_data["status"] == "processing": |
|||
# processing |
|||
progress = status_data["progress"] |
|||
print(f"progress: {progress}") |
|||
time.sleep(3) |
|||
|
|||
parse_result(uid, download_path, to) |
|||
|
|||
if __name__ == "__main__": |
|||
# pdf_path = input("pdf path: ") |
|||
# download_path = input("download path: ") |
|||
# process_pdf(pdf_path, download_path) |
|||
uid="0194413e-d82e-707c-b3ba-dd87e94a1d7f" |
|||
parse_result(uid,'E:/','docx') |
|||
|
|||
|
|||
|
Loading…
Reference in new issue