<output id="qn6qe"></output>

    1. <output id="qn6qe"><tt id="qn6qe"></tt></output>
    2. <strike id="qn6qe"></strike>

      亚洲 日本 欧洲 欧美 视频,日韩中文字幕有码av,一本一道av中文字幕无码,国产线播放免费人成视频播放,人妻少妇偷人无码视频,日夜啪啪一区二区三区,国产尤物精品自在拍视频首页,久热这里只有精品12

      Python 百度飛槳PaddlePaddle OCR文字識別和表格識別

      Python開發(fā)環(huán)境準備

      先安裝python和miniconda

      conda create -n ocr-test python=3.12.8
      conda env list
      conda activate ocr-test
      
      
      pip install paddlepaddle==3.1.0
      pip install paddleocr==3.1.0
      pip install paddlex==3.1.3
      pip install Flask
      pip install flask_cors
      

      Python代碼

      from flask import Flask, request, jsonify, send_file
      import os
      from flask_cors import cross_origin
      import time
      from PIL import Image
      from werkzeug.utils import secure_filename
      from paddleocr import PPStructureV3, TableRecognitionPipelineV2
      from bs4 import BeautifulSoup
      from openpyxl import Workbook
      import json
      
      pipeline = PPStructureV3(
          device="cpu",
          use_doc_orientation_classify=False,
          text_recognition_model_name='PP-OCRv5_mobile_rec',
          text_detection_model_name='PP-OCRv5_mobile_det',
          use_doc_unwarping=False,
          use_seal_recognition=False,
          use_formula_recognition=False,
          use_chart_recognition=False,
          use_region_detection=False,
      )
      
      # 純表格識別
      pipelineTable = TableRecognitionPipelineV2(
          device="cpu",
          use_doc_unwarping=False,
          use_doc_orientation_classify=False,
          text_recognition_model_name='PP-OCRv5_mobile_rec',
          text_detection_model_name='PP-OCRv5_mobile_det')
      
      # Flask 應用初始化
      app = Flask(__name__)
      os.environ["CPU_NUM"] = "6"
      UPLOAD_FOLDER = 'uploads'
      OUTPUT_FOLDER = 'output'
      os.makedirs(UPLOAD_FOLDER, exist_ok=True)
      os.makedirs(OUTPUT_FOLDER, exist_ok=True)
      
      
      def resize_image(image_path,
                       save_path=None,
                       max_len=1280,
                       max_side_limit=4000):
          img = Image.open(image_path)
          w, h = img.size
          scale_w = w / max_len if w > max_len else 1
          scale_h = h / max_side_limit if h > max_side_limit else 1
          scale = max(scale_w, scale_h)
          if scale > 1:
              img = img.resize((int(w / scale), int(h / scale)), Image.LANCZOS)
              if save_path:
                  img.save(save_path)
          return save_path if save_path else image_path
      
      
      # OCR API
      @app.route('/ocr', methods=['POST'])
      @cross_origin(origins='*')
      def ocr_image():
          if 'image' not in request.files:
              return jsonify({'error': 'No image uploaded'}), 400
      
          file = request.files['image']
          if file.filename == '':
              return jsonify({'error': 'Empty filename'}), 400
          filename = secure_filename(file.filename)
          raw_path = os.path.join(UPLOAD_FOLDER, filename)
      
          file.save(raw_path)
      
          # 預測
          start_time = time.time()
          results = pipeline.predict(raw_path)
          duration = round(time.time() - start_time, 2)
      
          all_json_results = []
      
          for idx, res in enumerate(results):
              json_path = os.path.join(OUTPUT_FOLDER, f'{filename}_{idx}.json')
              res.save_to_json(save_path=json_path)
              with open(json_path, 'r', encoding='utf-8') as f:
                  json_data = f.read()
      
              all_json_results.append({
                  'index': idx,
                  'json': json_data,
              })
      
              txt_path = os.path.join(OUTPUT_FOLDER, f'{filename}_{idx}.txt')
              data = json.loads(json_data)
              with open(txt_path, 'w', encoding='utf-8') as txt_file:
                  for i, item in enumerate(data['parsing_res_list']):
                      txt_file.write(f"{item['block_content']}\n")
      
          return jsonify({
              'message': 'success',
              'time': duration,
              'results': all_json_results
          })
      
      
      # OCR API
      @app.route('/ocr_table', methods=['POST'])
      @cross_origin(origins='*')
      def ocr_image_table():
          if 'image' not in request.files:
              return jsonify({'error': 'No image uploaded'}), 400
      
          file = request.files['image']
          if file.filename == '':
              return jsonify({'error': 'Empty filename'}), 400
          filename = secure_filename(file.filename)
          raw_path = os.path.join(UPLOAD_FOLDER, filename)
      
          file.save(raw_path)
          resize_image(raw_path, save_path=raw_path)
          # 預測
          start_time = time.time()
          results = pipelineTable.predict(raw_path)
          duration = round(time.time() - start_time, 2)
      
          all_json_results = []
      
          for idx, res in enumerate(results):
              json_path = os.path.join(OUTPUT_FOLDER, f'{filename}_{idx}.json')
              res.save_to_json(save_path=json_path)
              with open(json_path, 'r', encoding='utf-8') as f:
                  json_data = f.read()
      
              all_json_results.append({
                  'index': idx,
                  'json': json_data,
              })
      
              output_path = os.path.join(OUTPUT_FOLDER, f'{filename}_{idx}.xlsx');
              table_matrix = html_table_to_matrix(json_data)
              export_html_table_to_excel(table_matrix, output_path)
      
          print("耗時:", duration)
          return jsonify({
              'message': 'success',
              'time': duration,
              'results': all_json_results
          })
      
      
      def html_table_to_matrix(html_str):
          """將HTML表格轉換為二維矩陣"""
          soup = BeautifulSoup(html_str, 'html.parser')
          table = soup.find('table')
      
          matrix = []
          for row in table.find_all('tr'):
              row_data = []
              for cell in row.find_all(['td', 'th']):
                  # 合并文本并規(guī)范化空格
                  text = " ".join(cell.stripped_strings)
                  text = ' '.join(text.split())  # 去除多余空格
                  row_data.append(text)
              matrix.append(row_data)
      
          return matrix
      
      
      def export_html_table_to_excel(html_table_data, output_path):
          """將HTML表格數(shù)據(jù)導出到Excel文件"""
          # 創(chuàng)建Excel工作簿
          wb = Workbook()
          ws = wb.active
          ws.title = "提取的表格"
      
          # 將表格數(shù)據(jù)寫入工作表
          for row_idx, row in enumerate(html_table_data, 1):
              for col_idx, value in enumerate(row, 1):
                  cell = ws.cell(row=row_idx, column=col_idx, value=value)
      
          # 設置表頭為粗體
          header_fill = ws.row_dimensions[1]
          for col in range(1, len(html_table_data[0]) + 1):
              header_cell = ws.cell(row=1, column=col)
              header_cell.font = header_cell.font.copy(bold=True)
      
          # 自動調整列寬
          for col in ws.columns:
              max_length = 0
              column = [cell for cell in col]
              for cell in column:
                  try:
                      if len(str(cell.value)) > max_length:
                          max_length = len(cell.value)
                  except:
                      pass
              adjusted_width = (max_length + 2)
              if adjusted_width > 50:
                  adjusted_width = 50
              ws.column_dimensions[column[0].column_letter].width = adjusted_width
      
          # 保存文件
          wb.save(output_path)
          print(f"成功導出Excel文件: {output_path}")
      
      
      @app.route('/download/<filename>')
      @cross_origin(origins='*')
      def download_file(filename):
          """下載文件的API接口"""
          try:
              filename = secure_filename(filename)
              file_path = os.path.join(OUTPUT_FOLDER, filename)
      
              if not os.path.exists(file_path):
                  return jsonify({"error": "文件不存在"}), 404
      
              return send_file(
                  file_path,
                  as_attachment=True,
                  download_name=filename
              )
          except Exception as e:
              return jsonify({
                  "error": "下載文件時出錯: " + str(e)
              }), 500
      
      
      if __name__ == '__main__':
          print("服務器啟動中,CORS 已啟用")
          app.run(host='0.0.0.0', port=5000)
      
      posted @ 2025-08-23 10:45  0611163  閱讀(173)  評論(0)    收藏  舉報
      主站蜘蛛池模板: 麻豆蜜桃av蜜臀av色欲av| 欧美老少配性行为| 看全色黄大黄大色免费久久| 天天做天天躁天天躁| 夜夜影院未满十八勿进| 日本道不卡一二三区视频| 久久精品国产99久久无毒不卡 | 人成午夜免费大片| 日韩av中文字幕有码| 少妇熟女高潮流白浆| 无码日韩精品一区二区三区免费 | 在线精品国产中文字幕| 漂亮人妻被强中文字幕久久| 日本一区二区在线高清观看| 久久精品国产99久久久古代| 国产自产对白一区| 7777精品久久久大香线蕉| 中国女人熟毛茸茸A毛片| 久久久久无码中| 欧美经典人人爽人人爽人人片| 被拉到野外强要好爽| 亚洲av成人无网码天堂| 国产亚洲一二三区精品| 国产乱码精品一区二区三| 色综合久久综合香蕉色老大| 国产精品国产精品偷麻豆| 成人性生交片无码免费看| 日韩av在线一卡二卡三卡| 97精品伊人久久久大香线蕉| 国产果冻豆传媒麻婆精东 | 青青国产揄拍视频| 农村欧美丰满熟妇xxxx| 日本内射精品一区二区视频| 白丝乳交内射一二三区| 中国熟妇牲交视频| 人妻少妇精品视频三区二区| 五月天天天综合精品无码| 横峰县| 深夜av在线免费观看| 国产免费无遮挡吃奶视频| 日韩人妻无码一区二区三区综合部 |