Github Copilot 實戰(zhàn): 從零開始用AI寫一個OCR工具 (3)
源碼
https://github.com/densen2014/Blazor100/tree/master/AI/MiOcr
添加一個屏幕截圖功能,顯示截圖起始點,結(jié)束點,截圖區(qū)域,按鍵ESC取消截圖
這里AI就比較中規(guī)中矩,很快就能得到我要的功能了.下面只簡單貼一下代碼

ScreenCaptureWindow.xaml
<Window x:Class="MiOcr.ScreenCaptureWindow"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
WindowStyle="None" AllowsTransparency="True" Background="#01000000"
Topmost="True" ShowInTaskbar="False" WindowState="Maximized">
<Canvas x:Name="CaptureCanvas">
<TextBlock x:Name="StartCoordText"
Foreground="Yellow"
Background="#80000000"
FontSize="14"
Visibility="Collapsed"/>
<TextBlock x:Name="CurrentCoordText"
Foreground="Yellow"
Background="#80000000"
FontSize="14"
Visibility="Collapsed"/>
<TextBlock x:Name="SizeText"
Foreground="Yellow"
Background="#80000000"
FontSize="14"
Visibility="Collapsed"/>
</Canvas>
</Window>
ScreenCaptureWindow.xaml.cs
using System.Windows;
using System.Windows.Controls;
using System.Windows.Input;
using System.Windows.Interop;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Shapes;
namespace MiOcr;
public partial class ScreenCaptureWindow : Window
{
public Rect SelectedRect { get; private set; }
public BitmapSource? CapturedImage { get; private set; }
private System.Windows.Point? _start;
private Rectangle? _rectShape;
public ScreenCaptureWindow()
{
InitializeComponent();
MouseLeftButtonDown += OnMouseDown;
MouseMove += OnMouseMove;
MouseLeftButtonUp += OnMouseUp;
Cursor = Cursors.Cross;
PreviewKeyDown += ScreenCaptureWindow_PreviewKeyDown;
Focusable = true;
Loaded += (s, e) => Keyboard.Focus(this);
}
private void ScreenCaptureWindow_PreviewKeyDown(object sender, KeyEventArgs e)
{
if (e.Key == Key.Escape)
{
CapturedImage = null;
DialogResult = false;
Close();
}
}
private void PositionTextBlocks(double x, double y, double w, double h)
{
double margin = 8;
double canvasWidth = CaptureCanvas.ActualWidth;
double canvasHeight = CaptureCanvas.ActualHeight;
// 先測量文本大小
StartCoordText.Measure(new Size(double.PositiveInfinity, double.PositiveInfinity));
SizeText.Measure(new Size(double.PositiveInfinity, double.PositiveInfinity));
double startW = StartCoordText.DesiredSize.Width;
double startH = StartCoordText.DesiredSize.Height;
double sizeW = SizeText.DesiredSize.Width;
double sizeH = SizeText.DesiredSize.Height;
// 1. 左上優(yōu)先
double startX = x - startW - margin;
double startY = y - startH - margin;
if (startX >= 0 && startY >= 0)
{
Canvas.SetLeft(StartCoordText, startX);
Canvas.SetTop(StartCoordText, startY);
Canvas.SetLeft(SizeText, startX);
Canvas.SetTop(SizeText, startY + startH + 4);
return;
}
// 2. 右上
startX = x + w + margin;
startY = y - startH - margin;
if (startX + startW <= canvasWidth && startY >= 0)
{
Canvas.SetLeft(StartCoordText, startX);
Canvas.SetTop(StartCoordText, startY);
Canvas.SetLeft(SizeText, startX);
Canvas.SetTop(SizeText, startY + startH + 4);
return;
}
// 3. 左下
startX = x - startW - margin;
startY = y + h + margin;
if (startX >= 0 && startY + startH + sizeH + 4 <= canvasHeight)
{
Canvas.SetLeft(StartCoordText, startX);
Canvas.SetTop(StartCoordText, startY);
Canvas.SetLeft(SizeText, startX);
Canvas.SetTop(SizeText, startY + startH + 4);
return;
}
// 4. 右下
startX = x + w + margin;
startY = y + h + margin;
if (startX + startW <= canvasWidth && startY + startH + sizeH + 4 <= canvasHeight)
{
Canvas.SetLeft(StartCoordText, startX);
Canvas.SetTop(StartCoordText, startY);
Canvas.SetLeft(SizeText, startX);
Canvas.SetTop(SizeText, startY + startH + 4);
return;
}
// 5. 屏幕內(nèi)兜底
Canvas.SetLeft(StartCoordText, Math.Max(margin, Math.Min(canvasWidth - startW - margin, x)));
Canvas.SetTop(StartCoordText, Math.Max(margin, Math.Min(canvasHeight - startH - margin, y)));
Canvas.SetLeft(SizeText, Math.Max(margin, Math.Min(canvasWidth - sizeW - margin, x)));
Canvas.SetTop(SizeText, Math.Max(margin, Math.Min(canvasHeight - sizeH - margin, y + startH + 4)));
}
private void OnMouseDown(object sender, MouseButtonEventArgs e)
{
_start = e.GetPosition(this);
_rectShape = new Rectangle
{
Stroke = Brushes.Red,
StrokeThickness = 2,
Fill = new SolidColorBrush(Color.FromArgb(40, 0, 0, 255))
};
CaptureCanvas.Children.Add(_rectShape);
Canvas.SetLeft(_rectShape, _start.Value.X);
Canvas.SetTop(_rectShape, _start.Value.Y);
StartCoordText.Text = $"起點: ({(int)_start.Value.X}, {(int)_start.Value.Y})";
StartCoordText.Visibility = Visibility.Visible;
CurrentCoordText.Text = $"當(dāng)前: ({(int)_start.Value.X}, {(int)_start.Value.Y})";
CurrentCoordText.Visibility = Visibility.Visible;
SizeText.Text = $"大小: 0 x 0";
SizeText.Visibility = Visibility.Visible;
// 初始位置
PositionTextBlocks(_start.Value.X, _start.Value.Y, 0, 0);
}
private void OnMouseMove(object sender, MouseEventArgs e)
{
if (_start.HasValue && _rectShape != null)
{
var pos = e.GetPosition(this);
double x = Math.Min(_start.Value.X, pos.X);
double y = Math.Min(_start.Value.Y, pos.Y);
double w = Math.Abs(_start.Value.X - pos.X);
double h = Math.Abs(_start.Value.Y - pos.Y);
Canvas.SetLeft(_rectShape, x);
Canvas.SetTop(_rectShape, y);
_rectShape.Width = w;
_rectShape.Height = h;
// 更新當(dāng)前點坐標(biāo)
CurrentCoordText.Text = $"當(dāng)前: ({(int)pos.X}, {(int)pos.Y})";
Canvas.SetLeft(CurrentCoordText, pos.X + 2);
Canvas.SetTop(CurrentCoordText, pos.Y + 2);
// 更新區(qū)域大小
SizeText.Text = $"大小: {(int)w} x {(int)h}";
// 動態(tài)調(diào)整文本位置
PositionTextBlocks(x, y, w, h);
}
}
private void OnMouseUp(object sender, MouseButtonEventArgs e)
{
if (_start.HasValue && _rectShape != null)
{
var end = e.GetPosition(this);
double x = Math.Min(_start.Value.X, end.X);
double y = Math.Min(_start.Value.Y, end.Y);
double w = Math.Abs(_start.Value.X - end.X);
double h = Math.Abs(_start.Value.Y - end.Y);
SelectedRect = new Rect(x, y, w, h);
// 隱藏坐標(biāo)
StartCoordText.Visibility = Visibility.Collapsed;
CurrentCoordText.Visibility = Visibility.Collapsed;
// 隱藏區(qū)域大小
SizeText.Visibility = Visibility.Collapsed;
// 截圖
CapturedImage = CaptureScreenArea(SelectedRect);
DialogResult = true;
Close();
}
}
private BitmapSource CaptureScreenArea(Rect rect)
{
double dpiScale = NativeMethods.GetDpiScale(this);
int x = (int)(rect.X * dpiScale);
int y = (int)(rect.Y * dpiScale);
int w = (int)(rect.Width * dpiScale);
int h = (int)(rect.Height * dpiScale);
IntPtr hdcSrc = NativeMethods.GetDC(IntPtr.Zero);
IntPtr hdcDest = NativeMethods.CreateCompatibleDC(hdcSrc);
IntPtr hBitmap = NativeMethods.CreateCompatibleBitmap(hdcSrc, w, h);
IntPtr hOld = NativeMethods.SelectObject(hdcDest, hBitmap);
NativeMethods.BitBlt(hdcDest, 0, 0, w, h, hdcSrc, x, y, 0x00CC0020); // SRCCOPY
NativeMethods.SelectObject(hdcDest, hOld);
NativeMethods.DeleteDC(hdcDest);
NativeMethods.ReleaseDC(IntPtr.Zero, hdcSrc);
try
{
var source = Imaging.CreateBitmapSourceFromHBitmap(
hBitmap, IntPtr.Zero, Int32Rect.Empty, BitmapSizeOptions.FromEmptyOptions());
source.Freeze();
return source;
}
finally
{
NativeMethods.DeleteObject(hBitmap);
}
}
}
截圖api,不走system.draw
NativeMethods.cs
using System.Windows;
namespace MiOcr;
public static class NativeMethods
{
[System.Runtime.InteropServices.DllImport("gdi32.dll")]
public static extern bool DeleteObject(IntPtr hObject);
[System.Runtime.InteropServices.DllImport("user32.dll")]
public static extern IntPtr GetDC(IntPtr hWnd);
[System.Runtime.InteropServices.DllImport("user32.dll")]
public static extern int ReleaseDC(IntPtr hWnd, IntPtr hDC);
[System.Runtime.InteropServices.DllImport("gdi32.dll")]
public static extern IntPtr CreateCompatibleDC(IntPtr hdc);
[System.Runtime.InteropServices.DllImport("gdi32.dll")]
public static extern IntPtr CreateCompatibleBitmap(IntPtr hdc, int nWidth, int nHeight);
[System.Runtime.InteropServices.DllImport("gdi32.dll")]
public static extern IntPtr SelectObject(IntPtr hdc, IntPtr hgdiobj);
[System.Runtime.InteropServices.DllImport("gdi32.dll")]
public static extern bool BitBlt(IntPtr hdcDest, int nXDest, int nYDest, int nWidth, int nHeight,
IntPtr hdcSrc, int nXSrc, int nYSrc, int dwRop);
[System.Runtime.InteropServices.DllImport("gdi32.dll")]
public static extern bool DeleteDC(IntPtr hdc);
/// <summary>
/// 獲取指定窗口的 DPI 縮放比例。
/// </summary>
/// <param name="window"></param>
/// <returns></returns>
public static double GetDpiScale(Window window)
{
var source = PresentationSource.FromVisual(window);
if (source?.CompositionTarget != null)
{
return source.CompositionTarget.TransformToDevice.M11; // X 方向縮放
}
return 1.0;
}
}
最后主界面添加按鈕
<Button Width="140" Height="44"
Background="#4361EE"
Foreground="White"
FontWeight="SemiBold"
FontSize="16"
HorizontalAlignment="Left"
VerticalAlignment="Top"
Margin="180,20,20,20"
Click="CaptureScreenButton_Click">
<Button.Effect>
<DropShadowEffect ShadowDepth="1" BlurRadius="8" Opacity="0.25" Color="#4361EE"/>
</Button.Effect>
<Button.Content>
<StackPanel Orientation="Horizontal" VerticalAlignment="Center">
<TextBlock Text=""
FontFamily="Segoe MDL2 Assets"
FontSize="20"
Margin="0,0,8,0"
VerticalAlignment="Center"/>
<TextBlock Text="截屏"
FontSize="16"
VerticalAlignment="Center"/>
</StackPanel>
</Button.Content>
</Button>
執(zhí)行OCR初始化檢查模型是否存在, 不存在則回調(diào)一個正在初始化提示給UI, 免得用戶等待不耐煩
可以通過先檢查模型文件是否已存在本地,如果不存在則通知UI“正在初始化/下載模型”,再進(jìn)行下載。你可以這樣實現(xiàn):
- 檢查模型本地路徑是否存在。
- 如果不存在,調(diào)用UI回調(diào)(如事件、Action、委托等)提示“正在初始化”。
- 然后異步下載模型。
假設(shè)你用 Action? onStatusChanged 作為回調(diào)參數(shù),代碼如下:
// 獲取模型本地路徑
var modelInfo = OnlineFullModels.ChineseV3;
string modelDir = modelInfo.LocalDir;
if (!Directory.Exists(modelDir) || !File.Exists(Path.Combine(modelDir, "model.pdmodel")))
{
onStatusChanged?.Invoke("正在初始化OCR模型,請稍候...");
}
A: modelInfo.LocalDir; 不存在
A: CreateLocalAsync 方法不存在, 改為異步超時回調(diào)UI
這個步驟花了一點時間,因為原本的庫就不存在這個功能,所以最終走到這樣的代碼
明白。你可以通過以下方式實現(xiàn)“異步超時回調(diào)UI”:
? 在下載模型前,先啟動一個定時任務(wù)(如 Task.Delay),如果模型下載還未完成且超時,則回調(diào) UI 提示“正在初始化”。
? 如果模型下載提前完成,則取消提示。
下面是推薦實現(xiàn)方式:
public async Task<(List<string> strings, PaddleOcrResult result)> StartOCR(
Mat src,
Action<string>? onStatusChanged = null,
int timeoutMs = 1500)
{
PaddleOcrResult result;
var resultText = new List<string>();
var modelInfo = OnlineFullModels.ChineseV3;
// 啟動超時提示任務(wù)
using var cts = new CancellationTokenSource();
var timeoutTask = Task.Delay(timeoutMs, cts.Token)
.ContinueWith(t =>
{
if (!t.IsCanceled)
{
onStatusChanged?.Invoke("正在初始化OCR模型,請稍候...");
}
});
// 下載模型(如果已存在會跳過下載)
FullOcrModel model = await modelInfo.DownloadAsync();
cts.Cancel(); // 下載完成,取消超時提示
using (PaddleOcrAll all = new(model)
{
AllowRotateDetection = true,
Enable180Classification = true,
})
{
Stopwatch sw = Stopwatch.StartNew();
result = all.Run(src);
Console.WriteLine($"elapsed={sw.ElapsedMilliseconds} ms");
Console.WriteLine("Detected all texts: \n" + result.Text);
foreach (PaddleOcrResultRegion region in result.Regions)
{
Console.WriteLine($"Text: {region.Text}, Score: {region.Score}, RectCenter: {region.Rect.Center}, RectSize: {region.Rect.Size}, Angle: {region.Rect.Angle}");
resultText.Add(region.Text);
}
}
src.Dispose();
return (resultText, result);
}
說明:
? 增加了 timeoutMs 參數(shù)(默認(rèn)1.5秒),可根據(jù)需要調(diào)整。
? 如果模型下載超過指定時間,UI 會收到“正在初始化OCR模型,請稍候...”提示。
? 如果模型提前下載完成,則不會回調(diào)提示。
這樣可以優(yōu)雅地實現(xiàn)異步超時回調(diào)UI,無需依賴不存在的 CreateLocalAsync 方法。
StartOCR 處理一下 onStatusChanged 回調(diào), 下載初始化模型反饋友好提示
可以將 PaddleOCRService.StartOCR 的 onStatusChanged 回調(diào)參數(shù)傳遞到 UI 層,并在模型下載耗時時友好提示用戶。
下面是推薦做法:
- PaddleOCRService.cs 已實現(xiàn)超時回調(diào)(見前述建議)。
- MainWindow.xaml.cs 需要在調(diào)用 StartOCR 時傳遞回調(diào),并在 UI 上顯示提示。
步驟一:修改 RunOcrAndDraw,傳遞 onStatusChanged
將 RunOcrAndDraw 方法修改如下,利用 Dispatcher.Invoke 保證 UI 線程安全:
private async Task RunOcrAndDraw(byte[] imageBytes)
{
ProcessingText.Visibility = Visibility.Visible;
ProcessingText.Text = "正在識別...";
var ocr = new PaddleOCRService();
var results = await ocr.StartOCR(imageBytes, msg =>
{
// 友好提示,確保在UI線程
Dispatcher.Invoke(() =>
{
ProcessingText.Text = msg;
ProcessingText.Visibility = Visibility.Visible;
});
});
ProcessingText.Visibility = Visibility.Collapsed;
if (results.strings == null || results.strings.Count == 0)
{
OcrTextBox.Text = "未識別到文本";
return;
}
OcrTextBox.Text = string.Join(Environment.NewLine, results.result.Regions.Select(r => r.Text));
_lastOcrResult = results.result;
var drawed = DrawOcrResultsOnImage(_currentImage, results.result);
MainImage.Source = drawed;
}
步驟二:確保 PaddleOCRService.cs 的 StartOCR 支持 onStatusChanged
如前所述,PaddleOCRService.cs 需如下實現(xiàn):
public async Task<(List<string> strings, PaddleOcrResult result)> StartOCR(
Mat src,
Action<string>? onStatusChanged = null,
int timeoutMs = 1500)
{
PaddleOcrResult result;
var resultText = new List<string>();
var modelInfo = OnlineFullModels.ChineseV3;
using var cts = new CancellationTokenSource();
var timeoutTask = Task.Delay(timeoutMs, cts.Token)
.ContinueWith(t =>
{
if (!t.IsCanceled)
{
onStatusChanged?.Invoke("正在初始化OCR模型,請稍候...");
}
});
FullOcrModel model = await modelInfo.DownloadAsync();
cts.Cancel();
using (PaddleOcrAll all = new(model)
{
AllowRotateDetection = true,
Enable180Classification = true,
})
{
Stopwatch sw = Stopwatch.StartNew();
result = all.Run(src);
foreach (PaddleOcrResultRegion region in result.Regions)
{
resultText.Add(region.Text);
}
}
src.Dispose();
return (resultText, result);
}
這樣,用戶在模型首次下載或初始化時會看到“正在初始化OCR模型,請稍候...”,其余時間顯示“正在識別...”,體驗更友好。
回顧
項目目標(biāo)
本項目旨在利用 Github Copilot 輔助開發(fā),從零實現(xiàn)一個基于 AI 的 OCR(光學(xué)字符識別)工具。項目采用 .NET 9 和 WPF 技術(shù)棧,集成了 PaddleOCR 作為核心識別引擎,實現(xiàn)了圖片文字識別、區(qū)域選擇、結(jié)果高亮與復(fù)制等實用功能。
主要技術(shù)與依賴
? 開發(fā)語言與平臺:C#,.NET 9,WPF
? OCR引擎:Sdcb.OpenVINO.PaddleOCR
? 圖像處理:OpenCvSharp
? 界面交互:WPF,支持拖拽、粘貼、截圖等多種圖片輸入方式
? AI輔助開發(fā):Github Copilot 提供代碼建議與自動補全
核心功能
- 圖片輸入
? 支持文件選擇、拖拽、粘貼、屏幕截圖等多種方式加載圖片。 - OCR識別
? 調(diào)用 PaddleOCR 進(jìn)行文字識別,支持中文、英文等多語種。
? 識別結(jié)果實時顯示,支持區(qū)域高亮和文字復(fù)制。 - 用戶體驗優(yōu)化
? 首次模型下載時,異步回調(diào) UI,友好提示“正在初始化OCR模型,請稍候...”,避免用戶等待時無響應(yīng)。
? 識別過程有進(jìn)度提示,提升交互體驗。 - 結(jié)果交互
? 支持鼠標(biāo)框選圖片區(qū)域,提取并復(fù)制選中區(qū)域的文字。
? 右鍵點擊可復(fù)制單個識別文本。
關(guān)鍵實現(xiàn)思路
? AI驅(qū)動開發(fā):通過 Copilot 自動生成代碼骨架、方法實現(xiàn)和注釋,大幅提升開發(fā)效率。
? 異步與回調(diào):模型下載和識別過程均為異步,UI 通過回調(diào)及時反饋進(jìn)度和狀態(tài)。
? 圖像與坐標(biāo)映射:實現(xiàn)了圖片與控件坐標(biāo)的精準(zhǔn)映射,保證高亮和選區(qū)準(zhǔn)確。
總結(jié)
本項目充分發(fā)揮了 Github Copilot 在 AI 輔助開發(fā)中的優(yōu)勢,實現(xiàn)了一個功能完善、交互友好的 OCR 工具。開發(fā)過程中 Copilot 提供了大量代碼建議,極大提升了開發(fā)效率和代碼質(zhì)量。項目結(jié)構(gòu)清晰,易于擴展,適合 AI+開發(fā)實戰(zhàn)學(xué)習(xí)與參考。
關(guān)聯(lián)項目
FreeSql QQ群:4336577
BA & Blazor QQ群:795206915
Maui Blazor 中文社區(qū) QQ群:645660665
知識共享許可協(xié)議
本作品采用 知識共享署名-非商業(yè)性使用-相同方式共享 4.0 國際許可協(xié)議 進(jìn)行許可。歡迎轉(zhuǎn)載、使用、重新發(fā)布,但務(wù)必保留文章署名AlexChow(包含鏈接: https://github.com/densen2014 ),不得用于商業(yè)目的,基于本文修改后的作品務(wù)必以相同的許可發(fā)布。如有任何疑問,請與我聯(lián)系 。
轉(zhuǎn)載聲明
本文來自博客園,作者:周創(chuàng)琳 AlexChow,轉(zhuǎn)載請注明原文鏈接:http://www.rzrgm.cn/densen2014/p/18899403
AlexChow
今日頭條 | 博客園 | 知乎 | Gitee | GitHub


浙公網(wǎng)安備 33010602011771號