扫一扫
分享文章到微信
扫一扫
关注官方公众号
至顶头条
在本页阅读全文(共2页)
Imports System.IO
Imports System.Text.RegularExpressions
Namespace Businness.PinYin
Public Class PYService
Private gDataSet As New dsPinYin
''' <summary>
''' 汉字表
''' </summary>
Public ReadOnly Property PinYinTable() As dsPinYin.PinYinDataTable
Get
Return gDataSet.PinYin
End Get
End Property
Private gTxtFile As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase & "pinyin.txt"
Private gxmlFile As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase & "pinyin.xml"
Private gRegex As New Regex("(?<Word>^[\u4e00-\u9fa5]+)(?<PingYin>.*)")
''' <summary>
''' 加载汉字库,文件名为pinyin.xml,在程序当前目录下
''' </summary>
Public Sub Load()
If Not IO.File.Exists(gxmlFile) Then
Throw New Exception(String.Format("文件{0}不存在", gxmlFile))
End If
DataSetInitialize()
gDataSet.ReadXml(gxmlFile)
End Sub
''' <summary>
''' 从汉字文件中更新,文件名为pinyin.txt,在程序当前目录下
''' </summary>
''' <remarks></remarks>
Public Sub Update()
If Not IO.File.Exists(gTxtFile) Then
Throw New Exception(String.Format("文件{0}不存在", gTxtFile))
End If
UpdateFromTxt(gTxtFile)
End Sub
''' <summary>
''' 保存汉字库,文件为pingyin.xml,在程序当前目录下
''' </summary>
''' <remarks></remarks>
Public Sub Save()
gDataSet.WriteXml(gxmlFile)
End Sub
Private Sub DataSetInitialize()
'在更新或读入时,清除
Me.gDataSet.Clear()
Me.gDataSet.AcceptChanges()
End Sub
Private Sub UpdateFromTxt(ByVal file As String)
DataSetInitialize()
Dim mLine As String
Dim mBuilder As New System.Text.StringBuilder
Dim mReader As New IO.StreamReader(file, System.Text.Encoding.Default)
Do
mLine = mReader.ReadLine
Add(mLine)
Loop Until String.IsNullOrEmpty(mLine)
mReader.Close()
mReader.Dispose()
Me.gDataSet.PinYin.AcceptChanges()
End Sub
Private Sub Add(ByVal line As String)
If line Is Nothing Then Exit Sub
With gRegex.Match(line)
If .Success Then
'只取单字,不取词组
If .Groups("Word").Value.Length = 1 Then
Add(.Groups("Word").Value, .Groups("PingYin").Value)
End If
End If
End With
End Sub
Private Sub Add(ByVal word As String, ByVal py As String)
'多音的,拼音间用单个空枨符隔开
py = py.Trim.Replace(" ", " ")
Dim mCode As String = ChineseCode(word)
Dim mRow As dsPinYin.PinYinRow = Me.gDataSet.PinYin.FindBy代码(mCode)
If mRow Is Nothing Then
Me.gDataSet.PinYin.AddPinYinRow(word, mCode, py)
Else
Dim pyArray() As String = py.Split(" "c)
For Each s As String In pyArray
If Not mRow.拼音.Contains(s) Then
mRow.拼音 = String.Concat(mRow.拼音, " " & s)
End If
Next
End If
End Sub
''' <summary>
''' 将字符串转为拼音
''' </summary>
''' <param name="line">字符串</param>
''' <param name="isgetfirst">如是多音字,取第一个拼音</param>
Public Function ToPinyin(ByVal line As String, ByVal isgetfirst As Boolean) As String
Dim mBuilder As New Text.StringBuilder
如果您非常迫切的想了解IT领域最新产品与技术信息,那么订阅至顶网技术邮件将是您的最佳途径之一。
现场直击|2021世界人工智能大会
直击5G创新地带,就在2021MWC上海
5G已至 转型当时——服务提供商如何把握转型的绝佳时机
寻找自己的Flag
华为开发者大会2020(Cloud)- 科技行者