科技行者

行者学院 转型私董会 科技行者专题报道 网红大战科技行者

知识库

知识库 安全导航



ZDNet>软件频道>中间件-zhiding>汉字拼音的一个完整解决方法!

  • 扫一扫
    分享文章到微信

  • 扫一扫
    关注官方公众号
    至顶头条

   Imports System.IO Imports System.Text.RegularExpressions Namespace Businness.PinYin Public Class PYService Private gDataSe

来源:中国IT实验室 2007年10月02日

关键字:编程 汉字 拼音

  

Imports System.IO
Imports System.Text.RegularExpressions

Namespace Businness.PinYin
Public Class PYService
Private gDataSet As New dsPinYin

''' <summary>
''' 汉字表
''' </summary>
Public ReadOnly Property PinYinTable() As dsPinYin.PinYinDataTable
Get
Return gDataSet.PinYin
End Get
End Property

Private gTxtFile As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase & "pinyin.txt"
Private gxmlFile As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase & "pinyin.xml"

Private gRegex As New Regex("(?<Word>^[\u4e00-\u9fa5]+)(?<PingYin>.*)")

''' <summary>
''' 加载汉字库,文件名为pinyin.xml,在程序当前目录下
''' </summary>
Public Sub Load()
If Not IO.File.Exists(gxmlFile) Then
Throw New Exception(String.Format("文件{0}不存在", gxmlFile))
End If

DataSetInitialize()
gDataSet.ReadXml(gxmlFile)
End Sub

''' <summary>
''' 从汉字文件中更新,文件名为pinyin.txt,在程序当前目录下
''' </summary>
''' <remarks></remarks>
Public Sub Update()
If Not IO.File.Exists(gTxtFile) Then
Throw New Exception(String.Format("文件{0}不存在", gTxtFile))
End If

UpdateFromTxt(gTxtFile)
End Sub

''' <summary>
''' 保存汉字库,文件为pingyin.xml,在程序当前目录下
''' </summary>
''' <remarks></remarks>
Public Sub Save()
gDataSet.WriteXml(gxmlFile)
End Sub

Private Sub DataSetInitialize()
'在更新或读入时,清除
Me.gDataSet.Clear()
Me.gDataSet.AcceptChanges()
End Sub

Private Sub UpdateFromTxt(ByVal file As String)
DataSetInitialize()

Dim mLine As String
Dim mBuilder As New System.Text.StringBuilder
Dim mReader As New IO.StreamReader(file, System.Text.Encoding.Default)
Do
mLine = mReader.ReadLine
Add(mLine)
Loop Until String.IsNullOrEmpty(mLine)

mReader.Close()
mReader.Dispose()

Me.gDataSet.PinYin.AcceptChanges()
End Sub

Private Sub Add(ByVal line As String)
If line Is Nothing Then Exit Sub

With gRegex.Match(line)

If .Success Then

'只取单字,不取词组
If .Groups("Word").Value.Length = 1 Then
Add(.Groups("Word").Value, .Groups("PingYin").Value)
End If

End If
End With
End Sub

Private Sub Add(ByVal word As String, ByVal py As String)
'多音的,拼音间用单个空枨符隔开

py = py.Trim.Replace(" ", " ")

Dim mCode As String = ChineseCode(word)
Dim mRow As dsPinYin.PinYinRow = Me.gDataSet.PinYin.FindBy代码(mCode)
If mRow Is Nothing Then
Me.gDataSet.PinYin.AddPinYinRow(word, mCode, py)
Else

Dim pyArray() As String = py.Split(" "c)
For Each s As String In pyArray
If Not mRow.拼音.Contains(s) Then
mRow.拼音 = String.Concat(mRow.拼音, " " & s)
End If
Next
End If

End Sub

''' <summary>
''' 将字符串转为拼音
''' </summary>
''' <param name="line">字符串</param>
''' <param name="isgetfirst">如是多音字,取第一个拼音</param>
Public Function ToPinyin(ByVal line As String, ByVal isgetfirst As Boolean) As String
Dim mBuilder As New Text.StringBuilder

查看本文来源

  

For Each s As Char In line.ToCharArray
If IsTrue(s) Then
mBuilder.Append(GetPinyin(s, isgetfirst))
Else
mBuilder.Append(s)
End If
Next

Return mBuilder.ToString
End Function

Private Function GetPinyin(ByVal word As String, ByVal isgetfirst As Boolean) As String
Dim mResult As String = word

Dim mArray As String() = PinYinArray(ChineseCode(word)) '取拼音组
If Not mArray Is Nothing Then
If mArray.Length = 1 Or isgetfirst Then
mResult = mArray(0) '单音的
Else

mResult = String.Format("({0})", String.Join(",", mArray)) '多音的用括号括住,拼音间用逗号隔开
End If
End If

Return mResult
End Function

'取拼音组
Private Function PinYinArray(ByVal code As String) As String()
Dim mRow As dsPinYin.PinYinRow = Me.gDataSet.PinYin.FindBy代码(code)
If mRow Is Nothing Then Return Nothing

Return mRow.拼音.Split(" "c)
End Function

''' <summary>
''' 按拼音查字
''' </summary>
''' <param name="pinyin">拼音</param>
Public Function WordArray(ByVal pinyin As String) As String()
Dim mRows As dsPinYin.PinYinRow() = CType(Me.gDataSet.PinYin.Select(String.Format("拼音 LIKE '%{0}%'", pinyin)), dsPinYin.PinYinRow())
Dim mResult(-1) As String
For i As Integer = 0 To mRows.Length - 1
If Array.IndexOf(mRows(i).拼音.Split(" "c), pinyin) <> -1 Then
Me.Append(mResult, mRows(i).汉字)
End If
Next
Return mResult
End Function

''' <summary>
''' 按拼音查字
''' </summary>
''' <param name="pinyin">拼音</param>
Public Function Words(ByVal pinyin As String) As String
Return String.Concat(WordArray(pinyin))
End Function


''' <summary>
''' 汉字代码
''' </summary>
''' <param name="word">单个汉字</param>
Public Shared Function ChineseCode(ByVal word As String) As String
If Not IsTrue(word) Then Return Nothing

Dim bytes() As Byte = System.Text.Encoding.Default.GetBytes(word)

Return String.Concat(Hex(bytes(0)), Hex(bytes(1)))
End Function

''' <summary>
''' 是否是单个汉字
''' </summary>
''' <param name="word">字符</param>
Public Shared Function IsTrue(ByVal word As String) As Boolean
If word Is Nothing Then Return False

Return System.Text.RegularExpressions.Regex.IsMatch(word, "^[\u4e00-\u9fa5]$")
End Function

Private Sub Append(ByRef collection As String(), ByVal value As String)
ReDim Preserve collection(collection.Length)
collection(collection.Length - 1) = value
End Sub
End Class
End Namespace

查看本文来源