侯体宗的博客
  • 首页
  • Hyperf版
  • beego仿版
  • 人生(杂谈)
  • 技术
  • 关于我
  • 更多分类
    • 文件下载
    • 文字修仙
    • 中国象棋ai
    • 群聊
    • 九宫格抽奖
    • 拼图
    • 消消乐
    • 相册

python3+PyQt5实现支持多线程的页面索引器应用程序

Python  /  管理员 发布于 7年前   264

本文通过Python3+pyqt5实现了python Qt GUI 快速编程的19章的页面索引器应用程序例子。

/home/yrd/eric_workspace/chap19/walker_ans.py

#!/usr/bin/env python3import codecsimport html.entitiesimport reimport sysfrom PyQt5.QtCore import (QMutex, QThread,pyqtSignal,Qt)class Walker(QThread): finished = pyqtSignal(bool,int) indexed = pyqtSignal(str,int) COMMON_WORDS_THRESHOLD = 250 MIN_WORD_LEN = 3 MAX_WORD_LEN = 25 INVALID_FIRST_OR_LAST = frozenset("0123456789_") STRIPHTML_RE = re.compile(r"<[^>]*?>", re.IGNORECASE|re.MULTILINE) ENTITY_RE = re.compile(r"&(\w+?);|&#(\d+?);") SPLIT_RE = re.compile(r"\W+", re.IGNORECASE|re.MULTILINE) def __init__(self, index, lock, files, filenamesForWords,     commonWords, parent=None):  super(Walker, self).__init__(parent)  self.index = index  self.lock = lock  self.files = files  self.filenamesForWords = filenamesForWords  self.commonWords = commonWords  self.stopped = False  self.mutex = QMutex()  self.completed = False def stop(self):  try:   self.mutex.lock()   self.stopped = True  finally:   self.mutex.unlock() def isStopped(self):  try:   self.mutex.lock()   return self.stopped  finally:   self.mutex.unlock() def run(self):  self.processFiles()  self.stop()  self.finished.emit(self.completed,self.index) def processFiles(self):  def unichrFromEntity(match):   text = match.group(match.lastindex)   if text.isdigit():    return chr(int(text))   u = html.entities.name2codepoint.get(text)   return chr(u) if u is not None else ""  for fname in self.files:   if self.isStopped():    return   words = set()   fh = None   try:    fh = codecs.open(fname, "r", "UTF8", "ignore")    text = fh.read()   except EnvironmentError as e:    sys.stderr.write("Error: {0}\n".format(e))    continue   finally:    if fh is not None:     fh.close()   if self.isStopped():    return   text = self.STRIPHTML_RE.sub("", text)   text = self.ENTITY_RE.sub(unichrFromEntity, text)   text = text.lower()   for word in self.SPLIT_RE.split(text):    if (self.MIN_WORD_LEN <= len(word) <=     self.MAX_WORD_LEN and     word[0] not in self.INVALID_FIRST_OR_LAST and     word[-1] not in self.INVALID_FIRST_OR_LAST):     try:      self.lock.lockForRead()      new = word not in self.commonWords     finally:      self.lock.unlock()     if new:      words.add(word)   if self.isStopped():    return   for word in words:    try:     self.lock.lockForWrite()     files = self.filenamesForWords[word]     if len(files) > self.COMMON_WORDS_THRESHOLD:      del self.filenamesForWords[word]      self.commonWords.add(word)     else:      files.add(str(fname))    finally:     self.lock.unlock()   self.indexed.emit(fname,self.index)  self.completed = True/home/yrd/eric_workspace/chap19/pageindexer_ans.pyw#!/usr/bin/env python3import collectionsimport osimport sysfrom PyQt5.QtCore import (QDir, QReadWriteLock, QMutex,Qt)from PyQt5.QtWidgets import (QApplication, QDialog, QFileDialog, QFrame,        QHBoxLayout, QLCDNumber, QLabel, QLineEdit, QListWidget,        QPushButton, QVBoxLayout)import walker_ans as walkerdef isAlive(qobj): import sip try:  sip.unwrapinstance(qobj) except RuntimeError:  return False return Trueclass Form(QDialog): def __init__(self, parent=None):  super(Form, self).__init__(parent)  self.mutex = QMutex()  self.fileCount = 0  self.filenamesForWords = collections.defaultdict(set)  self.commonWords = set()  self.lock = QReadWriteLock()  self.path = QDir.homePath()  pathLabel = QLabel("Indexing path:")  self.pathLabel = QLabel()  self.pathLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken)  self.pathButton = QPushButton("Set &Path...")  self.pathButton.setAutoDefault(False)  findLabel = QLabel("&Find word:")  self.findEdit = QLineEdit()  findLabel.setBuddy(self.findEdit)  commonWordsLabel = QLabel("&Common words:")  self.commonWordsListWidget = QListWidget()  commonWordsLabel.setBuddy(self.commonWordsListWidget)  filesLabel = QLabel("Files containing the &word:")  self.filesListWidget = QListWidget()  filesLabel.setBuddy(self.filesListWidget)  filesIndexedLabel = QLabel("Files indexed")  self.filesIndexedLCD = QLCDNumber()  self.filesIndexedLCD.setSegmentStyle(QLCDNumber.Flat)  wordsIndexedLabel = QLabel("Words indexed")  self.wordsIndexedLCD = QLCDNumber()  self.wordsIndexedLCD.setSegmentStyle(QLCDNumber.Flat)  commonWordsLCDLabel = QLabel("Common words")  self.commonWordsLCD = QLCDNumber()  self.commonWordsLCD.setSegmentStyle(QLCDNumber.Flat)  self.statusLabel = QLabel("Click the 'Set Path' "         "button to start indexing")  self.statusLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken)  topLayout = QHBoxLayout()  topLayout.addWidget(pathLabel)  topLayout.addWidget(self.pathLabel, 1)  topLayout.addWidget(self.pathButton)  topLayout.addWidget(findLabel)  topLayout.addWidget(self.findEdit, 1)  leftLayout = QVBoxLayout()  leftLayout.addWidget(filesLabel)  leftLayout.addWidget(self.filesListWidget)  rightLayout = QVBoxLayout()  rightLayout.addWidget(commonWordsLabel)  rightLayout.addWidget(self.commonWordsListWidget)  middleLayout = QHBoxLayout()  middleLayout.addLayout(leftLayout, 1)  middleLayout.addLayout(rightLayout)  bottomLayout = QHBoxLayout()  bottomLayout.addWidget(filesIndexedLabel)  bottomLayout.addWidget(self.filesIndexedLCD)  bottomLayout.addWidget(wordsIndexedLabel)  bottomLayout.addWidget(self.wordsIndexedLCD)  bottomLayout.addWidget(commonWordsLCDLabel)  bottomLayout.addWidget(self.commonWordsLCD)  bottomLayout.addStretch()  layout = QVBoxLayout()  layout.addLayout(topLayout)  layout.addLayout(middleLayout)  layout.addLayout(bottomLayout)  layout.addWidget(self.statusLabel)  self.setLayout(layout)  self.walkers = []  self.completed = []  self.pathButton.clicked.connect(self.setPath)  self.findEdit.returnPressed.connect(self.find)  self.setWindowTitle("Page Indexer") def stopWalkers(self):  for walker in self.walkers:   if isAlive(walker) and walker.isRunning():    walker.stop()  for walker in self.walkers:   if isAlive(walker) and walker.isRunning():    walker.wait()  self.walkers = []  self.completed = [] def setPath(self):  self.stopWalkers()  self.pathButton.setEnabled(False)  path = QFileDialog.getExistingDirectory(self,     "Choose a Path to Index", self.path)  if not path:   self.statusLabel.setText("Click the 'Set Path' "          "button to start indexing")   self.pathButton.setEnabled(True)   return  self.statusLabel.setText("Scanning directories...")  QApplication.processEvents() # Needed for Windows  self.path = QDir.toNativeSeparators(path)  self.findEdit.setFocus()  self.pathLabel.setText(self.path)  self.statusLabel.clear()  self.filesListWidget.clear()  self.fileCount = 0  self.filenamesForWords = collections.defaultdict(set)  self.commonWords = set()  nofilesfound = True  files = []  index = 0  for root, dirs, fnames in os.walk(str(self.path)):   for name in [name for name in fnames       if name.endswith((".htm", ".html"))]:    files.append(os.path.join(root, name))    if len(files) == 1000:     self.processFiles(index, files[:])     files = []     index += 1     nofilesfound = False  if files:   self.processFiles(index, files[:])   nofilesfound = False  if nofilesfound:   self.finishedIndexing()   self.statusLabel.setText(     "No HTML files found in the given path") def processFiles(self, index, files):  thread = walker.Walker(index, self.lock, files,    self.filenamesForWords, self.commonWords, self)  thread.indexed[str,int].connect(self.indexed)  thread.finished[bool,int].connect(self.finished)  thread.finished.connect(thread.deleteLater)  self.walkers.append(thread)  self.completed.append(False)  thread.start()  thread.wait(300) # Needed for Windows def find(self):  word = str(self.findEdit.text())  if not word:   try:    self.mutex.lock()    self.statusLabel.setText("Enter a word to find in files")   finally:    self.mutex.unlock()   return  try:   self.mutex.lock()   self.statusLabel.clear()   self.filesListWidget.clear()  finally:   self.mutex.unlock()  word = word.lower()  if " " in word:   word = word.split()[0]  try:   self.lock.lockForRead()   found = word in self.commonWords  finally:   self.lock.unlock()  if found:   try:    self.mutex.lock()    self.statusLabel.setText("Common words like '{0}' "      "are not indexed".format(word))   finally:    self.mutex.unlock()   return  try:   self.lock.lockForRead()   files = self.filenamesForWords.get(word, set()).copy()  finally:   self.lock.unlock()  if not files:   try:    self.mutex.lock()    self.statusLabel.setText("No indexed file contains "      "the word '{0}'".format(word))   finally:    self.mutex.unlock()   return  files = [QDir.toNativeSeparators(name) for name in     sorted(files, key=str.lower)]  try:   self.mutex.lock()   self.filesListWidget.addItems(files)   self.statusLabel.setText(     "{0} indexed files contain the word '{1}'".format(     len(files), word))  finally:   self.mutex.unlock() def indexed(self, fname, index):  try:   self.mutex.lock()   self.statusLabel.setText(fname)   self.fileCount += 1   count = self.fileCount  finally:   self.mutex.unlock()  if count % 25 == 0:   try:    self.lock.lockForRead()    indexedWordCount = len(self.filenamesForWords)    commonWordCount = len(self.commonWords)   finally:    self.lock.unlock()   try:    self.mutex.lock()    self.filesIndexedLCD.display(count)    self.wordsIndexedLCD.display(indexedWordCount)    self.commonWordsLCD.display(commonWordCount)   finally:    self.mutex.unlock()  elif count % 101 == 0:   try:    self.lock.lockForRead()    words = self.commonWords.copy()   finally:    self.lock.unlock()   try:    self.mutex.lock()    self.commonWordsListWidget.clear()    self.commonWordsListWidget.addItems(sorted(words))   finally:    self.mutex.unlock() def finished(self, completed, index):  done = False  if self.walkers:   self.completed[index] = True   if all(self.completed):    try:     self.mutex.lock()     self.statusLabel.setText("Finished")     done = True    finally:     self.mutex.unlock()  else:   try:    self.mutex.lock()    self.statusLabel.setText("Finished")    done = True   finally:    self.mutex.unlock()  if done:   self.finishedIndexing() def reject(self):  if not all(self.completed):   self.stopWalkers()   self.finishedIndexing()  else:   self.accept() def closeEvent(self, event=None):  self.stopWalkers() def finishedIndexing(self):  self.filesIndexedLCD.display(self.fileCount)  self.wordsIndexedLCD.display(len(self.filenamesForWords))  self.commonWordsLCD.display(len(self.commonWords))  self.pathButton.setEnabled(True)  QApplication.processEvents() # Needed for Windowsapp = QApplication(sys.argv)form = Form()form.show()app.exec_()

运行结果:

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。


  • 上一条:
    Python cookbook(字符串与文本)在字符串的开头或结尾处进行文本匹配操作
    下一条:
    python3+PyQt5+Qt Designer实现扩展对话框
  • 昵称:

    邮箱:

    0条评论 (评论内容有缓存机制,请悉知!)
    最新最热
    • 分类目录
    • 人生(杂谈)
    • 技术
    • linux
    • Java
    • php
    • 框架(架构)
    • 前端
    • ThinkPHP
    • 数据库
    • 微信(小程序)
    • Laravel
    • Redis
    • Docker
    • Go
    • swoole
    • Windows
    • Python
    • 苹果(mac/ios)
    • 相关文章
    • 在python语言中Flask框架的学习及简单功能示例(0个评论)
    • 在Python语言中实现GUI全屏倒计时代码示例(0个评论)
    • Python + zipfile库实现zip文件解压自动化脚本示例(0个评论)
    • python爬虫BeautifulSoup快速抓取网站图片(1个评论)
    • vscode 配置 python3开发环境的方法(0个评论)
    • 近期文章
    • 在go中实现一个常用的先进先出的缓存淘汰算法示例代码(0个评论)
    • 在go+gin中使用"github.com/skip2/go-qrcode"实现url转二维码功能(0个评论)
    • 在go语言中使用api.geonames.org接口实现根据国际邮政编码获取地址信息功能(1个评论)
    • 在go语言中使用github.com/signintech/gopdf实现生成pdf分页文件功能(0个评论)
    • gmail发邮件报错:534 5.7.9 Application-specific password required...解决方案(0个评论)
    • 欧盟关于强迫劳动的规定的官方举报渠道及官方举报网站(0个评论)
    • 在go语言中使用github.com/signintech/gopdf实现生成pdf文件功能(0个评论)
    • Laravel从Accel获得5700万美元A轮融资(0个评论)
    • 在go + gin中gorm实现指定搜索/区间搜索分页列表功能接口实例(0个评论)
    • 在go语言中实现IP/CIDR的ip和netmask互转及IP段形式互转及ip是否存在IP/CIDR(0个评论)
    • 近期评论
    • 122 在

      学历:一种延缓就业设计,生活需求下的权衡之选中评论 工作几年后,报名考研了,到现在还没认真学习备考,迷茫中。作为一名北漂互联网打工人..
    • 123 在

      Clash for Windows作者删库跑路了,github已404中评论 按理说只要你在国内,所有的流量进出都在监控范围内,不管你怎么隐藏也没用,想搞你分..
    • 原梓番博客 在

      在Laravel框架中使用模型Model分表最简单的方法中评论 好久好久都没看友情链接申请了,今天刚看,已经添加。..
    • 博主 在

      佛跳墙vpn软件不会用?上不了网?佛跳墙vpn常见问题以及解决办法中评论 @1111老铁这个不行了,可以看看近期评论的其他文章..
    • 1111 在

      佛跳墙vpn软件不会用?上不了网?佛跳墙vpn常见问题以及解决办法中评论 网站不能打开,博主百忙中能否发个APP下载链接,佛跳墙或极光..
    • 2016-10
    • 2016-11
    • 2018-04
    • 2020-03
    • 2020-04
    • 2020-05
    • 2020-06
    • 2022-01
    • 2023-07
    • 2023-10
    Top

    Copyright·© 2019 侯体宗版权所有· 粤ICP备20027696号 PHP交流群

    侯体宗的博客