您现在的位置是:主页 > news > 做网站送邮箱/使用网站模板快速建站

做网站送邮箱/使用网站模板快速建站

admin2025/5/20 13:20:20news

简介做网站送邮箱,使用网站模板快速建站,苏州外贸网站制作,限制个人做网站本文整理汇总了Python中jieba.posseg.lcut方法的典型用法代码示例。如果您正苦于以下问题:Python posseg.lcut方法的具体用法?Python posseg.lcut怎么用?Python posseg.lcut使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以…

做网站送邮箱,使用网站模板快速建站,苏州外贸网站制作,限制个人做网站本文整理汇总了Python中jieba.posseg.lcut方法的典型用法代码示例。如果您正苦于以下问题:Python posseg.lcut方法的具体用法?Python posseg.lcut怎么用?Python posseg.lcut使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以…

本文整理汇总了Python中jieba.posseg.lcut方法的典型用法代码示例。如果您正苦于以下问题:Python posseg.lcut方法的具体用法?Python posseg.lcut怎么用?Python posseg.lcut使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在模块jieba.posseg的用法示例。

在下文中一共展示了posseg.lcut方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: extract_dictionary_feature

​点赞 3

# 需要导入模块: from jieba import posseg [as 别名]

# 或者: from jieba.posseg import lcut [as 别名]

def extract_dictionary_feature(file_name, col_tag=0, col_content=1):

# ????

adv = codecs.open('./data/vocabulary/adv.txt', 'rb', encoding='utf-8').read().split('\n')

inverse = codecs.open('./data/vocabulary/inverse.txt', 'rb', encoding='utf-8').read().split('\n')

negdict = codecs.open('./data/vocabulary/negdict.txt', 'rb', encoding='utf-8').read().split('\n')

posdict = codecs.open('./data/vocabulary/posdict.txt', 'rb', encoding='utf-8').read().split('\n')

contents = pd.read_excel(file_name, header=None)

print 'cut words...'

cw = lambda x: [pair for pair in psg.lcut(x) if pair.word not in stopwords]

contents['pairs'] = contents[col_content].apply(cw)

matrix = reviews2matrix(list(contents['pairs']), posdict, negdict, inverse, adv)

x = matrix2vec(matrix)

y = list(contents[col_tag])

return x, y

开发者ID:wac81,项目名称:Book_DeepLearning_Practice,代码行数:18,

示例2: delNOTNeedWords

​点赞 3

# 需要导入模块: from jieba import posseg [as 别名]

# 或者: from jieba.posseg import lcut [as 别名]

def delNOTNeedWords(content,customstopwords=None):

# words = jieba.lcut(content)

if customstopwords == None:

customstopwords = "stopwords.txt"

import os

if os.path.exists(customstopwords):

stop_words = codecs.open(customstopwords, encoding='UTF-8').read().split(u'\n')

customstopwords = stop_words

result=''

return_words = []

# for w in words:

# if w not in stopwords:

# result += w.encode('utf-8') # +"/"+str(w.flag)+" " #????

words = pseg.lcut(content)

for word, flag in words:

# print word.encode('utf-8')

tempword = word.encode('utf-8').strip(' ')

if (word not in customstopwords and len(tempword)>0 and flag in [u'n',u'nr',u'ns',u'nt',u'nz',u'ng',u't',u'tg',u'f',u'v',u'vd',u'vn',u'vf',u'vx',u'vi',u'vl',u'vg', u'a',u'an',u'ag',u'al',u'm',u'mq',u'o',u'x']):

# and flag[0] in [u'n', u'f', u'a', u'z']):

# ["/x","/zg","/uj","/ul","/e","/d","/uz","/y"]): #??????????????????

result += tempword # +"/"+str(w.flag)+" " #????

return_words.append(tempword)

return result,return_words

开发者ID:wac81,项目名称:Book_DeepLearning_Practice,代码行数:27,

示例3: delNOTNeedWords

​点赞 3

# 需要导入模块: from jieba import posseg [as 别名]

# 或者: from jieba.posseg import lcut [as 别名]

def delNOTNeedWords(content,stopwords):

# words = jieba.lcut(content)

result=''

# for w in words:

# if w not in stopwords:

# result += w.encode('utf-8') # +"/"+str(w.flag)+" " #????

words = pseg.lcut(content)

# jieba.cut()

text_list = []

for word, flag in words:

# print word.encode('utf-8')

if (word not in stopwords and flag not in ["/x","/zg","/uj","/ul","/e","/d","/uz","/y"]): #??????????????????

# text_list.append(word.encode('utf-8'))

result += word.encode('utf-8') # +"/"+str(w.flag)+" " #????

# ''.join(text_list)

return result

# return ''.join(text_list)

开发者ID:wac81,项目名称:recommended_system,代码行数:20,代码来源:ar.py

示例4: jieba_example

​点赞 2

# 需要导入模块: from jieba import posseg [as 别名]

# 或者: from jieba.posseg import lcut [as 别名]

def jieba_example():

raw = "????S5????,123,?,?"

raw_seq = jieba.cut(raw)

raw_seq_list = jieba.lcut(raw)

raw_keyword = jieba.analyse.extract_tags(raw, topK=3, withWeight=False, allowPOS=())

raw_with_ictclas = pseg.cut(raw)

for word, flag in raw_with_ictclas:

print word, flag

开发者ID:roliygu,项目名称:CNKICrawler,代码行数:10,

示例5: cut_with_flag

​点赞 2

# 需要导入模块: from jieba import posseg [as 别名]

# 或者: from jieba.posseg import lcut [as 别名]

def cut_with_flag(raw_str, filter_invalid_word_flag=True):

"""

:param raw_str: str

:return: list[(str, str)]

"""

res = [(a, b) for a, b in pseg.lcut(raw_str)]

if filter_invalid_word_flag:

return filter_invalid_word(res)

else:

return res

开发者ID:roliygu,项目名称:CNKICrawler,代码行数:14,

示例6: maxSimTxt

​点赞 2

# 需要导入模块: from jieba import posseg [as 别名]

# 或者: from jieba.posseg import lcut [as 别名]

def maxSimTxt(self, intxt, simCondision=0.1, simType='simple'):

"""

????????????????????

simType=simple, simple_POS, vec

"""

self.lastTxt.append(intxt)

if simType not in ('simple', 'simple_pos', 'vec'):

return 'error: maxSimTxt?simType?????: {}'.format(simType)

# ??????????????? simple_pos ??

embedding = self.vecModel

if simType == 'vec' and not embedding:

simType = 'simple_pos'

for t in self.zhishiku:

questions = t.q_vec if simType == 'vec' else t.q_word

in_vec = jieba.lcut(intxt) if simType == 'simple' else pseg.lcut(intxt)

t.sim = max(

similarity(in_vec, question, method=simType, embedding=embedding)

for question in questions

)

maxSim = max(self.zhishiku, key=lambda x: x.sim)

logger.info('maxSim=' + format(maxSim.sim, '.0%'))

if maxSim.sim < simCondision:

return '?????????????????????????'

return maxSim.a

开发者ID:ofooo,项目名称:FAQrobot,代码行数:31,

示例7: __init__

​点赞 2

# 需要导入模块: from jieba import posseg [as 别名]

# 或者: from jieba.posseg import lcut [as 别名]

def __init__(self, rtepair, stop=True, lemmatize=False):

"""

:param rtepair: a ``RTEPair`` from which features should be extracted, (txt, hyp)

:param stop: if ``True``, stopwords are thrown away.

:type stop: bool

"""

global stop_word_path

self.stop = stop

self.stopwords = codecs.open(stop_word_path + 'stopwords.txt', encoding='UTF-8').read()

self.negwords = set([u"?", u"??", u"??", u"?", u"??", u"??", u"??", u"??", u"??"])

text_words = pseg.lcut(rtepair[0])

hyp_words = pseg.lcut(rtepair[1])

self.text_words = set()

self.hyp_words = set()

# ??????????????

pass

# ?? wordnet ????????

if lemmatize:

pass

# ????

for word, flag in text_words:

if word not in self.stopwords:

self.text_words.add((word, flag))

for word, flag in hyp_words:

if word not in self.stopwords:

self.hyp_words.add((word, flag))

# ????

self._overlap = self.hyp_words & self.text_words # hyp ? text??

self._hyp_extra = self.hyp_words - self.text_words # hyp? text??

self._txt_extra = self.text_words - self.hyp_words # text? hyp??

开发者ID:wac81,项目名称:Book_DeepLearning_Practice,代码行数:38,

示例8: delstopwords

​点赞 2

# 需要导入模块: from jieba import posseg [as 别名]

# 或者: from jieba.posseg import lcut [as 别名]

def delstopwords(content):

result = ''

words = pseg.lcut("".join(content.split()))

for word, flag in words:

if word not in stopwords and flag not in ["/x", "/zg", "/uj", "/ul", "/e", "/d", "/uz",

"/y"]: # ??????????????????

result += word.encode('utf-8') # +"/"+str(w.flag)+" " #????

return result

开发者ID:wac81,项目名称:recommended_system,代码行数:10,

示例9: prefix_process

​点赞 2

# 需要导入模块: from jieba import posseg [as 别名]

# 或者: from jieba.posseg import lcut [as 别名]

def prefix_process(curr_index, sentence, score):

"""

?????????????

:param curr_index: w ? sentence ??????

:param score: ??????

:param sentence: ??

:return:

"""

num_cnt = 5

if curr_index - num_cnt > 0:

seg = sentence[curr_index - num_cnt:curr_index]

else:

seg = sentence[0:curr_index]

# ????????

for curr_neg_prefix in double_none_prefix:

if seg.endswith(curr_neg_prefix):

return 0.8 * score

# ????????

for curr_neg_prefix in set_neg_prefix:

if seg.endswith(curr_neg_prefix):

temp_pair = pseg.lcut(sentence[0:curr_index])

for i, (w, f) in enumerate(reversed(temp_pair)):

if f.startswith(u"x"):

break

elif f.startswith(u"r") or f.startswith(u"n") or f.startswith(u"m"):

if (len(temp_pair)-i-2) > 0 and temp_pair[len(temp_pair)-i-2].word in set_neg_prefix:

return 1.3 * score

return -1.3 * score

temp_pair = pseg.lcut(seg)

for i, (w, f) in enumerate(reversed(temp_pair)):

if f.startswith(u"x"):

break

elif f.startswith(u"r") or f.startswith(u"n") or f.startswith(u"m"):

if temp_pair[len(temp_pair)-i-2].word in set_neg_prefix:

return -0.6 * score

# ?????????????

for curr_very_prefix in set_very_prefix:

if seg.endswith(curr_very_prefix):

return 1.3 * score

return score

开发者ID:wac81,项目名称:Book_DeepLearning_Practice,代码行数:46,

注:本文中的jieba.posseg.lcut方法示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。