"""Jieba command line interface.""" import sys import jieba from argparse import ArgumentParser from ._compat import * parser = ArgumentParser(usage="%s -m jieba [options] filename" % sys.executable, description="Jieba command line interface.", epilog="If no filename specified, use STDIN instead.") parser.add_argument("-d", "--delimiter", metavar="DELIM", default=' / ', nargs='?', const=' ', help="use DELIM instead of ' / ' for word delimiter; or a space if it is used without DELIM") parser.add_argument("-p", "--pos", metavar="DELIM", nargs='?', const='_', help="enable POS tagging; if DELIM is specified, use DELIM instead of '_' for POS delimiter") parser.add_argument("-D", "--dict", help="use DICT as dictionary") parser.add_argument("-u", "--user-dict", help="use USER_DICT together with the default dictionary or DICT (if specified)") parser.add_argument("-a", "--cut-all", action="store_true", dest="cutall", default=False, help="full pattern cutting (ignored with POS tagging)") parser.add_argument("-n", "--no-hmm", dest="hmm", action="store_false", default=True, help="don't use the Hidden Markov Model") parser.add_argument("-q", "--quiet", action="store_true", default=False, help="don't print loading messages to stderr") parser.add_argument("-V", '--version', action='version', version="Jieba " + jieba.__version__) parser.add_argument("filename", nargs='?', help="input file") args = parser.parse_args() if args.quiet: jieba.setLogLevel(60) if args.pos: import jieba.posseg posdelim = args.pos def cutfunc(sentence, _, HMM=True): for w, f in jieba.posseg.cut(sentence, HMM): yield w + posdelim + f else: cutfunc = jieba.cut delim = text_type(args.delimiter) cutall = args.cutall hmm = args.hmm fp = open(args.filename, 'r') if args.filename else sys.stdin if args.dict: jieba.initialize(args.dict) else: jieba.initialize() if args.user_dict: jieba.load_userdict(args.user_dict) ln = fp.readline() while ln: l = ln.rstrip('\r\n') result = delim.join(cutfunc(ln.rstrip('\r\n'), cutall, hmm)) if PY2: result = result.encode(default_encoding) print(result) ln = fp.readline() fp.close()