5 回答
TA贡献1827条经验 获得超4个赞
您遇到两个问题:清理数据和创建字典。
在清除不属于单词的字符后,使用 defaultdict(list)
from collections import defaultdict
d = defaultdict(list)
text = """A text file (sometimes spelled textfile; an old alternative name is flatfile) is a kind of computer file that is structured as a sequence of lines of electronic text. A text file exists stored as data within a computer file system. In operating systems such as CP/M and MS-DOS, where the operating system does not keep track of the file size in bytes, the end of a text file is denoted by placing one or more special characters, known as an end-of-file marker, as padding after the last line in a text file. On modern operating systems such as Microsoft Windows and Unix-like systems, text files do not contain any special EOF character, because file systems on those operating systems keep track of the file size in bytes. There are for most text files a need to have end-of-line delimiters, which are done in a few different ways depending on operating system. Some operating systems with record-orientated file systems may not use new line delimiters and will primarily store text files with lines separated as fixed or variable length records.
'Text file' refers to a type of container, while plain text refers to a type of content.
At a generic level of description, there are two kinds of computer files: text files and binary files"
"""
# remove the characters ,.!;:-"' from begin/end of all space splitted words
words = [w.strip(",.!;:- \"'") for w in text.split()]
# add words to list in dict, automatically creates list if needed
# your code uses a set as well
for w in set(words):
d[len(w)].append(w)
# output
for k in sorted(d):
print(k,d[k])
输出:
1 ['A', 'a']
2 ['to', 'an', 'At', 'do', 'on', 'In', 'On', 'as', 'by', 'or', 'of', 'in', 'is']
3 ['use', 'the', 'one', 'and', 'few', 'not', 'EOF', 'may', 'any', 'for', 'are', 'two', 'end', 'new', 'old']
4 ['have', 'that', 'such', 'type', 'need', 'text', 'more', 'done', 'kind', 'Some', 'does', 'most', 'file', 'with', 'line', 'ways', 'keep', 'CP/M', 'name', 'will', 'Text', 'data', 'last', 'size']
5 ['track', 'those', 'bytes', 'fixed', 'known', 'where', 'which', 'there', 'while', 'There', 'lines', 'kinds', 'store', 'files', 'plain', 'after', 'level']
6 ['exists', 'modern', 'MS-DOS', 'system', 'within', 'refers', 'length', 'marker', 'stored', 'binary']
7 ['because', 'placing', 'content', 'Windows', 'padding', 'systems', 'records', 'contain', 'special', 'generic', 'denoted', 'spelled']
8 ['computer', 'sequence', 'textfile', 'variable']
9 ['Microsoft', 'depending', 'different', 'Unix-like', 'flatfile)', 'primarily', 'container', 'character', 'separated', 'operating']
10 ['delimiters', 'characters', 'electronic', '(sometimes', 'structured']
11 ['end-of-file', 'alternative', 'end-of-line', 'description']
17 ['record-orientated']
TA贡献1796条经验 获得超7个赞
str_files_txt = "A text file (sometimes spelled textfile; an old alternative name is flatfile) is a kind of computer file that is structured as a sequence of lines of electronic text. A text file exists stored as data within a computer file system. In operating systems such as CP/M and MS-DOS, where the operating system does not keep track of the file size in bytes, the end of a text file is denoted by placing one or more special characters, known as an end-of-file marker, as padding after the last line in a text file. On modern operating systems such as Microsoft Windows and Unix-like systems, text files do not contain any special EOF character, because file systems on those operating systems keep track of the file size in bytes. There are for most text files a need to have end-of-line delimiters, which are done in a few different ways depending on operating system. Some operating systems with record-orientated file systems may not use new line delimiters and will primarily store text files with lines separated as fixed or variable length records. 'Text file' refers to a type of container, while plain text refers to a type of content. At a generic level of description, there are two kinds of computer files: text files and binary files"
lengthWordDict = {}
for word in str_files_txt.split(' '):
wordWithoutSpecialChars = ''.join([char for char in word if char.isalpha()])
wordWithoutSpecialCharsLength = len(wordWithoutSpecialChars)
if(wordWithoutSpecialCharsLength in lengthWordDict.keys()):
lengthWordDict[wordWithoutSpecialCharsLength].append(word)
else:
lengthWordDict[wordWithoutSpecialCharsLength] = [word]
print(lengthWordDict)
这是我的解决方案,它获取单词的长度(没有特殊字符,例如标点符号)
要获取单词的绝对长度(带标点符号)替换wordWithoutSpecialChars 为word
输出:
{1: ['A', 'a', 'a', 'A', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a'], 4: ['text', 'file', 'name', 'kind', 'file', 'that', 'text.', 'text', 'file', 'data', 'file', 'such', 'does', 'keep', 'file', 'size', 'text', 'file', 'more', 'last', 'line', 'text', 'file.', 'such', 'text', 'file', 'keep', 'file', 'size', 'most', 'text', 'need', 'have', 'done', 'ways', 'Some', 'with', 'file', 'line', 'will', 'text', 'with', "'Text", "file'", 'type', 'text', 'type', 'text'], 9: ['(sometimes', 'operating', 'operating', 'end-of-file', 'operating', 'Microsoft', 'character,', 'operating', 'end-of-line', 'different', 'depending', 'operating', 'operating', 'primarily', 'separated', 'container,'], 7: ['spelled', 'systems', 'denoted', 'placing', 'special', 'padding', 'systems', 'Windows', 'systems,', 'contain', 'special', 'because', 'systems', 'systems', 'systems', 'systems', 'records.', 'content.', 'generic'], 8: ['textfile;', 'flatfile)', 'computer', 'sequence', 'computer', 'Unix-like', 'variable', 'computer'], 2: ['an', 'is', 'is', 'of', 'is', 'as', 'of', 'of', 'as', 'In', 'as', 'of', 'in', 'of', 'is', 'by', 'or', 'as', 'an', 'as', 'in', 'On', 'as', 'do', 'on', 'of', 'in', 'to', 'in', 'on', 'as', 'or', 'to', 'of', 'to', 'of', 'At', 'of', 'of'], 3: ['old', 'CP/M', 'and', 'the', 'not', 'the', 'the', 'end', 'one', 'the', 'and', 'not', 'any', 'EOF', 'the', 'are', 'for', 'are', 'few', 'may', 'not', 'use', 'new', 'and', 'are', 'two', 'and'], 11: ['alternative', 'description,'], 10: ['structured', 'electronic', 'characters,', 'delimiters,', 'delimiters'], 5: ['lines', 'MS-DOS,', 'where', 'track', 'bytes,', 'known', 'after', 'files', 'those', 'track', 'bytes.', 'There', 'files', 'which', 'store', 'files', 'lines', 'fixed', 'while', 'plain', 'level', 'there', 'kinds', 'files:', 'files', 'files'], 6: ['exists', 'stored', 'within', 'system.', 'system', 'marker,', 'modern', 'system.', 'length', 'refers', 'refers', 'binary'], 16: ['record-orientated']}
TA贡献1784条经验 获得超9个赞
answer = {}
for word in str_files_text.split(): # loop over all the words
# use setdefault to create an empty set if the key doesn't exist
answer.setdefault(len(word), set()).add(word) # add the word to the set
# the set will handle deduping
# turn those sets into lists
for k,v in answer.items():
answer[k] = list(v)
TA贡献2012条经验 获得超12个赞
您可以直接将字符串添加到字典的正确位置,如下所示:
res = {}
for ele in list(set(str_files_txt.split())):
if len(ele) in res:
res[len(ele)].append(ele)
else:
res[len(ele)] = [ele]
print(res)
TA贡献1864条经验 获得超6个赞
如何使用循环并让 json 自己创建键
str_files_txt = "A text file (sometimes spelled textfile; an old alternative name is flatfile) is a kind of computer file that is structured as a sequence of lines of electronic text. A text file exists stored as data within a computer file system. In operating systems such as CP/M and MS-DOS, where the operating system does not keep track of the file size in bytes, the end of a text file is denoted by placing one or more special characters, known as an end-of-file marker, as padding after the last line in a text file. On modern operating systems such as Microsoft Windows and Unix-like systems, text files do not contain any special EOF character, because file systems on those operating systems keep track of the file size in bytes. There are for most text files a need to have end-of-line delimiters, which are done in a few different ways depending on operating system. Some operating systems with record-orientated file systems may not use new line delimiters and will primarily store text files with lines separated as fixed or variable length records. 'Text file' refers to a type of container, while plain text refers to a type of content. At a generic level of description, there are two kinds of computer files: text files and binary files"
op={}
for items in str_files_txt.split():
if len(items) not in op:
op[len(items)]=[]
op[len(items)].append(items)
for items in op:
op[items]=list(set(op[items]))
添加回答
举报