七弈智慧• 工作室

表格数据清洗

2025-05-22 12:12:36 阅读(320)

当处理表格数据时,数据清洗是一个关键步骤,以确保数据的准确性和一致性。以下是今天讨论的数据清洗逻辑的教程总结:

步骤 1:数据导入

首先,将原始数据导入到你的应用程序或脚本中。这可以是以字符串形式存在的 JSON 数据,也可以是通过其他方法获取的数据。

javascript
// 示例原始数据
let rawData = '[["1","d4","O","d5","21","Mgh","","96","41","",""],["2","Bt\'t","","Nf6","22","Q94","","Qf6","42","",""],["3","eo","","5","23","h5","","/sh7","43","",""],["4","3","","c6","24","Bq4","","Bc6","44","O",""],["5","Ad3","","Ncb","25","Bu2","","e5","45","O",""],["6","Nf3","","b6","26","de","","Ne5","46","0",""],["7","Nd2","","Bb7","27","Be5","","Rey","47","C",""],["8","N5","","Ne5","28","Qcy","","Ref","48","",""],["9","Be5","","3RC8","29","Q94","","d4","49","",""],["10","BNf3","","P1f","30","Rg","C","ofo","50","C",""],["11","B930","","00","31","Ho","","K98","51","O",""],["12","Qe2","","Reg","32","9f","","+f7","52","0",""],["13","93 O","c4","","33","Qgb","0","Ke7","53","O",""],["14","Bc2","","h6","34","","","K+6","54","O",""],["15","Ney","","Be5","35","C","","h7","55","",""],["16","Be5","","Nd7","36","D0","","","56","",""],["17","Bfy","","Nf6","37","","","","57","O",""],["18","h4","","Nq7","38","","","","58","O",""],["19","94","","Qf6","O 39","","C","","59","","O"],["20","95","","Qoy","40","","","","60","O",""]]';
let parsedData = JSON.parse(rawData);

步骤 2:预测标准列数

在开始清洗数据之前,预测标准列数。这有助于找出数据中的异常。

javascript
function predictStandardColumns(data) {
    // ...(详见上文的函数)
}

let standardColumns = predictStandardColumns(parsedData);

步骤 3:标识并清理数字列

使用函数来标识并清理数字列。在这个过程中,可以根据设定的阈值清空非数字字符。

javascript
function identifyAndCleanNumberColumn(data, threshold = 0.8) {
    // ...(详见上文的函数)
}

const result = identifyAndCleanNumberColumn(parsedData);

步骤 4:补全数据

对标记为数字列的数据进行补全,确保数据的连续性。

javascript
function autocompleteNumbers(data, numericColumnIndices) {
    // ...(详见上文的函数)
}

let newData = autocompleteNumbers(result.data, result.numericColumnIndices);

步骤 5:删除多余列

最后,删除多余的列,包括空白列和长度为 1 的列。保留最后两列,并确保数字列后面有两列。

javascript
function removeExcessColumns(data, numericColumnIndices) {
    // ...(详见上文的函数)
}

let cleanedData = removeExcessColumns(newData, result.numericColumnIndices);

步骤 6:可视化或导出结果

完成数据清洗后,可以将结果可视化,插入表格,或导出到其他格式。

javascript
// 示例:将数据插入表格
insertDataIntoTable(cleanedData);

这些步骤构成了一个简单的数据清洗流程,适用于表格数据。具体的阈值和规则可以根据数据的特点进行调整。这个流程可以在不同的项目中重复使用,以确保数据质量和一致性。

 
 
 
 

接口页面--全局数据:

site-> {"id":1,"name":"七弈智慧","domain":"doc.7yi.link","email":"13346163791@qq.com","wx":null,"icp":"浙ICP备2023022652号-1","code":"","json":{"siteSubtitle":"• 工作室"},"title":"七弈国象-连接七弈 智慧人生","keywords":"国际象棋,棋谱,pgn,","description":"国际象棋站","createdAt":null,"updatedAt":"2025-04-14T01:07:55.000Z","template":null,"appid":null,"secret":null}

111-->• 工作室

nav-> [{"id":13,"pid":0,"name":"日常工作杂记","pinyin":"richanggongzuozaji","path":"/richanggongzuozaji","sort":4,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0","children":[{"id":14,"pid":13,"name":"前端","pinyin":"qianduan","path":"/richanggongzuozaji/qianduan","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":15,"pid":13,"name":"后端","pinyin":"houduan","path":"/cmsgaijin/houduan","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":16,"pid":13,"name":"总结","pinyin":"zongjie","path":"/kaifariji/zongjie","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":17,"pid":13,"name":"工作计划","pinyin":"gongzuojihua","path":"/cmsgaijin/gongzuojihua","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":22,"pid":13,"name":"炒股养棋","pinyin":"chaoguyangqi","path":"/chaoguyangqi","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":23,"pid":13,"name":"cx13","pinyin":"cx13","path":"/kaifariji/cx13","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":24,"pid":13,"name":"时限学堂","pinyin":"shixianxuetang","path":"/richanggongzuozaji/shixianxuetang","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":25,"pid":13,"name":"运营","pinyin":"yunying","path":"/richanggongzuozaji/yunying","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"}]},{"id":18,"pid":0,"name":"国象练习","pinyin":"guoxianglianxi","path":"/guoxianglianxi","sort":3,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":19,"pid":0,"name":"信息公布","pinyin":"xinxigongbu","path":"/xinxigongbu","sort":6,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0","children":[{"id":20,"pid":19,"name":"业界消息","pinyin":"yejiexiaoxi","path":"/qiyiguoxiang/yejiexiaoxi","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":21,"pid":19,"name":"七弈动态","pinyin":"qiyidongtai","path":"/xinxigongbu/qiyidongtai","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"}]}]

category-> [{"id":13,"pid":0,"name":"日常工作杂记","pinyin":"richanggongzuozaji","path":"/richanggongzuozaji","sort":4,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0","children":[{"id":14,"pid":13,"name":"前端","pinyin":"qianduan","path":"/richanggongzuozaji/qianduan","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":15,"pid":13,"name":"后端","pinyin":"houduan","path":"/cmsgaijin/houduan","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":16,"pid":13,"name":"总结","pinyin":"zongjie","path":"/kaifariji/zongjie","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":17,"pid":13,"name":"工作计划","pinyin":"gongzuojihua","path":"/cmsgaijin/gongzuojihua","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":22,"pid":13,"name":"炒股养棋","pinyin":"chaoguyangqi","path":"/chaoguyangqi","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":23,"pid":13,"name":"cx13","pinyin":"cx13","path":"/kaifariji/cx13","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":24,"pid":13,"name":"时限学堂","pinyin":"shixianxuetang","path":"/richanggongzuozaji/shixianxuetang","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":25,"pid":13,"name":"运营","pinyin":"yunying","path":"/richanggongzuozaji/yunying","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"}]},{"id":14,"pid":13,"name":"前端","pinyin":"qianduan","path":"/richanggongzuozaji/qianduan","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":15,"pid":13,"name":"后端","pinyin":"houduan","path":"/cmsgaijin/houduan","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":16,"pid":13,"name":"总结","pinyin":"zongjie","path":"/kaifariji/zongjie","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":17,"pid":13,"name":"工作计划","pinyin":"gongzuojihua","path":"/cmsgaijin/gongzuojihua","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":18,"pid":0,"name":"国象练习","pinyin":"guoxianglianxi","path":"/guoxianglianxi","sort":3,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":19,"pid":0,"name":"信息公布","pinyin":"xinxigongbu","path":"/xinxigongbu","sort":6,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0","children":[{"id":20,"pid":19,"name":"业界消息","pinyin":"yejiexiaoxi","path":"/qiyiguoxiang/yejiexiaoxi","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":21,"pid":19,"name":"七弈动态","pinyin":"qiyidongtai","path":"/xinxigongbu/qiyidongtai","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"}]},{"id":20,"pid":19,"name":"业界消息","pinyin":"yejiexiaoxi","path":"/qiyiguoxiang/yejiexiaoxi","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":21,"pid":19,"name":"七弈动态","pinyin":"qiyidongtai","path":"/xinxigongbu/qiyidongtai","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":22,"pid":13,"name":"炒股养棋","pinyin":"chaoguyangqi","path":"/chaoguyangqi","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":23,"pid":13,"name":"cx13","pinyin":"cx13","path":"/kaifariji/cx13","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":24,"pid":13,"name":"时限学堂","pinyin":"shixianxuetang","path":"/richanggongzuozaji/shixianxuetang","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":25,"pid":13,"name":"运营","pinyin":"yunying","path":"/richanggongzuozaji/yunying","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"}]

friendlink-> [{"id":1,"title":"七弈国象首页","link":"https://www.7yi.link","sort":0,"createdAt":"2023-07-22T14:59:55.000Z"}]

base_url-> /public/template/default

frag--->{"record":"<p style=\"text-align: center;\"><a href=\"http://beian.miit.gov.cn/\" target=\"_blank\" rel=\"noopener\">浙ICP备2023022652号-1</a></p>","footer-guanyu":"","footer-7yi":"","footer-chess":"","footer-fe":"","chanyue-introduce":"<p>七弈国象:专注于国际象棋开局与战术学习的网站</p>\n<ul>\n<li>国际象棋相关工具开发:<br>\n<ul>\n<li>记谱训练</li>\n<li>识谱工具</li>\n</ul>\n</li>\n<li>国象相关产品\n<ul>\n<li>记录本</li>\n</ul>\n</li>\n<li>电商网站\n<ul>\n<li>畅享一三</li>\n</ul>\n</li>\n<li>通用后台<br>\n<ul>\n<li>7Link通用后台开发系统&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;</li>\n</ul>\n</li>\n</ul>","copyright":"<p style=\"text-align: center;\">杭州七弈智慧科技有限公司版权所有</p>","ad":"<p style=\"text-align: center;\"><a href=\"https://7yi.link/stock/chess/web-mobile/\" target=\"_blank\" rel=\"noopener\"> <img style=\"display: block; margin-left: auto; margin-right: auto; max-width: 100%; height: auto;\" src=\"/public/uploads/default/2023/09/21/1695286791855_mceu_83563456911695286788809.png.png\"> </a></p>\n<p style=\"text-align: center;\"><a href=\"https://7yi.link/stock/chess/web-mobile/\" target=\"_blank\" rel=\"noopener\">开局树学习</a></p>"}

tag--->[{"id":8,"name":"双马防御","path":"shuangmafangyu"},{"id":7,"name":"开发","path":"kaifa"},{"id":6,"name":"伦敦体系","path":"lunduntixi"},{"id":5,"name":"俄罗斯防御","path":"eluosifangyu"},{"id":1,"name":"cms","path":"cms"}]

-----------

position------>[{"id":13,"pid":0,"name":"日常工作杂记","pinyin":"richanggongzuozaji","path":"/richanggongzuozaji","sort":4,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0","children":[{"id":14,"pid":13,"name":"前端","pinyin":"qianduan","path":"/richanggongzuozaji/qianduan","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":15,"pid":13,"name":"后端","pinyin":"houduan","path":"/cmsgaijin/houduan","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":16,"pid":13,"name":"总结","pinyin":"zongjie","path":"/kaifariji/zongjie","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":17,"pid":13,"name":"工作计划","pinyin":"gongzuojihua","path":"/cmsgaijin/gongzuojihua","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":22,"pid":13,"name":"炒股养棋","pinyin":"chaoguyangqi","path":"/chaoguyangqi","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":23,"pid":13,"name":"cx13","pinyin":"cx13","path":"/kaifariji/cx13","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":24,"pid":13,"name":"时限学堂","pinyin":"shixianxuetang","path":"/richanggongzuozaji/shixianxuetang","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},{"id":25,"pid":13,"name":"运营","pinyin":"yunying","path":"/richanggongzuozaji/yunying","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"}]},{"id":14,"pid":13,"name":"前端","pinyin":"qianduan","path":"/richanggongzuozaji/qianduan","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"}]

navSub------>{"cate":{"id":14,"pid":13,"name":"前端","pinyin":"qianduan","path":"/richanggongzuozaji/qianduan","sort":0,"target":"0","status":"0","list_view":"list.html","article_view":"article.html","seo_title":"","seo_keywords":"","seo_description":"","type":"0"},"id":14}

article------>{"id":121,"cid":14,"sub_cid":"","title":"表格数据清洗","short_title":"","tag_id":"","attr":"","seo_title":"","seo_keywords":"","seo_description":"","source":"","author":"","description":"","img":"","content":"<div class=\"flex-1 overflow-hidden\">\n<div class=\"react-scroll-to-bottom--css-drnmd-79elbk h-full\">\n<div class=\"react-scroll-to-bottom--css-drnmd-1n7m0yu\">\n<div class=\"flex flex-col pb-9 text-sm\">\n<div class=\"w-full text-token-text-primary\" data-testid=\"conversation-turn-199\">\n<div class=\"px-4 py-2 justify-center text-base md:gap-6 m-auto\">\n<div class=\"flex flex-1 text-base mx-auto gap-3 md:px-5 lg:px-1 xl:px-5 md:max-w-3xl lg:max-w-[40rem] xl:max-w-[48rem] } group final-completion\">\n<div class=\"relative flex w-full flex-col lg:w-[calc(100%-115px)] agent-turn\">\n<div class=\"flex-col gap-1 md:gap-3\">\n<div class=\"flex flex-grow flex-col max-w-full\">\n<div class=\"min-h-[20px] text-message flex flex-col items-start gap-3 whitespace-pre-wrap break-words [.text-message+&amp;]:mt-5 overflow-x-auto\" data-message-author-role=\"assistant\" data-message-id=\"ce855fdf-eddb-45bd-9f8b-8d4d2955cce6\">\n<div class=\"markdown prose w-full break-words dark:prose-invert light\">\n<p>当处理表格数据时,数据清洗是一个关键步骤,以确保数据的准确性和一致性。以下是今天讨论的数据清洗逻辑的教程总结:</p>\n<h3>步骤 1:数据导入</h3>\n<p>首先,将原始数据导入到你的应用程序或脚本中。这可以是以字符串形式存在的 JSON 数据,也可以是通过其他方法获取的数据。</p>\n<div class=\"bg-black rounded-md\">\n<div class=\"flex items-center relative text-gray-200 bg-gray-800 dark:bg-token-surface-primary px-4 py-2 text-xs font-sans justify-between rounded-t-md\">javascript<button class=\"flex gap-1 items-center\">Copy code</button></div>\n<pre><code class=\"language-javascript\"><span class=\"hljs-comment\">// 示例原始数据</span>\n<span class=\"hljs-keyword\">let</span> rawData = <span class=\"hljs-string\">'[[\"1\",\"d4\",\"O\",\"d5\",\"21\",\"Mgh\",\"\",\"96\",\"41\",\"\",\"\"],[\"2\",\"Bt\\'t\",\"\",\"Nf6\",\"22\",\"Q94\",\"\",\"Qf6\",\"42\",\"\",\"\"],[\"3\",\"eo\",\"\",\"5\",\"23\",\"h5\",\"\",\"/sh7\",\"43\",\"\",\"\"],[\"4\",\"3\",\"\",\"c6\",\"24\",\"Bq4\",\"\",\"Bc6\",\"44\",\"O\",\"\"],[\"5\",\"Ad3\",\"\",\"Ncb\",\"25\",\"Bu2\",\"\",\"e5\",\"45\",\"O\",\"\"],[\"6\",\"Nf3\",\"\",\"b6\",\"26\",\"de\",\"\",\"Ne5\",\"46\",\"0\",\"\"],[\"7\",\"Nd2\",\"\",\"Bb7\",\"27\",\"Be5\",\"\",\"Rey\",\"47\",\"C\",\"\"],[\"8\",\"N5\",\"\",\"Ne5\",\"28\",\"Qcy\",\"\",\"Ref\",\"48\",\"\",\"\"],[\"9\",\"Be5\",\"\",\"3RC8\",\"29\",\"Q94\",\"\",\"d4\",\"49\",\"\",\"\"],[\"10\",\"BNf3\",\"\",\"P1f\",\"30\",\"Rg\",\"C\",\"ofo\",\"50\",\"C\",\"\"],[\"11\",\"B930\",\"\",\"00\",\"31\",\"Ho\",\"\",\"K98\",\"51\",\"O\",\"\"],[\"12\",\"Qe2\",\"\",\"Reg\",\"32\",\"9f\",\"\",\"+f7\",\"52\",\"0\",\"\"],[\"13\",\"93 O\",\"c4\",\"\",\"33\",\"Qgb\",\"0\",\"Ke7\",\"53\",\"O\",\"\"],[\"14\",\"Bc2\",\"\",\"h6\",\"34\",\"\",\"\",\"K+6\",\"54\",\"O\",\"\"],[\"15\",\"Ney\",\"\",\"Be5\",\"35\",\"C\",\"\",\"h7\",\"55\",\"\",\"\"],[\"16\",\"Be5\",\"\",\"Nd7\",\"36\",\"D0\",\"\",\"\",\"56\",\"\",\"\"],[\"17\",\"Bfy\",\"\",\"Nf6\",\"37\",\"\",\"\",\"\",\"57\",\"O\",\"\"],[\"18\",\"h4\",\"\",\"Nq7\",\"38\",\"\",\"\",\"\",\"58\",\"O\",\"\"],[\"19\",\"94\",\"\",\"Qf6\",\"O 39\",\"\",\"C\",\"\",\"59\",\"\",\"O\"],[\"20\",\"95\",\"\",\"Qoy\",\"40\",\"\",\"\",\"\",\"60\",\"O\",\"\"]]'</span>;\n<span class=\"hljs-keyword\">let</span> parsedData = <span class=\"hljs-title class_\">JSON</span>.<span class=\"hljs-title function_\">parse</span>(rawData);\n</code></pre>\n</div>\n<h3>步骤 2:预测标准列数</h3>\n<p>在开始清洗数据之前,预测标准列数。这有助于找出数据中的异常。</p>\n<div class=\"bg-black rounded-md\">\n<div class=\"flex items-center relative text-gray-200 bg-gray-800 dark:bg-token-surface-primary px-4 py-2 text-xs font-sans justify-between rounded-t-md\">javascript<button class=\"flex gap-1 items-center\">Copy code</button></div>\n<pre><code class=\"language-javascript\"><span class=\"hljs-keyword\">function</span> <span class=\"hljs-title function_\">predictStandardColumns</span>(<span class=\"hljs-params\">data</span>) {\n <span class=\"hljs-comment\">// ...(详见上文的函数)</span>\n}\n\n<span class=\"hljs-keyword\">let</span> standardColumns = <span class=\"hljs-title function_\">predictStandardColumns</span>(parsedData);\n</code></pre>\n</div>\n<h3>步骤 3:标识并清理数字列</h3>\n<p>使用函数来标识并清理数字列。在这个过程中,可以根据设定的阈值清空非数字字符。</p>\n<div class=\"bg-black rounded-md\">\n<div class=\"flex items-center relative text-gray-200 bg-gray-800 dark:bg-token-surface-primary px-4 py-2 text-xs font-sans justify-between rounded-t-md\">javascript<button class=\"flex gap-1 items-center\">Copy code</button></div>\n<pre><code class=\"language-javascript\"><span class=\"hljs-keyword\">function</span> <span class=\"hljs-title function_\">identifyAndCleanNumberColumn</span>(<span class=\"hljs-params\">data, threshold = <span class=\"hljs-number\">0.8</span></span>) {\n <span class=\"hljs-comment\">// ...(详见上文的函数)</span>\n}\n\n<span class=\"hljs-keyword\">const</span> result = <span class=\"hljs-title function_\">identifyAndCleanNumberColumn</span>(parsedData);\n</code></pre>\n</div>\n<h3>步骤 4:补全数据</h3>\n<p>对标记为数字列的数据进行补全,确保数据的连续性。</p>\n<div class=\"bg-black rounded-md\">\n<div class=\"flex items-center relative text-gray-200 bg-gray-800 dark:bg-token-surface-primary px-4 py-2 text-xs font-sans justify-between rounded-t-md\">javascript<button class=\"flex gap-1 items-center\">Copy code</button></div>\n<pre><code class=\"language-javascript\"><span class=\"hljs-keyword\">function</span> <span class=\"hljs-title function_\">autocompleteNumbers</span>(<span class=\"hljs-params\">data, numericColumnIndices</span>) {\n <span class=\"hljs-comment\">// ...(详见上文的函数)</span>\n}\n\n<span class=\"hljs-keyword\">let</span> newData = <span class=\"hljs-title function_\">autocompleteNumbers</span>(result.<span class=\"hljs-property\">data</span>, result.<span class=\"hljs-property\">numericColumnIndices</span>);\n</code></pre>\n</div>\n<h3>步骤 5:删除多余列</h3>\n<p>最后,删除多余的列,包括空白列和长度为 1 的列。保留最后两列,并确保数字列后面有两列。</p>\n<div class=\"bg-black rounded-md\">\n<div class=\"flex items-center relative text-gray-200 bg-gray-800 dark:bg-token-surface-primary px-4 py-2 text-xs font-sans justify-between rounded-t-md\">javascript<button class=\"flex gap-1 items-center\">Copy code</button></div>\n<pre><code class=\"language-javascript\"><span class=\"hljs-keyword\">function</span> <span class=\"hljs-title function_\">removeExcessColumns</span>(<span class=\"hljs-params\">data, numericColumnIndices</span>) {\n <span class=\"hljs-comment\">// ...(详见上文的函数)</span>\n}\n\n<span class=\"hljs-keyword\">let</span> cleanedData = <span class=\"hljs-title function_\">removeExcessColumns</span>(newData, result.<span class=\"hljs-property\">numericColumnIndices</span>);\n</code></pre>\n</div>\n<h3>步骤 6:可视化或导出结果</h3>\n<p>完成数据清洗后,可以将结果可视化,插入表格,或导出到其他格式。</p>\n<div class=\"bg-black rounded-md\">\n<div class=\"flex items-center relative text-gray-200 bg-gray-800 dark:bg-token-surface-primary px-4 py-2 text-xs font-sans justify-between rounded-t-md\">javascript<button class=\"flex gap-1 items-center\">Copy code</button></div>\n<pre><code class=\"language-javascript\"><span class=\"hljs-comment\">// 示例:将数据插入表格</span>\n<span class=\"hljs-title function_\">insertDataIntoTable</span>(cleanedData);\n</code></pre>\n</div>\n<p>这些步骤构成了一个简单的数据清洗流程,适用于表格数据。具体的阈值和规则可以根据数据的特点进行调整。这个流程可以在不同的项目中重复使用,以确保数据质量和一致性。</p>\n</div>\n</div>\n</div>\n<div class=\"mt-1 flex justify-start gap-3 empty:hidden\">\n<div class=\"text-gray-400 flex self-end lg:self-center justify-center lg:justify-start mt-0 gap-1 visible\"><button class=\"flex items-center gap-1.5 rounded-md p-1 pl-0 text-xs hover:text-gray-950 dark:text-gray-400 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:invisible md:group-hover:visible md:group-[.final-completion]:visible\"></button>\n<div class=\"flex gap-1\">&nbsp;</div>\n<div class=\"flex items-center gap-1.5 text-xs\">&nbsp;</div>\n</div>\n</div>\n</div>\n<div class=\"absolute\">\n<div class=\"flex w-full gap-2 items-center justify-center\">&nbsp;</div>\n</div>\n</div>\n</div>\n</div>\n</div>\n</div>\n</div>\n</div>\n</div>\n<div class=\"w-full pt-2 md:pt-0 dark:border-white/20 md:border-transparent md:dark:border-transparent md:w-[calc(100%-.5rem)]\"><form class=\"stretch mx-2 flex flex-row gap-3 last:mb-2 md:mx-4 md:last:mb-6 lg:mx-auto lg:max-w-2xl xl:max-w-3xl\">\n<div class=\"relative flex h-full flex-1 items-stretch md:flex-col\">\n<div class=\"flex w-full items-center\">&nbsp;</div>\n</div>\n</form></div>","status":0,"pv":320,"link":"","createdAt":"2023-12-07 10:46:21","updatedAt":"2025-05-22 12:12:36","field":{},"tags":[]}

article.tags------>[]

news------>[{"id":252,"title":"ollama运行命令","short_title":"","img":"","createdAt":"2024-11-23T07:55:32.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":173,"title":"新网站上线的后续工作预告","short_title":"","img":"","createdAt":"2024-04-11T07:03:05.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":154,"title":"Bootstrap框架的css定义说明 ","short_title":"","img":"","createdAt":"2024-01-28T05:11:47.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":125,"title":"界面设计要求","short_title":"","img":"/public/uploads/default/2023/12/30/1703937436442_origin-mceclip0png","createdAt":"2023-12-30T08:21:24.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":122,"title":"行棋记录验证正则表达式详解","short_title":"","img":"","createdAt":"2023-12-08T12:36:00.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":121,"title":"表格数据清洗","short_title":"","img":"","createdAt":"2023-12-07T02:46:21.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":114,"title":"开发依赖库说明以及项目 配置信息说明","short_title":"","img":"","createdAt":"2023-11-29T01:23:34.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":113,"title":"编辑前端项目问题","short_title":"","img":"","createdAt":"2023-11-28T04:57:00.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":112,"title":"在线编辑excel","short_title":"","img":"","createdAt":"2023-11-27T09:15:03.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":103,"title":"表格结构识别结果说明文档","short_title":"","img":"","createdAt":"2023-11-10T10:00:26.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"}]

hot------>[{"id":46,"title":"Cocos Creator自适应宽度组件教程:ScaleWithParentWidth","path":"/cmsgaijin/qianduan"},{"id":100,"title":"CMS系统升级到最新版","path":"/cmsgaijin/qianduan"},{"id":74,"title":"微信小程序的一些问题","path":"/cmsgaijin/qianduan"},{"id":72,"title":"棋谱识别开发记录","path":"/cmsgaijin/qianduan"},{"id":112,"title":"在线编辑excel","path":"/cmsgaijin/qianduan"},{"id":50,"title":"cocos creator发布成微信小程序(非小游戏)","path":"/cmsgaijin/qianduan"},{"id":122,"title":"行棋记录验证正则表达式详解","path":"/cmsgaijin/qianduan"},{"id":17,"title":"七弈国象-研发记录(https://doc.7yi.link)上线","path":"/cmsgaijin/qianduan"},{"id":121,"title":"表格数据清洗","path":"/cmsgaijin/qianduan"},{"id":67,"title":"对禅悦CMS的代码显示功能进行个性化,让它变成自己习惯的风格","path":"/cmsgaijin/qianduan"}]

imgs------>[{"id":125,"title":"界面设计要求","short_title":"","img":"/public/uploads/default/2023/12/30/1703937436442_origin-mceclip0png","createdAt":"2023-12-30T08:21:24.000Z","description":"","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":100,"title":"CMS系统升级到最新版","short_title":"","img":"/public/uploads/default/2023/11/09/1699532940524_origin-mceclip0png","createdAt":"2023-11-09T12:02:37.000Z","description":"明空的版本更新很快,所以抽时间把版本更新到最新版","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":78,"title":"一次快乐的bug修改","short_title":"","img":"/public/uploads/default/2023/11/01/1698823694419_mceclip0.png.png","createdAt":"2023-10-31T23:26:39.000Z","description":"根据标识的位置直接进行了修正。非常感谢","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":75,"title":"首个用户反馈-来自Mr.上官","short_title":"","img":"/public/uploads/default/2023/10/29/1698545436900_mceclip0.jpg.jpg","createdAt":"2023-10-28T18:09:35.000Z","description":"进来以后,并没有感到自己注册成功,","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":74,"title":"微信小程序的一些问题","short_title":"","img":"/public/uploads/default/2023/10/27/1698441878047_mceclip0.jpg.jpg","createdAt":"2023-10-27T13:19:45.000Z","description":"\ndoc网站\n\ndoc网站中的导语不要展现出来了,对于现在快阅读的时代,已经不实用了\n广告宽度没有限制,影响页面内容页面的宽度\n\n\n小程序\n\n微信用户原有接口小程序不许再读取了,提供了让用户修改资料的方式来读取\n用户金币显示先取消,相关逻辑还没上线\n修改资料有问题,会错误判断资料没有修改\n按钮要配上声音\n\n上面这个只盖住了一半\n\n这个需要把动画加上\n完成开局以后在小程序中数据无法提交\nipad界面 我的 文字显示太小\n用户进入就注册,做到无感注册\n用户资料修改的web小程序内置版\n\n\n \n","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":72,"title":"棋谱识别开发记录","short_title":"","img":"/public/uploads/default/2023/10/25/1698207506409_mceclip0.png.png","createdAt":"2023-10-24T18:22:22.000Z","description":"一 准备工作\n\n安装Python\n安装OpenCV\n//更新一下pip 在不翻的情况下,更新不成功\n\npython.exe -m pip install --upgrade pip\n\n//安装一下openCV python版本\npip install opencv-python\n\n安装Tesseract\n\n\n// 安装地址\nhttps://github.com/UB-Mannheim/tesseract/wiki\n\n//安装的时候,注意要选择复选中文的相关库\n\n//在系统环境变量里添加路径\n\nC:\\Pr","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":67,"title":"对禅悦CMS的代码显示功能进行个性化,让它变成自己习惯的风格","short_title":"","img":"/public/uploads/default/2023/10/13/1697179493540_mceclip1.png.png","createdAt":"2023-10-12T22:41:43.000Z","description":"1 要显示行号2 要显示代码复制按钮3 要背景黑色","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":61,"title":"如何在文章里插入其它网站的信息","short_title":"","img":"/public/uploads/default/2023/10/11/1696985332888_mceclip0.png.png","createdAt":"2023-10-10T16:46:42.000Z","description":" ","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":58,"title":"在微信开发工具调试小程序,一直拿不到用户信息","short_title":"","img":"https://res.wx.qq.com/op_res/rm8CjueDGoqfBrYaFIm5tNS8j1LciYO0i-XLEbXGAayNvR1VI_TVu62YeNPLSHICbMP6cmjX_dWagWQdRjXY8w","createdAt":"2023-10-09T19:21:44.000Z","description":"读取到的用户信息一直是 微信用户,女原因:2023年8月,在用户隐私协议中没有设置相关信息读取","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"},{"id":47,"title":"cocos creator使用免费的图片打包工具 free texture packer","short_title":"","img":"/public/uploads/default/2023/10/04/1696407725293_mceclip0.png.png","createdAt":"2023-10-04T00:16:52.000Z","description":"简介:\ncocos creator的图集功能很不错,打包的软件一般都是收费的,找到一个网页版的打包网站 free textrue packer网站地址:http://free-tex-packer.com/app/","pinyin":"qianduan","name":"前端","path":"/cmsgaijin/qianduan"}]

pre------>{"id":114,"title":"开发依赖库说明以及项目 配置信息说明","name":"前端","path":"/cmsgaijin/qianduan"}

next------>{"id":122,"title":"行棋记录验证正则表达式详解","name":"前端","path":"/cmsgaijin/qianduan"}

E