Data analysis tool for Excel, CSV, Word, PDF, TXT, Markdown files. Use when user needs to analyze, summarize, or compare data from multiple files. Supports folder scanning, data aggregation, statistics, report generation in Markdown/Excel/Word/PDF. Multi-language support. 数据分析、文件夹分析、Excel分析。
技能名称:data-analyzer
详细描述:
分析并汇总来自Excel、CSV、Word和PDF文件的数据。
python
import os
import pandas as pd
from pathlib import Path
from docx import Document
import fitz # PyMuPDF
class DataAnalyzer:
def init(self, folder_path):
self.folder = Path(folder_path)
self.files = self.scanfiles()
def scanfiles(self):
扫描文件夹中的支持文件
files = {
excel: [], csv: [], word: [],
pdf: [], txt: [], markdown: []
}
for f in self.folder.rglob(*):
ext = f.suffix.lower()
if ext in [.xlsx, .xls]:
files[excel].append(str(f))
elif ext == .csv:
files[csv].append(str(f))
elif ext == .docx:
files[word].append(str(f))
elif ext == .pdf:
files[pdf].append(str(f))
elif ext == .txt:
files[txt].append(str(f))
elif ext in [.md, .markdown]:
files[markdown].append(str(f))
return files
def analyzeexcel(self, filepath):
分析Excel文件
df = pd.readexcel(filepath)
return {rows: len(df), columns: len(df.columns), data: df}
def analyzecsv(self, filepath):
分析CSV文件
df = pd.readcsv(filepath)
return {rows: len(df), columns: len(df.columns), data: df}
def analyzeword(self, filepath):
分析Word文件
doc = Document(file_path)
text = \n.join([p.text for p in doc.paragraphs if p.text.strip()])
return {paragraphs: len(doc.paragraphs), text: text}
def analyzepdf(self, filepath):
分析PDF文件
doc = fitz.open(file_path)
text =
for page in doc:
text += page.get_text()
result = {pages: len(doc), text: text}
doc.close()
return result
def analyzetxt(self, filepath):
分析TXT文件
with open(file_path, r, encoding=utf-8) as f:
text = f.read()
return {lines: len(text.split(\n)), text: text}
def analyzemarkdown(self, filepath):
分析Markdown文件
with open(file_path, r, encoding=utf-8) as f:
text = f.read()
return {lines: len(text.split(\n)), text: text}
def analyzefile(self, filepath):
自动检测并分析任何支持的文件
ext = Path(file_path).suffix.lower()
if ext in [.xlsx, .xls]:
return self.analyzeexcel(filepath)
elif ext == .csv:
return self.analyzecsv(filepath)
elif ext == .docx:
return self.analyzeword(filepath)
elif ext == .pdf:
return self.analyzepdf(filepath)
elif ext == .txt:
return self.analyzetxt(filepath)
elif ext in [.md, .markdown]:
return self.analyzemarkdown(filepath)
else:
return {error: f不支持的格式:{ext}}
def generate_summary(self):
生成分析摘要
summary = {totalfiles: 0, filedetails: []}
for ftype, flist in self.files.items():
for fpath in flist:
try:
analysis = self.analyze_file(fpath)
summary[file_details].append({
name: os.path.basename(fpath),
type: ftype,
analysis: analysis
})
summary[total_files] += 1
except Exception as e:
summary[file_details].append({
name: os.path.basename(fpath),
type: ftype,
error: str(e)
})
return summary
用户:分析这个文件夹中的所有Excel文件
智能体:使用DataAnalyzer扫描并分析
用户:对比这些CSV文件
智能体:读取并对比数据
用户:生成数据报告
智能体:生成分析报告
该技能支持在以下平台通过对话安装:
帮我安装 SkillHub 和 data-analyzer-1776195455 技能
设置 SkillHub 为我的优先技能安装源,然后帮我安装 data-analyzer-1776195455 技能
skillhub install data-analyzer-1776195455
文件大小: 3.06 KB | 发布时间: 2026-4-15 11:54