导入股票列表

This commit is contained in:
2025-11-16 09:23:47 +08:00
parent bc4a13e700
commit c732d9eb49
5 changed files with 277 additions and 18 deletions

View File

@ -25,16 +25,16 @@ def main():
success = wind_fetcher.process_and_save_data("000001.SZ", "2024-12-31")
print(f"处理结果: {'成功' if success else '失败'}")
# # 批量处理示例
# print("\n=== 批量处理多个股票 ===")
# stock_list = [
# "600519.SZ", # 贵州茅台
# "000002.SZ", # 万科A
# "600036.SH", # 招商银行
# "601318.SH", # 中国平安
# ]
# 批量处理示例
print("\n=== 批量处理多个股票 ===")
stock_list = [
"600519.SH",
"600436.SH",
"600887.SH",
"000858.SZ",
]
# results = wind_fetcher.batch_fetch_stocks(stock_list, "2024-12-31")
results = wind_fetcher.batch_fetch_stocks(stock_list, "2024-12-31")
# # 打印结果摘要
# print("\n=== 处理结果摘要 ===")

View File

@ -0,0 +1,188 @@
import pandas as pd
import logging
from typing import List, Dict, Any, Optional
from financial_data_manager import FinancialDataManager
class ExcelStockImporter:
"""Excel股票数据导入器"""
def __init__(self, db_config: Dict[str, Any]):
"""
初始化导入器
Args:
db_config: 数据库配置字典
"""
self.db_config = db_config
self.logger = self._setup_logger()
def _setup_logger(self) -> logging.Logger:
"""设置日志记录器"""
logger = logging.getLogger('ExcelStockImporter')
logger.setLevel(logging.INFO)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
def read_excel_file(self, file_path: str) -> Optional[pd.DataFrame]:
"""
读取Excel文件
Args:
file_path: Excel文件路径
Returns:
pandas DataFrame 或 None如果读取失败
"""
try:
self.logger.info(f"开始读取Excel文件: {file_path}")
# 读取Excel文件
df = pd.read_excel(file_path)
# 检查必要的列是否存在
required_columns = ['Wind代码', 'Wind一级行业代码', 'Wind一级行业名称', '上市地国家(地区)代码', '交易所']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
self.logger.error(f"Excel文件中缺少必要的列: {missing_columns}")
self.logger.info(f"Excel文件中的列: {df.columns.tolist()}")
return None
self.logger.info(f"成功读取Excel文件{len(df)} 行数据")
self.logger.debug(f"数据前5行:\n{df.head()}")
return df
except FileNotFoundError:
self.logger.error(f"文件不存在: {file_path}")
return None
except Exception as e:
self.logger.error(f"读取Excel文件失败: {e}")
return None
def process_stock_data(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
"""
处理股票数据,转换为数据库格式
Args:
df: 原始DataFrame
Returns:
处理后的股票数据列表
"""
processed_data = []
for idx, row in df.iterrows():
try:
stock_data = {
'stock_id': str(row['Wind代码']).strip(),
'wind_industry_code': str(row['Wind一级行业代码']).strip() if pd.notna(row['Wind一级行业代码']) else None,
'wind_industry_name': str(row['Wind一级行业名称']).strip() if pd.notna(row['Wind一级行业名称']) else None,
'country_code': str(row['上市地国家(地区)代码']).strip() if pd.notna(row['上市地国家(地区)代码']) else None,
'exchange': str(row['交易所']).strip() if pd.notna(row['交易所']) else None,
# 如果有上市日期列,可以在这里添加
# 'listing_date': row['上市日期'] if '上市日期' in row and pd.notna(row['上市日期']) else None
}
# 验证必要字段
if not stock_data['stock_id']:
self.logger.warning(f"{idx+1} 行股票代码为空,跳过")
continue
processed_data.append(stock_data)
except Exception as e:
self.logger.warning(f"处理第 {idx+1} 行数据时出错: {e}")
continue
self.logger.info(f"成功处理 {len(processed_data)} 条股票数据")
return processed_data
def import_to_database(self, file_path: str) -> bool:
"""
导入Excel数据到数据库
Args:
file_path: Excel文件路径
Returns:
成功返回True失败返回False
"""
try:
# 读取Excel文件
df = self.read_excel_file(file_path)
if df is None or df.empty:
self.logger.error("读取Excel文件失败或文件为空")
return False
# 处理数据
stock_data_list = self.process_stock_data(df)
if not stock_data_list:
self.logger.error("没有有效的股票数据需要导入")
return False
# 插入数据库
with FinancialDataManager(**self.db_config) as db_manager:
success_count = 0
total_count = len(stock_data_list)
for stock_data in stock_data_list:
try:
# 使用insert_stock方法插入数据
if db_manager.insert_stock(stock_data):
success_count += 1
else:
self.logger.warning(f"插入股票数据失败: {stock_data['stock_id']}")
except Exception as e:
self.logger.error(f"插入股票 {stock_data['stock_id']} 时出错: {e}")
self.logger.info(f"导入完成: 成功 {success_count}/{total_count}")
return success_count > 0
except Exception as e:
self.logger.error(f"导入过程失败: {e}")
return False
def validate_data(self, file_path: str) -> Dict[str, Any]:
"""
验证Excel数据
Args:
file_path: Excel文件路径
Returns:
验证结果字典
"""
try:
df = self.read_excel_file(file_path)
if df is None:
return {'valid': False, 'message': '文件读取失败'}
# 基本统计
result = {
'valid': True,
'total_rows': len(df),
'stock_codes': df['Wind代码'].tolist(),
'industries': df['Wind一级行业名称'].unique().tolist(),
'exchanges': df['交易所'].unique().tolist(),
'duplicate_stocks': df[df.duplicated('Wind代码')]['Wind代码'].tolist()
}
# 检查重复股票代码
if result['duplicate_stocks']:
self.logger.warning(f"发现重复的股票代码: {result['duplicate_stocks']}")
result['valid'] = False
return result
except Exception as e:
self.logger.error(f"数据验证失败: {e}")
return {'valid': False, 'message': str(e)}

View File

@ -127,31 +127,31 @@ class FinancialDataManager:
def insert_stock(self, stock_data: Dict[str, Any]) -> bool:
"""
插入或更新股票基本信息
插入或更新股票基本信息 - 更新版本
"""
required_fields = ['stock_id', 'stock_name']
required_fields = ['stock_id']
if not all(field in stock_data for field in required_fields):
self.logger.error(f"缺少必需字段: {required_fields}")
return False
query = """
INSERT INTO stocks (stock_id, stock_name, exchange, industry, market_cap, listing_date)
INSERT INTO stocks (stock_id, wind_industry_code, wind_industry_name, country_code, exchange, listing_date)
VALUES (%s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
stock_name = VALUES(stock_name),
wind_industry_code = VALUES(wind_industry_code),
wind_industry_name = VALUES(wind_industry_name),
country_code = VALUES(country_code),
exchange = VALUES(exchange),
industry = VALUES(industry),
market_cap = VALUES(market_cap),
listing_date = VALUES(listing_date),
updated_at = CURRENT_TIMESTAMP
"""
success = self._execute_query(query, (
stock_data['stock_id'],
stock_data['stock_name'],
stock_data.get('wind_industry_code'),
stock_data.get('wind_industry_name'),
stock_data.get('country_code'),
stock_data.get('exchange'),
stock_data.get('industry'),
stock_data.get('market_cap'),
stock_data.get('listing_date')
))
@ -159,6 +159,7 @@ class FinancialDataManager:
self.logger.info(f"成功处理股票: {stock_data['stock_id']}")
return success
def batch_insert_stocks(self, stocks_list: List[Dict[str, Any]]) -> bool:
"""批量插入股票数据"""
success_count = 0

70
finance/import_stocks.py Normal file
View File

@ -0,0 +1,70 @@
import os
import logging
from excel_stock_importer import ExcelStockImporter
# 设置日志
logging.basicConfig(level=logging.INFO)
def main():
# 数据库配置
db_config = {
'host': '127.0.0.1',
'database': 'fintech',
'user': 'root',
'password': 'secret',
'port': 3306
}
# Excel文件路径 - 尝试不同的路径
excel_files = [
"stock_list.xlsx", # 当前目录
"./finance/stock_list.xlsx", # 当前目录(明确指定)
os.path.join(os.getcwd(), "./finance/stock_list.xlsx"), # 绝对路径
]
print("=== 当前工作目录 ===")
print(f"工作目录: {os.getcwd()}")
print(f"目录内容: {os.listdir('.')}")
# 初始化导入器
importer = ExcelStockImporter(db_config)
found_file = None
for excel_file in excel_files:
if os.path.exists(excel_file):
found_file = excel_file
print(f"找到文件: {excel_file}")
break
else:
print(f"文件不存在: {excel_file}")
if not found_file:
print("未找到stock_list.xlsx文件请检查")
print("1. 文件名是否正确(包括扩展名)")
print("2. 文件是否在当前目录")
print("3. 文件权限是否正常")
return
# 验证数据
print("\n=== 验证数据 ===")
validation_result = importer.validate_data(found_file)
print(f"数据验证结果: {'通过' if validation_result['valid'] else '失败'}")
if 'total_rows' in validation_result:
print(f"总行数: {validation_result['total_rows']}")
print(f"行业列表: {validation_result.get('industries', [])}")
print(f"交易所列表: {validation_result.get('exchanges', [])}")
if validation_result.get('duplicate_stocks'):
print(f"重复股票代码: {validation_result['duplicate_stocks']}")
if validation_result.get('valid'):
# 导入数据
print("\n=== 导入数据 ===")
success = importer.import_to_database(found_file)
print(f"导入结果: {'成功' if success else '失败'}")
else:
print(f"数据验证失败: {validation_result.get('message', '未知错误')}")
if __name__ == "__main__":
main()

BIN
finance/stock_list.xlsx Normal file

Binary file not shown.