diff --git a/finance/download_data.py b/finance/download_data.py index 08d8505..8d09191 100644 --- a/finance/download_data.py +++ b/finance/download_data.py @@ -25,16 +25,16 @@ def main(): success = wind_fetcher.process_and_save_data("000001.SZ", "2024-12-31") print(f"处理结果: {'成功' if success else '失败'}") - # # 批量处理示例 - # print("\n=== 批量处理多个股票 ===") - # stock_list = [ - # "600519.SZ", # 贵州茅台 - # "000002.SZ", # 万科A - # "600036.SH", # 招商银行 - # "601318.SH", # 中国平安 - # ] + # 批量处理示例 + print("\n=== 批量处理多个股票 ===") + stock_list = [ + "600519.SH", + "600436.SH", + "600887.SH", + "000858.SZ", + ] - # results = wind_fetcher.batch_fetch_stocks(stock_list, "2024-12-31") + results = wind_fetcher.batch_fetch_stocks(stock_list, "2024-12-31") # # 打印结果摘要 # print("\n=== 处理结果摘要 ===") diff --git a/finance/excel_stock_importer.py b/finance/excel_stock_importer.py new file mode 100644 index 0000000..a7f1392 --- /dev/null +++ b/finance/excel_stock_importer.py @@ -0,0 +1,188 @@ +import pandas as pd +import logging +from typing import List, Dict, Any, Optional +from financial_data_manager import FinancialDataManager + +class ExcelStockImporter: + """Excel股票数据导入器""" + + def __init__(self, db_config: Dict[str, Any]): + """ + 初始化导入器 + + Args: + db_config: 数据库配置字典 + """ + self.db_config = db_config + self.logger = self._setup_logger() + + def _setup_logger(self) -> logging.Logger: + """设置日志记录器""" + logger = logging.getLogger('ExcelStockImporter') + logger.setLevel(logging.INFO) + + if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + + return logger + + def read_excel_file(self, file_path: str) -> Optional[pd.DataFrame]: + """ + 读取Excel文件 + + Args: + file_path: Excel文件路径 + + Returns: + pandas DataFrame 或 None(如果读取失败) + """ + try: + self.logger.info(f"开始读取Excel文件: {file_path}") + + # 读取Excel文件 + df = pd.read_excel(file_path) + + # 检查必要的列是否存在 + required_columns = ['Wind代码', 'Wind一级行业代码', 'Wind一级行业名称', '上市地国家(地区)代码', '交易所'] + missing_columns = [col for col in required_columns if col not in df.columns] + + if missing_columns: + self.logger.error(f"Excel文件中缺少必要的列: {missing_columns}") + self.logger.info(f"Excel文件中的列: {df.columns.tolist()}") + return None + + self.logger.info(f"成功读取Excel文件,共 {len(df)} 行数据") + self.logger.debug(f"数据前5行:\n{df.head()}") + + return df + + except FileNotFoundError: + self.logger.error(f"文件不存在: {file_path}") + return None + except Exception as e: + self.logger.error(f"读取Excel文件失败: {e}") + return None + + def process_stock_data(self, df: pd.DataFrame) -> List[Dict[str, Any]]: + """ + 处理股票数据,转换为数据库格式 + + Args: + df: 原始DataFrame + + Returns: + 处理后的股票数据列表 + """ + processed_data = [] + + for idx, row in df.iterrows(): + try: + stock_data = { + 'stock_id': str(row['Wind代码']).strip(), + 'wind_industry_code': str(row['Wind一级行业代码']).strip() if pd.notna(row['Wind一级行业代码']) else None, + 'wind_industry_name': str(row['Wind一级行业名称']).strip() if pd.notna(row['Wind一级行业名称']) else None, + 'country_code': str(row['上市地国家(地区)代码']).strip() if pd.notna(row['上市地国家(地区)代码']) else None, + 'exchange': str(row['交易所']).strip() if pd.notna(row['交易所']) else None, + # 如果有上市日期列,可以在这里添加 + # 'listing_date': row['上市日期'] if '上市日期' in row and pd.notna(row['上市日期']) else None + } + + # 验证必要字段 + if not stock_data['stock_id']: + self.logger.warning(f"第 {idx+1} 行股票代码为空,跳过") + continue + + processed_data.append(stock_data) + + except Exception as e: + self.logger.warning(f"处理第 {idx+1} 行数据时出错: {e}") + continue + + self.logger.info(f"成功处理 {len(processed_data)} 条股票数据") + return processed_data + + def import_to_database(self, file_path: str) -> bool: + """ + 导入Excel数据到数据库 + + Args: + file_path: Excel文件路径 + + Returns: + 成功返回True,失败返回False + """ + try: + # 读取Excel文件 + df = self.read_excel_file(file_path) + if df is None or df.empty: + self.logger.error("读取Excel文件失败或文件为空") + return False + + # 处理数据 + stock_data_list = self.process_stock_data(df) + if not stock_data_list: + self.logger.error("没有有效的股票数据需要导入") + return False + + # 插入数据库 + with FinancialDataManager(**self.db_config) as db_manager: + success_count = 0 + total_count = len(stock_data_list) + + for stock_data in stock_data_list: + try: + # 使用insert_stock方法插入数据 + if db_manager.insert_stock(stock_data): + success_count += 1 + else: + self.logger.warning(f"插入股票数据失败: {stock_data['stock_id']}") + except Exception as e: + self.logger.error(f"插入股票 {stock_data['stock_id']} 时出错: {e}") + + self.logger.info(f"导入完成: 成功 {success_count}/{total_count}") + return success_count > 0 + + except Exception as e: + self.logger.error(f"导入过程失败: {e}") + return False + + def validate_data(self, file_path: str) -> Dict[str, Any]: + """ + 验证Excel数据 + + Args: + file_path: Excel文件路径 + + Returns: + 验证结果字典 + """ + try: + df = self.read_excel_file(file_path) + if df is None: + return {'valid': False, 'message': '文件读取失败'} + + # 基本统计 + result = { + 'valid': True, + 'total_rows': len(df), + 'stock_codes': df['Wind代码'].tolist(), + 'industries': df['Wind一级行业名称'].unique().tolist(), + 'exchanges': df['交易所'].unique().tolist(), + 'duplicate_stocks': df[df.duplicated('Wind代码')]['Wind代码'].tolist() + } + + # 检查重复股票代码 + if result['duplicate_stocks']: + self.logger.warning(f"发现重复的股票代码: {result['duplicate_stocks']}") + result['valid'] = False + + return result + + except Exception as e: + self.logger.error(f"数据验证失败: {e}") + return {'valid': False, 'message': str(e)} \ No newline at end of file diff --git a/finance/financial_data_manager.py b/finance/financial_data_manager.py index c3a2f2c..8b604cc 100644 --- a/finance/financial_data_manager.py +++ b/finance/financial_data_manager.py @@ -127,31 +127,31 @@ class FinancialDataManager: def insert_stock(self, stock_data: Dict[str, Any]) -> bool: """ - 插入或更新股票基本信息 + 插入或更新股票基本信息 - 更新版本 """ - required_fields = ['stock_id', 'stock_name'] + required_fields = ['stock_id'] if not all(field in stock_data for field in required_fields): self.logger.error(f"缺少必需字段: {required_fields}") return False query = """ - INSERT INTO stocks (stock_id, stock_name, exchange, industry, market_cap, listing_date) + INSERT INTO stocks (stock_id, wind_industry_code, wind_industry_name, country_code, exchange, listing_date) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE - stock_name = VALUES(stock_name), + wind_industry_code = VALUES(wind_industry_code), + wind_industry_name = VALUES(wind_industry_name), + country_code = VALUES(country_code), exchange = VALUES(exchange), - industry = VALUES(industry), - market_cap = VALUES(market_cap), listing_date = VALUES(listing_date), updated_at = CURRENT_TIMESTAMP """ success = self._execute_query(query, ( stock_data['stock_id'], - stock_data['stock_name'], + stock_data.get('wind_industry_code'), + stock_data.get('wind_industry_name'), + stock_data.get('country_code'), stock_data.get('exchange'), - stock_data.get('industry'), - stock_data.get('market_cap'), stock_data.get('listing_date') )) @@ -159,6 +159,7 @@ class FinancialDataManager: self.logger.info(f"成功处理股票: {stock_data['stock_id']}") return success + def batch_insert_stocks(self, stocks_list: List[Dict[str, Any]]) -> bool: """批量插入股票数据""" success_count = 0 diff --git a/finance/import_stocks.py b/finance/import_stocks.py new file mode 100644 index 0000000..a0deb3b --- /dev/null +++ b/finance/import_stocks.py @@ -0,0 +1,70 @@ +import os +import logging +from excel_stock_importer import ExcelStockImporter + +# 设置日志 +logging.basicConfig(level=logging.INFO) + +def main(): + # 数据库配置 + db_config = { + 'host': '127.0.0.1', + 'database': 'fintech', + 'user': 'root', + 'password': 'secret', + 'port': 3306 + } + + # Excel文件路径 - 尝试不同的路径 + excel_files = [ + "stock_list.xlsx", # 当前目录 + "./finance/stock_list.xlsx", # 当前目录(明确指定) + os.path.join(os.getcwd(), "./finance/stock_list.xlsx"), # 绝对路径 + ] + + print("=== 当前工作目录 ===") + print(f"工作目录: {os.getcwd()}") + print(f"目录内容: {os.listdir('.')}") + + # 初始化导入器 + importer = ExcelStockImporter(db_config) + + found_file = None + for excel_file in excel_files: + if os.path.exists(excel_file): + found_file = excel_file + print(f"找到文件: {excel_file}") + break + else: + print(f"文件不存在: {excel_file}") + + if not found_file: + print("未找到stock_list.xlsx文件,请检查:") + print("1. 文件名是否正确(包括扩展名)") + print("2. 文件是否在当前目录") + print("3. 文件权限是否正常") + return + + # 验证数据 + print("\n=== 验证数据 ===") + validation_result = importer.validate_data(found_file) + print(f"数据验证结果: {'通过' if validation_result['valid'] else '失败'}") + + if 'total_rows' in validation_result: + print(f"总行数: {validation_result['total_rows']}") + print(f"行业列表: {validation_result.get('industries', [])}") + print(f"交易所列表: {validation_result.get('exchanges', [])}") + + if validation_result.get('duplicate_stocks'): + print(f"重复股票代码: {validation_result['duplicate_stocks']}") + + if validation_result.get('valid'): + # 导入数据 + print("\n=== 导入数据 ===") + success = importer.import_to_database(found_file) + print(f"导入结果: {'成功' if success else '失败'}") + else: + print(f"数据验证失败: {validation_result.get('message', '未知错误')}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/finance/stock_list.xlsx b/finance/stock_list.xlsx new file mode 100644 index 0000000..1a9bd14 Binary files /dev/null and b/finance/stock_list.xlsx differ