Issue
My database
USE [master]
GO
/****** Object: Database [company] Script Date: 06/07/2021 17:01:26 ******/
CREATE DATABASE [company]
CONTAINMENT = NONE
ON PRIMARY
( NAME = N'company', FILENAME = N'D:\Program Files\Microsoft SQL Server\MSSQL13.SQLEXPRESS\MSSQL\DATA\company.mdf' , SIZE = 8192KB , MAXSIZE = UNLIMITED, FILEGROWTH = 65536KB )
LOG ON
( NAME = N'company_log', FILENAME = N'D:\Program Files\Microsoft SQL Server\MSSQL13.SQLEXPRESS\MSSQL\DATA\company_log.ldf' , SIZE = 8192KB , MAXSIZE = 2048GB , FILEGROWTH = 65536KB )
GO
IF (1 = FULLTEXTSERVICEPROPERTY('IsFullTextInstalled'))
begin
EXEC [company].[dbo].[sp_fulltext_database] @action = 'enable'
end
GO
ALTER DATABASE [company] SET ANSI_NULL_DEFAULT OFF
GO
ALTER DATABASE [company] SET ANSI_NULLS OFF
GO
ALTER DATABASE [company] SET ANSI_PADDING OFF
GO
ALTER DATABASE [company] SET ANSI_WARNINGS OFF
GO
ALTER DATABASE [company] SET ARITHABORT OFF
GO
ALTER DATABASE [company] SET AUTO_CLOSE OFF
GO
ALTER DATABASE [company] SET AUTO_SHRINK OFF
GO
ALTER DATABASE [company] SET AUTO_UPDATE_STATISTICS ON
GO
ALTER DATABASE [company] SET CURSOR_CLOSE_ON_COMMIT OFF
GO
ALTER DATABASE [company] SET CURSOR_DEFAULT GLOBAL
GO
ALTER DATABASE [company] SET CONCAT_NULL_YIELDS_NULL OFF
GO
ALTER DATABASE [company] SET NUMERIC_ROUNDABORT OFF
GO
ALTER DATABASE [company] SET QUOTED_IDENTIFIER OFF
GO
ALTER DATABASE [company] SET RECURSIVE_TRIGGERS OFF
GO
ALTER DATABASE [company] SET DISABLE_BROKER
GO
ALTER DATABASE [company] SET AUTO_UPDATE_STATISTICS_ASYNC OFF
GO
ALTER DATABASE [company] SET DATE_CORRELATION_OPTIMIZATION OFF
GO
ALTER DATABASE [company] SET TRUSTWORTHY OFF
GO
ALTER DATABASE [company] SET ALLOW_SNAPSHOT_ISOLATION OFF
GO
ALTER DATABASE [company] SET PARAMETERIZATION SIMPLE
GO
ALTER DATABASE [company] SET READ_COMMITTED_SNAPSHOT OFF
GO
ALTER DATABASE [company] SET HONOR_BROKER_PRIORITY OFF
GO
ALTER DATABASE [company] SET RECOVERY SIMPLE
GO
ALTER DATABASE [company] SET MULTI_USER
GO
ALTER DATABASE [company] SET PAGE_VERIFY CHECKSUM
GO
ALTER DATABASE [company] SET DB_CHAINING OFF
GO
ALTER DATABASE [company] SET FILESTREAM( NON_TRANSACTED_ACCESS = OFF )
GO
ALTER DATABASE [company] SET TARGET_RECOVERY_TIME = 60 SECONDS
GO
ALTER DATABASE [company] SET DELAYED_DURABILITY = DISABLED
GO
ALTER DATABASE [company] SET QUERY_STORE = OFF
GO
USE [company]
GO
ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = OFF;
GO
ALTER DATABASE SCOPED CONFIGURATION SET MAXDOP = 0;
GO
ALTER DATABASE SCOPED CONFIGURATION SET PARAMETER_SNIFFING = ON;
GO
ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = OFF;
GO
ALTER DATABASE [company] SET READ_WRITE
GO
-------------------------------------------------------------------------------------------
USE [company]
GO
/****** Object: Table [dbo].[comp] Script Date: 06/07/2021 17:01:55 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[comp](
[mst] [nchar](14) NOT NULL,
[company_name] [nvarchar](max) NULL,
[address] [nvarchar](max) NULL,
[created] [datetime] NULL,
[modified] [datetime] NULL,
[legal_representative] [nvarchar](max) NULL,
CONSTRAINT [PK_comp] PRIMARY KEY CLUSTERED
(
[mst] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
My program
import scrapy
import re
import pyodbc
class BlogSpider(scrapy.Spider):
name = 'blogspider'
start_urls = ['https://masothue.com/']
# cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=DESKTOP-23PIH3M;DATABASE=company;UID=sa;PWD=123456a@')
# cursor = cnxn.cursor()
# cursor.execute("SELECT comp.mst, comp.address, comp.company_name FROM comp")
# for row in cursor.fetchall():
# print
# row
def parse(self, response):
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=DESKTOP-23PIH3M;DATABASE=company;UID=sa;PWD=123456a@')
cursor = cnxn.cursor()
cursor.execute("INSERT INTO [dbo].[comp] ([mst] ,[company_name] ,[address] ,[legal_representative]) VALUES (\"343\", \"565\", \"343\", \"343\")")
cnxn.commit()
for href in response.xpath("//div/h3/a/@href").extract():
print('https://masothue.com' + href)
print(re.search(r'(\d{10})', href).group(1))
error
C:\Users\donhuvy\PycharmProjects\pythonProject>scrapy runspider sc.py
2021-07-06 19:08:33 [scrapy.utils.log] INFO: Scrapy 2.5.0 started (bot: scrapybot)
2021-07-06 19:08:33 [scrapy.utils.log] INFO: Versions: lxml 4.6.3.0, libxml2 2.9.5, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 21.2.0, Python 3.9.6 (tags/v3.9.6:db3ff76, Jun 28 2021, 15:26:21) [MSC v.1929 64 bit (AMD64)], pyOpenSSL 20.0.1 (OpenSSL 1.1.1k 25 Mar 2021), cryptography 3.4.7, Platform Windows-10-10.0.19042-SP0
2021-07-06 19:08:33 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2021-07-06 19:08:33 [scrapy.crawler] INFO: Overridden settings:
{'SPIDER_LOADER_WARN_ONLY': True}
2021-07-06 19:08:33 [scrapy.extensions.telnet] INFO: Telnet Password: 5f64e686c90fdf8a
2021-07-06 19:08:33 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
'scrapy.extensions.telnet.TelnetConsole',
'scrapy.extensions.logstats.LogStats']
2021-07-06 19:08:33 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
'scrapy.downloadermiddlewares.retry.RetryMiddleware',
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
'scrapy.downloadermiddlewares.stats.DownloaderStats']
2021-07-06 19:08:33 [scrapy.middleware] INFO: Enabled spider middlewares:
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
'scrapy.spidermiddlewares.referer.RefererMiddleware',
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
'scrapy.spidermiddlewares.depth.DepthMiddleware']
2021-07-06 19:08:33 [scrapy.middleware] INFO: Enabled item pipelines:
[]
2021-07-06 19:08:33 [scrapy.core.engine] INFO: Spider opened
2021-07-06 19:08:33 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
2021-07-06 19:08:33 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
2021-07-06 19:08:34 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://masothue.com/> (referer: None)
2021-07-06 19:08:34 [scrapy.core.scraper] ERROR: Spider error processing <GET https://masothue.com/> (referer: None)
Traceback (most recent call last):
File "C:\Users\donhuvy\AppData\Roaming\Python\Python39\site-packages\twisted\internet\defer.py", line 662, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "C:\Users\donhuvy\AppData\Roaming\Python\Python39\site-packages\scrapy\spiders\__init__.py", line 90, in _parse
return self.parse(response, **kwargs)
File "C:\Users\donhuvy\PycharmProjects\pythonProject\sc.py", line 19, in parse
cursor.execute("INSERT INTO [dbo].[comp] ([mst] ,[company_name] ,[address] ,[legal_representative]) VALUES (\"343\", \"565\", \"343\", \"343\")")
pyodbc.ProgrammingError: ('42S22', "[42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name '343'. (207) (SQLExecDirectW); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name '565'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name '343'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name '343'. (207)")
2021-07-06 19:08:34 [scrapy.core.engine] INFO: Closing spider (finished)
2021-07-06 19:08:34 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 212,
'downloader/request_count': 1,
'downloader/request_method_count/GET': 1,
'downloader/response_bytes': 17472,
'downloader/response_count': 1,
'downloader/response_status_count/200': 1,
'elapsed_time_seconds': 0.437639,
'finish_reason': 'finished',
'finish_time': datetime.datetime(2021, 7, 6, 12, 8, 34, 415274),
'httpcompression/response_bytes': 81335,
'httpcompression/response_count': 1,
'log_count/DEBUG': 1,
'log_count/ERROR': 1,
'log_count/INFO': 10,
'response_received_count': 1,
'scheduler/dequeued': 1,
'scheduler/dequeued/memory': 1,
'scheduler/enqueued': 1,
'scheduler/enqueued/memory': 1,
'spider_exceptions/ProgrammingError': 1,
'start_time': datetime.datetime(2021, 7, 6, 12, 8, 33, 977635)}
2021-07-06 19:08:34 [scrapy.core.engine] INFO: Spider closed (finished)
C:\Users\donhuvy\PycharmProjects\pythonProject>
What is wrong? How to fix it?
Solution
The relevant hint is the error message Invalid column name '343'
. In
# WRONG, column names instead of string constants
" ... VALUES (\"343\", \"565\", \"343\", \"343\")"
You are using double quotes to delimit strings. Strings in Oracle SQL must be put in single quotes. Double quotes are used to escape column names and other identifiers.
# CORRECT
" ... VALUES ('343', '565', '343', '343')"
This will fix the problem if the corresponding columns are of a text type. For columns of a numeric type, don't use quotes. E.g. if the first column mst
is a NUMBER, write:
" ... VALUES (343, '565', '343', '343')"
Note that you can use a column name or an expression in the values clause. Just because it appears in a VALUES clause does not mean that it must be a constant or literal. See: VALUES expression
See:
- https://stackoverflow.com/a/1162757/880990
- Difference between single quote and double quote in Oracle SQL
- User-Defined Identifiers (PL/SQL Language Fundamentals, Oracle help center)
Answered By - Olivier Jacot-Descombes
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.