|
1 |
#!/usr/bin/python
|
|
2 |
|
|
3 |
# ******************************************************************************
|
|
4 |
# Copyright 2015 IAS - IDOC
|
|
5 |
#
|
|
6 |
# This program is free software: you can redistribute it and/or modify
|
|
7 |
# it under the terms of the GNU General Public License as published by
|
|
8 |
# the Free Software Foundation, either version 3 of the License, or
|
|
9 |
# (at your option) any later version.
|
|
10 |
#
|
|
11 |
# This program is distributed in the hope that it will be useful,
|
|
12 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 |
# GNU General Public License for more details.
|
|
15 |
#
|
|
16 |
# You should have received a copy of the GNU General Public License
|
|
17 |
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18 |
#
|
|
19 |
# ******************************************************************************
|
|
20 |
|
|
21 |
'''
|
|
22 |
This script writes/updates a table into a PostgreSQL Database
|
|
23 |
reading data and columns format directly from a FITS file.
|
|
24 |
|
|
25 |
The database can be local or on a remote server.
|
|
26 |
|
|
27 |
Its syntax is:
|
|
28 |
|
|
29 |
$ python ingest_dataset_from_FITS.py <file>.fits
|
|
30 |
|
|
31 |
@author: Alessandro NASTASI for IAS - IDOC
|
|
32 |
@date: 24/04/2015
|
|
33 |
'''
|
|
34 |
|
|
35 |
__author__ = "Alessandro Nastasi"
|
|
36 |
__credits__ = ["Alessandro Nastasi", "Karin Dassas"]
|
|
37 |
__license__ = "GPL"
|
|
38 |
__version__ = "1.0"
|
|
39 |
__date__ = "24/04/2015"
|
|
40 |
|
|
41 |
import psycopg2
|
|
42 |
import pyfits
|
|
43 |
|
|
44 |
import numpy as np
|
|
45 |
import os, sys, re, math
|
|
46 |
from time import time
|
|
47 |
|
|
48 |
class bcolors:
|
|
49 |
HEADER = '\033[95m'
|
|
50 |
OKBLUE = '\033[94m'
|
|
51 |
OKGREEN = '\033[92m'
|
|
52 |
WARNING = '\033[93m'
|
|
53 |
FAIL = '\033[91m'
|
|
54 |
ENDC = '\033[0m'
|
|
55 |
|
|
56 |
PSQL_FORMAT = {
|
|
57 |
'L' : 'boolean DEFAULT false',
|
|
58 |
#'X': 'TBD', #bit
|
|
59 |
#'B': 'TBD', #Unsigned byte
|
|
60 |
'I' : 'integer DEFAULT (-1) NOT NULL',
|
|
61 |
'J' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
|
|
62 |
'K' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
|
|
63 |
'E' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
|
|
64 |
'D' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
|
|
65 |
'A' : 'character varying(1027)'
|
|
66 |
#'C': 'TBD', #single precision complex
|
|
67 |
#'M': 'TBD', #double precision complex
|
|
68 |
#'P': 'TBD', #array descriptor
|
|
69 |
#'Q': 'TBD' #array descriptor
|
|
70 |
}
|
|
71 |
|
|
72 |
def convert_into_SQL_format(fits_format):
|
|
73 |
"""Convert from FITS to PSQL formats"""
|
|
74 |
formats = 'LXBIJKAEDCMPQ'
|
|
75 |
psql_format = ''
|
|
76 |
for char in formats:
|
|
77 |
if len(fits_format.split(char)) > 1:
|
|
78 |
psql_format = PSQL_FORMAT[char]
|
|
79 |
return psql_format
|
|
80 |
|
|
81 |
def RADECtoXYZ(RA,DEC):
|
|
82 |
"""Convert RA DEC pointing to X Y Z"""
|
|
83 |
#convert degrees to radians
|
|
84 |
RArad=math.radians(RA)
|
|
85 |
DECrad=math.radians(DEC)
|
|
86 |
X=math.cos(DECrad)*math.cos(RArad)
|
|
87 |
Y=math.cos(DECrad)*math.sin(RArad)
|
|
88 |
Z=math.sin(DECrad)
|
|
89 |
ResultXYZ=[X,Y,Z]
|
|
90 |
return ResultXYZ
|
|
91 |
|
|
92 |
dbname = "'<database_name>'" #<--- Customize here
|
|
93 |
|
|
94 |
if (len(sys.argv) > 1):
|
|
95 |
filename = sys.argv[1]
|
|
96 |
else:
|
|
97 |
print bcolors.WARNING + "\n\tSintax:\t$ python ingest_dataset_from_FITS.py <file>.fits\n" + bcolors.ENDC
|
|
98 |
os._exit(0)
|
|
99 |
|
|
100 |
host = raw_input("\n> Where is the dataset to update/create? (enter 0 to exit)\n\t- localhost [1]\n\t- remote server "+bcolors.WARNING+'<server_name>'+bcolors.ENDC+" [2]\n\t--> ")
|
|
101 |
choice = False
|
|
102 |
|
|
103 |
while not choice:
|
|
104 |
if host=='1':
|
|
105 |
#LOCALHOST
|
|
106 |
user = "'postgres'"
|
|
107 |
host = "'localhost'"
|
|
108 |
pwd = "''"
|
|
109 |
choice = True
|
|
110 |
|
|
111 |
elif host=='2':
|
|
112 |
#REMOTE SERVER
|
|
113 |
user = "'<username>'" #<--- Customize here
|
|
114 |
host = "'<server_name>'" #<--- Customize here
|
|
115 |
pwd = "''"
|
|
116 |
choice = True
|
|
117 |
|
|
118 |
elif host=='0':
|
|
119 |
print '\nExit.\n'; os._exit(0)
|
|
120 |
|
|
121 |
else:
|
|
122 |
print bcolors.WARNING+'\n!! Choice not valid !!'+ bcolors.ENDC
|
|
123 |
host = raw_input('\n\t> Please enter 1, 2 or 0: ')
|
|
124 |
|
|
125 |
fileInput = pyfits.open(filename)
|
|
126 |
fileExtension = filename.split('.')[1].strip()
|
|
127 |
|
|
128 |
input_mode = 'fits' #''None'
|
|
129 |
|
|
130 |
#CSV mode not yet implemented...
|
|
131 |
'''
|
|
132 |
if fileExtension == 'fits': input_mode = 'fits'
|
|
133 |
else: input_mode = 'csv'
|
|
134 |
'''
|
|
135 |
|
|
136 |
dataset = raw_input('\n> Please enter the name of the new dataset to create into the %s database: ' % dbname)
|
|
137 |
|
|
138 |
#Connection do the database
|
|
139 |
conn = psycopg2.connect("dbname="+dbname+" user="+user+" host="+host+" password="+pwd+"")
|
|
140 |
|
|
141 |
#Create the Psycopg object
|
|
142 |
cur = conn.cursor()
|
|
143 |
|
|
144 |
#Read the columns to INSERT INTO the table
|
|
145 |
if input_mode == 'fits':
|
|
146 |
print '\n- Reading/storing data from FITS table (it may take some time. Please wait...)'
|
|
147 |
data = fileInput[1].data
|
|
148 |
|
|
149 |
table_size = data.size # 100 joined = 210 extractednames
|
|
150 |
|
|
151 |
fields = (data.names)
|
|
152 |
fields_format = (data.formats)
|
|
153 |
data2D = [[data[field][i] for field in fields] for i in range(table_size)] #Storing all in a 2D array, but without the name line
|
|
154 |
|
|
155 |
#Create a TABLE
|
|
156 |
table = False
|
|
157 |
|
|
158 |
#First test if the table already exists:
|
|
159 |
cur.execute("select exists(select * from information_schema.tables where table_name=%s)", (dataset,))
|
|
160 |
table = cur.fetchone()[0]
|
|
161 |
drop_cascade = 'n'
|
|
162 |
#if yes, drop the table
|
|
163 |
if table:
|
|
164 |
print '\n- Dataset already exists. Dropping it...'
|
|
165 |
try:
|
|
166 |
cur.execute("DROP TABLE "+dataset+";")
|
|
167 |
except:
|
|
168 |
#If the dataset has one or more associated VIEWs, *everything* must be dropped
|
|
169 |
print bcolors.WARNING+"\n>> Impossible to drop the table, possibly because other elements (e.g. VIEWS) depend on it. <<"+bcolors.ENDC
|
|
170 |
drop_cascade = raw_input("\n\t> Do you want to use the 'DROP ... CASCADE' option to delete them too? [y/n]: ")
|
|
171 |
if drop_cascade in 'YESyes1':
|
|
172 |
#A re-connection is necessary if the DROP test failed
|
|
173 |
conn = psycopg2.connect("dbname="+dbname+" user="+user+" host="+host+" password="+pwd+"")
|
|
174 |
cur = conn.cursor()
|
|
175 |
cur.execute("DROP TABLE "+dataset+" CASCADE;")
|
|
176 |
print '\n\t- Dataset %s and all his dependencies successfully dropped.' % dataset
|
|
177 |
else:
|
|
178 |
print "\n- Exit.\n"; os._exit(0)
|
|
179 |
|
|
180 |
else:
|
|
181 |
#If not, just create a new one
|
|
182 |
print '\n- Table does not exist. A new one will be created...'
|
|
183 |
|
|
184 |
#If 'RA' and 'DEC' are found, the Cartesian coordinates x,y,z are computed and automatically appended at the end of the table
|
|
185 |
if ('RA' in fields) and ('DEC' in fields):
|
|
186 |
print '\n- Found RA and DEC. The Cartesian coordinates x,y,z will be computed and appended to the dataset as additional columns...'
|
|
187 |
fields.extend(['x','y','z'])
|
|
188 |
fields_format.extend(['E','E','E'])
|
|
189 |
xyz = [RADECtoXYZ(data['RA'][j], data['DEC'][j]) for j in range(len(data2D))]
|
|
190 |
data2D = np.column_stack( [ data2D , xyz ] )
|
|
191 |
|
|
192 |
#Store the info about which fields are string
|
|
193 |
fields_string_length = [len(field.split('A')) for field in fields_format]
|
|
194 |
|
|
195 |
print '\n- Creating/updating the table...\n'
|
|
196 |
createTable_cmd = "CREATE TABLE %s (id integer PRIMARY KEY" % dataset
|
|
197 |
for j, name in enumerate(fields):
|
|
198 |
|
|
199 |
createTable_cmd += ", %s %s" % (name, convert_into_SQL_format(fields_format[j]) )
|
|
200 |
|
|
201 |
createTable_cmd +=");"
|
|
202 |
cur.execute(createTable_cmd)
|
|
203 |
|
|
204 |
#Fill in the columns
|
|
205 |
for i in range(table_size):
|
|
206 |
sys.stdout.write("- Filling the %sth row of the table...\r" % i)
|
|
207 |
sys.stdout.flush()
|
|
208 |
toExecute = "INSERT INTO %s (id " % dataset
|
|
209 |
for field in fields: toExecute += ", %s" % field
|
|
210 |
toExecute += ") VALUES (%s " % i
|
|
211 |
for j, field in enumerate(fields):
|
|
212 |
|
|
213 |
if fields_string_length[j]>1:
|
|
214 |
toExecute += ", '%s'" % data2D[i][j]
|
|
215 |
else:
|
|
216 |
if str(data2D[i][j]) == 'nan': toExecute += ", NULL"
|
|
217 |
else: toExecute += ", %s" % data2D[i][j]
|
|
218 |
|
|
219 |
toExecute +=")"
|
|
220 |
cur.execute(toExecute)
|
|
221 |
if i == table_size-1: sys.stdout.write("- Filling the %sth row of the table..." % i +bcolors.OKGREEN+"\t[OK]"+bcolors.ENDC+"\r")
|
|
222 |
|
|
223 |
#Apply the changes -> create the actual database
|
|
224 |
conn.commit()
|
|
225 |
|
|
226 |
#Close the connections
|
|
227 |
cur.close()
|
|
228 |
conn.close()
|
|
229 |
|
|
230 |
print "\n--> The dataset "+bcolors.OKGREEN+"'%s'" % dataset+bcolors.ENDC+" has been updated/created into the "+bcolors.OKBLUE+dbname+bcolors.ENDC+" database in host: "+bcolors.OKBLUE+host+bcolors.ENDC+".\n"
|
|
231 |
|
Script to ingest a FITS table into a local/remote PostgreSQL database