Project

General

Profile

Revision 2c8888e0

Added by Alessandro_N over 9 years ago

Script to ingest a FITS table into a local/remote PostgreSQL database

View differences:

common/data_ingestion/README
1
# ingest_dataset_from_FITS README
2
#
3
# Alessandro Nastasi, April 2015
4
#
5

  
6
This script writes/updates a table into a PostgreSQL Database 
7
reading data and columns formats directly from a FITS file. 
8

  
9
The database can be local or on a remote server.
10

  
11
Its syntax is:
12

  
13
$ python ingest_dataset_from_FITS.py <file>.fits
14

  
15
@author: Alessandro NASTASI for IAS - IDOC 
16
@date: 24/04/2015
17

  
common/data_ingestion/ingest_dataset_from_FITS.py
1
#!/usr/bin/python
2

  
3
# ******************************************************************************
4
#    Copyright 2015 IAS - IDOC
5
#
6
#    This program is free software: you can redistribute it and/or modify
7
#    it under the terms of the GNU General Public License as published by
8
#    the Free Software Foundation, either version 3 of the License, or
9
#    (at your option) any later version.
10
#
11
#    This program is distributed in the hope that it will be useful,
12
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
#    GNU General Public License for more details.
15
#
16
#    You should have received a copy of the GNU General Public License
17
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
#
19
# ******************************************************************************
20

  
21
'''
22
This script writes/updates a table into a PostgreSQL Database 
23
reading data and columns format directly from a FITS file. 
24

  
25
The database can be local or on a remote server.
26

  
27
Its syntax is:
28

  
29
$ python ingest_dataset_from_FITS.py <file>.fits
30

  
31
@author: Alessandro NASTASI for IAS - IDOC 
32
@date: 24/04/2015
33
'''
34

  
35
__author__ = "Alessandro Nastasi"
36
__credits__ = ["Alessandro Nastasi", "Karin Dassas"]
37
__license__ = "GPL"
38
__version__ = "1.0"
39
__date__ = "24/04/2015"
40

  
41
import psycopg2
42
import pyfits
43

  
44
import numpy as np
45
import os, sys, re, math
46
from time import time
47

  
48
class bcolors:
49
    HEADER = '\033[95m'
50
    OKBLUE = '\033[94m'
51
    OKGREEN = '\033[92m'
52
    WARNING = '\033[93m'
53
    FAIL = '\033[91m'
54
    ENDC = '\033[0m'
55

  
56
PSQL_FORMAT = {
57
  'L' : 'boolean DEFAULT false',
58
  #'X':  'TBD',   #bit                            
59
  #'B':  'TBD',   #Unsigned byte                  
60
  'I' : 'integer DEFAULT (-1) NOT NULL',
61
  'J' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
62
  'K' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
63
  'E' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
64
  'D' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
65
  'A' : 'character varying(1027)'
66
  #'C': 'TBD', #single precision complex       
67
  #'M': 'TBD', #double precision complex       
68
  #'P': 'TBD', #array descriptor               
69
  #'Q': 'TBD'  #array descriptor               
70
  }
71

  
72
def convert_into_SQL_format(fits_format):
73
  """Convert from FITS to PSQL formats"""
74
  formats = 'LXBIJKAEDCMPQ'
75
  psql_format = ''
76
  for char in formats:
77
    if len(fits_format.split(char)) > 1:
78
      psql_format = PSQL_FORMAT[char]      
79
  return psql_format
80

  
81
def RADECtoXYZ(RA,DEC):
82
  """Convert RA DEC pointing to X Y Z"""
83
  #convert degrees to radians
84
  RArad=math.radians(RA)
85
  DECrad=math.radians(DEC)
86
  X=math.cos(DECrad)*math.cos(RArad)
87
  Y=math.cos(DECrad)*math.sin(RArad)
88
  Z=math.sin(DECrad)
89
  ResultXYZ=[X,Y,Z]
90
  return ResultXYZ 
91

  
92
dbname = "'<database_name>'"	#<--- Customize here
93

  
94
if (len(sys.argv) > 1):
95
    filename = sys.argv[1] 
96
else:
97
    print bcolors.WARNING +  "\n\tSintax:\t$ python ingest_dataset_from_FITS.py <file>.fits\n" + bcolors.ENDC
98
    os._exit(0)
99

  
100
host = raw_input("\n> Where is the dataset to update/create? (enter 0 to exit)\n\t- localhost [1]\n\t- remote server "+bcolors.WARNING+'<server_name>'+bcolors.ENDC+" [2]\n\t--> ")
101
choice = False
102

  
103
while not choice:
104
  if host=='1':
105
    #LOCALHOST
106
    user = "'postgres'"
107
    host = "'localhost'"
108
    pwd = "''"
109
    choice = True
110
      
111
  elif host=='2':
112
    #REMOTE SERVER
113
    user = "'<username>'" 	#<--- Customize here
114
    host = "'<server_name>'"	#<--- Customize here
115
    pwd = "''"
116
    choice = True
117
    
118
  elif host=='0':
119
    print '\nExit.\n'; os._exit(0)
120
    
121
  else: 
122
    print bcolors.WARNING+'\n!! Choice not valid !!'+ bcolors.ENDC
123
    host = raw_input('\n\t> Please enter 1, 2 or 0: ')
124
    
125
fileInput = pyfits.open(filename)    
126
fileExtension = filename.split('.')[1].strip()
127

  
128
input_mode = 'fits' #''None'
129

  
130
#CSV mode not yet implemented...
131
'''
132
if fileExtension == 'fits': input_mode = 'fits'
133
else: input_mode = 'csv'
134
'''
135

  
136
dataset = raw_input('\n> Please enter the name of the new dataset to create into the %s database: ' % dbname)
137

  
138
#Connection do the database
139
conn = psycopg2.connect("dbname="+dbname+" user="+user+" host="+host+" password="+pwd+"")
140

  
141
#Create the Psycopg object
142
cur = conn.cursor()
143

  
144
#Read the columns to INSERT INTO the table
145
if input_mode == 'fits':
146
  print '\n- Reading/storing data from FITS table (it may take some time. Please wait...)'
147
  data = fileInput[1].data
148
  
149
  table_size = data.size # 100 joined = 210 extractednames
150
  
151
  fields = (data.names)
152
  fields_format = (data.formats)
153
  data2D = [[data[field][i] for field in fields] for i in range(table_size)] #Storing all in a 2D array, but without the name line
154

  
155
#Create a TABLE 
156
table = False
157

  
158
#First test if the table already exists:
159
cur.execute("select exists(select * from information_schema.tables where table_name=%s)", (dataset,))
160
table = cur.fetchone()[0]
161
drop_cascade = 'n'
162
#if yes, drop the table
163
if table:
164
  print '\n- Dataset already exists. Dropping it...'
165
  try:
166
    cur.execute("DROP TABLE "+dataset+";")
167
  except:
168
    #If the dataset has one or more associated VIEWs, *everything* must be dropped
169
    print bcolors.WARNING+"\n>> Impossible to drop the table, possibly because other elements (e.g. VIEWS) depend on it. <<"+bcolors.ENDC
170
    drop_cascade = raw_input("\n\t> Do you want to use the 'DROP ... CASCADE' option to delete them too? [y/n]: ")
171
    if drop_cascade in 'YESyes1':
172
      #A re-connection is necessary if the DROP test failed
173
      conn = psycopg2.connect("dbname="+dbname+" user="+user+" host="+host+" password="+pwd+"")
174
      cur = conn.cursor()
175
      cur.execute("DROP TABLE "+dataset+" CASCADE;")
176
      print '\n\t- Dataset %s and all his dependencies successfully dropped.' % dataset
177
    else:
178
      print "\n- Exit.\n"; os._exit(0)
179
      
180
else:
181
  #If not, just create a new one
182
  print '\n- Table does not exist. A new one will be created...'
183

  
184
#If 'RA' and 'DEC' are found, the Cartesian coordinates x,y,z are computed and automatically appended at the end of the table
185
if ('RA' in fields) and ('DEC' in fields):
186
  print '\n- Found RA and DEC. The Cartesian coordinates x,y,z will be computed and appended to the dataset as additional columns...'
187
  fields.extend(['x','y','z'])
188
  fields_format.extend(['E','E','E'])
189
  xyz = [RADECtoXYZ(data['RA'][j], data['DEC'][j]) for j in range(len(data2D))]
190
  data2D = np.column_stack( [ data2D , xyz ] )
191

  
192
#Store the info about which fields are string
193
fields_string_length = [len(field.split('A')) for field in fields_format]
194

  
195
print '\n- Creating/updating the table...\n'
196
createTable_cmd = "CREATE TABLE %s (id integer PRIMARY KEY" % dataset
197
for j, name in enumerate(fields):
198
  
199
  createTable_cmd += ", %s %s" % (name, convert_into_SQL_format(fields_format[j]) )
200
  
201
createTable_cmd +=");"
202
cur.execute(createTable_cmd)
203

  
204
#Fill in the columns
205
for i in range(table_size):
206
  sys.stdout.write("- Filling the %sth row of the table...\r" % i)
207
  sys.stdout.flush()
208
  toExecute = "INSERT INTO %s (id " % dataset
209
  for field in fields: toExecute += ", %s" % field
210
  toExecute += ") VALUES (%s " % i
211
  for j, field in enumerate(fields):
212

  
213
    if fields_string_length[j]>1:
214
      toExecute += ", '%s'" % data2D[i][j]
215
    else:
216
      if str(data2D[i][j]) == 'nan': toExecute += ", NULL"
217
      else: toExecute += ", %s" % data2D[i][j]
218

  
219
  toExecute +=")"
220
  cur.execute(toExecute)
221
  if i == table_size-1: sys.stdout.write("- Filling the %sth row of the table..." % i +bcolors.OKGREEN+"\t[OK]"+bcolors.ENDC+"\r")
222

  
223
#Apply the changes -> create the actual database
224
conn.commit()
225

  
226
#Close the connections
227
cur.close()
228
conn.close()
229

  
230
print "\n--> The dataset "+bcolors.OKGREEN+"'%s'" % dataset+bcolors.ENDC+" has been updated/created into the "+bcolors.OKBLUE+dbname+bcolors.ENDC+" database in host: "+bcolors.OKBLUE+host+bcolors.ENDC+".\n"
231

  

Also available in: Unified diff