tuned_pandasql.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. from typing import Dict, Any
  2. from pandasql import PandaSQL
  3. class TunedPandaSQL(PandaSQL):
  4. def multiquery(self, queries: Dict[Any, str], env=None):
  5. """
  6. Execute the SQL query.
  7. Automatically creates tables mentioned in the query from dataframes before executing.
  8. :param query: SQL query string, which can reference pandas dataframes as SQL tables.
  9. :param env: Variables environment - a dict mapping table names to pandas dataframes.
  10. If not specified use local and global variables of the caller.
  11. :return: Pandas dataframe with the result of the SQL query.
  12. """
  13. if env is None:
  14. from pandasql.sqldf import get_outer_frame_variables
  15. env = get_outer_frame_variables()
  16. from pandasql.sqldf import extract_table_names
  17. from pandasql.sqldf import write_table, read_sql, DatabaseError, PandaSQLException, ResourceClosedError
  18. with self.conn as conn:
  19. table_names = set(name
  20. for query in queries.values()
  21. for name in extract_table_names(query))
  22. for table_name in table_names:
  23. if table_name not in env:
  24. # don't raise error because the table may be already in the database
  25. continue
  26. if self.persist and table_name in self.loaded_tables:
  27. # table was loaded before using the same instance, don't do it again
  28. continue
  29. self.loaded_tables.add(table_name)
  30. write_table(env[table_name], table_name, conn)
  31. results = {}
  32. for k, query in queries.items():
  33. try:
  34. results[k] = read_sql(query, conn)
  35. except DatabaseError as ex:
  36. raise PandaSQLException(ex)
  37. except ResourceClosedError:
  38. # query returns nothing
  39. results[k] = None
  40. return results