Added documentation strings, formatted code using black.

codeastra2 · codeastra2 · commit f49fdbb1d677 · 2023-04-19T08:50:33.000+02:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "chatgptdevfriendly"
-version = "0.0.2"
+version = "0.0.1"
 authors = [
   { name="Srinivas Kumar R", email="srinivas1996kumar@gmail.com" },
 ]
diff --git a/src/chatgptdevfriendly/v1.py b/src/chatgptdevfriendly/v1.py
@@ -10,18 +10,18 @@
 from datetime import datetime
 
 
+log_formatter = logging.Formatter(
+    "%(asctime)s | %(levelname)s | %(message)s", "%m-%d-%Y %H:%M:%S"
+)
 
-log_formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s', 
-                            '%m-%d-%Y %H:%M:%S')
 
+def retry(tries: int = 5, delay: int = 3, backoff: int = 2):
+    """Retry  wrapper, to retry functions on exceptions and errors.
 
-def retry(tries=5, delay=3, backoff=2):
-    """
-    Retries a function or method until it succeeds or the number of retries is exceeded.
-
-    :param tries: the maximum number of times to retry (default 4).
-    :param delay: the initial delay between retries in seconds (default 3).
-    :param backoff: the backoff multiplier (e.g. value of 2 will double the delay each retry) (default 2).
+    Args:
+        tries (int, optional): Number of retries. Defaults to 5.
+        delay (int, optional): Time delay in seconds. Defaults to 3.
+        backoff (int, optional): Backoff between the retries. Defaults to 2.
     """
 
     def deco_retry(func):
@@ -30,7 +30,7 @@ def f_retry(*args, **kwargs):
             mtries, mdelay = tries, delay
             while mtries >= 0:
                 try:
-                    #args[0].logger.info("Trying the the function.......")
+                    # args[0].logger.info("Trying the the function.......")
                     return func(*args, **kwargs)
                 except (
                     openai.error.APIError,
@@ -51,7 +51,19 @@ def f_retry(*args, **kwargs):
     return deco_retry
 
 
-def dot_product(list1, list2):
+def dot_product(list1: list, list2: list):
+    """Computes the dot product of 2 vectors.
+
+    Args:
+        list1 (int): List 1 values
+        list2 (int): List 2 values
+
+    Raises:
+        ValueError: _description_
+
+    Returns:
+        int: Dot product value of the 2 lists.
+    """
     # Check if the lengths of the two lists are equal
     if len(list1) != len(list2):
         raise ValueError("Lists must have same length")
@@ -76,7 +88,14 @@ class ChatGptSmartClient(object):
 
     CONTEXT_TOKEN_LIMIT = 4000
 
-    def __init__(self, api_key: str, model: str, log_info: bool=False):
+    def __init__(self, api_key: str, model: str, log_info: bool = False):
+        """Init method to instantiate the client.
+
+        Args:
+            api_key (str): OpenAI api key.
+            model (str): The model to be used for chat completion, ex: "gpt-3.5-turbo"
+            log_info (bool, optional): Whether to log stats for each query. Defaults to False.
+        """
         openai.api_key = api_key
 
         self.instruction_msgs = {
@@ -94,7 +113,7 @@ def __init__(self, api_key: str, model: str, log_info: bool=False):
 
         self.rspid_vs_tottokens_dict = {}
 
-        self.total_token_cnt = 0 
+        self.total_token_cnt = 0
 
         self.log_info = log_info
         self.logger = logging.getLogger(f"chatgptlogger{time.time()}")
@@ -103,9 +122,19 @@ def __init__(self, api_key: str, model: str, log_info: bool=False):
         stdout_handler.setFormatter(log_formatter)
         self.logger.addHandler(stdout_handler)
 
-
     @retry()
-    def query(self, query: str, w_context=True, add_to_context=True):
+    def query(self, query: str, w_context: bool = True, add_to_context: bool = True):
+        """Wrapper method to make conversation with ChatGPT apis.
+
+        Args:
+            query (str): Query to send to Chatgpt servers.
+            w_context (bool, optional): Whether to ask this query with previous conversation context. Defaults to True.
+            add_to_context (bool, optional): Whether to add this query and response to conversation context. Defaults to True.
+
+        Returns:
+            OpenAIJson, int: Response to query  and the response id for later rollbck if necessary
+        """
+
         # TODO: We coud get the embeddings, cache and further use them to speed up the results.
         # self.get_embeddings(query=query)
 
@@ -120,14 +149,16 @@ def query(self, query: str, w_context=True, add_to_context=True):
             msgs.append(query)
         else:
             msgs = [self.instruction_msgs, query]
-        
+
         start_time = time.time()
         response = openai.ChatCompletion.create(model=self.model, messages=msgs)
         end_time = time.time()
 
         if self.log_info:
-            self.logger.info(f"The time taken for this response is : {end_time - start_time} seconds")
-        #print(f"The time taken for this response is : {end_time - start_time} seconds")
+            self.logger.info(
+                f"The time taken for this response is : {end_time - start_time} seconds"
+            )
+        # print(f"The time taken for this response is : {end_time - start_time} seconds")
 
         f_resp = response["choices"][0]["message"]
         tot_token_cnt = response["usage"]["total_tokens"]
@@ -138,9 +169,10 @@ def query(self, query: str, w_context=True, add_to_context=True):
 
         self.total_token_cnt += tot_token_cnt
 
-
         if self.log_info:
-            self.logger.info(f"The total token count currently is {self.total_token_cnt}")
+            self.logger.info(
+                f"The total token count currently is {self.total_token_cnt}"
+            )
 
         if add_to_context:
             self.prev_msgs.append(query)
@@ -149,54 +181,62 @@ def query(self, query: str, w_context=True, add_to_context=True):
             rsp_id = self.rsp_id
             self.rspid_vs_tottokens_dict[self.rsp_id] = tot_token_cnt
 
-        #print(self.prev_msgs)
+        # print(self.prev_msgs)
 
         return f_resp, rsp_id
 
-    def erase_history(self):
+    def erase_history(self) -> None:
+        """Removing all previous context."""
         self.prev_msgs = [self.instruction_msgs]
         self.rsp_id = len(self.prev_msgs)
 
-
     # This function is used for getting embeddings and hence maybe
     # used to speedup the system by caching.
-    def get_embeddings(self, query: str):
-        """_summary_
+    def get_embeddings(self, query: str) -> None:
+        """Calls the OpenAI embeddings API to get the generated embeddings for a query,
+            can be used later for caching and faster response time.
 
         Args:
-            query (str): _description_
+            query (str): Query to be sent to OpenAI servers Chat Apis.
         """
         response = openai.Embedding.create(input=query, model="text-embedding-ada-002")
         embeddings = response["data"][0]["embedding"]
         return embeddings
 
-    def rollback_conversation(self, rsp_id):
-        """Rollback conversation to the point of a particular response. 
+    def rollback_conversation(self, rsp_id: int) -> None:
+        """Rollback conversation to the point of a particular response.
 
         Args:
-            rsp_id (int): Id number of previous tracked response to roll back to. 
+            rsp_id (int): Id number of previous tracked response to roll back to.
         """
         self.prev_msgs = self.prev_msgs[0:rsp_id]
         self.rsp_id = len(self.prev_msgs)
-    
-    def print_metrics(self):
-        self.logger.info(f"The total tokens used up-till now is: {self.total_token_cnt}")
-        self.logger.info(f"The average response time is: {sum(self.rsp_time_list)/len(self.rsp_time_list)} sec")
 
-        self.plot_rsp_times()
-
-    def plot_rsp_times(self):
+    def print_metrics(self) -> None:
+        """Method to log the token usage and response time information."""
+        self.logger.info(
+            f"The total tokens used up-till now is: {self.total_token_cnt}"
+        )
+        self.logger.info(
+            f"The average response time is: {sum(self.rsp_time_list)/len(self.rsp_time_list)} sec"
+        )
 
-        formatted_timestamps = [datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') for ts in self.rsp_tstamp_list]
+        self.plot_rsp_times()
 
+    def plot_rsp_times(self) -> None:
+        """Method to plot the response times versus the timestamps."""
+        formatted_timestamps = [
+            datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
+            for ts in self.rsp_tstamp_list
+        ]
 
         # Plot the response times against the timestamps
         fig, ax = plt.subplots()
         ax.plot(formatted_timestamps, self.rsp_time_list)
         ax.set_xlabel("Timestamp")
         ax.set_ylabel("Response Time (s)")
         ax.set_title("ChatGPT API Response Time")
-        ax.tick_params(axis='x', rotation=45)
+        ax.tick_params(axis="x", rotation=45)
         ax.xaxis.labelpad = 85
         ax.yaxis.labelpad = 35
 
@@ -211,51 +251,65 @@ def plot_rsp_times(self):
         # Show the plot
         plt.show()
 
-    def dump_context_to_a_file(self, filename="context"):
-        self.dicts_to_jsonl(data_list=self.prev_msgs, filename=filename)
+    def dump_context_to_a_file(self, filename: str = "context"):
+        """Wrapper method to call to dump context to a file.
 
-        
-    def dicts_to_jsonl(self, data_list: list, filename: str, compress: bool = True) -> None:
+        Args:
+            filename (str, optional): Filename to dump to. Defaults to "context".
         """
-        Method saves list of dicts into jsonl file.
-        :param data_list: (list) list of dicts to be stored,
-        :param filename: (str) path to the output file. If suffix .jsonl is not given then methods appends
-            .jsonl suffix into the file.
-        :param compress: (bool) should file be compressed into a gzip archive?
+        self.dicts_to_jsonl(data_list=self.prev_msgs, filename=filename)
+
+    def dicts_to_jsonl(self, data_list: list, filename: str) -> None:
+        """Method saves context list of dicts to .jsonl format.
+
+        Args:
+            data_list (list): List of dicts of previous conversations
+            filename (str): Filenam to save to
         """
-        sjsonl = '.jsonl'
+        sjsonl = ".jsonl"
 
         # Check filename
         if not filename.endswith(sjsonl):
             filename = filename + sjsonl
 
         # Save data
-        with open(filename, 'w') as out:
+        with open(filename, "w") as out:
             for ddict in data_list:
-                jout = json.dumps(ddict) + '\n'
+                jout = json.dumps(ddict) + "\n"
                 out.write(jout)
-    
-    def load_context_from_a_file(self, filename):
 
-        sjsonl = '.jsonl'
+    def load_context_from_a_file(self, filename: str):
+        """Fills up the self.prev_msgs i.e context from a dumped file.
+
+        Args:
+            filename (str): Filename should be a .jsonl file.
+
+        Returns:
+            list: List containing the context information
+        """
+        sjsonl = ".jsonl"
 
         # Check filename
         if not filename.endswith(sjsonl):
             filename = filename + sjsonl
-        
+
         self.prev_msgs = []
-        
-        with open(filename, encoding='utf-8') as json_file:
+
+        with open(filename, encoding="utf-8") as json_file:
             for line in json_file.readlines():
                 self.prev_msgs.append(line)
 
         return self.prev_msgs
 
     def trim_conversation(self):
+        """Method to trim, context generatd till now to below the token limit.
+        The queries are removed in FIFO manner until the token count goes below the limit.
+        """
         while sum(self.total_token_cnt_list) >= self.CONTEXT_TOKEN_LIMIT:
-
-            del  self.total_token_cnt_list[0]
+            del self.total_token_cnt_list[0]
             del self.prev_msgs[1]
             del self.prev_msgs[2]
-        
-        self.logger.info(f"Trimmed the context list to length: {sum(self.total_token_cnt_list)}")
+
+        self.logger.info(
+            f"Trimmed the context list to length: {sum(self.total_token_cnt_list)}"
+        )
diff --git a/tests/test_main.py b/tests/test_main.py
@@ -8,9 +8,11 @@
 
 for _ in range(10):
     chatgptsmtclient.query("List the top 10 upcoming startups in India?")
-    chatgptsmtclient.query("Ok thanks, can you giv me the valuation of these startups in tabuar format")
+    chatgptsmtclient.query(
+        "Ok thanks, can you giv me the valuation of these startups in tabuar format"
+    )
 
 chatgptsmtclient.dump_context_to_a_file("context")
 chatgptsmtclient.load_context_from_a_file("context")
 
-chatgptsmtclient.print_metrics()
+chatgptsmtclient.print_metrics()

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@ build-backend = "hatchling.build"`
`4`	`4`
`5`	`5`	`[project]`
`6`	`6`	`name = "chatgptdevfriendly"`
`7`		`-version = "0.0.2"`
	`7`	`+version = "0.0.1"`
`8`	`8`	`authors = [`
`9`	`9`	`{ name="Srinivas Kumar R", email="[email protected]" },`
`10`	`10`	`]`