Python - Extract Literals

I am working on a project where we execute python generated by LLM on the backend in a container. Many times LLMs generate code with literals which can be made as parameters in a web ui.

My first attempt was doing it using Javascript, but found to be not reliable. I created a simple backend service to do this

import ast
from typing import List, Dict, Union, Tuple

def extract_literals(code: str) -> List[Dict[str, Union[str, int, List, Tuple[int, int]]]]:
    tree = ast.parse(code)
    literals = []
    
    class LiteralVisitor(ast.NodeVisitor):
        def visit_Constant(self, node):
            if isinstance(node.value, (str, int, float)):
                literals.append({
                    'value': node.value,
                    'type': type(node.value).__name__,
                    'line_start': node.lineno,
                    'line_end': node.end_lineno,
                    'col_start': node.col_offset,
                    'col_end': node.end_col_offset
                })
            
        def visit_List(self, node):
            # Only handle lists of numbers or strings
            try:
                values = []
                for elt in node.elts:
                    if isinstance(elt, ast.Constant):
                        values.append(elt.value)
                if values:  # Only add if we could parse all elements
                    literals.append({
                        'value': values,
                        'type': 'array',
                        'line_start': node.lineno,
                        'line_end': node.end_lineno,
                        'col_start': node.col_offset,
                        'col_end': node.end_col_offset
                    })
            except:
                pass

    LiteralVisitor().visit(tree)
    return literals

the code uses the ast module in python and walks and retrives literals and lists of literals.

On the Frontend I have a dynamic form generator

const renderInput = (literal: Literal, index: number) => {
    switch (literal.type) {
      case "bool":
        return (
          <Select
            defaultValue={literal.value ? "True" : "False"} // Convert boolean to Python string format
            onValueChange={(value) =>
              handleValueChange(index, value === "True")
            }
          >
            <SelectTrigger className="h-8">
              <SelectValue />
            </SelectTrigger>
            <SelectContent>
              <SelectItem value="True">True</SelectItem>
              <SelectItem value="False">False</SelectItem>
            </SelectContent>
          </Select>
        );

      case "int":
        return (
          <Input
            type="number"
            defaultValue={String(literal.value)}
            onChange={(e) => handleValueChange(index, Number(e.target.value))}
            className="h-8"
          />
        );

      case "str":
        return (
          <Input
            type="text"
            defaultValue={literal.value as string}
            onChange={(e) => handleValueChange(index, e.target.value)}
            className="h-8"
          />
        );

      case "array":
        if (!isArrayValue(literal.value)) {
          console.error("Expected array value for array type literal");
          return null;
        }
        return (
          <ArrayInput
            value={literal.value}
            onChange={(newValue) => handleValueChange(index, newValue)}
          />
        );
    }
  };

The result is below. So code generated from LLM, the literals could be extracted and be modified by the user without asking it to generate again

LLM Code Gen Modifiers

I got additional feedback asking for where the literals came from, whether they are assignments to variables or put inside function calls. Here is my modified code to also capture associations

import ast
from typing import List, Dict, Union, Tuple

def extract_code_elements(code: str) -> Dict[str, List[Dict[str, Union[str, int, List, Tuple[int, int]]]]]:
    tree = ast.parse(code)
    literals = []
    variables = []
    literal_id = 0
    
    class CodeElementVisitor(ast.NodeVisitor):
        def add_literal(self, node, value, type_name, associated_variable=None, is_kwarg=False, kwarg_name=None):
            nonlocal literal_id
            literal_info = {
                'id': f'lit_{literal_id}',
                'value': value,
                'type': type_name,
                'line_start': node.lineno,
                'line_end': node.end_lineno,
                'col_start': node.col_offset,
                'col_end': node.end_col_offset,
                'associated_variable': associated_variable,
                'is_kwarg': is_kwarg,
                'kwarg_name': kwarg_name,
                'context': self.get_context(node)
            }
            literals.append(literal_info)
            literal_id += 1
            return literal_info
            
        def get_context(self, node):
            current = node
            while current:
                if isinstance(current, ast.Call):
                    if isinstance(current.func, ast.Attribute):
                        return f"Function call: {current.func.attr}"
                    elif isinstance(current.func, ast.Name):
                        return f"Function call: {current.func.id}"
                current = getattr(current, 'parent', None)
            return None
            
        def generic_visit(self, node):
            """Add parent references during traversal"""
            for child in ast.iter_child_nodes(node):
                child.parent = node
                self.visit(child)
            
        def visit_Num(self, node):  # For Python < 3.8
            self.add_literal(node, node.n, type(node.n).__name__)
            
        def visit_Constant(self, node):
            if isinstance(node.value, (int, float, str)):
                # Skip if already handled by visit_Num
                if not hasattr(node, '_handled'):
                    self.add_literal(node, node.value, type(node.value).__name__, 
                                   is_kwarg=isinstance(node.parent, ast.keyword),
                                   kwarg_name=node.parent.arg if isinstance(node.parent, ast.keyword) else None)

        def visit_List(self, node):
            # Extract list values
            values = []
            for elt in node.elts:
                if isinstance(elt, ast.Constant):
                    values.append(elt.value)
                elif isinstance(elt, ast.Num):  # For Python < 3.8
                    values.append(elt.n)
            
            # Add the list as a literal
            if values:
                self.add_literal(
                    node,
                    values,
                    'array',
                    associated_variable=getattr(node.parent, 'targets', [None])[0].id 
                    if isinstance(node.parent, ast.Assign) and node.parent.targets 
                    else None
                )
                    
        def visit_Call(self, node):
            # Mark argument nodes as being in a call context
            for arg in node.args:
                arg.in_call = True
            for kw in node.keywords:
                kw.value.in_call = True
            self.generic_visit(node)

        def visit_Assign(self, node):
            for target in node.targets:
                if isinstance(target, ast.Name):
                    if isinstance(node.value, ast.Constant):
                        if isinstance(node.value.value, (str, int, float)):
                            literal_info = self.add_literal(
                                node.value,
                                node.value.value,
                                type(node.value.value).__name__,
                                target.id
                            )
                            variables.append({
                                'name': target.id,
                                'associated_literal_id': literal_info['id']
                            })
                    elif isinstance(node.value, ast.List):
                        # The list literal will be handled by visit_List
                        # We just need to record the variable association
                        variables.append({
                            'name': target.id,
                            'associated_literal_id': f'lit_{literal_id}'  # Next ID to be used
                        })
            self.generic_visit(node)

    visitor = CodeElementVisitor()
    visitor.visit(tree)

    # Create associations list
    associations = [
        {
            'variable_name': lit['associated_variable'],
            'literal_id': lit['id'],
            'value': lit['value'],
            'type': lit['type']
        }
        for lit in literals if lit['associated_variable']
    ]

    return {
        'literals': literals,
        'variables': variables,
        'associations': associations
    }

# Test with matplotlib code
code = """
import matplotlib.pyplot as plt
import numpy as np

list = [1,2,3]
x = np.arange(5)
y1 = np.random.randn(5)
y2 = np.random.randn(5)

plt.bar(x, y1, label='Data 1')
plt.bar(x, y2, label='Data 2')
plt.legend()
plt.show()
"""

results = extract_code_elements(code)

print("\nAll Literals found:")
for lit in results['literals']:
    print(f"ID: {lit['id']}")
    print(f"Value: {lit['value']} (Type: {lit['type']})")
    if lit['context']:
        print(f"Context: {lit['context']}")
    if lit['is_kwarg']:
        print(f"Keyword Argument: {lit['kwarg_name']}")
    print(f"Associated Variable: {lit['associated_variable']}")
    print(f"Location: Line {lit['line_start']}, Col {lit['col_start']}")
    print()

The UI was updated to include the additional information

Screenshot after UI Update

By: Gavi Narra on: